Spaces:
Sleeping
Sleeping
| [ | |
| "tests/test_flatmate_rl.py::test_buyer_answers_diet_and_availability_when_broker_asks_for_both", | |
| "tests/test_flatmate_rl.py::test_conflict_check_calendar_slots_exposes_pre_booked_and_available", | |
| "tests/test_flatmate_rl.py::test_conflict_check_cannot_book_pre_booked_slot", | |
| "tests/test_flatmate_rl.py::test_conflict_check_heuristic_books_only_available_slot", | |
| "tests/test_flatmate_rl.py::test_heuristic_policy_progresses_after_confirmation_in_single_visit", | |
| "tests/test_flatmate_rl.py::test_heuristic_policy_recovers_from_strict_eval_feedback", | |
| "tests/test_flatmate_rl.py::test_hidden_flex_requires_alternative_slot_to_unlock_backup_availability", | |
| "tests/test_flatmate_rl.py::test_legal_non_canonical_tool_after_store_can_continue", | |
| "tests/test_flatmate_rl.py::test_multi_visit_scenario_books_two_visits", | |
| "tests/test_flatmate_rl.py::test_negotiation_heuristic_confirms_deal_with_agreed_rent", | |
| "tests/test_flatmate_rl.py::test_observation_surfaces_prerequisites_and_recent_tool_calls", | |
| "tests/test_flatmate_rl.py::test_redundant_successful_tool_call_gets_small_penalty_without_termination", | |
| "tests/test_flatmate_rl.py::test_reset_exposes_initial_buyer_message", | |
| "tests/test_flatmate_rl.py::test_scenarios_are_self_consistent", | |
| "tests/test_flatmate_rl.py::test_schema_valid_non_canonical_action_never_uses_legacy_flow_failure", | |
| "tests/test_flatmate_rl.py::test_search_before_store_user_details_fails", | |
| "tests/test_flatmate_rl.py::test_seeded_reset_varies_values_without_changing_episode_flow", | |
| "tests/test_flatmate_rl.py::test_seller_followup_accepts_expected_tool_with_different_arguments", | |
| "tests/test_flatmate_rl.py::test_seller_followup_accepts_paraphrased_assistant_message", | |
| "tests/test_flatmate_rl.py::test_seller_followup_match_tools_infer_dynamic_post_when_args_are_loose", | |
| "tests/test_flatmate_rl.py::test_seller_followup_non_canonical_tool_order_gets_small_penalty_without_termination", | |
| "tests/test_flatmate_rl.py::test_seller_followup_scenario_schedules_dynamic_visit", | |
| "tests/test_flatmate_rl.py::test_seller_followup_search_returns_no_visit_compatible_current_posts", | |
| "tests/test_flatmate_rl.py::test_single_visit_scenario_books_one_visit", | |
| "tests/test_flatmate_rl.py::test_store_user_details_does_not_return_expected_answers_payload", | |
| "tests/test_flatmate_rl.py::test_strict_eval_mode_hides_scenario_metadata_and_reward", | |
| "tests/test_inference_parsing.py::test_legacy_parse_can_coerce_tool_name_in_action_type", | |
| "tests/test_inference_parsing.py::test_malformed_action_feedback_is_recoverable", | |
| "tests/test_inference_parsing.py::test_model_call_error_does_not_fallback_to_heuristic", | |
| "tests/test_inference_parsing.py::test_strict_parse_rejects_tool_name_in_action_type", | |
| "tests/test_inference_parsing.py::test_strict_parse_reports_json_error", | |
| "tests/test_inference_parsing.py::test_user_prompt_renders_prerequisites_and_recent_tools", | |
| "tests/test_reward_regression.py::test_heuristic_reward_regression" | |
| ] |