Spaces:
Sleeping
Sleeping
File size: 2,985 Bytes
0594d27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | [
"tests/test_flatmate_rl.py::test_buyer_answers_diet_and_availability_when_broker_asks_for_both",
"tests/test_flatmate_rl.py::test_conflict_check_calendar_slots_exposes_pre_booked_and_available",
"tests/test_flatmate_rl.py::test_conflict_check_cannot_book_pre_booked_slot",
"tests/test_flatmate_rl.py::test_conflict_check_heuristic_books_only_available_slot",
"tests/test_flatmate_rl.py::test_heuristic_policy_progresses_after_confirmation_in_single_visit",
"tests/test_flatmate_rl.py::test_heuristic_policy_recovers_from_strict_eval_feedback",
"tests/test_flatmate_rl.py::test_hidden_flex_requires_alternative_slot_to_unlock_backup_availability",
"tests/test_flatmate_rl.py::test_legal_non_canonical_tool_after_store_can_continue",
"tests/test_flatmate_rl.py::test_multi_visit_scenario_books_two_visits",
"tests/test_flatmate_rl.py::test_negotiation_heuristic_confirms_deal_with_agreed_rent",
"tests/test_flatmate_rl.py::test_observation_surfaces_prerequisites_and_recent_tool_calls",
"tests/test_flatmate_rl.py::test_redundant_successful_tool_call_gets_small_penalty_without_termination",
"tests/test_flatmate_rl.py::test_reset_exposes_initial_buyer_message",
"tests/test_flatmate_rl.py::test_scenarios_are_self_consistent",
"tests/test_flatmate_rl.py::test_schema_valid_non_canonical_action_never_uses_legacy_flow_failure",
"tests/test_flatmate_rl.py::test_search_before_store_user_details_fails",
"tests/test_flatmate_rl.py::test_seeded_reset_varies_values_without_changing_episode_flow",
"tests/test_flatmate_rl.py::test_seller_followup_accepts_expected_tool_with_different_arguments",
"tests/test_flatmate_rl.py::test_seller_followup_accepts_paraphrased_assistant_message",
"tests/test_flatmate_rl.py::test_seller_followup_match_tools_infer_dynamic_post_when_args_are_loose",
"tests/test_flatmate_rl.py::test_seller_followup_non_canonical_tool_order_gets_small_penalty_without_termination",
"tests/test_flatmate_rl.py::test_seller_followup_scenario_schedules_dynamic_visit",
"tests/test_flatmate_rl.py::test_seller_followup_search_returns_no_visit_compatible_current_posts",
"tests/test_flatmate_rl.py::test_single_visit_scenario_books_one_visit",
"tests/test_flatmate_rl.py::test_store_user_details_does_not_return_expected_answers_payload",
"tests/test_flatmate_rl.py::test_strict_eval_mode_hides_scenario_metadata_and_reward",
"tests/test_inference_parsing.py::test_legacy_parse_can_coerce_tool_name_in_action_type",
"tests/test_inference_parsing.py::test_malformed_action_feedback_is_recoverable",
"tests/test_inference_parsing.py::test_model_call_error_does_not_fallback_to_heuristic",
"tests/test_inference_parsing.py::test_strict_parse_rejects_tool_name_in_action_type",
"tests/test_inference_parsing.py::test_strict_parse_reports_json_error",
"tests/test_inference_parsing.py::test_user_prompt_renders_prerequisites_and_recent_tools",
"tests/test_reward_regression.py::test_heuristic_reward_regression"
] |