File size: 2,985 Bytes
0594d27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[
  "tests/test_flatmate_rl.py::test_buyer_answers_diet_and_availability_when_broker_asks_for_both",
  "tests/test_flatmate_rl.py::test_conflict_check_calendar_slots_exposes_pre_booked_and_available",
  "tests/test_flatmate_rl.py::test_conflict_check_cannot_book_pre_booked_slot",
  "tests/test_flatmate_rl.py::test_conflict_check_heuristic_books_only_available_slot",
  "tests/test_flatmate_rl.py::test_heuristic_policy_progresses_after_confirmation_in_single_visit",
  "tests/test_flatmate_rl.py::test_heuristic_policy_recovers_from_strict_eval_feedback",
  "tests/test_flatmate_rl.py::test_hidden_flex_requires_alternative_slot_to_unlock_backup_availability",
  "tests/test_flatmate_rl.py::test_legal_non_canonical_tool_after_store_can_continue",
  "tests/test_flatmate_rl.py::test_multi_visit_scenario_books_two_visits",
  "tests/test_flatmate_rl.py::test_negotiation_heuristic_confirms_deal_with_agreed_rent",
  "tests/test_flatmate_rl.py::test_observation_surfaces_prerequisites_and_recent_tool_calls",
  "tests/test_flatmate_rl.py::test_redundant_successful_tool_call_gets_small_penalty_without_termination",
  "tests/test_flatmate_rl.py::test_reset_exposes_initial_buyer_message",
  "tests/test_flatmate_rl.py::test_scenarios_are_self_consistent",
  "tests/test_flatmate_rl.py::test_schema_valid_non_canonical_action_never_uses_legacy_flow_failure",
  "tests/test_flatmate_rl.py::test_search_before_store_user_details_fails",
  "tests/test_flatmate_rl.py::test_seeded_reset_varies_values_without_changing_episode_flow",
  "tests/test_flatmate_rl.py::test_seller_followup_accepts_expected_tool_with_different_arguments",
  "tests/test_flatmate_rl.py::test_seller_followup_accepts_paraphrased_assistant_message",
  "tests/test_flatmate_rl.py::test_seller_followup_match_tools_infer_dynamic_post_when_args_are_loose",
  "tests/test_flatmate_rl.py::test_seller_followup_non_canonical_tool_order_gets_small_penalty_without_termination",
  "tests/test_flatmate_rl.py::test_seller_followup_scenario_schedules_dynamic_visit",
  "tests/test_flatmate_rl.py::test_seller_followup_search_returns_no_visit_compatible_current_posts",
  "tests/test_flatmate_rl.py::test_single_visit_scenario_books_one_visit",
  "tests/test_flatmate_rl.py::test_store_user_details_does_not_return_expected_answers_payload",
  "tests/test_flatmate_rl.py::test_strict_eval_mode_hides_scenario_metadata_and_reward",
  "tests/test_inference_parsing.py::test_legacy_parse_can_coerce_tool_name_in_action_type",
  "tests/test_inference_parsing.py::test_malformed_action_feedback_is_recoverable",
  "tests/test_inference_parsing.py::test_model_call_error_does_not_fallback_to_heuristic",
  "tests/test_inference_parsing.py::test_strict_parse_rejects_tool_name_in_action_type",
  "tests/test_inference_parsing.py::test_strict_parse_reports_json_error",
  "tests/test_inference_parsing.py::test_user_prompt_renders_prerequisites_and_recent_tools",
  "tests/test_reward_regression.py::test_heuristic_reward_regression"
]