from __future__ import annotations import importlib from flatmate_rl.models import FlatmateRlAction from flatmate_rl.server.flatmate_rl_environment import FlatmateRlEnvironment from flatmate_rl.server.heuristic_policy import autopolicy_next_request, expected_policy_action from flatmate_rl.server.scenarios import POSTS, SCENARIOS def _tool(env: FlatmateRlEnvironment, name: str, **kwargs): scenario_id = env.state.scenario_id or getattr(getattr(env, "_episode", None), "_scenario", {}).get("task_id", "") if not kwargs and name == "store_user_details": kwargs = dict(SCENARIOS[scenario_id]["scenario_creation_config"]["expected_answers"]) if not kwargs and name == "store_seller_details": kwargs = dict(SCENARIOS[scenario_id]["scenario_creation_config"]["followup_seller_expected_answers"]) return env.step( FlatmateRlAction( action_type="tool_call", tool_name=name, tool_arguments=kwargs, ) ) def _msg(env: FlatmateRlEnvironment, text: str): return env.step( FlatmateRlAction( action_type="assistant_message", assistant_message=text, ) ) def test_scenarios_are_self_consistent() -> None: for scenario_id, scenario in SCENARIOS.items(): assert scenario["task_id"] == scenario_id assert scenario["label"] assert scenario["difficulty"] in {"medium", "hard"} assert scenario["initial_user_message"] assert scenario["task_post_ids"] assert len(scenario["task_post_ids"]) == len(set(scenario["task_post_ids"])) assert all(post_id in POSTS for post_id in scenario["task_post_ids"]) ground_truth = scenario["ground_truth"] expected_answers = scenario["scenario_creation_config"]["expected_answers"] assert ground_truth["required_bookings"] >= 1 assert ground_truth["required_tool_calls"] assert ground_truth["required_info"] assert ground_truth["optimal_posts"] assert set(ground_truth["optimal_posts"]).issubset(set(scenario["task_post_ids"]) | {"post_dynamic_followup_1"}) assert set(ground_truth["acceptable_posts"]).issubset(set(scenario["task_post_ids"])) assert set(ground_truth["dealbreaker_posts"]).issubset(set(scenario["task_post_ids"])) assert expected_answers["user_type"] == "buyer" assert expected_answers["user_sub_type"] == "flat" assert expected_answers["budget_max"] == scenario["buyer_profile"]["budget_max"] assert expected_answers["dietary"] == scenario["buyer_profile"]["dietary"] assert expected_answers["areas"] == scenario["buyer_profile"]["areas"] assert expected_answers["occupation"] == scenario["buyer_profile"]["occupation"] assert expected_answers["visit_availability"] == scenario["buyer_profile"]["visit_availability"] if scenario_id == "task_visit_single_seller_followup": assert scenario["seller_profile"] is not None assert ( scenario["scenario_creation_config"]["followup_seller_expected_answers"]["calendar_slots"] == scenario["seller_profile"]["calendar_slots"] ) assert scenario["scenario_creation_config"]["followup_seller_expected_answers"]["area"] == scenario["seller_profile"]["area"] else: assert scenario["seller_profile"] is None def test_reset_exposes_initial_buyer_message() -> None: env = FlatmateRlEnvironment() observation = env.reset(scenario_id="task_visit_single") assert observation.status == "ready" assert observation.scenario_id == "task_visit_single" assert observation.phase == "buyer" assert "budget is up to Rs. 20,000" in observation.last_user_message assert observation.remaining_required_fields == ["diet", "visit_availability"] def test_seeded_reset_varies_values_without_changing_episode_flow() -> None: default_env = FlatmateRlEnvironment() default_obs = default_env.reset(scenario_id="task_visit_single") seeded_env = FlatmateRlEnvironment() seeded_obs = seeded_env.reset(scenario_id="task_visit_single", seed=123) seeded_episode = seeded_env._episode # type: ignore[attr-defined] assert seeded_obs.scenario_id == default_obs.scenario_id assert seeded_obs.remaining_required_fields == default_obs.remaining_required_fields assert seeded_episode._scenario["task_post_ids"] == SCENARIOS["task_visit_single"]["task_post_ids"] assert seeded_episode._scenario["ground_truth"] == SCENARIOS["task_visit_single"]["ground_truth"] assert seeded_episode._scenario["buyer_profile"]["budget_max"] != SCENARIOS["task_visit_single"]["buyer_profile"]["budget_max"] assert ( seeded_episode._scenario["scenario_creation_config"]["expected_answers"]["budget_max"] == seeded_episode._scenario["buyer_profile"]["budget_max"] ) def test_search_before_store_user_details_fails() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") result = _tool(env, "search_posts") assert result.last_tool_result["success"] is False assert "store_user_details must be called before search_posts" in result.last_tool_result["message"] def test_store_user_details_does_not_return_expected_answers_payload() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") _msg(env, "Please share your dietary preference and visit availability.") result = _tool(env, "store_user_details") assert result.last_tool_result == { "tool": "store_user_details", "success": True, "message": "Buyer profile stored.", } def test_observation_surfaces_prerequisites_and_recent_tool_calls() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") _msg(env, "Please share your dietary preference and visit availability.") stored = _tool(env, "store_user_details") assert stored.prerequisites_satisfied["details_stored"] is True assert stored.prerequisites_satisfied["posts_searched"] is False assert stored.recent_tool_calls[-1] == { "tool_name": "store_user_details", "tool_arguments_summary": SCENARIOS["task_visit_single"]["scenario_creation_config"]["expected_answers"], "success": True, } searched = _tool(env, "search_posts") assert searched.prerequisites_satisfied["posts_searched"] is True assert searched.recent_tool_calls[-1]["tool_name"] == "search_posts" assert searched.recent_tool_calls[-1]["success"] is True def test_strict_eval_mode_hides_scenario_metadata_and_reward(monkeypatch) -> None: monkeypatch.setenv("STRICT_EVAL_MODE", "1") environment_module = importlib.import_module("flatmate_rl.server.flatmate_rl_environment") environment_module = importlib.reload(environment_module) env = environment_module.FlatmateRlEnvironment() observation = env.reset(scenario_id="task_visit_single") assert observation.scenario_id == "" assert observation.scenario_label == "" assert observation.difficulty == "" assert observation.gathered_fields == [] assert observation.remaining_required_fields == [] assert observation.violations == [] assert observation.tool_trace == [] assert observation.total_reward == 0.0 assert "diet" in observation.feedback_summary assert "visit_availability" in observation.feedback_summary _msg(env, "Please share your dietary preference and visit availability.") result = _tool(env, "store_user_details") assert result.last_tool_result == { "tool": "store_user_details", "success": True, "message": "Buyer profile stored.", } assert result.total_reward == 0.0 assert result.tool_trace == [] monkeypatch.delenv("STRICT_EVAL_MODE", raising=False) importlib.reload(environment_module) def test_single_visit_scenario_books_one_visit() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") _msg(env, "Please share your dietary preference and visit availability.") _tool(env, "store_user_details") _tool(env, "search_posts") _tool(env, "match_location_preference", post_ids=["post_023", "post_031"]) _tool(env, "get_commute_time", post_ids=["post_023", "post_031"]) _tool(env, "check_calendar_slots", post_ids=["post_023"]) _msg(env, "post_023 is available Saturday 11am. Please confirm Saturday 11am if that works.") _tool(env, "contact_poster", post_id="post_023", time_text="Saturday 11am") final_obs = _tool(env, "book_viewing", post_id="post_023", time_text="Saturday 11am") assert final_obs.done is True assert final_obs.booked_visits == [{"post_id": "post_023", "time": "Saturday 11am"}] assert len(final_obs.seller_conversation_history) >= 2 assert final_obs.seller_conversation_history[0]["role"] == "assistant" assert final_obs.seller_conversation_history[1]["role"] == "user" assert "buyer profile" in final_obs.seller_conversation_history[0]["content"] assert "budget up to Rs. 20000" in final_obs.seller_conversation_history[0]["content"] assert "Can you confirm the buyer profile is acceptable" in final_obs.seller_conversation_history[0]["content"] assert "Saturday 11am" in final_obs.seller_conversation_history[0]["content"] assert "buyer profile is acceptable" in final_obs.seller_conversation_history[1]["content"] assert "Saturday 11am works for the visit" in final_obs.seller_conversation_history[1]["content"] contact_result = next(result for result in final_obs.tool_results if result["tool"] == "contact_poster") assert contact_result["buyer_profile_shared"] is True assert contact_result["seller_profile_fit_confirmed"] is True def test_buyer_answers_diet_and_availability_when_broker_asks_for_both() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") obs = _msg(env, "Please share your dietary preference and visit availability.") assert "non-vegetarian" in obs.last_user_message assert "visit availability" in obs.last_user_message assert "diet" in obs.gathered_fields assert "visit_availability" in obs.gathered_fields assert obs.remaining_required_fields == [] def test_heuristic_policy_progresses_after_confirmation_in_single_visit() -> None: env = FlatmateRlEnvironment() obs = env.reset(scenario_id="task_visit_single") for _ in range(12): payload = expected_policy_action("task_visit_single", obs.model_dump()) assert payload is not None obs = env.step(FlatmateRlAction.model_validate(payload)) if obs.done: break assert obs.done is True assert obs.booked_visits == [{"post_id": "post_023", "time": "Saturday 11am"}] def test_redundant_successful_tool_call_gets_small_penalty_without_termination() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") _msg(env, "Please share your dietary preference and visit availability.") _tool(env, "store_user_details") _tool(env, "search_posts") redundant_step = _tool(env, "search_posts") assert redundant_step.done is False assert redundant_step.status == "tool_result" assert redundant_step.step_reward == -0.05 assert "redundant_tool_call" in redundant_step.message assert "redundant_tool_call" in redundant_step.violations def test_seller_followup_non_canonical_tool_order_gets_small_penalty_without_termination() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single_seller_followup") _msg(env, "Please share your dietary preference.") _tool(env, "store_user_details") _tool(env, "search_posts") wrong_transition = _tool(env, "match_location_preference", post_ids=["post_131"]) assert wrong_transition.done is False assert wrong_transition.status == "tool_result" assert wrong_transition.step_reward == -0.1 assert "non_canonical_order: expected close_buyer_conversation, got match_location_preference" in wrong_transition.message assert "non_canonical_order" in wrong_transition.violations def test_legal_non_canonical_tool_after_store_can_continue() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") _msg(env, "Please share your dietary preference and visit availability.") _tool(env, "store_user_details") obs = _tool(env, "match_location_preference", post_ids=["post_023"]) assert obs.done is False assert obs.step_reward == -0.1 assert "non_canonical_order" in obs.violations def test_schema_valid_non_canonical_action_never_uses_legacy_flow_failure() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single") _msg(env, "Please share your dietary preference and visit availability.") _tool(env, "store_user_details") obs = env.step( FlatmateRlAction( action_type="tool_call", tool_name="match_location_preference", tool_arguments={"post_ids": ["post_023", "post_031"]}, ) ) assert obs.done is False assert obs.status == "tool_result" assert obs.step_reward == -0.1 assert "expected_flow_violation" not in obs.violations assert "Expected flow violation" not in obs.message assert "non_canonical_order" in obs.violations assert "non_canonical_order: expected search_posts, got match_location_preference" in obs.message def test_seller_followup_search_returns_no_visit_compatible_current_posts() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single_seller_followup") _msg(env, "Please share your dietary preference.") _tool(env, "store_user_details") obs = _tool(env, "search_posts") assert obs.last_tool_result["post_ids"] == [] assert obs.last_tool_result["rejected_for_slot_mismatch"] == ["post_131", "post_132"] assert "search_posts returned 0 results" in obs.feedback_summary def test_seller_followup_accepts_paraphrased_assistant_message() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single_seller_followup") obs = _msg( env, "Could you please let me know about your dietary preferences? This will help me find the best match for you.", ) assert obs.status == "user_response" assert obs.done is False assert obs.violations == [] assert "diet" in obs.gathered_fields def test_seller_followup_accepts_expected_tool_with_different_arguments() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single_seller_followup") _msg( env, "Could you please let me know about your dietary preferences? This will help me find the best match for you.", ) obs = _tool(env, "store_user_details", diet="non-vegetarian") assert obs.status == "tool_result" assert obs.done is False assert obs.violations == [] assert obs.last_tool_result["success"] is True def test_seller_followup_match_tools_infer_dynamic_post_when_args_are_loose() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single_seller_followup") _msg(env, "Please share your dietary preference.") _tool(env, "store_user_details") _tool(env, "search_posts") _tool(env, "close_buyer_conversation") _msg(env, "Please share the household dietary setup, who the flat is for, and available time slots.") _tool(env, "store_seller_details", dietary="non-vegetarian", occupation_requirement="working professionals") match_obs = _tool(env, "match_location_preference", area="Jogeshwari", rent=19500) assert match_obs.last_tool_result["matches"] == {"post_dynamic_followup_1": {"match": True}} slot_obs = _tool(env, "check_table_slot_matches", post_ids=["post_dynamic_followup_1"]) assert slot_obs.last_tool_result["slot_matches"] == { "post_dynamic_followup_1": ["Saturday 4pm", "Sunday 5pm"] } confirm_obs = _tool( env, "confirm_seller_match", post_id="post_dynamic_followup_1", time_text="Sunday 5pm", ) assert confirm_obs.last_tool_result["success"] is True assert confirm_obs.last_tool_result["time_text"] == "Sunday 5pm" offer_obs = _tool(env, "offer_matched_listing_to_buyer", post_id="post_dynamic_followup_1", time_text="Sunday 5pm") assert offer_obs.last_tool_result["success"] is True final_obs = _tool(env, "schedule_table_visit", post_id="post_dynamic_followup_1", time_text="Sunday 5pm") assert final_obs.done is True assert final_obs.booked_visits == [{"post_id": "post_dynamic_followup_1", "time": "Sunday 5pm"}] def test_heuristic_policy_recovers_from_strict_eval_feedback() -> None: sanitized_observation = { "done": False, "phase": "buyer", "buyer_profile_stored": False, "seller_profile_stored": False, "remaining_required_fields": [], "feedback_summary": "Ask the buyer for these missing fields before storing details: diet, visit_availability.", "message": "Missing buyer fields: diet, visit_availability.", "last_tool_result": { "tool": "store_user_details", "success": False, "message": "Missing buyer fields: diet, visit_availability.", }, "booked_visits": [], "selected_posts": [], "tool_trace": [], "buyer_conversation_history": [ { "role": "user", "content": "Hi, I'm looking for a flatmate-share near Goregaon East.", } ], "status": "tool_result", } action = autopolicy_next_request("task_visit_single", sanitized_observation) assert action == { "action_type": "assistant_message", "assistant_message": "Please share your dietary preference and visit availability.", } def test_hidden_flex_requires_alternative_slot_to_unlock_backup_availability() -> None: env = FlatmateRlEnvironment() obs = env.reset(scenario_id="task_visit_single_hidden_flex") assert "Tuesday after 6pm" in obs.last_user_message obs = _msg(env, "Please share your dietary preference.") assert obs.last_user_message == "I’m non-vegetarian." _tool(env, "store_user_details") _tool(env, "search_posts") _tool(env, "match_location_preference", post_ids=["post_023", "post_052"]) _tool(env, "get_commute_time", post_ids=["post_023", "post_052"]) _tool(env, "check_calendar_slots", post_ids=["post_023", "post_052"]) obs = _msg(env, "No Tuesday slot matches. I can offer Saturday 1pm or Sunday 5pm instead.") assert "confirm" in obs.last_user_message.lower() assert "Sunday 5pm" in obs.last_user_message or "Saturday 1pm" in obs.last_user_message _tool(env, "contact_poster", post_id="post_023", time_text="Sunday 5pm") obs = _tool(env, "book_viewing", post_id="post_023", time_text="Sunday 5pm") assert obs.done is True assert obs.booked_visits == [{"post_id": "post_023", "time": "Sunday 5pm"}] def test_multi_visit_scenario_books_two_visits() -> None: env = FlatmateRlEnvironment() obs = env.reset(scenario_id="task_visit_multi") for _ in range(20): payload = expected_policy_action("task_visit_multi", obs.model_dump()) assert payload is not None obs = env.step(FlatmateRlAction.model_validate(payload)) if obs.done: break assert obs.done is True assert len(obs.booked_visits) == 2 def test_seller_followup_scenario_schedules_dynamic_visit() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_single_seller_followup") _msg(env, "Please share your dietary preference.") _tool(env, "store_user_details") _tool(env, "search_posts") transition = _tool(env, "close_buyer_conversation") assert transition.phase == "seller" assert "I will follow up if a suitable listing comes in" in transition.buyer_conversation_history[-1]["content"] assert "listing a new flatmate-share opening" in transition.seller_conversation_history[-1]["content"] _msg(env, "Please share the household dietary setup, who the flat is for, and available time slots.") _tool(env, "store_seller_details") _tool(env, "match_location_preference", post_ids=["post_dynamic_followup_1"]) _tool(env, "check_table_slot_matches", post_ids=["post_dynamic_followup_1"]) _tool(env, "confirm_seller_match", post_id="post_dynamic_followup_1", time_text="Sunday 5pm") _tool(env, "offer_matched_listing_to_buyer", post_id="post_dynamic_followup_1", time_text="Sunday 5pm") final_obs = _tool(env, "schedule_table_visit", post_id="post_dynamic_followup_1", time_text="Sunday 5pm") assert final_obs.done is True assert final_obs.booked_visits == [{"post_id": "post_dynamic_followup_1", "time": "Sunday 5pm"}] def test_conflict_check_calendar_slots_exposes_pre_booked_and_available() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_conflict_check") _msg(env, "Please share your dietary preference and visit availability.") _tool(env, "store_user_details") _tool(env, "search_posts") _tool(env, "match_location_preference", post_ids=["post_142"]) _tool(env, "get_commute_time", post_ids=["post_142"]) obs = _tool(env, "check_calendar_slots", post_ids=["post_142"]) assert obs.last_tool_result["success"] is True assert obs.last_tool_result["calendar_slots"]["post_142"] == ["Sunday 5pm"] assert obs.last_tool_result["pre_booked_slots"]["post_142"] == ["Saturday 11am", "Saturday 4pm"] assert "already booked" in obs.last_tool_result["message"] def test_conflict_check_cannot_book_pre_booked_slot() -> None: env = FlatmateRlEnvironment() env.reset(scenario_id="task_visit_conflict_check") _msg(env, "Please share your dietary preference and visit availability.") _tool(env, "store_user_details") _tool(env, "search_posts") _tool(env, "match_location_preference", post_ids=["post_142"]) _tool(env, "get_commute_time", post_ids=["post_142"]) _tool(env, "check_calendar_slots", post_ids=["post_142"]) obs = _tool(env, "contact_poster", post_id="post_142", time_text="Saturday 11am") assert obs.last_tool_result["success"] is False assert obs.done is False def test_conflict_check_heuristic_books_only_available_slot() -> None: env = FlatmateRlEnvironment() obs = env.reset(scenario_id="task_visit_conflict_check") for _ in range(14): payload = expected_policy_action("task_visit_conflict_check", obs.model_dump()) assert payload is not None obs = env.step(FlatmateRlAction.model_validate(payload)) if obs.done: break assert obs.done is True assert obs.booked_visits == [{"post_id": "post_142", "time": "Sunday 5pm"}] def test_negotiation_heuristic_confirms_deal_with_agreed_rent() -> None: env = FlatmateRlEnvironment() obs = env.reset(scenario_id="task_negotiation_hidden_budget") for _ in range(14): payload = expected_policy_action("task_negotiation_hidden_budget", obs.model_dump()) assert payload is not None obs = env.step(FlatmateRlAction.model_validate(payload)) if obs.done: break assert obs.done is True assert obs.status == "completed" assert obs.booked_visits == [{"post_id": "post_155", "time": "negotiated_deal", "agreed_rent": 21000}] assert obs.last_tool_result["tool"] == "confirm_negotiated_deal" assert any("Would you accept Rs. 21000" in item["content"] for item in obs.seller_conversation_history) assert any("I can accept Rs. 21000" in item["content"] for item in obs.seller_conversation_history)