Spaces:

kushalExplores
/

flatmate_rl

Sleeping

File size: 23,882 Bytes

from __future__ import annotations

import importlib

from flatmate_rl.models import FlatmateRlAction
from flatmate_rl.server.flatmate_rl_environment import FlatmateRlEnvironment
from flatmate_rl.server.heuristic_policy import autopolicy_next_request, expected_policy_action
from flatmate_rl.server.scenarios import POSTS, SCENARIOS


def _tool(env: FlatmateRlEnvironment, name: str, **kwargs):
    scenario_id = env.state.scenario_id or getattr(getattr(env, "_episode", None), "_scenario", {}).get("task_id", "")
    if not kwargs and name == "store_user_details":
        kwargs = dict(SCENARIOS[scenario_id]["scenario_creation_config"]["expected_answers"])
    if not kwargs and name == "store_seller_details":
        kwargs = dict(SCENARIOS[scenario_id]["scenario_creation_config"]["followup_seller_expected_answers"])
    return env.step(
        FlatmateRlAction(
            action_type="tool_call",
            tool_name=name,
            tool_arguments=kwargs,
        )
    )


def _msg(env: FlatmateRlEnvironment, text: str):
    return env.step(
        FlatmateRlAction(
            action_type="assistant_message",
            assistant_message=text,
        )
    )


def test_scenarios_are_self_consistent() -> None:
    for scenario_id, scenario in SCENARIOS.items():
        assert scenario["task_id"] == scenario_id
        assert scenario["label"]
        assert scenario["difficulty"] in {"medium", "hard"}
        assert scenario["initial_user_message"]

        assert scenario["task_post_ids"]
        assert len(scenario["task_post_ids"]) == len(set(scenario["task_post_ids"]))
        assert all(post_id in POSTS for post_id in scenario["task_post_ids"])

        ground_truth = scenario["ground_truth"]
        expected_answers = scenario["scenario_creation_config"]["expected_answers"]

        assert ground_truth["required_bookings"] >= 1
        assert ground_truth["required_tool_calls"]
        assert ground_truth["required_info"]
        assert ground_truth["optimal_posts"]
        assert set(ground_truth["optimal_posts"]).issubset(set(scenario["task_post_ids"]) | {"post_dynamic_followup_1"})
        assert set(ground_truth["acceptable_posts"]).issubset(set(scenario["task_post_ids"]))
        assert set(ground_truth["dealbreaker_posts"]).issubset(set(scenario["task_post_ids"]))

        assert expected_answers["user_type"] == "buyer"
        assert expected_answers["user_sub_type"] == "flat"
        assert expected_answers["budget_max"] == scenario["buyer_profile"]["budget_max"]
        assert expected_answers["dietary"] == scenario["buyer_profile"]["dietary"]
        assert expected_answers["areas"] == scenario["buyer_profile"]["areas"]
        assert expected_answers["occupation"] == scenario["buyer_profile"]["occupation"]
        assert expected_answers["visit_availability"] == scenario["buyer_profile"]["visit_availability"]

        if scenario_id == "task_visit_single_seller_followup":
            assert scenario["seller_profile"] is not None
            assert (
                scenario["scenario_creation_config"]["followup_seller_expected_answers"]["calendar_slots"]
                == scenario["seller_profile"]["calendar_slots"]
            )
            assert scenario["scenario_creation_config"]["followup_seller_expected_answers"]["area"] == scenario["seller_profile"]["area"]
        else:
            assert scenario["seller_profile"] is None


def test_reset_exposes_initial_buyer_message() -> None:
    env = FlatmateRlEnvironment()
    observation = env.reset(scenario_id="task_visit_single")

    assert observation.status == "ready"
    assert observation.scenario_id == "task_visit_single"
    assert observation.phase == "buyer"
    assert "budget is up to Rs. 20,000" in observation.last_user_message
    assert observation.remaining_required_fields == ["diet", "visit_availability"]


def test_seeded_reset_varies_values_without_changing_episode_flow() -> None:
    default_env = FlatmateRlEnvironment()
    default_obs = default_env.reset(scenario_id="task_visit_single")

    seeded_env = FlatmateRlEnvironment()
    seeded_obs = seeded_env.reset(scenario_id="task_visit_single", seed=123)
    seeded_episode = seeded_env._episode  # type: ignore[attr-defined]

    assert seeded_obs.scenario_id == default_obs.scenario_id
    assert seeded_obs.remaining_required_fields == default_obs.remaining_required_fields
    assert seeded_episode._scenario["task_post_ids"] == SCENARIOS["task_visit_single"]["task_post_ids"]
    assert seeded_episode._scenario["ground_truth"] == SCENARIOS["task_visit_single"]["ground_truth"]
    assert seeded_episode._scenario["buyer_profile"]["budget_max"] != SCENARIOS["task_visit_single"]["buyer_profile"]["budget_max"]
    assert (
        seeded_episode._scenario["scenario_creation_config"]["expected_answers"]["budget_max"]
        == seeded_episode._scenario["buyer_profile"]["budget_max"]
    )


def test_search_before_store_user_details_fails() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    result = _tool(env, "search_posts")

    assert result.last_tool_result["success"] is False
    assert "store_user_details must be called before search_posts" in result.last_tool_result["message"]


def test_store_user_details_does_not_return_expected_answers_payload() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    _msg(env, "Please share your dietary preference and visit availability.")
    result = _tool(env, "store_user_details")

    assert result.last_tool_result == {
        "tool": "store_user_details",
        "success": True,
        "message": "Buyer profile stored.",
    }


def test_observation_surfaces_prerequisites_and_recent_tool_calls() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    _msg(env, "Please share your dietary preference and visit availability.")
    stored = _tool(env, "store_user_details")

    assert stored.prerequisites_satisfied["details_stored"] is True
    assert stored.prerequisites_satisfied["posts_searched"] is False
    assert stored.recent_tool_calls[-1] == {
        "tool_name": "store_user_details",
        "tool_arguments_summary": SCENARIOS["task_visit_single"]["scenario_creation_config"]["expected_answers"],
        "success": True,
    }

    searched = _tool(env, "search_posts")
    assert searched.prerequisites_satisfied["posts_searched"] is True
    assert searched.recent_tool_calls[-1]["tool_name"] == "search_posts"
    assert searched.recent_tool_calls[-1]["success"] is True


def test_strict_eval_mode_hides_scenario_metadata_and_reward(monkeypatch) -> None:
    monkeypatch.setenv("STRICT_EVAL_MODE", "1")

    environment_module = importlib.import_module("flatmate_rl.server.flatmate_rl_environment")
    environment_module = importlib.reload(environment_module)
    env = environment_module.FlatmateRlEnvironment()

    observation = env.reset(scenario_id="task_visit_single")

    assert observation.scenario_id == ""
    assert observation.scenario_label == ""
    assert observation.difficulty == ""
    assert observation.gathered_fields == []
    assert observation.remaining_required_fields == []
    assert observation.violations == []
    assert observation.tool_trace == []
    assert observation.total_reward == 0.0
    assert "diet" in observation.feedback_summary
    assert "visit_availability" in observation.feedback_summary

    _msg(env, "Please share your dietary preference and visit availability.")
    result = _tool(env, "store_user_details")

    assert result.last_tool_result == {
        "tool": "store_user_details",
        "success": True,
        "message": "Buyer profile stored.",
    }
    assert result.total_reward == 0.0
    assert result.tool_trace == []

    monkeypatch.delenv("STRICT_EVAL_MODE", raising=False)
    importlib.reload(environment_module)


def test_single_visit_scenario_books_one_visit() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    _msg(env, "Please share your dietary preference and visit availability.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    _tool(env, "match_location_preference", post_ids=["post_023", "post_031"])
    _tool(env, "get_commute_time", post_ids=["post_023", "post_031"])
    _tool(env, "check_calendar_slots", post_ids=["post_023"])
    _msg(env, "post_023 is available Saturday 11am. Please confirm Saturday 11am if that works.")
    _tool(env, "contact_poster", post_id="post_023", time_text="Saturday 11am")
    final_obs = _tool(env, "book_viewing", post_id="post_023", time_text="Saturday 11am")

    assert final_obs.done is True
    assert final_obs.booked_visits == [{"post_id": "post_023", "time": "Saturday 11am"}]
    assert len(final_obs.seller_conversation_history) >= 2
    assert final_obs.seller_conversation_history[0]["role"] == "assistant"
    assert final_obs.seller_conversation_history[1]["role"] == "user"
    assert "buyer profile" in final_obs.seller_conversation_history[0]["content"]
    assert "budget up to Rs. 20000" in final_obs.seller_conversation_history[0]["content"]
    assert "Can you confirm the buyer profile is acceptable" in final_obs.seller_conversation_history[0]["content"]
    assert "Saturday 11am" in final_obs.seller_conversation_history[0]["content"]
    assert "buyer profile is acceptable" in final_obs.seller_conversation_history[1]["content"]
    assert "Saturday 11am works for the visit" in final_obs.seller_conversation_history[1]["content"]
    contact_result = next(result for result in final_obs.tool_results if result["tool"] == "contact_poster")
    assert contact_result["buyer_profile_shared"] is True
    assert contact_result["seller_profile_fit_confirmed"] is True


def test_buyer_answers_diet_and_availability_when_broker_asks_for_both() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    obs = _msg(env, "Please share your dietary preference and visit availability.")

    assert "non-vegetarian" in obs.last_user_message
    assert "visit availability" in obs.last_user_message
    assert "diet" in obs.gathered_fields
    assert "visit_availability" in obs.gathered_fields
    assert obs.remaining_required_fields == []


def test_heuristic_policy_progresses_after_confirmation_in_single_visit() -> None:
    env = FlatmateRlEnvironment()
    obs = env.reset(scenario_id="task_visit_single")

    for _ in range(12):
        payload = expected_policy_action("task_visit_single", obs.model_dump())
        assert payload is not None
        obs = env.step(FlatmateRlAction.model_validate(payload))
        if obs.done:
            break

    assert obs.done is True
    assert obs.booked_visits == [{"post_id": "post_023", "time": "Saturday 11am"}]


def test_redundant_successful_tool_call_gets_small_penalty_without_termination() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    _msg(env, "Please share your dietary preference and visit availability.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    redundant_step = _tool(env, "search_posts")

    assert redundant_step.done is False
    assert redundant_step.status == "tool_result"
    assert redundant_step.step_reward == -0.05
    assert "redundant_tool_call" in redundant_step.message
    assert "redundant_tool_call" in redundant_step.violations


def test_seller_followup_non_canonical_tool_order_gets_small_penalty_without_termination() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single_seller_followup")

    _msg(env, "Please share your dietary preference.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    wrong_transition = _tool(env, "match_location_preference", post_ids=["post_131"])

    assert wrong_transition.done is False
    assert wrong_transition.status == "tool_result"
    assert wrong_transition.step_reward == -0.1
    assert "non_canonical_order: expected close_buyer_conversation, got match_location_preference" in wrong_transition.message
    assert "non_canonical_order" in wrong_transition.violations


def test_legal_non_canonical_tool_after_store_can_continue() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    _msg(env, "Please share your dietary preference and visit availability.")
    _tool(env, "store_user_details")
    obs = _tool(env, "match_location_preference", post_ids=["post_023"])

    assert obs.done is False
    assert obs.step_reward == -0.1
    assert "non_canonical_order" in obs.violations


def test_schema_valid_non_canonical_action_never_uses_legacy_flow_failure() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single")

    _msg(env, "Please share your dietary preference and visit availability.")
    _tool(env, "store_user_details")

    obs = env.step(
        FlatmateRlAction(
            action_type="tool_call",
            tool_name="match_location_preference",
            tool_arguments={"post_ids": ["post_023", "post_031"]},
        )
    )

    assert obs.done is False
    assert obs.status == "tool_result"
    assert obs.step_reward == -0.1
    assert "expected_flow_violation" not in obs.violations
    assert "Expected flow violation" not in obs.message
    assert "non_canonical_order" in obs.violations
    assert "non_canonical_order: expected search_posts, got match_location_preference" in obs.message


def test_seller_followup_search_returns_no_visit_compatible_current_posts() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single_seller_followup")

    _msg(env, "Please share your dietary preference.")
    _tool(env, "store_user_details")
    obs = _tool(env, "search_posts")

    assert obs.last_tool_result["post_ids"] == []
    assert obs.last_tool_result["rejected_for_slot_mismatch"] == ["post_131", "post_132"]
    assert "search_posts returned 0 results" in obs.feedback_summary


def test_seller_followup_accepts_paraphrased_assistant_message() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single_seller_followup")

    obs = _msg(
        env,
        "Could you please let me know about your dietary preferences? This will help me find the best match for you.",
    )

    assert obs.status == "user_response"
    assert obs.done is False
    assert obs.violations == []
    assert "diet" in obs.gathered_fields


def test_seller_followup_accepts_expected_tool_with_different_arguments() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single_seller_followup")

    _msg(
        env,
        "Could you please let me know about your dietary preferences? This will help me find the best match for you.",
    )
    obs = _tool(env, "store_user_details", diet="non-vegetarian")

    assert obs.status == "tool_result"
    assert obs.done is False
    assert obs.violations == []
    assert obs.last_tool_result["success"] is True


def test_seller_followup_match_tools_infer_dynamic_post_when_args_are_loose() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single_seller_followup")

    _msg(env, "Please share your dietary preference.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    _tool(env, "close_buyer_conversation")
    _msg(env, "Please share the household dietary setup, who the flat is for, and available time slots.")
    _tool(env, "store_seller_details", dietary="non-vegetarian", occupation_requirement="working professionals")

    match_obs = _tool(env, "match_location_preference", area="Jogeshwari", rent=19500)
    assert match_obs.last_tool_result["matches"] == {"post_dynamic_followup_1": {"match": True}}

    slot_obs = _tool(env, "check_table_slot_matches", post_ids=["post_dynamic_followup_1"])
    assert slot_obs.last_tool_result["slot_matches"] == {
        "post_dynamic_followup_1": ["Saturday 4pm", "Sunday 5pm"]
    }

    confirm_obs = _tool(
        env,
        "confirm_seller_match",
        post_id="post_dynamic_followup_1",
        time_text="Sunday 5pm",
    )
    assert confirm_obs.last_tool_result["success"] is True
    assert confirm_obs.last_tool_result["time_text"] == "Sunday 5pm"

    offer_obs = _tool(env, "offer_matched_listing_to_buyer", post_id="post_dynamic_followup_1", time_text="Sunday 5pm")
    assert offer_obs.last_tool_result["success"] is True

    final_obs = _tool(env, "schedule_table_visit", post_id="post_dynamic_followup_1", time_text="Sunday 5pm")
    assert final_obs.done is True
    assert final_obs.booked_visits == [{"post_id": "post_dynamic_followup_1", "time": "Sunday 5pm"}]


def test_heuristic_policy_recovers_from_strict_eval_feedback() -> None:
    sanitized_observation = {
        "done": False,
        "phase": "buyer",
        "buyer_profile_stored": False,
        "seller_profile_stored": False,
        "remaining_required_fields": [],
        "feedback_summary": "Ask the buyer for these missing fields before storing details: diet, visit_availability.",
        "message": "Missing buyer fields: diet, visit_availability.",
        "last_tool_result": {
            "tool": "store_user_details",
            "success": False,
            "message": "Missing buyer fields: diet, visit_availability.",
        },
        "booked_visits": [],
        "selected_posts": [],
        "tool_trace": [],
        "buyer_conversation_history": [
            {
                "role": "user",
                "content": "Hi, I'm looking for a flatmate-share near Goregaon East.",
            }
        ],
        "status": "tool_result",
    }

    action = autopolicy_next_request("task_visit_single", sanitized_observation)

    assert action == {
        "action_type": "assistant_message",
        "assistant_message": "Please share your dietary preference and visit availability.",
    }


def test_hidden_flex_requires_alternative_slot_to_unlock_backup_availability() -> None:
    env = FlatmateRlEnvironment()
    obs = env.reset(scenario_id="task_visit_single_hidden_flex")
    assert "Tuesday after 6pm" in obs.last_user_message

    obs = _msg(env, "Please share your dietary preference.")
    assert obs.last_user_message == "I’m non-vegetarian."
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    _tool(env, "match_location_preference", post_ids=["post_023", "post_052"])
    _tool(env, "get_commute_time", post_ids=["post_023", "post_052"])
    _tool(env, "check_calendar_slots", post_ids=["post_023", "post_052"])
    obs = _msg(env, "No Tuesday slot matches. I can offer Saturday 1pm or Sunday 5pm instead.")
    assert "confirm" in obs.last_user_message.lower()
    assert "Sunday 5pm" in obs.last_user_message or "Saturday 1pm" in obs.last_user_message
    _tool(env, "contact_poster", post_id="post_023", time_text="Sunday 5pm")
    obs = _tool(env, "book_viewing", post_id="post_023", time_text="Sunday 5pm")
    assert obs.done is True
    assert obs.booked_visits == [{"post_id": "post_023", "time": "Sunday 5pm"}]


def test_multi_visit_scenario_books_two_visits() -> None:
    env = FlatmateRlEnvironment()
    obs = env.reset(scenario_id="task_visit_multi")

    for _ in range(20):
        payload = expected_policy_action("task_visit_multi", obs.model_dump())
        assert payload is not None
        obs = env.step(FlatmateRlAction.model_validate(payload))
        if obs.done:
            break

    assert obs.done is True
    assert len(obs.booked_visits) == 2


def test_seller_followup_scenario_schedules_dynamic_visit() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_single_seller_followup")

    _msg(env, "Please share your dietary preference.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    transition = _tool(env, "close_buyer_conversation")
    assert transition.phase == "seller"
    assert "I will follow up if a suitable listing comes in" in transition.buyer_conversation_history[-1]["content"]
    assert "listing a new flatmate-share opening" in transition.seller_conversation_history[-1]["content"]
    _msg(env, "Please share the household dietary setup, who the flat is for, and available time slots.")
    _tool(env, "store_seller_details")
    _tool(env, "match_location_preference", post_ids=["post_dynamic_followup_1"])
    _tool(env, "check_table_slot_matches", post_ids=["post_dynamic_followup_1"])
    _tool(env, "confirm_seller_match", post_id="post_dynamic_followup_1", time_text="Sunday 5pm")
    _tool(env, "offer_matched_listing_to_buyer", post_id="post_dynamic_followup_1", time_text="Sunday 5pm")
    final_obs = _tool(env, "schedule_table_visit", post_id="post_dynamic_followup_1", time_text="Sunday 5pm")

    assert final_obs.done is True
    assert final_obs.booked_visits == [{"post_id": "post_dynamic_followup_1", "time": "Sunday 5pm"}]


def test_conflict_check_calendar_slots_exposes_pre_booked_and_available() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_conflict_check")

    _msg(env, "Please share your dietary preference and visit availability.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    _tool(env, "match_location_preference", post_ids=["post_142"])
    _tool(env, "get_commute_time", post_ids=["post_142"])
    obs = _tool(env, "check_calendar_slots", post_ids=["post_142"])

    assert obs.last_tool_result["success"] is True
    assert obs.last_tool_result["calendar_slots"]["post_142"] == ["Sunday 5pm"]
    assert obs.last_tool_result["pre_booked_slots"]["post_142"] == ["Saturday 11am", "Saturday 4pm"]
    assert "already booked" in obs.last_tool_result["message"]


def test_conflict_check_cannot_book_pre_booked_slot() -> None:
    env = FlatmateRlEnvironment()
    env.reset(scenario_id="task_visit_conflict_check")

    _msg(env, "Please share your dietary preference and visit availability.")
    _tool(env, "store_user_details")
    _tool(env, "search_posts")
    _tool(env, "match_location_preference", post_ids=["post_142"])
    _tool(env, "get_commute_time", post_ids=["post_142"])
    _tool(env, "check_calendar_slots", post_ids=["post_142"])

    obs = _tool(env, "contact_poster", post_id="post_142", time_text="Saturday 11am")
    assert obs.last_tool_result["success"] is False
    assert obs.done is False


def test_conflict_check_heuristic_books_only_available_slot() -> None:
    env = FlatmateRlEnvironment()
    obs = env.reset(scenario_id="task_visit_conflict_check")

    for _ in range(14):
        payload = expected_policy_action("task_visit_conflict_check", obs.model_dump())
        assert payload is not None
        obs = env.step(FlatmateRlAction.model_validate(payload))
        if obs.done:
            break

    assert obs.done is True
    assert obs.booked_visits == [{"post_id": "post_142", "time": "Sunday 5pm"}]


def test_negotiation_heuristic_confirms_deal_with_agreed_rent() -> None:
    env = FlatmateRlEnvironment()
    obs = env.reset(scenario_id="task_negotiation_hidden_budget")

    for _ in range(14):
        payload = expected_policy_action("task_negotiation_hidden_budget", obs.model_dump())
        assert payload is not None
        obs = env.step(FlatmateRlAction.model_validate(payload))
        if obs.done:
            break

    assert obs.done is True
    assert obs.status == "completed"
    assert obs.booked_visits == [{"post_id": "post_155", "time": "negotiated_deal", "agreed_rent": 21000}]
    assert obs.last_tool_result["tool"] == "confirm_negotiated_deal"
    assert any("Would you accept Rs. 21000" in item["content"] for item in obs.seller_conversation_history)
    assert any("I can accept Rs. 21000" in item["content"] for item in obs.seller_conversation_history)