Spaces:
Sleeping
Sleeping
| from osint_env.domain.models import EnvironmentConfig, SwarmConfig | |
| from osint_env.env.environment import OSINTEnvironment | |
| from osint_env.eval.runner import run_evaluation | |
| def test_eval_runner(): | |
| env = OSINTEnvironment(EnvironmentConfig(seed=17)) | |
| result = run_evaluation(env, episodes=3) | |
| assert "task_success_rate" in result | |
| assert "deanonymization_accuracy" in result | |
| assert "leaderboard_score" in result | |
| assert "avg_knowledge_indexing_reward" in result | |
| def test_eval_runner_swarm_mode(): | |
| env = OSINTEnvironment( | |
| EnvironmentConfig(seed=17, swarm=SwarmConfig(enabled=True, max_agents=3, max_breadth=2, max_width=2, max_depth=2)) | |
| ) | |
| result = run_evaluation(env, episodes=2) | |
| assert "spawn_signal" in result | |
| assert "avg_spawn_count" in result | |
| def test_eval_runner_details_include_episode_answers(): | |
| env = OSINTEnvironment(EnvironmentConfig(seed=17)) | |
| result = run_evaluation(env, episodes=2, return_details=True) | |
| assert "episodes" in result | |
| assert len(result["episodes"]) == 2 | |
| row = result["episodes"][0] | |
| assert "question" in row | |
| assert "task_answer" in row | |
| assert "agent_answer" in row | |