Spaces:
Paused
Paused
File size: 1,172 Bytes
d814291 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | from osint_env.domain.models import EnvironmentConfig, SwarmConfig
from osint_env.env.environment import OSINTEnvironment
from osint_env.eval.runner import run_evaluation
def test_eval_runner():
env = OSINTEnvironment(EnvironmentConfig(seed=17))
result = run_evaluation(env, episodes=3)
assert "task_success_rate" in result
assert "deanonymization_accuracy" in result
assert "leaderboard_score" in result
assert "avg_knowledge_indexing_reward" in result
def test_eval_runner_swarm_mode():
env = OSINTEnvironment(
EnvironmentConfig(seed=17, swarm=SwarmConfig(enabled=True, max_agents=3, max_breadth=2, max_width=2, max_depth=2))
)
result = run_evaluation(env, episodes=2)
assert "spawn_signal" in result
assert "avg_spawn_count" in result
def test_eval_runner_details_include_episode_answers():
env = OSINTEnvironment(EnvironmentConfig(seed=17))
result = run_evaluation(env, episodes=2, return_details=True)
assert "episodes" in result
assert len(result["episodes"]) == 2
row = result["episodes"][0]
assert "question" in row
assert "task_answer" in row
assert "agent_answer" in row
|