File size: 1,172 Bytes
ce675d4
aa360de
 
 
 
 
 
 
 
 
ce675d4
 
 
 
 
 
 
 
 
 
 
9e6be29
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from osint_env.domain.models import EnvironmentConfig, SwarmConfig
from osint_env.env.environment import OSINTEnvironment
from osint_env.eval.runner import run_evaluation


def test_eval_runner():
    env = OSINTEnvironment(EnvironmentConfig(seed=17))
    result = run_evaluation(env, episodes=3)
    assert "task_success_rate" in result
    assert "deanonymization_accuracy" in result
    assert "leaderboard_score" in result
    assert "avg_knowledge_indexing_reward" in result


def test_eval_runner_swarm_mode():
    env = OSINTEnvironment(
        EnvironmentConfig(seed=17, swarm=SwarmConfig(enabled=True, max_agents=3, max_breadth=2, max_width=2, max_depth=2))
    )
    result = run_evaluation(env, episodes=2)
    assert "spawn_signal" in result
    assert "avg_spawn_count" in result


def test_eval_runner_details_include_episode_answers():
    env = OSINTEnvironment(EnvironmentConfig(seed=17))
    result = run_evaluation(env, episodes=2, return_details=True)
    assert "episodes" in result
    assert len(result["episodes"]) == 2

    row = result["episodes"][0]
    assert "question" in row
    assert "task_answer" in row
    assert "agent_answer" in row