OSINT / tests /test_eval.py
siddeshwar-kagatikar
fix(rewards): never crash GRPO on malformed completions
d814291
from osint_env.domain.models import EnvironmentConfig, SwarmConfig
from osint_env.env.environment import OSINTEnvironment
from osint_env.eval.runner import run_evaluation
def test_eval_runner():
env = OSINTEnvironment(EnvironmentConfig(seed=17))
result = run_evaluation(env, episodes=3)
assert "task_success_rate" in result
assert "deanonymization_accuracy" in result
assert "leaderboard_score" in result
assert "avg_knowledge_indexing_reward" in result
def test_eval_runner_swarm_mode():
env = OSINTEnvironment(
EnvironmentConfig(seed=17, swarm=SwarmConfig(enabled=True, max_agents=3, max_breadth=2, max_width=2, max_depth=2))
)
result = run_evaluation(env, episodes=2)
assert "spawn_signal" in result
assert "avg_spawn_count" in result
def test_eval_runner_details_include_episode_answers():
env = OSINTEnvironment(EnvironmentConfig(seed=17))
result = run_evaluation(env, episodes=2, return_details=True)
assert "episodes" in result
assert len(result["episodes"]) == 2
row = result["episodes"][0]
assert "question" in row
assert "task_answer" in row
assert "agent_answer" in row