OSINT / datasets /fixed_levels /leaderboard_fixed_levels.json
Siddeshwar1625's picture
Removed unintended embedded repo and added to gitignore
87f8562
[
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 20,
"max_width": 2,
"seed": 2026,
"seeded_questions": 15,
"swarm_enabled": true
},
"created_at": "2026-04-01T18:48:39+00:00",
"episodes": 15,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.16666666666666666,
"avg_connectivity_reward": 0.16999999999999998,
"avg_diversity_reward": 0.1157777777777778,
"avg_entity_informativeness_reward": -0.08858065677817137,
"avg_format_reward": 0.14999999999999997,
"avg_graph_f1": 0.8492063492063492,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.052000000000000005,
"avg_relation_informativeness_reward": 0.07135858524047924,
"avg_reward": 4.197526826881651,
"avg_soft_shaping_reward": 0.24999999999999994,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 1.0,
"leaderboard_score": 0.8543934355282199,
"retrieval_signal": 0.6932,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5730889190257948,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0001",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-02T09:16:05+00:00",
"episodes": 30,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2000000000000001,
"avg_connectivity_reward": 0.12999999999999998,
"avg_diversity_reward": 0.12433333333333325,
"avg_entity_informativeness_reward": 0.000700571890338102,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.2916528337385394,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.05070078042510192,
"avg_relation_informativeness_reward": 0.07853375358885142,
"avg_reward": 4.377456514967488,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6241912131110795,
"retrieval_signal": 0.6927452731487858,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5869968650958378,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0002",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-03T13:22:03+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.20000000000000004,
"avg_connectivity_reward": -0.06666666666666667,
"avg_diversity_reward": 0.13444444444444445,
"avg_entity_informativeness_reward": -0.01010882862863417,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5793650793650794,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.10372960372960373,
"avg_relation_informativeness_reward": 0.07108687894082726,
"avg_reward": 4.419313576918165,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6797400780463063,
"retrieval_signal": 0.7113053613053614,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5356956100624386,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0003",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T18:29:39+00:00",
"episodes": 30,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2000000000000001,
"avg_connectivity_reward": 0.12999999999999998,
"avg_diversity_reward": 0.12433333333333325,
"avg_entity_informativeness_reward": -0.02515191749984708,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.2916528337385394,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.11539120363588044,
"avg_relation_informativeness_reward": 0.0769903534735767,
"avg_reward": 4.460667345528021,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6269168609961595,
"retrieval_signal": 0.7153869212725582,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5815176871947458,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0004",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T18:33:06+00:00",
"episodes": 2,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.13833333333333334,
"avg_entity_informativeness_reward": -0.026628229842114173,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.6190476190476191,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.10681818181818181,
"avg_relation_informativeness_reward": 0.048120982127120335,
"avg_reward": 4.334953339016039,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.685242999396977,
"retrieval_signal": 0.7123863636363637,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5075485504570012,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0005",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 1,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T18:54:52+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.1,
"avg_connectivity_reward": -0.3,
"avg_diversity_reward": 0.08,
"avg_entity_informativeness_reward": -0.02450859227728558,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.33333333333333337,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.08181818181818182,
"avg_relation_informativeness_reward": 0.04353540016904645,
"avg_reward": 3.037246438342494,
"avg_soft_shaping_reward": 0.15,
"avg_spawn_count": 2.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 5.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6201263424948862,
"retrieval_signal": 0.7036363636363637,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.45080536157835216,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0006",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 1,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T19:22:57+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.1,
"avg_connectivity_reward": -0.3,
"avg_diversity_reward": 0.08,
"avg_entity_informativeness_reward": -0.005263146336646693,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.33333333333333337,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.08181818181818182,
"avg_relation_informativeness_reward": 0.044276243254877785,
"avg_reward": 3.057232727368964,
"avg_soft_shaping_reward": 0.15,
"avg_spawn_count": 2.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 5.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6205293479318178,
"retrieval_signal": 0.7036363636363637,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.4548026193836462,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0007",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "qwen3:1.7b",
"llm_provider": "ollama",
"max_agents": 1,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T19:48:33+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.10000000000000002,
"avg_connectivity_reward": -0.09999999999999999,
"avg_diversity_reward": 0.08,
"avg_entity_informativeness_reward": -0.028683816517602444,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.15537340619307835,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.07932190760059611,
"avg_relation_informativeness_reward": 0.044225025032092045,
"avg_reward": 3.1324990406542437,
"avg_soft_shaping_reward": 0.15,
"avg_spawn_count": 2.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 5.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.5890485416309927,
"retrieval_signal": 0.7027626676602087,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5001082417028979,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0008",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "qwen3:1.7b",
"llm_provider": "ollama",
"max_agents": 1,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T19:55:08+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.1,
"avg_connectivity_reward": -0.3,
"avg_diversity_reward": 0.08,
"avg_entity_informativeness_reward": -0.005263146336646693,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.33333333333333337,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.08181818181818182,
"avg_relation_informativeness_reward": 0.04406984773661544,
"avg_reward": 3.0570263318507016,
"avg_soft_shaping_reward": 0.15,
"avg_spawn_count": 2.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 5.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6205251901591228,
"retrieval_signal": 0.7036363636363637,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.45476134027999376,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0009",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "qwen3:1.7b",
"llm_provider": "ollama",
"max_agents": 1,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T20:01:34+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.1,
"avg_connectivity_reward": -0.3,
"avg_diversity_reward": 0.08,
"avg_entity_informativeness_reward": -0.020826953461399098,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.33333333333333337,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.08181818181818182,
"avg_relation_informativeness_reward": 0.04348043923536236,
"avg_reward": 3.040873116224696,
"avg_soft_shaping_reward": 0.15,
"avg_spawn_count": 2.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 5.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6201995296517067,
"retrieval_signal": 0.7036363636363637,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.45153069715479266,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0010",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T20:46:11+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.12666666666666665,
"avg_entity_informativeness_reward": 0.019629386278697845,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5714285714285715,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.12272727272727273,
"avg_relation_informativeness_reward": 0.08347928023822283,
"avg_reward": 1.829702015111513,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6715432845394145,
"retrieval_signal": 0.7179545454545455,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5221217333033842,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0011",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T20:49:44+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.12666666666666665,
"avg_entity_informativeness_reward": 0.019629386278697845,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5714285714285715,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.12272727272727273,
"avg_relation_informativeness_reward": 0.08335372627068136,
"avg_reward": 0.7139904233885594,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6641542345113342,
"retrieval_signal": 0.7179545454545455,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5220966225098759,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0012",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-06T20:59:43+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.12666666666666665,
"avg_entity_informativeness_reward": 0.0036675120354726642,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5714285714285715,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.12272727272727273,
"avg_relation_informativeness_reward": 0.08250745620050208,
"avg_reward": 0.7138056720677886,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6638424503476543,
"retrieval_signal": 0.7179545454545455,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.518734993647195,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0013",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T09:44:40+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.12666666666666665,
"avg_entity_informativeness_reward": -0.018704290877944903,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5714285714285715,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.12272727272727273,
"avg_relation_informativeness_reward": 0.08056039127695382,
"avg_reward": 0.7135379106634446,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6633913226563717,
"retrieval_signal": 0.7179545454545455,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5138712200798018,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0014",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T09:55:19+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.12666666666666665,
"avg_entity_informativeness_reward": -0.018704290877944903,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5714285714285715,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.12272727272727273,
"avg_relation_informativeness_reward": 0.08056039127695382,
"avg_reward": 0.7135379106634446,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6633913226563717,
"retrieval_signal": 0.7179545454545455,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5138712200798018,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0015",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T09:56:28+00:00",
"episodes": 30,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2000000000000001,
"avg_connectivity_reward": 0.12999999999999998,
"avg_diversity_reward": 0.12433333333333325,
"avg_entity_informativeness_reward": -0.02515191749984708,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.2916528337385394,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.11539120363588044,
"avg_relation_informativeness_reward": 0.0769903534735767,
"avg_reward": 0.7150555461096118,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6132407715455404,
"retrieval_signal": 0.7153869212725582,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5815176871947458,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0016",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T10:02:32+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.2,
"avg_connectivity_reward": -0.15,
"avg_diversity_reward": 0.12666666666666665,
"avg_entity_informativeness_reward": -0.018704290877944903,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5714285714285715,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.12272727272727273,
"avg_relation_informativeness_reward": 0.08056039127695382,
"avg_reward": 0.7135379106634446,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6633913226563717,
"retrieval_signal": 0.7179545454545455,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5138712200798018,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0017",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T10:02:49+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.20000000000000004,
"avg_connectivity_reward": -0.06666666666666667,
"avg_diversity_reward": 0.13444444444444445,
"avg_entity_informativeness_reward": -0.029992009599206938,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5793650793650794,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.10372960372960373,
"avg_relation_informativeness_reward": 0.06898843512226,
"avg_reward": 0.7133699465240085,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6656078661080486,
"retrieval_signal": 0.7113053613053614,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5312992851046106,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0018",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T10:04:53+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.20000000000000004,
"avg_connectivity_reward": -0.06666666666666667,
"avg_diversity_reward": 0.13444444444444445,
"avg_entity_informativeness_reward": -0.029992009599206938,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5793650793650794,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.10372960372960373,
"avg_relation_informativeness_reward": 0.06898843512226,
"avg_reward": 0.7133699465240085,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6656078661080486,
"retrieval_signal": 0.7113053613053614,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5312992851046106,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0019",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T10:11:34+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.20000000000000004,
"avg_connectivity_reward": -0.06666666666666667,
"avg_diversity_reward": 0.13444444444444445,
"avg_entity_informativeness_reward": -0.029992009599206938,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.5793650793650794,
"avg_knowledge_carrier_reward": 0.5,
"avg_knowledge_indexing_reward": 0.10372960372960373,
"avg_relation_informativeness_reward": 0.06898843512226,
"avg_reward": 0.7133699465240085,
"avg_soft_shaping_reward": 0.3,
"avg_spawn_count": 4.0,
"avg_spawn_critical_steps": 6.0,
"avg_steps_to_solution": 9.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.6656078661080486,
"retrieval_signal": 0.7113053613053614,
"spawn_completion_rate": 1.0,
"spawn_signal": 0.6666666666666666,
"structural_signal": 0.5312992851046106,
"task_success_rate": 1.0,
"tool_efficiency": 0.5
},
"run_id": "run_0020",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T10:29:54+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.0,
"avg_connectivity_reward": 0.0,
"avg_diversity_reward": 0.0,
"avg_entity_informativeness_reward": 0.0,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.0,
"avg_knowledge_carrier_reward": 0.0,
"avg_knowledge_indexing_reward": 0.0,
"avg_relation_informativeness_reward": 0.0,
"avg_reward": 0.5519400198339021,
"avg_soft_shaping_reward": 0.0,
"avg_spawn_count": 0.0,
"avg_spawn_critical_steps": 0.0,
"avg_steps_to_solution": 1.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.2785970009916951,
"retrieval_signal": 0.5,
"spawn_completion_rate": 0.0,
"spawn_signal": 0.4,
"structural_signal": 0.5,
"task_success_rate": 0.0,
"tool_efficiency": 1.0
},
"run_id": "run_0021",
"run_name": "fixed_levels_qwen_swarm"
},
{
"config": {
"llm_model": "gpt-5.4-mini",
"llm_provider": "openai",
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"max_steps": 24,
"max_width": 2,
"seed": 2026,
"seeded_questions": 30,
"swarm_enabled": true
},
"created_at": "2026-04-07T15:59:20+00:00",
"episodes": 1,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.0,
"avg_connectivity_reward": 0.0,
"avg_diversity_reward": 0.0,
"avg_entity_informativeness_reward": 0.0,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.0,
"avg_knowledge_carrier_reward": 0.0,
"avg_knowledge_indexing_reward": 0.0,
"avg_relation_informativeness_reward": 0.0,
"avg_reward": 0.5519400198339021,
"avg_soft_shaping_reward": 0.0,
"avg_spawn_count": 0.0,
"avg_spawn_critical_steps": 0.0,
"avg_steps_to_solution": 1.0,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.2785970009916951,
"retrieval_signal": 0.5,
"spawn_completion_rate": 0.0,
"spawn_signal": 0.4,
"structural_signal": 0.5,
"task_success_rate": 0.0,
"tool_efficiency": 1.0
},
"run_id": "run_0022",
"run_name": "fixed_levels_qwen_swarm"
}
]