[ { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 20, "max_width": 2, "seed": 2026, "seeded_questions": 15, "swarm_enabled": true }, "created_at": "2026-04-01T18:48:39+00:00", "episodes": 15, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.16666666666666666, "avg_connectivity_reward": 0.16999999999999998, "avg_diversity_reward": 0.1157777777777778, "avg_entity_informativeness_reward": -0.08858065677817137, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.8492063492063492, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.052000000000000005, "avg_relation_informativeness_reward": 0.07135858524047924, "avg_reward": 4.197526826881651, "avg_soft_shaping_reward": 0.24999999999999994, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 1.0, "leaderboard_score": 0.8543934355282199, "retrieval_signal": 0.6932, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5730889190257948, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0001", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-02T09:16:05+00:00", "episodes": 30, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2000000000000001, "avg_connectivity_reward": 0.12999999999999998, "avg_diversity_reward": 0.12433333333333325, "avg_entity_informativeness_reward": 0.000700571890338102, "avg_format_reward": 0.15, "avg_graph_f1": 0.2916528337385394, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.05070078042510192, "avg_relation_informativeness_reward": 0.07853375358885142, "avg_reward": 4.377456514967488, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6241912131110795, "retrieval_signal": 0.6927452731487858, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5869968650958378, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0002", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-03T13:22:03+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.20000000000000004, "avg_connectivity_reward": -0.06666666666666667, "avg_diversity_reward": 0.13444444444444445, "avg_entity_informativeness_reward": -0.01010882862863417, "avg_format_reward": 0.15, "avg_graph_f1": 0.5793650793650794, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.10372960372960373, "avg_relation_informativeness_reward": 0.07108687894082726, "avg_reward": 4.419313576918165, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6797400780463063, "retrieval_signal": 0.7113053613053614, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5356956100624386, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0003", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T18:29:39+00:00", "episodes": 30, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2000000000000001, "avg_connectivity_reward": 0.12999999999999998, "avg_diversity_reward": 0.12433333333333325, "avg_entity_informativeness_reward": -0.02515191749984708, "avg_format_reward": 0.15, "avg_graph_f1": 0.2916528337385394, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.11539120363588044, "avg_relation_informativeness_reward": 0.0769903534735767, "avg_reward": 4.460667345528021, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6269168609961595, "retrieval_signal": 0.7153869212725582, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5815176871947458, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0004", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T18:33:06+00:00", "episodes": 2, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.13833333333333334, "avg_entity_informativeness_reward": -0.026628229842114173, "avg_format_reward": 0.15, "avg_graph_f1": 0.6190476190476191, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.10681818181818181, "avg_relation_informativeness_reward": 0.048120982127120335, "avg_reward": 4.334953339016039, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.685242999396977, "retrieval_signal": 0.7123863636363637, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5075485504570012, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0005", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 1, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T18:54:52+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.1, "avg_connectivity_reward": -0.3, "avg_diversity_reward": 0.08, "avg_entity_informativeness_reward": -0.02450859227728558, "avg_format_reward": 0.15, "avg_graph_f1": 0.33333333333333337, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.08181818181818182, "avg_relation_informativeness_reward": 0.04353540016904645, "avg_reward": 3.037246438342494, "avg_soft_shaping_reward": 0.15, "avg_spawn_count": 2.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 5.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6201263424948862, "retrieval_signal": 0.7036363636363637, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.45080536157835216, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0006", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 1, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T19:22:57+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.1, "avg_connectivity_reward": -0.3, "avg_diversity_reward": 0.08, "avg_entity_informativeness_reward": -0.005263146336646693, "avg_format_reward": 0.15, "avg_graph_f1": 0.33333333333333337, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.08181818181818182, "avg_relation_informativeness_reward": 0.044276243254877785, "avg_reward": 3.057232727368964, "avg_soft_shaping_reward": 0.15, "avg_spawn_count": 2.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 5.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6205293479318178, "retrieval_signal": 0.7036363636363637, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.4548026193836462, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0007", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "qwen3:1.7b", "llm_provider": "ollama", "max_agents": 1, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T19:48:33+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.10000000000000002, "avg_connectivity_reward": -0.09999999999999999, "avg_diversity_reward": 0.08, "avg_entity_informativeness_reward": -0.028683816517602444, "avg_format_reward": 0.15, "avg_graph_f1": 0.15537340619307835, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.07932190760059611, "avg_relation_informativeness_reward": 0.044225025032092045, "avg_reward": 3.1324990406542437, "avg_soft_shaping_reward": 0.15, "avg_spawn_count": 2.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 5.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.5890485416309927, "retrieval_signal": 0.7027626676602087, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5001082417028979, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0008", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "qwen3:1.7b", "llm_provider": "ollama", "max_agents": 1, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T19:55:08+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.1, "avg_connectivity_reward": -0.3, "avg_diversity_reward": 0.08, "avg_entity_informativeness_reward": -0.005263146336646693, "avg_format_reward": 0.15, "avg_graph_f1": 0.33333333333333337, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.08181818181818182, "avg_relation_informativeness_reward": 0.04406984773661544, "avg_reward": 3.0570263318507016, "avg_soft_shaping_reward": 0.15, "avg_spawn_count": 2.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 5.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6205251901591228, "retrieval_signal": 0.7036363636363637, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.45476134027999376, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0009", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "qwen3:1.7b", "llm_provider": "ollama", "max_agents": 1, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T20:01:34+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.1, "avg_connectivity_reward": -0.3, "avg_diversity_reward": 0.08, "avg_entity_informativeness_reward": -0.020826953461399098, "avg_format_reward": 0.15, "avg_graph_f1": 0.33333333333333337, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.08181818181818182, "avg_relation_informativeness_reward": 0.04348043923536236, "avg_reward": 3.040873116224696, "avg_soft_shaping_reward": 0.15, "avg_spawn_count": 2.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 5.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6201995296517067, "retrieval_signal": 0.7036363636363637, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.45153069715479266, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0010", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T20:46:11+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.12666666666666665, "avg_entity_informativeness_reward": 0.019629386278697845, "avg_format_reward": 0.15, "avg_graph_f1": 0.5714285714285715, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.12272727272727273, "avg_relation_informativeness_reward": 0.08347928023822283, "avg_reward": 1.829702015111513, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6715432845394145, "retrieval_signal": 0.7179545454545455, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5221217333033842, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0011", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T20:49:44+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.12666666666666665, "avg_entity_informativeness_reward": 0.019629386278697845, "avg_format_reward": 0.15, "avg_graph_f1": 0.5714285714285715, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.12272727272727273, "avg_relation_informativeness_reward": 0.08335372627068136, "avg_reward": 0.7139904233885594, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6641542345113342, "retrieval_signal": 0.7179545454545455, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5220966225098759, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0012", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-06T20:59:43+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.12666666666666665, "avg_entity_informativeness_reward": 0.0036675120354726642, "avg_format_reward": 0.15, "avg_graph_f1": 0.5714285714285715, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.12272727272727273, "avg_relation_informativeness_reward": 0.08250745620050208, "avg_reward": 0.7138056720677886, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6638424503476543, "retrieval_signal": 0.7179545454545455, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.518734993647195, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0013", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T09:44:40+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.12666666666666665, "avg_entity_informativeness_reward": -0.018704290877944903, "avg_format_reward": 0.15, "avg_graph_f1": 0.5714285714285715, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.12272727272727273, "avg_relation_informativeness_reward": 0.08056039127695382, "avg_reward": 0.7135379106634446, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6633913226563717, "retrieval_signal": 0.7179545454545455, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5138712200798018, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0014", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T09:55:19+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.12666666666666665, "avg_entity_informativeness_reward": -0.018704290877944903, "avg_format_reward": 0.15, "avg_graph_f1": 0.5714285714285715, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.12272727272727273, "avg_relation_informativeness_reward": 0.08056039127695382, "avg_reward": 0.7135379106634446, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6633913226563717, "retrieval_signal": 0.7179545454545455, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5138712200798018, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0015", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T09:56:28+00:00", "episodes": 30, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2000000000000001, "avg_connectivity_reward": 0.12999999999999998, "avg_diversity_reward": 0.12433333333333325, "avg_entity_informativeness_reward": -0.02515191749984708, "avg_format_reward": 0.15, "avg_graph_f1": 0.2916528337385394, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.11539120363588044, "avg_relation_informativeness_reward": 0.0769903534735767, "avg_reward": 0.7150555461096118, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6132407715455404, "retrieval_signal": 0.7153869212725582, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5815176871947458, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0016", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T10:02:32+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.2, "avg_connectivity_reward": -0.15, "avg_diversity_reward": 0.12666666666666665, "avg_entity_informativeness_reward": -0.018704290877944903, "avg_format_reward": 0.15, "avg_graph_f1": 0.5714285714285715, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.12272727272727273, "avg_relation_informativeness_reward": 0.08056039127695382, "avg_reward": 0.7135379106634446, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6633913226563717, "retrieval_signal": 0.7179545454545455, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5138712200798018, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0017", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T10:02:49+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.20000000000000004, "avg_connectivity_reward": -0.06666666666666667, "avg_diversity_reward": 0.13444444444444445, "avg_entity_informativeness_reward": -0.029992009599206938, "avg_format_reward": 0.15, "avg_graph_f1": 0.5793650793650794, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.10372960372960373, "avg_relation_informativeness_reward": 0.06898843512226, "avg_reward": 0.7133699465240085, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6656078661080486, "retrieval_signal": 0.7113053613053614, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5312992851046106, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0018", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T10:04:53+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.20000000000000004, "avg_connectivity_reward": -0.06666666666666667, "avg_diversity_reward": 0.13444444444444445, "avg_entity_informativeness_reward": -0.029992009599206938, "avg_format_reward": 0.15, "avg_graph_f1": 0.5793650793650794, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.10372960372960373, "avg_relation_informativeness_reward": 0.06898843512226, "avg_reward": 0.7133699465240085, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6656078661080486, "retrieval_signal": 0.7113053613053614, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5312992851046106, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0019", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T10:11:34+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.20000000000000004, "avg_connectivity_reward": -0.06666666666666667, "avg_diversity_reward": 0.13444444444444445, "avg_entity_informativeness_reward": -0.029992009599206938, "avg_format_reward": 0.15, "avg_graph_f1": 0.5793650793650794, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.10372960372960373, "avg_relation_informativeness_reward": 0.06898843512226, "avg_reward": 0.7133699465240085, "avg_soft_shaping_reward": 0.3, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.6656078661080486, "retrieval_signal": 0.7113053613053614, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.5312992851046106, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0020", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T10:29:54+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0021", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-07T15:59:20+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0022", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T04:25:00+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0023", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T04:28:07+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0024", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T04:39:32+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0025", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T04:40:21+00:00", "episodes": 30, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.13333333333333336, "avg_connectivity_reward": 0.09999999999999999, "avg_diversity_reward": 0.03911111111111111, "avg_entity_informativeness_reward": -0.00951758755541623, "avg_format_reward": 0.15, "avg_graph_f1": 0.08482743691314255, "avg_knowledge_carrier_reward": 0.3333333333333333, "avg_knowledge_indexing_reward": 0.0832325289772058, "avg_relation_informativeness_reward": 0.024842289016879314, "avg_reward": 0.6636425017249088, "avg_soft_shaping_reward": 0.19999999999999993, "avg_spawn_count": 2.6666666666666665, "avg_spawn_critical_steps": 4.0, "avg_steps_to_solution": 6.333333333333333, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.4644798510150634, "retrieval_signal": 0.6457980518086888, "spawn_completion_rate": 1.0, "spawn_signal": 0.7, "structural_signal": 0.5472649402922927, "task_success_rate": 0.6666666666666666, "tool_efficiency": 0.5 }, "run_id": "run_0026", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T05:01:16+00:00", "episodes": 10, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0027", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T05:01:29+00:00", "episodes": 10, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.19999999999999998, "avg_connectivity_reward": 0.06, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.18535980927285275, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.09575879120879122, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.7109638031154166, "avg_soft_shaping_reward": 0.29999999999999993, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.5866289994462388, "retrieval_signal": 0.708515576923077, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.535, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0028", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T05:01:43+00:00", "episodes": 10, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.19999999999999998, "avg_connectivity_reward": 0.24, "avg_diversity_reward": 0.11733333333333333, "avg_entity_informativeness_reward": -0.028552762666248687, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.06912250146657492, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.15393879572282626, "avg_relation_informativeness_reward": 0.07452686705063795, "avg_reward": 0.7171006884027153, "avg_soft_shaping_reward": 0.29999999999999993, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.5730007362494549, "retrieval_signal": 0.7288785785029892, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.6067948208768779, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0029", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T15:57:03+00:00", "episodes": 10, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0030", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T15:57:18+00:00", "episodes": 10, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.19999999999999998, "avg_connectivity_reward": 0.06, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.18535980927285275, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.09575879120879122, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.7109638031154166, "avg_soft_shaping_reward": 0.29999999999999993, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.5866289994462388, "retrieval_signal": 0.708515576923077, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.535, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0031", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "gpt-5.4-mini", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 24, "max_width": 2, "seed": 2026, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-08T15:57:32+00:00", "episodes": 10, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.19999999999999998, "avg_connectivity_reward": 0.24, "avg_diversity_reward": 0.11733333333333333, "avg_entity_informativeness_reward": -0.028552762666248687, "avg_format_reward": 0.14999999999999997, "avg_graph_f1": 0.06912250146657492, "avg_knowledge_carrier_reward": 0.5, "avg_knowledge_indexing_reward": 0.15393879572282626, "avg_relation_informativeness_reward": 0.07452686705063795, "avg_reward": 0.7171006884027153, "avg_soft_shaping_reward": 0.29999999999999993, "avg_spawn_count": 4.0, "avg_spawn_critical_steps": 6.0, "avg_steps_to_solution": 9.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.5730007362494549, "retrieval_signal": 0.7288785785029892, "spawn_completion_rate": 1.0, "spawn_signal": 0.6666666666666666, "structural_signal": 0.6067948208768779, "task_success_rate": 1.0, "tool_efficiency": 0.5 }, "run_id": "run_0032", "run_name": "fixed_levels_qwen_swarm" }, { "config": { "llm_model": "qwen3:8b", "llm_provider": "openai", "max_agents": 3, "max_breadth": 2, "max_depth": 2, "max_steps": 18, "max_width": 2, "seed": 7, "seeded_questions": 30, "swarm_enabled": true }, "created_at": "2026-04-20T19:46:04+00:00", "episodes": 1, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": 0.0, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": 0.0, "avg_knowledge_indexing_reward": 0.0, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.5519400198339021, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 1.0, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2785970009916951, "retrieval_signal": 0.5, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.5, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0033", "run_name": "fixed_levels_qwen_swarm" } ]