Spaces:
Paused
Paused
| [ | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 20, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 15, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T18:48:39+00:00", | |
| "episodes": 15, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.16666666666666666, | |
| "avg_connectivity_reward": 0.16999999999999998, | |
| "avg_diversity_reward": 0.1157777777777778, | |
| "avg_entity_informativeness_reward": -0.08858065677817137, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.8492063492063492, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.052000000000000005, | |
| "avg_relation_informativeness_reward": 0.07135858524047924, | |
| "avg_reward": 4.197526826881651, | |
| "avg_soft_shaping_reward": 0.24999999999999994, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8543934355282199, | |
| "retrieval_signal": 0.6932, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5730889190257948, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0001", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-02T09:16:05+00:00", | |
| "episodes": 30, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2000000000000001, | |
| "avg_connectivity_reward": 0.12999999999999998, | |
| "avg_diversity_reward": 0.12433333333333325, | |
| "avg_entity_informativeness_reward": 0.000700571890338102, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.2916528337385394, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.05070078042510192, | |
| "avg_relation_informativeness_reward": 0.07853375358885142, | |
| "avg_reward": 4.377456514967488, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6241912131110795, | |
| "retrieval_signal": 0.6927452731487858, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5869968650958378, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0002", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-03T13:22:03+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.20000000000000004, | |
| "avg_connectivity_reward": -0.06666666666666667, | |
| "avg_diversity_reward": 0.13444444444444445, | |
| "avg_entity_informativeness_reward": -0.01010882862863417, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5793650793650794, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.10372960372960373, | |
| "avg_relation_informativeness_reward": 0.07108687894082726, | |
| "avg_reward": 4.419313576918165, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6797400780463063, | |
| "retrieval_signal": 0.7113053613053614, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5356956100624386, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0003", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T18:29:39+00:00", | |
| "episodes": 30, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2000000000000001, | |
| "avg_connectivity_reward": 0.12999999999999998, | |
| "avg_diversity_reward": 0.12433333333333325, | |
| "avg_entity_informativeness_reward": -0.02515191749984708, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.2916528337385394, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.11539120363588044, | |
| "avg_relation_informativeness_reward": 0.0769903534735767, | |
| "avg_reward": 4.460667345528021, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6269168609961595, | |
| "retrieval_signal": 0.7153869212725582, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5815176871947458, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0004", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T18:33:06+00:00", | |
| "episodes": 2, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.13833333333333334, | |
| "avg_entity_informativeness_reward": -0.026628229842114173, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.6190476190476191, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.10681818181818181, | |
| "avg_relation_informativeness_reward": 0.048120982127120335, | |
| "avg_reward": 4.334953339016039, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.685242999396977, | |
| "retrieval_signal": 0.7123863636363637, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5075485504570012, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0005", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 1, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T18:54:52+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": -0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.02450859227728558, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.33333333333333337, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.08181818181818182, | |
| "avg_relation_informativeness_reward": 0.04353540016904645, | |
| "avg_reward": 3.037246438342494, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 2.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 5.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6201263424948862, | |
| "retrieval_signal": 0.7036363636363637, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.45080536157835216, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0006", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 1, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T19:22:57+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": -0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.005263146336646693, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.33333333333333337, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.08181818181818182, | |
| "avg_relation_informativeness_reward": 0.044276243254877785, | |
| "avg_reward": 3.057232727368964, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 2.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 5.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6205293479318178, | |
| "retrieval_signal": 0.7036363636363637, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.4548026193836462, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0007", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "qwen3:1.7b", | |
| "llm_provider": "ollama", | |
| "max_agents": 1, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T19:48:33+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.10000000000000002, | |
| "avg_connectivity_reward": -0.09999999999999999, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.028683816517602444, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.15537340619307835, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.07932190760059611, | |
| "avg_relation_informativeness_reward": 0.044225025032092045, | |
| "avg_reward": 3.1324990406542437, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 2.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 5.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.5890485416309927, | |
| "retrieval_signal": 0.7027626676602087, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5001082417028979, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0008", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "qwen3:1.7b", | |
| "llm_provider": "ollama", | |
| "max_agents": 1, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T19:55:08+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": -0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.005263146336646693, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.33333333333333337, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.08181818181818182, | |
| "avg_relation_informativeness_reward": 0.04406984773661544, | |
| "avg_reward": 3.0570263318507016, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 2.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 5.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6205251901591228, | |
| "retrieval_signal": 0.7036363636363637, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.45476134027999376, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0009", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "qwen3:1.7b", | |
| "llm_provider": "ollama", | |
| "max_agents": 1, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T20:01:34+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": -0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.020826953461399098, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.33333333333333337, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.08181818181818182, | |
| "avg_relation_informativeness_reward": 0.04348043923536236, | |
| "avg_reward": 3.040873116224696, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 2.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 5.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6201995296517067, | |
| "retrieval_signal": 0.7036363636363637, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.45153069715479266, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0010", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T20:46:11+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.12666666666666665, | |
| "avg_entity_informativeness_reward": 0.019629386278697845, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5714285714285715, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.12272727272727273, | |
| "avg_relation_informativeness_reward": 0.08347928023822283, | |
| "avg_reward": 1.829702015111513, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6715432845394145, | |
| "retrieval_signal": 0.7179545454545455, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5221217333033842, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0011", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T20:49:44+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.12666666666666665, | |
| "avg_entity_informativeness_reward": 0.019629386278697845, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5714285714285715, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.12272727272727273, | |
| "avg_relation_informativeness_reward": 0.08335372627068136, | |
| "avg_reward": 0.7139904233885594, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6641542345113342, | |
| "retrieval_signal": 0.7179545454545455, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5220966225098759, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0012", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-06T20:59:43+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.12666666666666665, | |
| "avg_entity_informativeness_reward": 0.0036675120354726642, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5714285714285715, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.12272727272727273, | |
| "avg_relation_informativeness_reward": 0.08250745620050208, | |
| "avg_reward": 0.7138056720677886, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6638424503476543, | |
| "retrieval_signal": 0.7179545454545455, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.518734993647195, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0013", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T09:44:40+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.12666666666666665, | |
| "avg_entity_informativeness_reward": -0.018704290877944903, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5714285714285715, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.12272727272727273, | |
| "avg_relation_informativeness_reward": 0.08056039127695382, | |
| "avg_reward": 0.7135379106634446, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6633913226563717, | |
| "retrieval_signal": 0.7179545454545455, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5138712200798018, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0014", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T09:55:19+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.12666666666666665, | |
| "avg_entity_informativeness_reward": -0.018704290877944903, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5714285714285715, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.12272727272727273, | |
| "avg_relation_informativeness_reward": 0.08056039127695382, | |
| "avg_reward": 0.7135379106634446, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6633913226563717, | |
| "retrieval_signal": 0.7179545454545455, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5138712200798018, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0015", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T09:56:28+00:00", | |
| "episodes": 30, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2000000000000001, | |
| "avg_connectivity_reward": 0.12999999999999998, | |
| "avg_diversity_reward": 0.12433333333333325, | |
| "avg_entity_informativeness_reward": -0.02515191749984708, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.2916528337385394, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.11539120363588044, | |
| "avg_relation_informativeness_reward": 0.0769903534735767, | |
| "avg_reward": 0.7150555461096118, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6132407715455404, | |
| "retrieval_signal": 0.7153869212725582, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5815176871947458, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0016", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T10:02:32+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": -0.15, | |
| "avg_diversity_reward": 0.12666666666666665, | |
| "avg_entity_informativeness_reward": -0.018704290877944903, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5714285714285715, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.12272727272727273, | |
| "avg_relation_informativeness_reward": 0.08056039127695382, | |
| "avg_reward": 0.7135379106634446, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6633913226563717, | |
| "retrieval_signal": 0.7179545454545455, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5138712200798018, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0017", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T10:02:49+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.20000000000000004, | |
| "avg_connectivity_reward": -0.06666666666666667, | |
| "avg_diversity_reward": 0.13444444444444445, | |
| "avg_entity_informativeness_reward": -0.029992009599206938, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5793650793650794, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.10372960372960373, | |
| "avg_relation_informativeness_reward": 0.06898843512226, | |
| "avg_reward": 0.7133699465240085, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6656078661080486, | |
| "retrieval_signal": 0.7113053613053614, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5312992851046106, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0018", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T10:04:53+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.20000000000000004, | |
| "avg_connectivity_reward": -0.06666666666666667, | |
| "avg_diversity_reward": 0.13444444444444445, | |
| "avg_entity_informativeness_reward": -0.029992009599206938, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5793650793650794, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.10372960372960373, | |
| "avg_relation_informativeness_reward": 0.06898843512226, | |
| "avg_reward": 0.7133699465240085, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6656078661080486, | |
| "retrieval_signal": 0.7113053613053614, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5312992851046106, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0019", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T10:11:34+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.20000000000000004, | |
| "avg_connectivity_reward": -0.06666666666666667, | |
| "avg_diversity_reward": 0.13444444444444445, | |
| "avg_entity_informativeness_reward": -0.029992009599206938, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.5793650793650794, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.10372960372960373, | |
| "avg_relation_informativeness_reward": 0.06898843512226, | |
| "avg_reward": 0.7133699465240085, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6656078661080486, | |
| "retrieval_signal": 0.7113053613053614, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5312992851046106, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0020", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T10:29:54+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0021", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-07T15:59:20+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0022", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T04:25:00+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0023", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T04:28:07+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0024", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T04:39:32+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0025", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T04:40:21+00:00", | |
| "episodes": 30, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.13333333333333336, | |
| "avg_connectivity_reward": 0.09999999999999999, | |
| "avg_diversity_reward": 0.03911111111111111, | |
| "avg_entity_informativeness_reward": -0.00951758755541623, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.08482743691314255, | |
| "avg_knowledge_carrier_reward": 0.3333333333333333, | |
| "avg_knowledge_indexing_reward": 0.0832325289772058, | |
| "avg_relation_informativeness_reward": 0.024842289016879314, | |
| "avg_reward": 0.6636425017249088, | |
| "avg_soft_shaping_reward": 0.19999999999999993, | |
| "avg_spawn_count": 2.6666666666666665, | |
| "avg_spawn_critical_steps": 4.0, | |
| "avg_steps_to_solution": 6.333333333333333, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.4644798510150634, | |
| "retrieval_signal": 0.6457980518086888, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.7, | |
| "structural_signal": 0.5472649402922927, | |
| "task_success_rate": 0.6666666666666666, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0026", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T05:01:16+00:00", | |
| "episodes": 10, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0027", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T05:01:29+00:00", | |
| "episodes": 10, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.19999999999999998, | |
| "avg_connectivity_reward": 0.06, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.18535980927285275, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.09575879120879122, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.7109638031154166, | |
| "avg_soft_shaping_reward": 0.29999999999999993, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.5866289994462388, | |
| "retrieval_signal": 0.708515576923077, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.535, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0028", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T05:01:43+00:00", | |
| "episodes": 10, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.19999999999999998, | |
| "avg_connectivity_reward": 0.24, | |
| "avg_diversity_reward": 0.11733333333333333, | |
| "avg_entity_informativeness_reward": -0.028552762666248687, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.06912250146657492, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.15393879572282626, | |
| "avg_relation_informativeness_reward": 0.07452686705063795, | |
| "avg_reward": 0.7171006884027153, | |
| "avg_soft_shaping_reward": 0.29999999999999993, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.5730007362494549, | |
| "retrieval_signal": 0.7288785785029892, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.6067948208768779, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0029", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T15:57:03+00:00", | |
| "episodes": 10, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0030", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T15:57:18+00:00", | |
| "episodes": 10, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.19999999999999998, | |
| "avg_connectivity_reward": 0.06, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.18535980927285275, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.09575879120879122, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.7109638031154166, | |
| "avg_soft_shaping_reward": 0.29999999999999993, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.5866289994462388, | |
| "retrieval_signal": 0.708515576923077, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.535, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0031", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "gpt-5.4-mini", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 24, | |
| "max_width": 2, | |
| "seed": 2026, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-08T15:57:32+00:00", | |
| "episodes": 10, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.19999999999999998, | |
| "avg_connectivity_reward": 0.24, | |
| "avg_diversity_reward": 0.11733333333333333, | |
| "avg_entity_informativeness_reward": -0.028552762666248687, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.06912250146657492, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.15393879572282626, | |
| "avg_relation_informativeness_reward": 0.07452686705063795, | |
| "avg_reward": 0.7171006884027153, | |
| "avg_soft_shaping_reward": 0.29999999999999993, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.5730007362494549, | |
| "retrieval_signal": 0.7288785785029892, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.6067948208768779, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0032", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| }, | |
| { | |
| "config": { | |
| "llm_model": "qwen3:8b", | |
| "llm_provider": "openai", | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 30, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-20T19:46:04+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": 0.0, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.5519400198339021, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 1.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2785970009916951, | |
| "retrieval_signal": 0.5, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.5, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0033", | |
| "run_name": "fixed_levels_qwen_swarm" | |
| } | |
| ] |