Spaces:
Paused
Paused
| [ | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 1, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T12:03:13+00:00", | |
| "episodes": 2, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": 0.024705877237863647, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.15, | |
| "avg_relation_informativeness_reward": 0.03137141693971891, | |
| "avg_reward": 3.534162700533434, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8618382743087459, | |
| "retrieval_signal": 0.7275, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.6082154588355165, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.25 | |
| }, | |
| "run_id": "run_0001", | |
| "run_name": "swarm_seed_smoke" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 1, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T12:16:28+00:00", | |
| "episodes": 2, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": 0.024705877237863647, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.15, | |
| "avg_relation_informativeness_reward": 0.03137141693971891, | |
| "avg_reward": 3.534162700533434, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8618382743087459, | |
| "retrieval_signal": 0.7275, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.6082154588355165, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.25 | |
| }, | |
| "run_id": "run_0002", | |
| "run_name": "swarm_seed_smoke" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 0, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T12:25:15+00:00", | |
| "episodes": 20, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.10000000000000002, | |
| "avg_connectivity_reward": 0.23999999999999994, | |
| "avg_diversity_reward": 0.08000000000000002, | |
| "avg_entity_informativeness_reward": -0.00983642442912193, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.1125, | |
| "avg_relation_informativeness_reward": 0.007185245326892638, | |
| "avg_reward": 3.351267560586956, | |
| "avg_soft_shaping_reward": 0.14999999999999997, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8573187614039594, | |
| "retrieval_signal": 0.7143750000000001, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5814697641795541, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.25 | |
| }, | |
| "run_id": "run_0003", | |
| "run_name": "baseline_swarm" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 1, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T17:27:30+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": 0.06128386989162576, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.3, | |
| "avg_relation_informativeness_reward": 0.12, | |
| "avg_reward": 3.916035942914144, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8718832338515622, | |
| "retrieval_signal": 0.78, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.6332567739783251, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.25 | |
| }, | |
| "run_id": "run_0004", | |
| "run_name": "ollama_qwen_smoke" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 1, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T17:29:12+00:00", | |
| "episodes": 1, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.1, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": 0.06128386989162576, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.3, | |
| "avg_relation_informativeness_reward": 0.12, | |
| "avg_reward": 4.059369276247478, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.9020114237119466, | |
| "retrieval_signal": 0.78, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.6332567739783251, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0005", | |
| "run_name": "ollama_qwen_smoke2" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 0, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T17:39:15+00:00", | |
| "episodes": 2, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.2, | |
| "avg_connectivity_reward": 0.0, | |
| "avg_diversity_reward": 0.0683333333333333, | |
| "avg_entity_informativeness_reward": -0.07397348480982455, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.6666666666666667, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.14884615384615385, | |
| "avg_relation_informativeness_reward": -0.00860389783205907, | |
| "avg_reward": 4.351764433970379, | |
| "avg_soft_shaping_reward": 0.3, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.6973935600514568, | |
| "retrieval_signal": 0.7270961538461539, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5137345234716233, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0006", | |
| "run_name": "high_timeout_shared_ctx" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 0, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T18:57:40+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.13333333333333333, | |
| "avg_connectivity_reward": 0.09999999999999999, | |
| "avg_diversity_reward": 0.056666666666666664, | |
| "avg_entity_informativeness_reward": -0.020478979694240708, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.8148148148148149, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.27, | |
| "avg_relation_informativeness_reward": 0.07174291752145656, | |
| "avg_reward": 4.0269419367756605, | |
| "avg_soft_shaping_reward": 0.19999999999999998, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.7366215569569294, | |
| "retrieval_signal": 0.7695000000000001, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5570861208987765, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0007", | |
| "run_name": "episode_selector_check" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 15, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T19:11:44+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.10000000000000002, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.02722031691758704, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": -0.00011920119799207429, | |
| "avg_reward": 3.444079221573606, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8828572592896698, | |
| "retrieval_signal": 0.675, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5915320963768841, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0008", | |
| "run_name": "qwen_rerun" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 15, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T19:19:34+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.10000000000000002, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.024861029515896544, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": -0.0024320085090966614, | |
| "avg_reward": 3.4441257016641917, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8828581656226586, | |
| "retrieval_signal": 0.675, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5915413923950014, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0009", | |
| "run_name": "qwen_episode_fix" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 15, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T19:24:37+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.10000000000000002, | |
| "avg_connectivity_reward": 0.3, | |
| "avg_diversity_reward": 0.08, | |
| "avg_entity_informativeness_reward": -0.02722031691758704, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 1.0, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.0, | |
| "avg_relation_informativeness_reward": -0.0030604289114462002, | |
| "avg_reward": 3.4411379938601514, | |
| "avg_soft_shaping_reward": 0.15, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8827999009847504, | |
| "retrieval_signal": 0.675, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5909438508341933, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0010", | |
| "run_name": "qwen_rerun_graph_fix" | |
| }, | |
| { | |
| "config": { | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "max_steps": 18, | |
| "max_width": 2, | |
| "seed": 7, | |
| "seeded_questions": 15, | |
| "swarm_enabled": true | |
| }, | |
| "created_at": "2026-04-01T19:31:54+00:00", | |
| "episodes": 15, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.16666666666666666, | |
| "avg_connectivity_reward": 0.16999999999999998, | |
| "avg_diversity_reward": 0.1157777777777778, | |
| "avg_entity_informativeness_reward": -0.0181244777358718, | |
| "avg_format_reward": 0.14999999999999997, | |
| "avg_graph_f1": 0.8492063492063492, | |
| "avg_knowledge_carrier_reward": 0.5, | |
| "avg_knowledge_indexing_reward": 0.012000000000000002, | |
| "avg_relation_informativeness_reward": 0.05935837081627929, | |
| "avg_reward": 4.201760569277529, | |
| "avg_soft_shaping_reward": 0.24999999999999994, | |
| "avg_spawn_count": 4.0, | |
| "avg_spawn_critical_steps": 6.0, | |
| "avg_steps_to_solution": 9.0, | |
| "deanonymization_accuracy": 1.0, | |
| "leaderboard_score": 0.8534887252258901, | |
| "retrieval_signal": 0.6792, | |
| "spawn_completion_rate": 1.0, | |
| "spawn_signal": 0.6666666666666666, | |
| "structural_signal": 0.5847801119494148, | |
| "task_success_rate": 1.0, | |
| "tool_efficiency": 0.5 | |
| }, | |
| "run_id": "run_0011", | |
| "run_name": "qwen_rerun_graph_fix" | |
| } | |
| ] |