[ { "config": { "max_steps": 8, "model": "gpt-4.1-nano", "provider": "openai", "seed": 7, "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" }, "created_at": "2026-04-02T05:13:26+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": -0.19999999999999998, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.09999999999999999, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": -0.16666666666666666, "avg_knowledge_indexing_reward": 0.09999999999999999, "avg_relation_informativeness_reward": 0.0, "avg_reward": -0.6288888888888888, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 5.333333333333333, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2606214594106657, "retrieval_signal": 0.4766666666666666, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.45, "task_success_rate": 0.0, "tool_efficiency": 1.0 }, "run_id": "run_0001", "run_name": "openai_fixed_levels_baseline" }, { "config": { "max_steps": 8, "model": "gpt-4.1-nano", "provider": "openai", "seed": 7, "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" }, "created_at": "2026-04-02T05:47:22+00:00", "episodes": 30, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.010000000000000002, "avg_connectivity_reward": -0.029999999999999995, "avg_diversity_reward": 0.008, "avg_entity_informativeness_reward": -0.000503934120399963, "avg_format_reward": 0.014999999999999998, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": -0.025, "avg_knowledge_indexing_reward": 0.003193930313179388, "avg_relation_informativeness_reward": -0.0038133420811587043, "avg_reward": -0.35025037651763485, "avg_soft_shaping_reward": -0.013000000000000001, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 7.9, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.2523805392887, "retrieval_signal": 0.4923678756096128, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.49383654475968825, "task_success_rate": 0.0, "tool_efficiency": 0.8528138528138528 }, "run_id": "run_0002", "run_name": "openai_fixed_levels_baseline" }, { "config": { "max_steps": 8, "model": "gpt-4.1-nano", "provider": "openai", "seed": 7, "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" }, "created_at": "2026-04-02T10:15:11+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": -0.19999999999999998, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.09999999999999999, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": -0.16666666666666666, "avg_knowledge_indexing_reward": 0.18776223776223777, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.26679098679098673, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 6.666666666666667, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.3691822627132265, "retrieval_signal": 0.5073834498834499, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.45, "task_success_rate": 0.3333333333333333, "tool_efficiency": 1.0 }, "run_id": "run_0003", "run_name": "openai_fixed_levels_baseline" }, { "config": { "max_steps": 8, "model": "gpt-5.4-mini", "provider": "openai", "seed": 7, "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" }, "created_at": "2026-04-02T10:23:56+00:00", "episodes": 3, "metrics": { "avg_compactness_reward": 0.0, "avg_connectivity_gain_reward": 0.0, "avg_connectivity_reward": -0.3, "avg_diversity_reward": 0.0, "avg_entity_informativeness_reward": 0.0, "avg_format_reward": 0.15, "avg_graph_f1": 0.0, "avg_knowledge_carrier_reward": -0.25, "avg_knowledge_indexing_reward": 0.27412587412587414, "avg_relation_informativeness_reward": 0.0, "avg_reward": 0.6447878047878046, "avg_soft_shaping_reward": 0.0, "avg_spawn_count": 0.0, "avg_spawn_critical_steps": 0.0, "avg_steps_to_solution": 5.666666666666667, "deanonymization_accuracy": 0.0, "leaderboard_score": 0.46489058185132603, "retrieval_signal": 0.508444055944056, "spawn_completion_rate": 0.0, "spawn_signal": 0.4, "structural_signal": 0.425, "task_success_rate": 0.6666666666666666, "tool_efficiency": 1.0 }, "run_id": "run_0004", "run_name": "openai_fixed_levels_baseline" } ]