Spaces:
Paused
Paused
| [ | |
| { | |
| "config": { | |
| "max_steps": 8, | |
| "model": "gpt-4.1-nano", | |
| "provider": "openai", | |
| "seed": 7, | |
| "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", | |
| "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" | |
| }, | |
| "created_at": "2026-04-02T05:13:26+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": -0.19999999999999998, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.09999999999999999, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": -0.16666666666666666, | |
| "avg_knowledge_indexing_reward": 0.09999999999999999, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": -0.6288888888888888, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 5.333333333333333, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2606214594106657, | |
| "retrieval_signal": 0.4766666666666666, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.45, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0001", | |
| "run_name": "openai_fixed_levels_baseline" | |
| }, | |
| { | |
| "config": { | |
| "max_steps": 8, | |
| "model": "gpt-4.1-nano", | |
| "provider": "openai", | |
| "seed": 7, | |
| "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", | |
| "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" | |
| }, | |
| "created_at": "2026-04-02T05:47:22+00:00", | |
| "episodes": 30, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.010000000000000002, | |
| "avg_connectivity_reward": -0.029999999999999995, | |
| "avg_diversity_reward": 0.008, | |
| "avg_entity_informativeness_reward": -0.000503934120399963, | |
| "avg_format_reward": 0.014999999999999998, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": -0.025, | |
| "avg_knowledge_indexing_reward": 0.003193930313179388, | |
| "avg_relation_informativeness_reward": -0.0038133420811587043, | |
| "avg_reward": -0.35025037651763485, | |
| "avg_soft_shaping_reward": -0.013000000000000001, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 7.9, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.2523805392887, | |
| "retrieval_signal": 0.4923678756096128, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.49383654475968825, | |
| "task_success_rate": 0.0, | |
| "tool_efficiency": 0.8528138528138528 | |
| }, | |
| "run_id": "run_0002", | |
| "run_name": "openai_fixed_levels_baseline" | |
| }, | |
| { | |
| "config": { | |
| "max_steps": 8, | |
| "model": "gpt-4.1-nano", | |
| "provider": "openai", | |
| "seed": 7, | |
| "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", | |
| "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" | |
| }, | |
| "created_at": "2026-04-02T10:15:11+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": -0.19999999999999998, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.09999999999999999, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": -0.16666666666666666, | |
| "avg_knowledge_indexing_reward": 0.18776223776223777, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.26679098679098673, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 6.666666666666667, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.3691822627132265, | |
| "retrieval_signal": 0.5073834498834499, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.45, | |
| "task_success_rate": 0.3333333333333333, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0003", | |
| "run_name": "openai_fixed_levels_baseline" | |
| }, | |
| { | |
| "config": { | |
| "max_steps": 8, | |
| "model": "gpt-5.4-mini", | |
| "provider": "openai", | |
| "seed": 7, | |
| "seed_file": "datasets/fixed_levels/seed_fixed_levels.json", | |
| "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json" | |
| }, | |
| "created_at": "2026-04-02T10:23:56+00:00", | |
| "episodes": 3, | |
| "metrics": { | |
| "avg_compactness_reward": 0.0, | |
| "avg_connectivity_gain_reward": 0.0, | |
| "avg_connectivity_reward": -0.3, | |
| "avg_diversity_reward": 0.0, | |
| "avg_entity_informativeness_reward": 0.0, | |
| "avg_format_reward": 0.15, | |
| "avg_graph_f1": 0.0, | |
| "avg_knowledge_carrier_reward": -0.25, | |
| "avg_knowledge_indexing_reward": 0.27412587412587414, | |
| "avg_relation_informativeness_reward": 0.0, | |
| "avg_reward": 0.6447878047878046, | |
| "avg_soft_shaping_reward": 0.0, | |
| "avg_spawn_count": 0.0, | |
| "avg_spawn_critical_steps": 0.0, | |
| "avg_steps_to_solution": 5.666666666666667, | |
| "deanonymization_accuracy": 0.0, | |
| "leaderboard_score": 0.46489058185132603, | |
| "retrieval_signal": 0.508444055944056, | |
| "spawn_completion_rate": 0.0, | |
| "spawn_signal": 0.4, | |
| "structural_signal": 0.425, | |
| "task_success_rate": 0.6666666666666666, | |
| "tool_efficiency": 1.0 | |
| }, | |
| "run_id": "run_0004", | |
| "run_name": "openai_fixed_levels_baseline" | |
| } | |
| ] |