Spaces:
Paused
Paused
File size: 5,640 Bytes
957b4b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | [
{
"config": {
"max_steps": 8,
"model": "gpt-4.1-nano",
"provider": "openai",
"seed": 7,
"seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
"shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
},
"created_at": "2026-04-02T05:13:26+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.0,
"avg_connectivity_reward": -0.19999999999999998,
"avg_diversity_reward": 0.0,
"avg_entity_informativeness_reward": 0.0,
"avg_format_reward": 0.09999999999999999,
"avg_graph_f1": 0.0,
"avg_knowledge_carrier_reward": -0.16666666666666666,
"avg_knowledge_indexing_reward": 0.09999999999999999,
"avg_relation_informativeness_reward": 0.0,
"avg_reward": -0.6288888888888888,
"avg_soft_shaping_reward": 0.0,
"avg_spawn_count": 0.0,
"avg_spawn_critical_steps": 0.0,
"avg_steps_to_solution": 5.333333333333333,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.2606214594106657,
"retrieval_signal": 0.4766666666666666,
"spawn_completion_rate": 0.0,
"spawn_signal": 0.4,
"structural_signal": 0.45,
"task_success_rate": 0.0,
"tool_efficiency": 1.0
},
"run_id": "run_0001",
"run_name": "openai_fixed_levels_baseline"
},
{
"config": {
"max_steps": 8,
"model": "gpt-4.1-nano",
"provider": "openai",
"seed": 7,
"seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
"shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
},
"created_at": "2026-04-02T05:47:22+00:00",
"episodes": 30,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.010000000000000002,
"avg_connectivity_reward": -0.029999999999999995,
"avg_diversity_reward": 0.008,
"avg_entity_informativeness_reward": -0.000503934120399963,
"avg_format_reward": 0.014999999999999998,
"avg_graph_f1": 0.0,
"avg_knowledge_carrier_reward": -0.025,
"avg_knowledge_indexing_reward": 0.003193930313179388,
"avg_relation_informativeness_reward": -0.0038133420811587043,
"avg_reward": -0.35025037651763485,
"avg_soft_shaping_reward": -0.013000000000000001,
"avg_spawn_count": 0.0,
"avg_spawn_critical_steps": 0.0,
"avg_steps_to_solution": 7.9,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.2523805392887,
"retrieval_signal": 0.4923678756096128,
"spawn_completion_rate": 0.0,
"spawn_signal": 0.4,
"structural_signal": 0.49383654475968825,
"task_success_rate": 0.0,
"tool_efficiency": 0.8528138528138528
},
"run_id": "run_0002",
"run_name": "openai_fixed_levels_baseline"
},
{
"config": {
"max_steps": 8,
"model": "gpt-4.1-nano",
"provider": "openai",
"seed": 7,
"seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
"shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
},
"created_at": "2026-04-02T10:15:11+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.0,
"avg_connectivity_reward": -0.19999999999999998,
"avg_diversity_reward": 0.0,
"avg_entity_informativeness_reward": 0.0,
"avg_format_reward": 0.09999999999999999,
"avg_graph_f1": 0.0,
"avg_knowledge_carrier_reward": -0.16666666666666666,
"avg_knowledge_indexing_reward": 0.18776223776223777,
"avg_relation_informativeness_reward": 0.0,
"avg_reward": 0.26679098679098673,
"avg_soft_shaping_reward": 0.0,
"avg_spawn_count": 0.0,
"avg_spawn_critical_steps": 0.0,
"avg_steps_to_solution": 6.666666666666667,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.3691822627132265,
"retrieval_signal": 0.5073834498834499,
"spawn_completion_rate": 0.0,
"spawn_signal": 0.4,
"structural_signal": 0.45,
"task_success_rate": 0.3333333333333333,
"tool_efficiency": 1.0
},
"run_id": "run_0003",
"run_name": "openai_fixed_levels_baseline"
},
{
"config": {
"max_steps": 8,
"model": "gpt-5.4-mini",
"provider": "openai",
"seed": 7,
"seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
"shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
},
"created_at": "2026-04-02T10:23:56+00:00",
"episodes": 3,
"metrics": {
"avg_compactness_reward": 0.0,
"avg_connectivity_gain_reward": 0.0,
"avg_connectivity_reward": -0.3,
"avg_diversity_reward": 0.0,
"avg_entity_informativeness_reward": 0.0,
"avg_format_reward": 0.15,
"avg_graph_f1": 0.0,
"avg_knowledge_carrier_reward": -0.25,
"avg_knowledge_indexing_reward": 0.27412587412587414,
"avg_relation_informativeness_reward": 0.0,
"avg_reward": 0.6447878047878046,
"avg_soft_shaping_reward": 0.0,
"avg_spawn_count": 0.0,
"avg_spawn_critical_steps": 0.0,
"avg_steps_to_solution": 5.666666666666667,
"deanonymization_accuracy": 0.0,
"leaderboard_score": 0.46489058185132603,
"retrieval_signal": 0.508444055944056,
"spawn_completion_rate": 0.0,
"spawn_signal": 0.4,
"structural_signal": 0.425,
"task_success_rate": 0.6666666666666666,
"tool_efficiency": 1.0
},
"run_id": "run_0004",
"run_name": "openai_fixed_levels_baseline"
}
] |