File size: 5,640 Bytes
957b4b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
[
  {
    "config": {
      "max_steps": 8,
      "model": "gpt-4.1-nano",
      "provider": "openai",
      "seed": 7,
      "seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
      "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
    },
    "created_at": "2026-04-02T05:13:26+00:00",
    "episodes": 3,
    "metrics": {
      "avg_compactness_reward": 0.0,
      "avg_connectivity_gain_reward": 0.0,
      "avg_connectivity_reward": -0.19999999999999998,
      "avg_diversity_reward": 0.0,
      "avg_entity_informativeness_reward": 0.0,
      "avg_format_reward": 0.09999999999999999,
      "avg_graph_f1": 0.0,
      "avg_knowledge_carrier_reward": -0.16666666666666666,
      "avg_knowledge_indexing_reward": 0.09999999999999999,
      "avg_relation_informativeness_reward": 0.0,
      "avg_reward": -0.6288888888888888,
      "avg_soft_shaping_reward": 0.0,
      "avg_spawn_count": 0.0,
      "avg_spawn_critical_steps": 0.0,
      "avg_steps_to_solution": 5.333333333333333,
      "deanonymization_accuracy": 0.0,
      "leaderboard_score": 0.2606214594106657,
      "retrieval_signal": 0.4766666666666666,
      "spawn_completion_rate": 0.0,
      "spawn_signal": 0.4,
      "structural_signal": 0.45,
      "task_success_rate": 0.0,
      "tool_efficiency": 1.0
    },
    "run_id": "run_0001",
    "run_name": "openai_fixed_levels_baseline"
  },
  {
    "config": {
      "max_steps": 8,
      "model": "gpt-4.1-nano",
      "provider": "openai",
      "seed": 7,
      "seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
      "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
    },
    "created_at": "2026-04-02T05:47:22+00:00",
    "episodes": 30,
    "metrics": {
      "avg_compactness_reward": 0.0,
      "avg_connectivity_gain_reward": 0.010000000000000002,
      "avg_connectivity_reward": -0.029999999999999995,
      "avg_diversity_reward": 0.008,
      "avg_entity_informativeness_reward": -0.000503934120399963,
      "avg_format_reward": 0.014999999999999998,
      "avg_graph_f1": 0.0,
      "avg_knowledge_carrier_reward": -0.025,
      "avg_knowledge_indexing_reward": 0.003193930313179388,
      "avg_relation_informativeness_reward": -0.0038133420811587043,
      "avg_reward": -0.35025037651763485,
      "avg_soft_shaping_reward": -0.013000000000000001,
      "avg_spawn_count": 0.0,
      "avg_spawn_critical_steps": 0.0,
      "avg_steps_to_solution": 7.9,
      "deanonymization_accuracy": 0.0,
      "leaderboard_score": 0.2523805392887,
      "retrieval_signal": 0.4923678756096128,
      "spawn_completion_rate": 0.0,
      "spawn_signal": 0.4,
      "structural_signal": 0.49383654475968825,
      "task_success_rate": 0.0,
      "tool_efficiency": 0.8528138528138528
    },
    "run_id": "run_0002",
    "run_name": "openai_fixed_levels_baseline"
  },
  {
    "config": {
      "max_steps": 8,
      "model": "gpt-4.1-nano",
      "provider": "openai",
      "seed": 7,
      "seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
      "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
    },
    "created_at": "2026-04-02T10:15:11+00:00",
    "episodes": 3,
    "metrics": {
      "avg_compactness_reward": 0.0,
      "avg_connectivity_gain_reward": 0.0,
      "avg_connectivity_reward": -0.19999999999999998,
      "avg_diversity_reward": 0.0,
      "avg_entity_informativeness_reward": 0.0,
      "avg_format_reward": 0.09999999999999999,
      "avg_graph_f1": 0.0,
      "avg_knowledge_carrier_reward": -0.16666666666666666,
      "avg_knowledge_indexing_reward": 0.18776223776223777,
      "avg_relation_informativeness_reward": 0.0,
      "avg_reward": 0.26679098679098673,
      "avg_soft_shaping_reward": 0.0,
      "avg_spawn_count": 0.0,
      "avg_spawn_critical_steps": 0.0,
      "avg_steps_to_solution": 6.666666666666667,
      "deanonymization_accuracy": 0.0,
      "leaderboard_score": 0.3691822627132265,
      "retrieval_signal": 0.5073834498834499,
      "spawn_completion_rate": 0.0,
      "spawn_signal": 0.4,
      "structural_signal": 0.45,
      "task_success_rate": 0.3333333333333333,
      "tool_efficiency": 1.0
    },
    "run_id": "run_0003",
    "run_name": "openai_fixed_levels_baseline"
  },
  {
    "config": {
      "max_steps": 8,
      "model": "gpt-5.4-mini",
      "provider": "openai",
      "seed": 7,
      "seed_file": "datasets/fixed_levels/seed_fixed_levels.json",
      "shared_config_path": "datasets/fixed_levels/shared_config_fixed_levels.json"
    },
    "created_at": "2026-04-02T10:23:56+00:00",
    "episodes": 3,
    "metrics": {
      "avg_compactness_reward": 0.0,
      "avg_connectivity_gain_reward": 0.0,
      "avg_connectivity_reward": -0.3,
      "avg_diversity_reward": 0.0,
      "avg_entity_informativeness_reward": 0.0,
      "avg_format_reward": 0.15,
      "avg_graph_f1": 0.0,
      "avg_knowledge_carrier_reward": -0.25,
      "avg_knowledge_indexing_reward": 0.27412587412587414,
      "avg_relation_informativeness_reward": 0.0,
      "avg_reward": 0.6447878047878046,
      "avg_soft_shaping_reward": 0.0,
      "avg_spawn_count": 0.0,
      "avg_spawn_critical_steps": 0.0,
      "avg_steps_to_solution": 5.666666666666667,
      "deanonymization_accuracy": 0.0,
      "leaderboard_score": 0.46489058185132603,
      "retrieval_signal": 0.508444055944056,
      "spawn_completion_rate": 0.0,
      "spawn_signal": 0.4,
      "structural_signal": 0.425,
      "task_success_rate": 0.6666666666666666,
      "tool_efficiency": 1.0
    },
    "run_id": "run_0004",
    "run_name": "openai_fixed_levels_baseline"
  }
]