Siddeshwar1625 commited on
Commit
d3955b4
·
verified ·
1 Parent(s): a2c2d08

Upload self-play summary

Browse files
self_play_hf_l40s_full/self_play_summary.json ADDED
@@ -0,0 +1,1196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "canonical_graph_mode": "fixed",
3
+ "dry_run": false,
4
+ "final_models": {
5
+ "answerer": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model",
6
+ "generator": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model"
7
+ },
8
+ "initial_models": {
9
+ "answerer": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "generator": "Qwen/Qwen2.5-0.5B-Instruct"
11
+ },
12
+ "kimi_objective_mapping": {
13
+ "grouped_rollouts": "TRL GRPO num_generations",
14
+ "mean_centered_advantage": "GRPO relative reward baseline",
15
+ "parallel_orchestration": "PARL-inspired auxiliary reward over generator and answerer swarms",
16
+ "reference_regularization": "GRPO beta KL term",
17
+ "toggle_self_play": "Alternating generator and answerer rounds",
18
+ "token_level_clipping": "GRPO epsilon clipping over policy ratios"
19
+ },
20
+ "model_topology": "shared",
21
+ "output_dir": "artifacts/self_play_hf_l40s_full",
22
+ "phase_schedule": "generator_answerer",
23
+ "pipeline_mode": "swarm_v2",
24
+ "post_training_evaluation": {
25
+ "answerer_models": {
26
+ "finetuned_answerer": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model",
27
+ "original_answerer": "Qwen/Qwen2.5-0.5B-Instruct"
28
+ },
29
+ "generated_tasks_path": "artifacts/self_play_hf_l40s_full/post_training_eval_generated_tasks.json",
30
+ "generator_model": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model",
31
+ "model_evaluations": {
32
+ "finetuned_answerer": {
33
+ "episodes": [
34
+ {
35
+ "agent_answer": "user_25",
36
+ "completion_length": 589,
37
+ "graph_f1": 0.0,
38
+ "predicted_edge_count": 0,
39
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
40
+ "reward": 0.9323133138030703,
41
+ "success": 1,
42
+ "support_edge_count": 2,
43
+ "task_answer": "user_25",
44
+ "task_id": "swarm_v2_r7_0",
45
+ "task_type": "swarm_v2_2hop_trace"
46
+ },
47
+ {
48
+ "agent_answer": "loc_pune",
49
+ "completion_length": 581,
50
+ "graph_f1": 0.0,
51
+ "predicted_edge_count": 0,
52
+ "question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
53
+ "reward": 0.9323133138030703,
54
+ "success": 1,
55
+ "support_edge_count": 2,
56
+ "task_answer": "loc_pune",
57
+ "task_id": "swarm_v2_r7_1",
58
+ "task_type": "swarm_v2_2hop_trace"
59
+ },
60
+ {
61
+ "agent_answer": "loc_hyderabad",
62
+ "completion_length": 553,
63
+ "graph_f1": 0.0,
64
+ "predicted_edge_count": 0,
65
+ "question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
66
+ "reward": 0.9323133138030703,
67
+ "success": 1,
68
+ "support_edge_count": 2,
69
+ "task_answer": "loc_hyderabad",
70
+ "task_id": "swarm_v2_r7_2",
71
+ "task_type": "swarm_v2_2hop_trace"
72
+ },
73
+ {
74
+ "agent_answer": "loc_bengaluru",
75
+ "completion_length": 548,
76
+ "graph_f1": 0.0,
77
+ "predicted_edge_count": 0,
78
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
79
+ "reward": 0.9323133138030703,
80
+ "success": 1,
81
+ "support_edge_count": 2,
82
+ "task_answer": "loc_bengaluru",
83
+ "task_id": "swarm_v2_r7_3",
84
+ "task_type": "swarm_v2_2hop_trace"
85
+ },
86
+ {
87
+ "agent_answer": "user_3",
88
+ "completion_length": 563,
89
+ "graph_f1": 0.0,
90
+ "predicted_edge_count": 0,
91
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
92
+ "reward": 0.9323133138030703,
93
+ "success": 1,
94
+ "support_edge_count": 2,
95
+ "task_answer": "user_3",
96
+ "task_id": "swarm_v2_r7_4",
97
+ "task_type": "swarm_v2_2hop_trace"
98
+ },
99
+ {
100
+ "agent_answer": "org_northbridge",
101
+ "completion_length": 584,
102
+ "graph_f1": 0.0,
103
+ "predicted_edge_count": 0,
104
+ "question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
105
+ "reward": 0.9323133138030703,
106
+ "success": 1,
107
+ "support_edge_count": 2,
108
+ "task_answer": "org_northbridge",
109
+ "task_id": "swarm_v2_r7_5",
110
+ "task_type": "swarm_v2_2hop_trace"
111
+ },
112
+ {
113
+ "agent_answer": "org_helios_labs",
114
+ "completion_length": 546,
115
+ "graph_f1": 0.0,
116
+ "predicted_edge_count": 0,
117
+ "question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
118
+ "reward": 0.9323133138030703,
119
+ "success": 1,
120
+ "support_edge_count": 2,
121
+ "task_answer": "org_helios_labs",
122
+ "task_id": "swarm_v2_r7_6",
123
+ "task_type": "swarm_v2_2hop_trace"
124
+ },
125
+ {
126
+ "agent_answer": "user_12",
127
+ "completion_length": 547,
128
+ "graph_f1": 0.0,
129
+ "predicted_edge_count": 0,
130
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
131
+ "reward": 0.9323133138030703,
132
+ "success": 1,
133
+ "support_edge_count": 2,
134
+ "task_answer": "user_12",
135
+ "task_id": "swarm_v2_r7_7",
136
+ "task_type": "swarm_v2_2hop_trace"
137
+ },
138
+ {
139
+ "agent_answer": "loc_pune",
140
+ "completion_length": 581,
141
+ "graph_f1": 0.0,
142
+ "predicted_edge_count": 0,
143
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
144
+ "reward": 0.9323133138030703,
145
+ "success": 1,
146
+ "support_edge_count": 2,
147
+ "task_answer": "loc_pune",
148
+ "task_id": "swarm_v2_r7_8",
149
+ "task_type": "swarm_v2_2hop_trace"
150
+ },
151
+ {
152
+ "agent_answer": "user_12",
153
+ "completion_length": 552,
154
+ "graph_f1": 0.0,
155
+ "predicted_edge_count": 0,
156
+ "question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
157
+ "reward": 0.9323133138030703,
158
+ "success": 1,
159
+ "support_edge_count": 2,
160
+ "task_answer": "user_12",
161
+ "task_id": "swarm_v2_r7_9",
162
+ "task_type": "swarm_v2_2hop_trace"
163
+ },
164
+ {
165
+ "agent_answer": "user_1",
166
+ "completion_length": 592,
167
+ "graph_f1": 0.0,
168
+ "predicted_edge_count": 0,
169
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
170
+ "reward": 0.6704928651250475,
171
+ "success": 0,
172
+ "support_edge_count": 3,
173
+ "task_answer": "loc_bengaluru",
174
+ "task_id": "swarm_v2_r7_10",
175
+ "task_type": "swarm_v2_3hop_trace"
176
+ },
177
+ {
178
+ "agent_answer": "org_apex_dynamics",
179
+ "completion_length": 556,
180
+ "graph_f1": 0.0,
181
+ "predicted_edge_count": 0,
182
+ "question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
183
+ "reward": 0.9323133138030703,
184
+ "success": 1,
185
+ "support_edge_count": 2,
186
+ "task_answer": "org_apex_dynamics",
187
+ "task_id": "swarm_v2_r7_11",
188
+ "task_type": "swarm_v2_2hop_trace"
189
+ },
190
+ {
191
+ "agent_answer": "org_northbridge",
192
+ "completion_length": 555,
193
+ "graph_f1": 0.0,
194
+ "predicted_edge_count": 0,
195
+ "question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
196
+ "reward": 0.9323133138030703,
197
+ "success": 1,
198
+ "support_edge_count": 2,
199
+ "task_answer": "org_northbridge",
200
+ "task_id": "swarm_v2_r7_12",
201
+ "task_type": "swarm_v2_2hop_trace"
202
+ },
203
+ {
204
+ "agent_answer": "org_northbridge",
205
+ "completion_length": 606,
206
+ "graph_f1": 0.0,
207
+ "predicted_edge_count": 0,
208
+ "question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
209
+ "reward": 0.9323133138030703,
210
+ "success": 1,
211
+ "support_edge_count": 2,
212
+ "task_answer": "org_northbridge",
213
+ "task_id": "swarm_v2_r7_13",
214
+ "task_type": "swarm_v2_2hop_trace"
215
+ },
216
+ {
217
+ "agent_answer": "org_northbridge",
218
+ "completion_length": 587,
219
+ "graph_f1": 0.0,
220
+ "predicted_edge_count": 0,
221
+ "question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
222
+ "reward": 0.9323133138030703,
223
+ "success": 1,
224
+ "support_edge_count": 2,
225
+ "task_answer": "org_northbridge",
226
+ "task_id": "swarm_v2_r7_14",
227
+ "task_type": "swarm_v2_2hop_trace"
228
+ },
229
+ {
230
+ "agent_answer": "user_35",
231
+ "completion_length": 577,
232
+ "graph_f1": 0.0,
233
+ "predicted_edge_count": 0,
234
+ "question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
235
+ "reward": 0.6704928651250475,
236
+ "success": 0,
237
+ "support_edge_count": 3,
238
+ "task_answer": "loc_hyderabad",
239
+ "task_id": "swarm_v2_r7_15",
240
+ "task_type": "swarm_v2_3hop_trace"
241
+ },
242
+ {
243
+ "agent_answer": "loc_bengaluru",
244
+ "completion_length": 538,
245
+ "graph_f1": 0.0,
246
+ "predicted_edge_count": 0,
247
+ "question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
248
+ "reward": 0.9323133138030703,
249
+ "success": 1,
250
+ "support_edge_count": 2,
251
+ "task_answer": "loc_bengaluru",
252
+ "task_id": "swarm_v2_r7_16",
253
+ "task_type": "swarm_v2_2hop_trace"
254
+ },
255
+ {
256
+ "agent_answer": "user_12",
257
+ "completion_length": 590,
258
+ "graph_f1": 0.0,
259
+ "predicted_edge_count": 0,
260
+ "question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
261
+ "reward": 0.9323133138030703,
262
+ "success": 1,
263
+ "support_edge_count": 2,
264
+ "task_answer": "user_12",
265
+ "task_id": "swarm_v2_r7_17",
266
+ "task_type": "swarm_v2_2hop_trace"
267
+ },
268
+ {
269
+ "agent_answer": "org_northbridge",
270
+ "completion_length": 555,
271
+ "graph_f1": 0.0,
272
+ "predicted_edge_count": 0,
273
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
274
+ "reward": 0.9323133138030703,
275
+ "success": 1,
276
+ "support_edge_count": 2,
277
+ "task_answer": "org_northbridge",
278
+ "task_id": "swarm_v2_r7_18",
279
+ "task_type": "swarm_v2_2hop_trace"
280
+ },
281
+ {
282
+ "agent_answer": "loc_pune",
283
+ "completion_length": 555,
284
+ "graph_f1": 0.0,
285
+ "predicted_edge_count": 0,
286
+ "question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
287
+ "reward": 0.9323133138030703,
288
+ "success": 1,
289
+ "support_edge_count": 2,
290
+ "task_answer": "loc_pune",
291
+ "task_id": "swarm_v2_r7_19",
292
+ "task_type": "swarm_v2_2hop_trace"
293
+ },
294
+ {
295
+ "agent_answer": "loc_bengaluru",
296
+ "completion_length": 538,
297
+ "graph_f1": 0.0,
298
+ "predicted_edge_count": 0,
299
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
300
+ "reward": 0.9323133138030703,
301
+ "success": 1,
302
+ "support_edge_count": 2,
303
+ "task_answer": "loc_bengaluru",
304
+ "task_id": "swarm_v2_r7_20",
305
+ "task_type": "swarm_v2_2hop_trace"
306
+ },
307
+ {
308
+ "agent_answer": "org_northbridge",
309
+ "completion_length": 569,
310
+ "graph_f1": 0.0,
311
+ "predicted_edge_count": 0,
312
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
313
+ "reward": 0.9323133138030703,
314
+ "success": 1,
315
+ "support_edge_count": 2,
316
+ "task_answer": "org_northbridge",
317
+ "task_id": "swarm_v2_r7_21",
318
+ "task_type": "swarm_v2_2hop_trace"
319
+ },
320
+ {
321
+ "agent_answer": "user_3",
322
+ "completion_length": 553,
323
+ "graph_f1": 0.0,
324
+ "predicted_edge_count": 0,
325
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
326
+ "reward": 0.6704928651250475,
327
+ "success": 0,
328
+ "support_edge_count": 3,
329
+ "task_answer": "loc_pune",
330
+ "task_id": "swarm_v2_r7_22",
331
+ "task_type": "swarm_v2_3hop_trace"
332
+ },
333
+ {
334
+ "agent_answer": "org_helios_labs",
335
+ "completion_length": 546,
336
+ "graph_f1": 0.0,
337
+ "predicted_edge_count": 0,
338
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
339
+ "reward": 0.9323133138030703,
340
+ "success": 1,
341
+ "support_edge_count": 2,
342
+ "task_answer": "org_helios_labs",
343
+ "task_id": "swarm_v2_r7_23",
344
+ "task_type": "swarm_v2_2hop_trace"
345
+ }
346
+ ],
347
+ "model_path": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model",
348
+ "summary": {
349
+ "avg_completion_length": 565.0416666666666,
350
+ "avg_graph_f1": 0.0,
351
+ "avg_reward": 0.8995857577183174,
352
+ "episodes": 24,
353
+ "task_success_rate": 0.875
354
+ }
355
+ },
356
+ "original_answerer": {
357
+ "episodes": [
358
+ {
359
+ "agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_delhi\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst",
360
+ "completion_length": 388,
361
+ "graph_f1": 0.6666666666666666,
362
+ "predicted_edge_count": 1,
363
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
364
+ "reward": 0.4462659972989426,
365
+ "success": 0,
366
+ "support_edge_count": 2,
367
+ "task_answer": "user_25",
368
+ "task_id": "swarm_v2_r7_0",
369
+ "task_type": "swarm_v2_2hop_trace"
370
+ },
371
+ {
372
+ "agent_answer": "user_3",
373
+ "completion_length": 414,
374
+ "graph_f1": 1.0,
375
+ "predicted_edge_count": 2,
376
+ "question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
377
+ "reward": 0.5717814630149074,
378
+ "success": 0,
379
+ "support_edge_count": 2,
380
+ "task_answer": "loc_pune",
381
+ "task_id": "swarm_v2_r7_1",
382
+ "task_type": "swarm_v2_2hop_trace"
383
+ },
384
+ {
385
+ "agent_answer": "{\"answer\":\"user_24\",\"supporting_edges\":[{\"src\":\"alias_24_458\",\"rel\":\"alias_of\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",",
386
+ "completion_length": 387,
387
+ "graph_f1": 1.0,
388
+ "predicted_edge_count": 2,
389
+ "question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
390
+ "reward": 0.5018390028775422,
391
+ "success": 0,
392
+ "support_edge_count": 2,
393
+ "task_answer": "loc_hyderabad",
394
+ "task_id": "swarm_v2_r7_2",
395
+ "task_type": "swarm_v2_2hop_trace"
396
+ },
397
+ {
398
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",",
399
+ "completion_length": 381,
400
+ "graph_f1": 1.0,
401
+ "predicted_edge_count": 2,
402
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
403
+ "reward": 0.5000528755950792,
404
+ "success": 0,
405
+ "support_edge_count": 2,
406
+ "task_answer": "loc_bengaluru",
407
+ "task_id": "swarm_v2_r7_3",
408
+ "task_type": "swarm_v2_2hop_trace"
409
+ },
410
+ {
411
+ "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"user_39\",",
412
+ "completion_length": 396,
413
+ "graph_f1": 1.0,
414
+ "predicted_edge_count": 2,
415
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
416
+ "reward": 0.5017743415980812,
417
+ "success": 0,
418
+ "support_edge_count": 2,
419
+ "task_answer": "user_3",
420
+ "task_id": "swarm_v2_r7_4",
421
+ "task_type": "swarm_v2_2hop_trace"
422
+ },
423
+ {
424
+ "agent_answer": "user_7",
425
+ "completion_length": 387,
426
+ "graph_f1": 0.0,
427
+ "predicted_edge_count": 2,
428
+ "question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
429
+ "reward": 0.7089737433490155,
430
+ "success": 0,
431
+ "support_edge_count": 2,
432
+ "task_answer": "org_northbridge",
433
+ "task_id": "swarm_v2_r7_5",
434
+ "task_type": "swarm_v2_2hop_trace"
435
+ },
436
+ {
437
+ "agent_answer": "{\"answer\":\"user_34\",\"supporting_edges\":[{\"src\":\"alias_34_511\",\"rel\":\"alias_of\",\"dst\":\"user_34\",\"confidence\":1.0},{\"src\":\"user_34\",\"rel\":\"works_at\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_0\",\"confidence\":1.0},{\"src\":\"user_0\",\"rel\":\"works_at\",\"dst\":\"user_16\",\"confidence\":1.0},{\"src\":\"user_16\",\"rel\":\"works",
438
+ "completion_length": 361,
439
+ "graph_f1": 1.0,
440
+ "predicted_edge_count": 2,
441
+ "question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
442
+ "reward": 0.49966549250864506,
443
+ "success": 0,
444
+ "support_edge_count": 2,
445
+ "task_answer": "org_helios_labs",
446
+ "task_id": "swarm_v2_r7_6",
447
+ "task_type": "swarm_v2_2hop_trace"
448
+ },
449
+ {
450
+ "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":",
451
+ "completion_length": 408,
452
+ "graph_f1": 0.6666666666666666,
453
+ "predicted_edge_count": 1,
454
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
455
+ "reward": 0.4454139519345446,
456
+ "success": 0,
457
+ "support_edge_count": 2,
458
+ "task_answer": "user_12",
459
+ "task_id": "swarm_v2_r7_7",
460
+ "task_type": "swarm_v2_2hop_trace"
461
+ },
462
+ {
463
+ "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"user_6\",\"confidence\":1.0},{\"src\":\"user_6\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1",
464
+ "completion_length": 386,
465
+ "graph_f1": 1.0,
466
+ "predicted_edge_count": 2,
467
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
468
+ "reward": 0.4994635369242434,
469
+ "success": 0,
470
+ "support_edge_count": 2,
471
+ "task_answer": "loc_pune",
472
+ "task_id": "swarm_v2_r7_8",
473
+ "task_type": "swarm_v2_2hop_trace"
474
+ },
475
+ {
476
+ "agent_answer": "user_3",
477
+ "completion_length": 423,
478
+ "graph_f1": 0.5,
479
+ "predicted_edge_count": 2,
480
+ "question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
481
+ "reward": 0.6989023123676081,
482
+ "success": 0,
483
+ "support_edge_count": 2,
484
+ "task_answer": "user_12",
485
+ "task_id": "swarm_v2_r7_9",
486
+ "task_type": "swarm_v2_2hop_trace"
487
+ },
488
+ {
489
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"connected_to\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",",
490
+ "completion_length": 383,
491
+ "graph_f1": 0.0,
492
+ "predicted_edge_count": 0,
493
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
494
+ "reward": 0.39524653939368076,
495
+ "success": 0,
496
+ "support_edge_count": 3,
497
+ "task_answer": "loc_bengaluru",
498
+ "task_id": "swarm_v2_r7_10",
499
+ "task_type": "swarm_v2_3hop_trace"
500
+ },
501
+ {
502
+ "agent_answer": "{\"answer\":\"org_apex_dynamics\",\"supporting_edges\":[{\"src\":\"user_16\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_18\",\"confidence\":1.0},{\"src\":\"user_18\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"connected_to\",\"dst",
503
+ "completion_length": 411,
504
+ "graph_f1": 1.0,
505
+ "predicted_edge_count": 2,
506
+ "question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
507
+ "reward": 0.5363758482799911,
508
+ "success": 0,
509
+ "support_edge_count": 2,
510
+ "task_answer": "org_apex_dynamics",
511
+ "task_id": "swarm_v2_r7_11",
512
+ "task_type": "swarm_v2_2hop_trace"
513
+ },
514
+ {
515
+ "agent_answer": "{\"answer\":\"org_northbridge\",\"supporting_edges\":[{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":",
516
+ "completion_length": 431,
517
+ "graph_f1": 0.0,
518
+ "predicted_edge_count": 0,
519
+ "question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
520
+ "reward": 0.39524653939368076,
521
+ "success": 0,
522
+ "support_edge_count": 2,
523
+ "task_answer": "org_northbridge",
524
+ "task_id": "swarm_v2_r7_12",
525
+ "task_type": "swarm_v2_2hop_trace"
526
+ },
527
+ {
528
+ "agent_answer": "user_7",
529
+ "completion_length": 418,
530
+ "graph_f1": 0.0,
531
+ "predicted_edge_count": 2,
532
+ "question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
533
+ "reward": 0.7410081838633457,
534
+ "success": 0,
535
+ "support_edge_count": 2,
536
+ "task_answer": "org_northbridge",
537
+ "task_id": "swarm_v2_r7_13",
538
+ "task_type": "swarm_v2_2hop_trace"
539
+ },
540
+ {
541
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_11_684\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_north",
542
+ "completion_length": 387,
543
+ "graph_f1": 0.0,
544
+ "predicted_edge_count": 0,
545
+ "question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
546
+ "reward": 0.39524653939368076,
547
+ "success": 0,
548
+ "support_edge_count": 2,
549
+ "task_answer": "org_northbridge",
550
+ "task_id": "swarm_v2_r7_14",
551
+ "task_type": "swarm_v2_2hop_trace"
552
+ },
553
+ {
554
+ "agent_answer": "{\"answer\":\"user_35\",\"supporting_edges\":[{\"src\":\"alias_29_495\",\"rel\":\"alias_of\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel",
555
+ "completion_length": 375,
556
+ "graph_f1": 0.5,
557
+ "predicted_edge_count": 1,
558
+ "question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
559
+ "reward": 0.42825354331930193,
560
+ "success": 0,
561
+ "support_edge_count": 3,
562
+ "task_answer": "loc_hyderabad",
563
+ "task_id": "swarm_v2_r7_15",
564
+ "task_type": "swarm_v2_3hop_trace"
565
+ },
566
+ {
567
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_38_337\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
568
+ "completion_length": 378,
569
+ "graph_f1": 0.0,
570
+ "predicted_edge_count": 0,
571
+ "question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
572
+ "reward": 0.39524653939368076,
573
+ "success": 0,
574
+ "support_edge_count": 2,
575
+ "task_answer": "loc_bengaluru",
576
+ "task_id": "swarm_v2_r7_16",
577
+ "task_type": "swarm_v2_2hop_trace"
578
+ },
579
+ {
580
+ "agent_answer": "user_3",
581
+ "completion_length": 391,
582
+ "graph_f1": 0.5,
583
+ "predicted_edge_count": 2,
584
+ "question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
585
+ "reward": 0.6997365142465289,
586
+ "success": 0,
587
+ "support_edge_count": 2,
588
+ "task_answer": "user_12",
589
+ "task_id": "swarm_v2_r7_17",
590
+ "task_type": "swarm_v2_2hop_trace"
591
+ },
592
+ {
593
+ "agent_answer": "{\"answer\":\"user_39\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_39\",\"confidence\":1.0},{\"src\":\"user_39\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
594
+ "completion_length": 373,
595
+ "graph_f1": 1.0,
596
+ "predicted_edge_count": 2,
597
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
598
+ "reward": 0.49900727548994456,
599
+ "success": 0,
600
+ "support_edge_count": 2,
601
+ "task_answer": "org_northbridge",
602
+ "task_id": "swarm_v2_r7_18",
603
+ "task_type": "swarm_v2_2hop_trace"
604
+ },
605
+ {
606
+ "agent_answer": "user_6",
607
+ "completion_length": 415,
608
+ "graph_f1": 0.0,
609
+ "predicted_edge_count": 2,
610
+ "question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
611
+ "reward": 0.7402569345038308,
612
+ "success": 0,
613
+ "support_edge_count": 2,
614
+ "task_answer": "loc_pune",
615
+ "task_id": "swarm_v2_r7_19",
616
+ "task_type": "swarm_v2_2hop_trace"
617
+ },
618
+ {
619
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_32_394\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
620
+ "completion_length": 378,
621
+ "graph_f1": 0.0,
622
+ "predicted_edge_count": 0,
623
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
624
+ "reward": 0.39524653939368076,
625
+ "success": 0,
626
+ "support_edge_count": 2,
627
+ "task_answer": "loc_bengaluru",
628
+ "task_id": "swarm_v2_r7_20",
629
+ "task_type": "swarm_v2_2hop_trace"
630
+ },
631
+ {
632
+ "agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
633
+ "completion_length": 373,
634
+ "graph_f1": 1.0,
635
+ "predicted_edge_count": 2,
636
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
637
+ "reward": 0.4998888700517808,
638
+ "success": 0,
639
+ "support_edge_count": 2,
640
+ "task_answer": "org_northbridge",
641
+ "task_id": "swarm_v2_r7_21",
642
+ "task_type": "swarm_v2_2hop_trace"
643
+ },
644
+ {
645
+ "agent_answer": "user_3",
646
+ "completion_length": 401,
647
+ "graph_f1": 0.0,
648
+ "predicted_edge_count": 3,
649
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
650
+ "reward": 0.468212688181137,
651
+ "success": 0,
652
+ "support_edge_count": 3,
653
+ "task_answer": "loc_pune",
654
+ "task_id": "swarm_v2_r7_22",
655
+ "task_type": "swarm_v2_3hop_trace"
656
+ },
657
+ {
658
+ "agent_answer": "user_32",
659
+ "completion_length": 434,
660
+ "graph_f1": 1.0,
661
+ "predicted_edge_count": 2,
662
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
663
+ "reward": 0.7753896096874865,
664
+ "success": 0,
665
+ "support_edge_count": 2,
666
+ "task_answer": "org_helios_labs",
667
+ "task_id": "swarm_v2_r7_23",
668
+ "task_type": "swarm_v2_2hop_trace"
669
+ }
670
+ ],
671
+ "model_path": "Qwen/Qwen2.5-0.5B-Instruct",
672
+ "summary": {
673
+ "avg_completion_length": 394.9583333333333,
674
+ "avg_graph_f1": 0.5347222222222222,
675
+ "avg_reward": 0.5307706200858483,
676
+ "episodes": 24,
677
+ "task_success_rate": 0.0
678
+ }
679
+ }
680
+ },
681
+ "path": "artifacts/self_play_hf_l40s_full/post_training_evaluation.json",
682
+ "pipeline_mode": "swarm_v2",
683
+ "skipped": false,
684
+ "summary": {
685
+ "compared_models": [
686
+ "finetuned_answerer",
687
+ "original_answerer"
688
+ ],
689
+ "delta_vs_original": {
690
+ "avg_graph_f1": -0.5347222222222222,
691
+ "avg_reward": 0.3688151376324691,
692
+ "task_success_rate": 0.875
693
+ },
694
+ "finetuned_answerer": {
695
+ "avg_completion_length": 565.0416666666666,
696
+ "avg_graph_f1": 0.0,
697
+ "avg_reward": 0.8995857577183174,
698
+ "episodes": 24,
699
+ "task_success_rate": 0.875
700
+ },
701
+ "generated_task_count": 24,
702
+ "generator_valid_rate": 0.5,
703
+ "original_answerer": {
704
+ "avg_completion_length": 394.9583333333333,
705
+ "avg_graph_f1": 0.5347222222222222,
706
+ "avg_reward": 0.5307706200858483,
707
+ "episodes": 24,
708
+ "task_success_rate": 0.0
709
+ },
710
+ "top_generator_invalid_reasons": [
711
+ [
712
+ "duplicate_or_near_duplicate",
713
+ 3
714
+ ]
715
+ ]
716
+ },
717
+ "validation_reports_path": "artifacts/self_play_hf_l40s_full/post_training_eval_validation_reports.json"
718
+ },
719
+ "rounds": [
720
+ {
721
+ "answerer": {
722
+ "checkpoint_dirs": [
723
+ "artifacts/self_play_hf_l40s_full/round_001/answerer_train/checkpoint-120"
724
+ ],
725
+ "diagnostics": {
726
+ "entropy_max": 1.1653341054916382,
727
+ "entropy_min": 0.3167879283428192,
728
+ "fingerprint_changed_count": 31,
729
+ "fingerprint_param_count": 32,
730
+ "grad_norm_max": 1250253952.0,
731
+ "kl_max": 23265218.0,
732
+ "loss_abs_max": 58163.08203125,
733
+ "nonzero_grad_tensors": 0,
734
+ "params_with_grad": 0,
735
+ "reward_max": 0.9646978974342346,
736
+ "reward_min": 0.5316684246063232,
737
+ "reward_std_max": 0.22473102807998657,
738
+ "total_param_count": 494032768,
739
+ "trainable_fraction": 1.0,
740
+ "trainable_param_count": 494032768
741
+ },
742
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_001/answerer_train/final_model",
743
+ "global_step": 120,
744
+ "is_full_finetune": true,
745
+ "model_path": "artifacts/self_play_hf_l40s_full/round_001/answerer_train/final_model",
746
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_001/answerer_train",
747
+ "train_rows": 40,
748
+ "training_loss": 484.692720271176,
749
+ "tuning_mode": "full"
750
+ },
751
+ "answerer_pre": null,
752
+ "answerer_pre_task_count": 0,
753
+ "answerer_task_count": 40,
754
+ "artifacts": {
755
+ "answerer_dataset": "artifacts/self_play_hf_l40s_full/round_001/answerer_dataset.json",
756
+ "answerer_pre_dataset": "",
757
+ "canonical_graph_candidates": "artifacts/self_play_hf_l40s_full/round_001/canonical_graph_candidates.json",
758
+ "generated_tasks": "artifacts/self_play_hf_l40s_full/round_001/generated_tasks.json",
759
+ "generator_dataset": "artifacts/self_play_hf_l40s_full/round_001/generator_dataset.json",
760
+ "replay_traces": "artifacts/self_play_hf_l40s_full/round_001/replay_traces.json",
761
+ "validation_reports": "artifacts/self_play_hf_l40s_full/round_001/validation_reports.json"
762
+ },
763
+ "dry_run": false,
764
+ "generated_task_count": 24,
765
+ "generator": {
766
+ "checkpoint_dirs": [
767
+ "artifacts/self_play_hf_l40s_full/round_001/generator_train/checkpoint-120"
768
+ ],
769
+ "diagnostics": {
770
+ "entropy_max": 3.6932995319366455,
771
+ "entropy_min": 1.0433989763259888,
772
+ "fingerprint_changed_count": 31,
773
+ "fingerprint_param_count": 32,
774
+ "grad_norm_max": 6.948690414428711,
775
+ "kl_max": 0.2944556176662445,
776
+ "loss_abs_max": 0.32165396213531494,
777
+ "nonzero_grad_tensors": 0,
778
+ "params_with_grad": 0,
779
+ "reward_max": -0.07656250149011612,
780
+ "reward_min": -0.4352343678474426,
781
+ "reward_std_max": 0.306624174118042,
782
+ "total_param_count": 494032768,
783
+ "trainable_fraction": 1.0,
784
+ "trainable_param_count": 494032768
785
+ },
786
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_001/generator_train/final_model",
787
+ "global_step": 120,
788
+ "is_full_finetune": true,
789
+ "model_path": "artifacts/self_play_hf_l40s_full/round_001/generator_train/final_model",
790
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_001/generator_train",
791
+ "train_rows": 24,
792
+ "training_loss": 0.0,
793
+ "tuning_mode": "full"
794
+ },
795
+ "phase_schedule": "generator_answerer",
796
+ "pipeline_mode": "swarm_v2",
797
+ "round": 1
798
+ },
799
+ {
800
+ "answerer": {
801
+ "checkpoint_dirs": [
802
+ "artifacts/self_play_hf_l40s_full/round_002/answerer_train/checkpoint-120"
803
+ ],
804
+ "diagnostics": {
805
+ "entropy_max": 1.6676212549209595,
806
+ "entropy_min": 0.769734263420105,
807
+ "fingerprint_changed_count": 31,
808
+ "fingerprint_param_count": 32,
809
+ "grad_norm_max": 4.319361686706543,
810
+ "kl_max": 0.32789888978004456,
811
+ "loss_abs_max": 0.17892315983772278,
812
+ "nonzero_grad_tensors": 0,
813
+ "params_with_grad": 0,
814
+ "reward_max": 0.9358674883842468,
815
+ "reward_min": 0.4761159420013428,
816
+ "reward_std_max": 0.19161619246006012,
817
+ "total_param_count": 494032768,
818
+ "trainable_fraction": 1.0,
819
+ "trainable_param_count": 494032768
820
+ },
821
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_002/answerer_train/final_model",
822
+ "global_step": 120,
823
+ "is_full_finetune": true,
824
+ "model_path": "artifacts/self_play_hf_l40s_full/round_002/answerer_train/final_model",
825
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_002/answerer_train",
826
+ "train_rows": 40,
827
+ "training_loss": 0.0007389950291932716,
828
+ "tuning_mode": "full"
829
+ },
830
+ "answerer_pre": null,
831
+ "answerer_pre_task_count": 0,
832
+ "answerer_task_count": 40,
833
+ "artifacts": {
834
+ "answerer_dataset": "artifacts/self_play_hf_l40s_full/round_002/answerer_dataset.json",
835
+ "answerer_pre_dataset": "",
836
+ "canonical_graph_candidates": "artifacts/self_play_hf_l40s_full/round_002/canonical_graph_candidates.json",
837
+ "generated_tasks": "artifacts/self_play_hf_l40s_full/round_002/generated_tasks.json",
838
+ "generator_dataset": "artifacts/self_play_hf_l40s_full/round_002/generator_dataset.json",
839
+ "replay_traces": "artifacts/self_play_hf_l40s_full/round_002/replay_traces.json",
840
+ "validation_reports": "artifacts/self_play_hf_l40s_full/round_002/validation_reports.json"
841
+ },
842
+ "dry_run": false,
843
+ "generated_task_count": 24,
844
+ "generator": {
845
+ "checkpoint_dirs": [
846
+ "artifacts/self_play_hf_l40s_full/round_002/generator_train/checkpoint-120"
847
+ ],
848
+ "diagnostics": {
849
+ "entropy_max": 3.5546140670776367,
850
+ "entropy_min": 0.9028136134147644,
851
+ "fingerprint_changed_count": 32,
852
+ "fingerprint_param_count": 32,
853
+ "grad_norm_max": 4.169064521789551,
854
+ "kl_max": 0.514514148235321,
855
+ "loss_abs_max": 0.24258767068386078,
856
+ "nonzero_grad_tensors": 0,
857
+ "params_with_grad": 0,
858
+ "reward_max": -0.05000000074505806,
859
+ "reward_min": -0.1990624964237213,
860
+ "reward_std_max": 0.27848079800605774,
861
+ "total_param_count": 494032768,
862
+ "trainable_fraction": 1.0,
863
+ "trainable_param_count": 494032768
864
+ },
865
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_002/generator_train/final_model",
866
+ "global_step": 120,
867
+ "is_full_finetune": true,
868
+ "model_path": "artifacts/self_play_hf_l40s_full/round_002/generator_train/final_model",
869
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_002/generator_train",
870
+ "train_rows": 24,
871
+ "training_loss": 0.0009096094763663132,
872
+ "tuning_mode": "full"
873
+ },
874
+ "phase_schedule": "generator_answerer",
875
+ "pipeline_mode": "swarm_v2",
876
+ "round": 2
877
+ },
878
+ {
879
+ "answerer": {
880
+ "checkpoint_dirs": [
881
+ "artifacts/self_play_hf_l40s_full/round_003/answerer_train/checkpoint-120"
882
+ ],
883
+ "diagnostics": {
884
+ "entropy_max": 2.224254846572876,
885
+ "entropy_min": 1.4734899997711182,
886
+ "fingerprint_changed_count": 32,
887
+ "fingerprint_param_count": 32,
888
+ "grad_norm_max": 13.311189651489258,
889
+ "kl_max": 1.1347599029541016,
890
+ "loss_abs_max": 0.18968576192855835,
891
+ "nonzero_grad_tensors": 0,
892
+ "params_with_grad": 0,
893
+ "reward_max": 0.9411008358001709,
894
+ "reward_min": 0.7219144701957703,
895
+ "reward_std_max": 0.18893879652023315,
896
+ "total_param_count": 494032768,
897
+ "trainable_fraction": 1.0,
898
+ "trainable_param_count": 494032768
899
+ },
900
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_003/answerer_train/final_model",
901
+ "global_step": 120,
902
+ "is_full_finetune": true,
903
+ "model_path": "artifacts/self_play_hf_l40s_full/round_003/answerer_train/final_model",
904
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_003/answerer_train",
905
+ "train_rows": 40,
906
+ "training_loss": 0.000411013352277223,
907
+ "tuning_mode": "full"
908
+ },
909
+ "answerer_pre": null,
910
+ "answerer_pre_task_count": 0,
911
+ "answerer_task_count": 40,
912
+ "artifacts": {
913
+ "answerer_dataset": "artifacts/self_play_hf_l40s_full/round_003/answerer_dataset.json",
914
+ "answerer_pre_dataset": "",
915
+ "canonical_graph_candidates": "artifacts/self_play_hf_l40s_full/round_003/canonical_graph_candidates.json",
916
+ "generated_tasks": "artifacts/self_play_hf_l40s_full/round_003/generated_tasks.json",
917
+ "generator_dataset": "artifacts/self_play_hf_l40s_full/round_003/generator_dataset.json",
918
+ "replay_traces": "artifacts/self_play_hf_l40s_full/round_003/replay_traces.json",
919
+ "validation_reports": "artifacts/self_play_hf_l40s_full/round_003/validation_reports.json"
920
+ },
921
+ "dry_run": false,
922
+ "generated_task_count": 24,
923
+ "generator": {
924
+ "checkpoint_dirs": [
925
+ "artifacts/self_play_hf_l40s_full/round_003/generator_train/checkpoint-120"
926
+ ],
927
+ "diagnostics": {
928
+ "entropy_max": 4.359671115875244,
929
+ "entropy_min": 3.150116205215454,
930
+ "fingerprint_changed_count": 32,
931
+ "fingerprint_param_count": 32,
932
+ "grad_norm_max": 3.8442986011505127,
933
+ "kl_max": 0.6575136780738831,
934
+ "loss_abs_max": 0.16846270859241486,
935
+ "nonzero_grad_tensors": 0,
936
+ "params_with_grad": 0,
937
+ "reward_max": -0.05000000074505806,
938
+ "reward_min": -0.16500000655651093,
939
+ "reward_std_max": 0.2437519133090973,
940
+ "total_param_count": 494032768,
941
+ "trainable_fraction": 1.0,
942
+ "trainable_param_count": 494032768
943
+ },
944
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_003/generator_train/final_model",
945
+ "global_step": 120,
946
+ "is_full_finetune": true,
947
+ "model_path": "artifacts/self_play_hf_l40s_full/round_003/generator_train/final_model",
948
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_003/generator_train",
949
+ "train_rows": 24,
950
+ "training_loss": 0.0006241449679995033,
951
+ "tuning_mode": "full"
952
+ },
953
+ "phase_schedule": "generator_answerer",
954
+ "pipeline_mode": "swarm_v2",
955
+ "round": 3
956
+ },
957
+ {
958
+ "answerer": {
959
+ "checkpoint_dirs": [
960
+ "artifacts/self_play_hf_l40s_full/round_004/answerer_train/checkpoint-120"
961
+ ],
962
+ "diagnostics": {
963
+ "entropy_max": 2.322800397872925,
964
+ "entropy_min": 1.5768123865127563,
965
+ "fingerprint_changed_count": 31,
966
+ "fingerprint_param_count": 32,
967
+ "grad_norm_max": 4.225009918212891,
968
+ "kl_max": 0.24286241829395294,
969
+ "loss_abs_max": 0.1410004049539566,
970
+ "nonzero_grad_tensors": 0,
971
+ "params_with_grad": 0,
972
+ "reward_max": 0.9533497095108032,
973
+ "reward_min": 0.6900650858879089,
974
+ "reward_std_max": 0.15437306463718414,
975
+ "total_param_count": 494032768,
976
+ "trainable_fraction": 1.0,
977
+ "trainable_param_count": 494032768
978
+ },
979
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_004/answerer_train/final_model",
980
+ "global_step": 120,
981
+ "is_full_finetune": true,
982
+ "model_path": "artifacts/self_play_hf_l40s_full/round_004/answerer_train/final_model",
983
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_004/answerer_train",
984
+ "train_rows": 40,
985
+ "training_loss": 0.0005180203434368498,
986
+ "tuning_mode": "full"
987
+ },
988
+ "answerer_pre": null,
989
+ "answerer_pre_task_count": 0,
990
+ "answerer_task_count": 40,
991
+ "artifacts": {
992
+ "answerer_dataset": "artifacts/self_play_hf_l40s_full/round_004/answerer_dataset.json",
993
+ "answerer_pre_dataset": "",
994
+ "canonical_graph_candidates": "artifacts/self_play_hf_l40s_full/round_004/canonical_graph_candidates.json",
995
+ "generated_tasks": "artifacts/self_play_hf_l40s_full/round_004/generated_tasks.json",
996
+ "generator_dataset": "artifacts/self_play_hf_l40s_full/round_004/generator_dataset.json",
997
+ "replay_traces": "artifacts/self_play_hf_l40s_full/round_004/replay_traces.json",
998
+ "validation_reports": "artifacts/self_play_hf_l40s_full/round_004/validation_reports.json"
999
+ },
1000
+ "dry_run": false,
1001
+ "generated_task_count": 24,
1002
+ "generator": {
1003
+ "checkpoint_dirs": [
1004
+ "artifacts/self_play_hf_l40s_full/round_004/generator_train/checkpoint-120"
1005
+ ],
1006
+ "diagnostics": {
1007
+ "entropy_max": 4.859399318695068,
1008
+ "entropy_min": 3.430957794189453,
1009
+ "fingerprint_changed_count": 32,
1010
+ "fingerprint_param_count": 32,
1011
+ "grad_norm_max": 3.653455972671509,
1012
+ "kl_max": 0.3064752519130707,
1013
+ "loss_abs_max": 0.18890579044818878,
1014
+ "nonzero_grad_tensors": 0,
1015
+ "params_with_grad": 0,
1016
+ "reward_max": -0.05000000074505806,
1017
+ "reward_min": -0.27531248331069946,
1018
+ "reward_std_max": 0.29975247383117676,
1019
+ "total_param_count": 494032768,
1020
+ "trainable_fraction": 1.0,
1021
+ "trainable_param_count": 494032768
1022
+ },
1023
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_004/generator_train/final_model",
1024
+ "global_step": 120,
1025
+ "is_full_finetune": true,
1026
+ "model_path": "artifacts/self_play_hf_l40s_full/round_004/generator_train/final_model",
1027
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_004/generator_train",
1028
+ "train_rows": 24,
1029
+ "training_loss": 0.0005565226826244422,
1030
+ "tuning_mode": "full"
1031
+ },
1032
+ "phase_schedule": "generator_answerer",
1033
+ "pipeline_mode": "swarm_v2",
1034
+ "round": 4
1035
+ },
1036
+ {
1037
+ "answerer": {
1038
+ "checkpoint_dirs": [
1039
+ "artifacts/self_play_hf_l40s_full/round_005/answerer_train/checkpoint-120"
1040
+ ],
1041
+ "diagnostics": {
1042
+ "entropy_max": 2.336609125137329,
1043
+ "entropy_min": 1.333341360092163,
1044
+ "fingerprint_changed_count": 32,
1045
+ "fingerprint_param_count": 32,
1046
+ "grad_norm_max": 3.576796531677246,
1047
+ "kl_max": 0.15152397751808167,
1048
+ "loss_abs_max": 0.15017201006412506,
1049
+ "nonzero_grad_tensors": 0,
1050
+ "params_with_grad": 0,
1051
+ "reward_max": 0.9394216537475586,
1052
+ "reward_min": 0.7121027708053589,
1053
+ "reward_std_max": 0.17226041853427887,
1054
+ "total_param_count": 494032768,
1055
+ "trainable_fraction": 1.0,
1056
+ "trainable_param_count": 494032768
1057
+ },
1058
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_005/answerer_train/final_model",
1059
+ "global_step": 120,
1060
+ "is_full_finetune": true,
1061
+ "model_path": "artifacts/self_play_hf_l40s_full/round_005/answerer_train/final_model",
1062
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_005/answerer_train",
1063
+ "train_rows": 40,
1064
+ "training_loss": 0.0002945329675640096,
1065
+ "tuning_mode": "full"
1066
+ },
1067
+ "answerer_pre": null,
1068
+ "answerer_pre_task_count": 0,
1069
+ "answerer_task_count": 40,
1070
+ "artifacts": {
1071
+ "answerer_dataset": "artifacts/self_play_hf_l40s_full/round_005/answerer_dataset.json",
1072
+ "answerer_pre_dataset": "",
1073
+ "canonical_graph_candidates": "artifacts/self_play_hf_l40s_full/round_005/canonical_graph_candidates.json",
1074
+ "generated_tasks": "artifacts/self_play_hf_l40s_full/round_005/generated_tasks.json",
1075
+ "generator_dataset": "artifacts/self_play_hf_l40s_full/round_005/generator_dataset.json",
1076
+ "replay_traces": "artifacts/self_play_hf_l40s_full/round_005/replay_traces.json",
1077
+ "validation_reports": "artifacts/self_play_hf_l40s_full/round_005/validation_reports.json"
1078
+ },
1079
+ "dry_run": false,
1080
+ "generated_task_count": 24,
1081
+ "generator": {
1082
+ "checkpoint_dirs": [
1083
+ "artifacts/self_play_hf_l40s_full/round_005/generator_train/checkpoint-120"
1084
+ ],
1085
+ "diagnostics": {
1086
+ "entropy_max": 4.6490559577941895,
1087
+ "entropy_min": 3.2972638607025146,
1088
+ "fingerprint_changed_count": 31,
1089
+ "fingerprint_param_count": 32,
1090
+ "grad_norm_max": 2.7098262310028076,
1091
+ "kl_max": 0.22837483882904053,
1092
+ "loss_abs_max": 0.10853039473295212,
1093
+ "nonzero_grad_tensors": 0,
1094
+ "params_with_grad": 0,
1095
+ "reward_max": -0.05000000074505806,
1096
+ "reward_min": -0.16249999403953552,
1097
+ "reward_std_max": 0.24186772108078003,
1098
+ "total_param_count": 494032768,
1099
+ "trainable_fraction": 1.0,
1100
+ "trainable_param_count": 494032768
1101
+ },
1102
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_005/generator_train/final_model",
1103
+ "global_step": 120,
1104
+ "is_full_finetune": true,
1105
+ "model_path": "artifacts/self_play_hf_l40s_full/round_005/generator_train/final_model",
1106
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_005/generator_train",
1107
+ "train_rows": 24,
1108
+ "training_loss": 0.0004231600946695835,
1109
+ "tuning_mode": "full"
1110
+ },
1111
+ "phase_schedule": "generator_answerer",
1112
+ "pipeline_mode": "swarm_v2",
1113
+ "round": 5
1114
+ },
1115
+ {
1116
+ "answerer": {
1117
+ "checkpoint_dirs": [
1118
+ "artifacts/self_play_hf_l40s_full/round_006/answerer_train/checkpoint-120"
1119
+ ],
1120
+ "diagnostics": {
1121
+ "entropy_max": 2.569056272506714,
1122
+ "entropy_min": 0.6120401620864868,
1123
+ "fingerprint_changed_count": 31,
1124
+ "fingerprint_param_count": 32,
1125
+ "grad_norm_max": 296.2247314453125,
1126
+ "kl_max": 79.00199890136719,
1127
+ "loss_abs_max": 0.23638710379600525,
1128
+ "nonzero_grad_tensors": 0,
1129
+ "params_with_grad": 0,
1130
+ "reward_max": 0.9358674883842468,
1131
+ "reward_min": 0.457000195980072,
1132
+ "reward_std_max": 0.23860794305801392,
1133
+ "total_param_count": 494032768,
1134
+ "trainable_fraction": 1.0,
1135
+ "trainable_param_count": 494032768
1136
+ },
1137
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model",
1138
+ "global_step": 120,
1139
+ "is_full_finetune": true,
1140
+ "model_path": "artifacts/self_play_hf_l40s_full/round_006/answerer_train/final_model",
1141
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_006/answerer_train",
1142
+ "train_rows": 40,
1143
+ "training_loss": 0.0024079756675443303,
1144
+ "tuning_mode": "full"
1145
+ },
1146
+ "answerer_pre": null,
1147
+ "answerer_pre_task_count": 0,
1148
+ "answerer_task_count": 40,
1149
+ "artifacts": {
1150
+ "answerer_dataset": "artifacts/self_play_hf_l40s_full/round_006/answerer_dataset.json",
1151
+ "answerer_pre_dataset": "",
1152
+ "canonical_graph_candidates": "artifacts/self_play_hf_l40s_full/round_006/canonical_graph_candidates.json",
1153
+ "generated_tasks": "artifacts/self_play_hf_l40s_full/round_006/generated_tasks.json",
1154
+ "generator_dataset": "artifacts/self_play_hf_l40s_full/round_006/generator_dataset.json",
1155
+ "replay_traces": "artifacts/self_play_hf_l40s_full/round_006/replay_traces.json",
1156
+ "validation_reports": "artifacts/self_play_hf_l40s_full/round_006/validation_reports.json"
1157
+ },
1158
+ "dry_run": false,
1159
+ "generated_task_count": 24,
1160
+ "generator": {
1161
+ "checkpoint_dirs": [
1162
+ "artifacts/self_play_hf_l40s_full/round_006/generator_train/checkpoint-120"
1163
+ ],
1164
+ "diagnostics": {
1165
+ "entropy_max": 4.700138092041016,
1166
+ "entropy_min": 4.030674457550049,
1167
+ "fingerprint_changed_count": 32,
1168
+ "fingerprint_param_count": 32,
1169
+ "grad_norm_max": 2.6713056564331055,
1170
+ "kl_max": 0.15328484773635864,
1171
+ "loss_abs_max": 0.09415566176176071,
1172
+ "nonzero_grad_tensors": 0,
1173
+ "params_with_grad": 0,
1174
+ "reward_max": -0.05000000074505806,
1175
+ "reward_min": -0.125,
1176
+ "reward_std_max": 0.20493900775909424,
1177
+ "total_param_count": 494032768,
1178
+ "trainable_fraction": 1.0,
1179
+ "trainable_param_count": 494032768
1180
+ },
1181
+ "final_model_path": "artifacts/self_play_hf_l40s_full/round_006/generator_train/final_model",
1182
+ "global_step": 120,
1183
+ "is_full_finetune": true,
1184
+ "model_path": "artifacts/self_play_hf_l40s_full/round_006/generator_train/final_model",
1185
+ "phase_output_dir": "artifacts/self_play_hf_l40s_full/round_006/generator_train",
1186
+ "train_rows": 24,
1187
+ "training_loss": 0.0002774861710387692,
1188
+ "tuning_mode": "full"
1189
+ },
1190
+ "phase_schedule": "generator_answerer",
1191
+ "pipeline_mode": "swarm_v2",
1192
+ "round": 6
1193
+ }
1194
+ ],
1195
+ "tuning_mode": "full"
1196
+ }