Siddeshwar1625 commited on
Commit
0c292f8
·
verified ·
1 Parent(s): 5822325

Upload self-play summary

Browse files
self_play_hf_a10g_train/self_play_summary.json ADDED
@@ -0,0 +1,880 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "canonical_graph_mode": "fixed",
3
+ "dry_run": false,
4
+ "final_models": {
5
+ "answerer": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
6
+ "generator": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model"
7
+ },
8
+ "initial_models": {
9
+ "answerer": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "generator": "Qwen/Qwen2.5-0.5B-Instruct"
11
+ },
12
+ "kimi_objective_mapping": {
13
+ "grouped_rollouts": "TRL GRPO num_generations",
14
+ "mean_centered_advantage": "GRPO relative reward baseline",
15
+ "parallel_orchestration": "PARL-inspired auxiliary reward over generator and answerer swarms",
16
+ "reference_regularization": "GRPO beta KL term",
17
+ "toggle_self_play": "Alternating generator and answerer rounds",
18
+ "token_level_clipping": "GRPO epsilon clipping over policy ratios"
19
+ },
20
+ "model_topology": "shared",
21
+ "output_dir": "artifacts/self_play_hf_a10g_train",
22
+ "phase_schedule": "generator_answerer",
23
+ "pipeline_mode": "swarm_v2",
24
+ "post_training_evaluation": {
25
+ "answerer_models": {
26
+ "finetuned_answerer": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
27
+ "original_answerer": "Qwen/Qwen2.5-0.5B-Instruct"
28
+ },
29
+ "generated_tasks_path": "artifacts/self_play_hf_a10g_train/post_training_eval_generated_tasks.json",
30
+ "generator_model": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
31
+ "model_evaluations": {
32
+ "finetuned_answerer": {
33
+ "episodes": [
34
+ {
35
+ "agent_answer": "finished_subtasks\": 2",
36
+ "completion_length": 433,
37
+ "graph_f1": 0.6666666666666666,
38
+ "predicted_edge_count": 1,
39
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
40
+ "reward": 0.4462659972989426,
41
+ "success": 0,
42
+ "support_edge_count": 2,
43
+ "task_answer": "user_25",
44
+ "task_id": "swarm_v2_r3_0",
45
+ "task_type": "swarm_v2_2hop_trace"
46
+ },
47
+ {
48
+ "agent_answer": "finished_subtasks\": 2",
49
+ "completion_length": 434,
50
+ "graph_f1": 0.0,
51
+ "predicted_edge_count": 0,
52
+ "question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
53
+ "reward": 0.39524653939368076,
54
+ "success": 0,
55
+ "support_edge_count": 2,
56
+ "task_answer": "loc_pune",
57
+ "task_id": "swarm_v2_r3_1",
58
+ "task_type": "swarm_v2_2hop_trace"
59
+ },
60
+ {
61
+ "agent_answer": "finished_subtasks\": 2",
62
+ "completion_length": 434,
63
+ "graph_f1": 1.0,
64
+ "predicted_edge_count": 2,
65
+ "question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
66
+ "reward": 0.5018390028775422,
67
+ "success": 0,
68
+ "support_edge_count": 2,
69
+ "task_answer": "loc_hyderabad",
70
+ "task_id": "swarm_v2_r3_2",
71
+ "task_type": "swarm_v2_2hop_trace"
72
+ },
73
+ {
74
+ "agent_answer": "finished_subtasks\": 2,",
75
+ "completion_length": 440,
76
+ "graph_f1": 1.0,
77
+ "predicted_edge_count": 2,
78
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
79
+ "reward": 0.5000528755950792,
80
+ "success": 0,
81
+ "support_edge_count": 2,
82
+ "task_answer": "loc_bengaluru",
83
+ "task_id": "swarm_v2_r3_3",
84
+ "task_type": "swarm_v2_2hop_trace"
85
+ },
86
+ {
87
+ "agent_answer": "finished_sub",
88
+ "completion_length": 426,
89
+ "graph_f1": 1.0,
90
+ "predicted_edge_count": 2,
91
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
92
+ "reward": 0.5017743415980812,
93
+ "success": 0,
94
+ "support_edge_count": 2,
95
+ "task_answer": "user_3",
96
+ "task_id": "swarm_v2_r3_4",
97
+ "task_type": "swarm_v2_2hop_trace"
98
+ },
99
+ {
100
+ "agent_answer": "finished_subtasks\":",
101
+ "completion_length": 435,
102
+ "graph_f1": 0.6666666666666666,
103
+ "predicted_edge_count": 1,
104
+ "question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
105
+ "reward": 0.4462659972989426,
106
+ "success": 0,
107
+ "support_edge_count": 2,
108
+ "task_answer": "org_northbridge",
109
+ "task_id": "swarm_v2_r3_5",
110
+ "task_type": "swarm_v2_2hop_trace"
111
+ },
112
+ {
113
+ "agent_answer": "finished_subtasks",
114
+ "completion_length": 431,
115
+ "graph_f1": 1.0,
116
+ "predicted_edge_count": 2,
117
+ "question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
118
+ "reward": 0.49966549250864506,
119
+ "success": 0,
120
+ "support_edge_count": 2,
121
+ "task_answer": "org_helios_labs",
122
+ "task_id": "swarm_v2_r3_6",
123
+ "task_type": "swarm_v2_2hop_trace"
124
+ },
125
+ {
126
+ "agent_answer": "critical",
127
+ "completion_length": 447,
128
+ "graph_f1": 0.6666666666666666,
129
+ "predicted_edge_count": 1,
130
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
131
+ "reward": 0.4454139519345446,
132
+ "success": 0,
133
+ "support_edge_count": 2,
134
+ "task_answer": "user_12",
135
+ "task_id": "swarm_v2_r3_7",
136
+ "task_type": "swarm_v2_2hop_trace"
137
+ },
138
+ {
139
+ "agent_answer": "critical",
140
+ "completion_length": 445,
141
+ "graph_f1": 1.0,
142
+ "predicted_edge_count": 2,
143
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
144
+ "reward": 0.4994635369242434,
145
+ "success": 0,
146
+ "support_edge_count": 2,
147
+ "task_answer": "loc_pune",
148
+ "task_id": "swarm_v2_r3_8",
149
+ "task_type": "swarm_v2_2hop_trace"
150
+ },
151
+ {
152
+ "agent_answer": "finished_subtasks\": 2,",
153
+ "completion_length": 431,
154
+ "graph_f1": 1.0,
155
+ "predicted_edge_count": 2,
156
+ "question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
157
+ "reward": 0.530983823504946,
158
+ "success": 0,
159
+ "support_edge_count": 2,
160
+ "task_answer": "user_12",
161
+ "task_id": "swarm_v2_r3_9",
162
+ "task_type": "swarm_v2_2hop_trace"
163
+ },
164
+ {
165
+ "agent_answer": "dst\": \"user",
166
+ "completion_length": 452,
167
+ "graph_f1": 0.5,
168
+ "predicted_edge_count": 1,
169
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
170
+ "reward": 0.4353546524684235,
171
+ "success": 0,
172
+ "support_edge_count": 3,
173
+ "task_answer": "loc_bengaluru",
174
+ "task_id": "swarm_v2_r3_10",
175
+ "task_type": "swarm_v2_3hop_trace"
176
+ },
177
+ {
178
+ "agent_answer": "finished_subtasks\": 2,",
179
+ "completion_length": 448,
180
+ "graph_f1": 0.0,
181
+ "predicted_edge_count": 0,
182
+ "question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
183
+ "reward": 0.39524653939368076,
184
+ "success": 0,
185
+ "support_edge_count": 2,
186
+ "task_answer": "org_apex_dynamics",
187
+ "task_id": "swarm_v2_r3_11",
188
+ "task_type": "swarm_v2_2hop_trace"
189
+ },
190
+ {
191
+ "agent_answer": "critical_steps\":",
192
+ "completion_length": 459,
193
+ "graph_f1": 0.0,
194
+ "predicted_edge_count": 0,
195
+ "question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
196
+ "reward": 0.39524653939368076,
197
+ "success": 0,
198
+ "support_edge_count": 2,
199
+ "task_answer": "org_northbridge",
200
+ "task_id": "swarm_v2_r3_12",
201
+ "task_type": "swarm_v2_2hop_trace"
202
+ },
203
+ {
204
+ "agent_answer": "",
205
+ "completion_length": 446,
206
+ "graph_f1": 0.6666666666666666,
207
+ "predicted_edge_count": 1,
208
+ "question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
209
+ "reward": 0.36246822117289235,
210
+ "success": 0,
211
+ "support_edge_count": 2,
212
+ "task_answer": "org_northbridge",
213
+ "task_id": "swarm_v2_r3_13",
214
+ "task_type": "swarm_v2_2hop_trace"
215
+ },
216
+ {
217
+ "agent_answer": "finished_subtasks\": 2",
218
+ "completion_length": 428,
219
+ "graph_f1": 0.6666666666666666,
220
+ "predicted_edge_count": 1,
221
+ "question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
222
+ "reward": 0.4462659972989426,
223
+ "success": 0,
224
+ "support_edge_count": 2,
225
+ "task_answer": "org_northbridge",
226
+ "task_id": "swarm_v2_r3_14",
227
+ "task_type": "swarm_v2_2hop_trace"
228
+ },
229
+ {
230
+ "agent_answer": "dst\": \"loc",
231
+ "completion_length": 447,
232
+ "graph_f1": 0.5,
233
+ "predicted_edge_count": 1,
234
+ "question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
235
+ "reward": 0.4353546524684235,
236
+ "success": 0,
237
+ "support_edge_count": 3,
238
+ "task_answer": "loc_hyderabad",
239
+ "task_id": "swarm_v2_r3_15",
240
+ "task_type": "swarm_v2_3hop_trace"
241
+ },
242
+ {
243
+ "agent_answer": "finished_subtasks\":",
244
+ "completion_length": 433,
245
+ "graph_f1": 1.0,
246
+ "predicted_edge_count": 2,
247
+ "question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
248
+ "reward": 0.5000528755950792,
249
+ "success": 0,
250
+ "support_edge_count": 2,
251
+ "task_answer": "loc_bengaluru",
252
+ "task_id": "swarm_v2_r3_16",
253
+ "task_type": "swarm_v2_2hop_trace"
254
+ },
255
+ {
256
+ "agent_answer": "finished_subtasks\": 2,",
257
+ "completion_length": 437,
258
+ "graph_f1": 0.6666666666666666,
259
+ "predicted_edge_count": 1,
260
+ "question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
261
+ "reward": 0.44122448096324046,
262
+ "success": 0,
263
+ "support_edge_count": 2,
264
+ "task_answer": "user_12",
265
+ "task_id": "swarm_v2_r3_17",
266
+ "task_type": "swarm_v2_2hop_trace"
267
+ },
268
+ {
269
+ "agent_answer": "finished_subtasks\":",
270
+ "completion_length": 435,
271
+ "graph_f1": 0.6666666666666666,
272
+ "predicted_edge_count": 1,
273
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
274
+ "reward": 0.4454139519345446,
275
+ "success": 0,
276
+ "support_edge_count": 2,
277
+ "task_answer": "org_northbridge",
278
+ "task_id": "swarm_v2_r3_18",
279
+ "task_type": "swarm_v2_2hop_trace"
280
+ },
281
+ {
282
+ "agent_answer": "critical_steps\":",
283
+ "completion_length": 456,
284
+ "graph_f1": 0.0,
285
+ "predicted_edge_count": 0,
286
+ "question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
287
+ "reward": 0.39524653939368076,
288
+ "success": 0,
289
+ "support_edge_count": 2,
290
+ "task_answer": "loc_pune",
291
+ "task_id": "swarm_v2_r3_19",
292
+ "task_type": "swarm_v2_2hop_trace"
293
+ },
294
+ {
295
+ "agent_answer": "finished_subtasks\":",
296
+ "completion_length": 433,
297
+ "graph_f1": 1.0,
298
+ "predicted_edge_count": 2,
299
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
300
+ "reward": 0.5000528755950792,
301
+ "success": 0,
302
+ "support_edge_count": 2,
303
+ "task_answer": "loc_bengaluru",
304
+ "task_id": "swarm_v2_r3_20",
305
+ "task_type": "swarm_v2_2hop_trace"
306
+ },
307
+ {
308
+ "agent_answer": "finished_subtasks\":",
309
+ "completion_length": 435,
310
+ "graph_f1": 0.6666666666666666,
311
+ "predicted_edge_count": 1,
312
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
313
+ "reward": 0.4462659972989426,
314
+ "success": 0,
315
+ "support_edge_count": 2,
316
+ "task_answer": "org_northbridge",
317
+ "task_id": "swarm_v2_r3_21",
318
+ "task_type": "swarm_v2_2hop_trace"
319
+ },
320
+ {
321
+ "agent_answer": "dst\": \"user_",
322
+ "completion_length": 446,
323
+ "graph_f1": 1.0,
324
+ "predicted_edge_count": 3,
325
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
326
+ "reward": 0.4993624365793478,
327
+ "success": 0,
328
+ "support_edge_count": 3,
329
+ "task_answer": "loc_pune",
330
+ "task_id": "swarm_v2_r3_22",
331
+ "task_type": "swarm_v2_3hop_trace"
332
+ },
333
+ {
334
+ "agent_answer": "finished_subtasks",
335
+ "completion_length": 431,
336
+ "graph_f1": 1.0,
337
+ "predicted_edge_count": 2,
338
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
339
+ "reward": 0.5001432839561198,
340
+ "success": 0,
341
+ "support_edge_count": 2,
342
+ "task_answer": "org_helios_labs",
343
+ "task_id": "swarm_v2_r3_23",
344
+ "task_type": "swarm_v2_2hop_trace"
345
+ }
346
+ ],
347
+ "model_path": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
348
+ "summary": {
349
+ "avg_completion_length": 439.25,
350
+ "avg_graph_f1": 0.6805555555555555,
351
+ "avg_reward": 0.4568612751019469,
352
+ "episodes": 24,
353
+ "task_success_rate": 0.0
354
+ }
355
+ },
356
+ "original_answerer": {
357
+ "episodes": [
358
+ {
359
+ "agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_delhi\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst",
360
+ "completion_length": 388,
361
+ "graph_f1": 0.6666666666666666,
362
+ "predicted_edge_count": 1,
363
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
364
+ "reward": 0.4462659972989426,
365
+ "success": 0,
366
+ "support_edge_count": 2,
367
+ "task_answer": "user_25",
368
+ "task_id": "swarm_v2_r3_0",
369
+ "task_type": "swarm_v2_2hop_trace"
370
+ },
371
+ {
372
+ "agent_answer": "user_3",
373
+ "completion_length": 414,
374
+ "graph_f1": 1.0,
375
+ "predicted_edge_count": 2,
376
+ "question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
377
+ "reward": 0.5717814630149074,
378
+ "success": 0,
379
+ "support_edge_count": 2,
380
+ "task_answer": "loc_pune",
381
+ "task_id": "swarm_v2_r3_1",
382
+ "task_type": "swarm_v2_2hop_trace"
383
+ },
384
+ {
385
+ "agent_answer": "{\"answer\":\"user_24\",\"supporting_edges\":[{\"src\":\"alias_24_458\",\"rel\":\"alias_of\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",",
386
+ "completion_length": 387,
387
+ "graph_f1": 1.0,
388
+ "predicted_edge_count": 2,
389
+ "question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
390
+ "reward": 0.5018390028775422,
391
+ "success": 0,
392
+ "support_edge_count": 2,
393
+ "task_answer": "loc_hyderabad",
394
+ "task_id": "swarm_v2_r3_2",
395
+ "task_type": "swarm_v2_2hop_trace"
396
+ },
397
+ {
398
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",",
399
+ "completion_length": 381,
400
+ "graph_f1": 1.0,
401
+ "predicted_edge_count": 2,
402
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
403
+ "reward": 0.5000528755950792,
404
+ "success": 0,
405
+ "support_edge_count": 2,
406
+ "task_answer": "loc_bengaluru",
407
+ "task_id": "swarm_v2_r3_3",
408
+ "task_type": "swarm_v2_2hop_trace"
409
+ },
410
+ {
411
+ "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"user_39\",",
412
+ "completion_length": 396,
413
+ "graph_f1": 1.0,
414
+ "predicted_edge_count": 2,
415
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
416
+ "reward": 0.5017743415980812,
417
+ "success": 0,
418
+ "support_edge_count": 2,
419
+ "task_answer": "user_3",
420
+ "task_id": "swarm_v2_r3_4",
421
+ "task_type": "swarm_v2_2hop_trace"
422
+ },
423
+ {
424
+ "agent_answer": "user_7",
425
+ "completion_length": 387,
426
+ "graph_f1": 0.0,
427
+ "predicted_edge_count": 2,
428
+ "question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
429
+ "reward": 0.7089737433490155,
430
+ "success": 0,
431
+ "support_edge_count": 2,
432
+ "task_answer": "org_northbridge",
433
+ "task_id": "swarm_v2_r3_5",
434
+ "task_type": "swarm_v2_2hop_trace"
435
+ },
436
+ {
437
+ "agent_answer": "{\"answer\":\"user_34\",\"supporting_edges\":[{\"src\":\"alias_34_511\",\"rel\":\"alias_of\",\"dst\":\"user_34\",\"confidence\":1.0},{\"src\":\"user_34\",\"rel\":\"works_at\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_0\",\"confidence\":1.0},{\"src\":\"user_0\",\"rel\":\"works_at\",\"dst\":\"user_16\",\"confidence\":1.0},{\"src\":\"user_16\",\"rel\":\"works",
438
+ "completion_length": 361,
439
+ "graph_f1": 1.0,
440
+ "predicted_edge_count": 2,
441
+ "question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
442
+ "reward": 0.49966549250864506,
443
+ "success": 0,
444
+ "support_edge_count": 2,
445
+ "task_answer": "org_helios_labs",
446
+ "task_id": "swarm_v2_r3_6",
447
+ "task_type": "swarm_v2_2hop_trace"
448
+ },
449
+ {
450
+ "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":",
451
+ "completion_length": 408,
452
+ "graph_f1": 0.6666666666666666,
453
+ "predicted_edge_count": 1,
454
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
455
+ "reward": 0.4454139519345446,
456
+ "success": 0,
457
+ "support_edge_count": 2,
458
+ "task_answer": "user_12",
459
+ "task_id": "swarm_v2_r3_7",
460
+ "task_type": "swarm_v2_2hop_trace"
461
+ },
462
+ {
463
+ "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"user_6\",\"confidence\":1.0},{\"src\":\"user_6\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1",
464
+ "completion_length": 386,
465
+ "graph_f1": 1.0,
466
+ "predicted_edge_count": 2,
467
+ "question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
468
+ "reward": 0.4994635369242434,
469
+ "success": 0,
470
+ "support_edge_count": 2,
471
+ "task_answer": "loc_pune",
472
+ "task_id": "swarm_v2_r3_8",
473
+ "task_type": "swarm_v2_2hop_trace"
474
+ },
475
+ {
476
+ "agent_answer": "user_3",
477
+ "completion_length": 423,
478
+ "graph_f1": 0.5,
479
+ "predicted_edge_count": 2,
480
+ "question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
481
+ "reward": 0.6989023123676081,
482
+ "success": 0,
483
+ "support_edge_count": 2,
484
+ "task_answer": "user_12",
485
+ "task_id": "swarm_v2_r3_9",
486
+ "task_type": "swarm_v2_2hop_trace"
487
+ },
488
+ {
489
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"connected_to\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",",
490
+ "completion_length": 383,
491
+ "graph_f1": 0.0,
492
+ "predicted_edge_count": 0,
493
+ "question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
494
+ "reward": 0.39524653939368076,
495
+ "success": 0,
496
+ "support_edge_count": 3,
497
+ "task_answer": "loc_bengaluru",
498
+ "task_id": "swarm_v2_r3_10",
499
+ "task_type": "swarm_v2_3hop_trace"
500
+ },
501
+ {
502
+ "agent_answer": "{\"answer\":\"org_apex_dynamics\",\"supporting_edges\":[{\"src\":\"user_16\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_18\",\"confidence\":1.0},{\"src\":\"user_18\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"connected_to\",\"dst",
503
+ "completion_length": 411,
504
+ "graph_f1": 1.0,
505
+ "predicted_edge_count": 2,
506
+ "question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
507
+ "reward": 0.5363758482799911,
508
+ "success": 0,
509
+ "support_edge_count": 2,
510
+ "task_answer": "org_apex_dynamics",
511
+ "task_id": "swarm_v2_r3_11",
512
+ "task_type": "swarm_v2_2hop_trace"
513
+ },
514
+ {
515
+ "agent_answer": "{\"answer\":\"org_northbridge\",\"supporting_edges\":[{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":",
516
+ "completion_length": 431,
517
+ "graph_f1": 0.0,
518
+ "predicted_edge_count": 0,
519
+ "question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
520
+ "reward": 0.39524653939368076,
521
+ "success": 0,
522
+ "support_edge_count": 2,
523
+ "task_answer": "org_northbridge",
524
+ "task_id": "swarm_v2_r3_12",
525
+ "task_type": "swarm_v2_2hop_trace"
526
+ },
527
+ {
528
+ "agent_answer": "user_7",
529
+ "completion_length": 418,
530
+ "graph_f1": 0.0,
531
+ "predicted_edge_count": 2,
532
+ "question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
533
+ "reward": 0.7410081838633457,
534
+ "success": 0,
535
+ "support_edge_count": 2,
536
+ "task_answer": "org_northbridge",
537
+ "task_id": "swarm_v2_r3_13",
538
+ "task_type": "swarm_v2_2hop_trace"
539
+ },
540
+ {
541
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_11_684\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_north",
542
+ "completion_length": 387,
543
+ "graph_f1": 0.0,
544
+ "predicted_edge_count": 0,
545
+ "question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
546
+ "reward": 0.39524653939368076,
547
+ "success": 0,
548
+ "support_edge_count": 2,
549
+ "task_answer": "org_northbridge",
550
+ "task_id": "swarm_v2_r3_14",
551
+ "task_type": "swarm_v2_2hop_trace"
552
+ },
553
+ {
554
+ "agent_answer": "{\"answer\":\"user_35\",\"supporting_edges\":[{\"src\":\"alias_29_495\",\"rel\":\"alias_of\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel",
555
+ "completion_length": 375,
556
+ "graph_f1": 0.5,
557
+ "predicted_edge_count": 1,
558
+ "question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
559
+ "reward": 0.42825354331930193,
560
+ "success": 0,
561
+ "support_edge_count": 3,
562
+ "task_answer": "loc_hyderabad",
563
+ "task_id": "swarm_v2_r3_15",
564
+ "task_type": "swarm_v2_3hop_trace"
565
+ },
566
+ {
567
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_38_337\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
568
+ "completion_length": 378,
569
+ "graph_f1": 0.0,
570
+ "predicted_edge_count": 0,
571
+ "question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
572
+ "reward": 0.39524653939368076,
573
+ "success": 0,
574
+ "support_edge_count": 2,
575
+ "task_answer": "loc_bengaluru",
576
+ "task_id": "swarm_v2_r3_16",
577
+ "task_type": "swarm_v2_2hop_trace"
578
+ },
579
+ {
580
+ "agent_answer": "user_3",
581
+ "completion_length": 391,
582
+ "graph_f1": 0.5,
583
+ "predicted_edge_count": 2,
584
+ "question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
585
+ "reward": 0.6997365142465289,
586
+ "success": 0,
587
+ "support_edge_count": 2,
588
+ "task_answer": "user_12",
589
+ "task_id": "swarm_v2_r3_17",
590
+ "task_type": "swarm_v2_2hop_trace"
591
+ },
592
+ {
593
+ "agent_answer": "{\"answer\":\"user_39\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_39\",\"confidence\":1.0},{\"src\":\"user_39\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
594
+ "completion_length": 373,
595
+ "graph_f1": 1.0,
596
+ "predicted_edge_count": 2,
597
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
598
+ "reward": 0.49900727548994456,
599
+ "success": 0,
600
+ "support_edge_count": 2,
601
+ "task_answer": "org_northbridge",
602
+ "task_id": "swarm_v2_r3_18",
603
+ "task_type": "swarm_v2_2hop_trace"
604
+ },
605
+ {
606
+ "agent_answer": "user_6",
607
+ "completion_length": 415,
608
+ "graph_f1": 0.0,
609
+ "predicted_edge_count": 2,
610
+ "question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
611
+ "reward": 0.7402569345038308,
612
+ "success": 0,
613
+ "support_edge_count": 2,
614
+ "task_answer": "loc_pune",
615
+ "task_id": "swarm_v2_r3_19",
616
+ "task_type": "swarm_v2_2hop_trace"
617
+ },
618
+ {
619
+ "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_32_394\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
620
+ "completion_length": 378,
621
+ "graph_f1": 0.0,
622
+ "predicted_edge_count": 0,
623
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
624
+ "reward": 0.39524653939368076,
625
+ "success": 0,
626
+ "support_edge_count": 2,
627
+ "task_answer": "loc_bengaluru",
628
+ "task_id": "swarm_v2_r3_20",
629
+ "task_type": "swarm_v2_2hop_trace"
630
+ },
631
+ {
632
+ "agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
633
+ "completion_length": 373,
634
+ "graph_f1": 1.0,
635
+ "predicted_edge_count": 2,
636
+ "question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
637
+ "reward": 0.4998888700517808,
638
+ "success": 0,
639
+ "support_edge_count": 2,
640
+ "task_answer": "org_northbridge",
641
+ "task_id": "swarm_v2_r3_21",
642
+ "task_type": "swarm_v2_2hop_trace"
643
+ },
644
+ {
645
+ "agent_answer": "user_3",
646
+ "completion_length": 401,
647
+ "graph_f1": 0.0,
648
+ "predicted_edge_count": 3,
649
+ "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
650
+ "reward": 0.468212688181137,
651
+ "success": 0,
652
+ "support_edge_count": 3,
653
+ "task_answer": "loc_pune",
654
+ "task_id": "swarm_v2_r3_22",
655
+ "task_type": "swarm_v2_3hop_trace"
656
+ },
657
+ {
658
+ "agent_answer": "user_32",
659
+ "completion_length": 434,
660
+ "graph_f1": 1.0,
661
+ "predicted_edge_count": 2,
662
+ "question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
663
+ "reward": 0.7753896096874865,
664
+ "success": 0,
665
+ "support_edge_count": 2,
666
+ "task_answer": "org_helios_labs",
667
+ "task_id": "swarm_v2_r3_23",
668
+ "task_type": "swarm_v2_2hop_trace"
669
+ }
670
+ ],
671
+ "model_path": "Qwen/Qwen2.5-0.5B-Instruct",
672
+ "summary": {
673
+ "avg_completion_length": 394.9583333333333,
674
+ "avg_graph_f1": 0.5347222222222222,
675
+ "avg_reward": 0.5307706200858483,
676
+ "episodes": 24,
677
+ "task_success_rate": 0.0
678
+ }
679
+ }
680
+ },
681
+ "path": "artifacts/self_play_hf_a10g_train/post_training_evaluation.json",
682
+ "pipeline_mode": "swarm_v2",
683
+ "skipped": false,
684
+ "summary": {
685
+ "compared_models": [
686
+ "finetuned_answerer",
687
+ "original_answerer"
688
+ ],
689
+ "delta_vs_original": {
690
+ "avg_graph_f1": 0.14583333333333326,
691
+ "avg_reward": -0.07390934498390139,
692
+ "task_success_rate": 0.0
693
+ },
694
+ "finetuned_answerer": {
695
+ "avg_completion_length": 439.25,
696
+ "avg_graph_f1": 0.6805555555555555,
697
+ "avg_reward": 0.4568612751019469,
698
+ "episodes": 24,
699
+ "task_success_rate": 0.0
700
+ },
701
+ "generated_task_count": 24,
702
+ "generator_valid_rate": 0.5,
703
+ "original_answerer": {
704
+ "avg_completion_length": 394.9583333333333,
705
+ "avg_graph_f1": 0.5347222222222222,
706
+ "avg_reward": 0.5307706200858483,
707
+ "episodes": 24,
708
+ "task_success_rate": 0.0
709
+ },
710
+ "top_generator_invalid_reasons": [
711
+ [
712
+ "duplicate_or_near_duplicate",
713
+ 3
714
+ ]
715
+ ]
716
+ },
717
+ "validation_reports_path": "artifacts/self_play_hf_a10g_train/post_training_eval_validation_reports.json"
718
+ },
719
+ "rounds": [
720
+ {
721
+ "answerer": {
722
+ "checkpoint_dirs": [
723
+ "artifacts/self_play_hf_a10g_train/round_001/answerer_train/checkpoint-50"
724
+ ],
725
+ "diagnostics": {
726
+ "entropy_max": 0.5364719033241272,
727
+ "entropy_min": 0.05320966988801956,
728
+ "fingerprint_changed_count": 29,
729
+ "fingerprint_param_count": 32,
730
+ "grad_norm_max": 1074.104736328125,
731
+ "kl_max": 138.4082489013672,
732
+ "loss_abs_max": 0.35473087430000305,
733
+ "nonzero_grad_tensors": 0,
734
+ "params_with_grad": 0,
735
+ "reward_max": 0.8143881559371948,
736
+ "reward_min": 0.49031317234039307,
737
+ "reward_std_max": 0.1820799559354782,
738
+ "total_param_count": 494032768,
739
+ "trainable_fraction": 1.0,
740
+ "trainable_param_count": 494032768
741
+ },
742
+ "final_model_path": "artifacts/self_play_hf_a10g_train/round_001/answerer_train/final_model",
743
+ "global_step": 50,
744
+ "is_full_finetune": true,
745
+ "model_path": "artifacts/self_play_hf_a10g_train/round_001/answerer_train/final_model",
746
+ "phase_output_dir": "artifacts/self_play_hf_a10g_train/round_001/answerer_train",
747
+ "train_rows": 40,
748
+ "training_loss": 0.004957557299057953,
749
+ "tuning_mode": "full"
750
+ },
751
+ "answerer_pre": null,
752
+ "answerer_pre_task_count": 0,
753
+ "answerer_task_count": 40,
754
+ "artifacts": {
755
+ "answerer_dataset": "artifacts/self_play_hf_a10g_train/round_001/answerer_dataset.json",
756
+ "answerer_pre_dataset": "",
757
+ "canonical_graph_candidates": "artifacts/self_play_hf_a10g_train/round_001/canonical_graph_candidates.json",
758
+ "generated_tasks": "artifacts/self_play_hf_a10g_train/round_001/generated_tasks.json",
759
+ "generator_dataset": "artifacts/self_play_hf_a10g_train/round_001/generator_dataset.json",
760
+ "replay_traces": "artifacts/self_play_hf_a10g_train/round_001/replay_traces.json",
761
+ "validation_reports": "artifacts/self_play_hf_a10g_train/round_001/validation_reports.json"
762
+ },
763
+ "dry_run": false,
764
+ "generated_task_count": 24,
765
+ "generator": {
766
+ "checkpoint_dirs": [
767
+ "artifacts/self_play_hf_a10g_train/round_001/generator_train/checkpoint-50"
768
+ ],
769
+ "diagnostics": {
770
+ "entropy_max": 2.1167566776275635,
771
+ "entropy_min": 0.8998836874961853,
772
+ "fingerprint_changed_count": 32,
773
+ "fingerprint_param_count": 32,
774
+ "grad_norm_max": 4.9195966720581055,
775
+ "kl_max": 0.18827898800373077,
776
+ "loss_abs_max": 0.2862913906574249,
777
+ "nonzero_grad_tensors": 0,
778
+ "params_with_grad": 0,
779
+ "reward_max": -0.07312500476837158,
780
+ "reward_min": -0.45585936307907104,
781
+ "reward_std_max": 0.3070923686027527,
782
+ "total_param_count": 494032768,
783
+ "trainable_fraction": 1.0,
784
+ "trainable_param_count": 494032768
785
+ },
786
+ "final_model_path": "artifacts/self_play_hf_a10g_train/round_001/generator_train/final_model",
787
+ "global_step": 50,
788
+ "is_full_finetune": true,
789
+ "model_path": "artifacts/self_play_hf_a10g_train/round_001/generator_train/final_model",
790
+ "phase_output_dir": "artifacts/self_play_hf_a10g_train/round_001/generator_train",
791
+ "train_rows": 24,
792
+ "training_loss": 0.0014148461786680855,
793
+ "tuning_mode": "full"
794
+ },
795
+ "phase_schedule": "generator_answerer",
796
+ "pipeline_mode": "swarm_v2",
797
+ "round": 1
798
+ },
799
+ {
800
+ "answerer": {
801
+ "checkpoint_dirs": [
802
+ "artifacts/self_play_hf_a10g_train/round_002/answerer_train/checkpoint-50"
803
+ ],
804
+ "diagnostics": {
805
+ "entropy_max": 0.23017549514770508,
806
+ "entropy_min": 0.04105038195848465,
807
+ "fingerprint_changed_count": 28,
808
+ "fingerprint_param_count": 32,
809
+ "grad_norm_max": 13.60930347442627,
810
+ "kl_max": 3.5134291648864746,
811
+ "loss_abs_max": 0.1710355579853058,
812
+ "nonzero_grad_tensors": 0,
813
+ "params_with_grad": 0,
814
+ "reward_max": 0.8078757524490356,
815
+ "reward_min": 0.5161759257316589,
816
+ "reward_std_max": 0.19121909141540527,
817
+ "total_param_count": 494032768,
818
+ "trainable_fraction": 1.0,
819
+ "trainable_param_count": 494032768
820
+ },
821
+ "final_model_path": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
822
+ "global_step": 50,
823
+ "is_full_finetune": true,
824
+ "model_path": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
825
+ "phase_output_dir": "artifacts/self_play_hf_a10g_train/round_002/answerer_train",
826
+ "train_rows": 40,
827
+ "training_loss": -0.0006817242898978292,
828
+ "tuning_mode": "full"
829
+ },
830
+ "answerer_pre": null,
831
+ "answerer_pre_task_count": 0,
832
+ "answerer_task_count": 40,
833
+ "artifacts": {
834
+ "answerer_dataset": "artifacts/self_play_hf_a10g_train/round_002/answerer_dataset.json",
835
+ "answerer_pre_dataset": "",
836
+ "canonical_graph_candidates": "artifacts/self_play_hf_a10g_train/round_002/canonical_graph_candidates.json",
837
+ "generated_tasks": "artifacts/self_play_hf_a10g_train/round_002/generated_tasks.json",
838
+ "generator_dataset": "artifacts/self_play_hf_a10g_train/round_002/generator_dataset.json",
839
+ "replay_traces": "artifacts/self_play_hf_a10g_train/round_002/replay_traces.json",
840
+ "validation_reports": "artifacts/self_play_hf_a10g_train/round_002/validation_reports.json"
841
+ },
842
+ "dry_run": false,
843
+ "generated_task_count": 24,
844
+ "generator": {
845
+ "checkpoint_dirs": [
846
+ "artifacts/self_play_hf_a10g_train/round_002/generator_train/checkpoint-50"
847
+ ],
848
+ "diagnostics": {
849
+ "entropy_max": 1.4437135457992554,
850
+ "entropy_min": 0.5105040073394775,
851
+ "fingerprint_changed_count": 31,
852
+ "fingerprint_param_count": 32,
853
+ "grad_norm_max": 2.945613384246826,
854
+ "kl_max": 0.15856948494911194,
855
+ "loss_abs_max": 0.21216821670532227,
856
+ "nonzero_grad_tensors": 0,
857
+ "params_with_grad": 0,
858
+ "reward_max": 0.02702206000685692,
859
+ "reward_min": -0.24335937201976776,
860
+ "reward_std_max": 0.42619889974594116,
861
+ "total_param_count": 494032768,
862
+ "trainable_fraction": 1.0,
863
+ "trainable_param_count": 494032768
864
+ },
865
+ "final_model_path": "artifacts/self_play_hf_a10g_train/round_002/generator_train/final_model",
866
+ "global_step": 50,
867
+ "is_full_finetune": true,
868
+ "model_path": "artifacts/self_play_hf_a10g_train/round_002/generator_train/final_model",
869
+ "phase_output_dir": "artifacts/self_play_hf_a10g_train/round_002/generator_train",
870
+ "train_rows": 24,
871
+ "training_loss": 0.00024927092301368247,
872
+ "tuning_mode": "full"
873
+ },
874
+ "phase_schedule": "generator_answerer",
875
+ "pipeline_mode": "swarm_v2",
876
+ "round": 2
877
+ }
878
+ ],
879
+ "tuning_mode": "full"
880
+ }