osint-checkpoints / self_play_hf_a10g_train /post_training_evaluation.json
Siddeshwar1625's picture
Upload post-training evaluation
42d592d verified
{
"answerer_models": {
"finetuned_answerer": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
"original_answerer": "Qwen/Qwen2.5-0.5B-Instruct"
},
"generated_tasks_path": "artifacts/self_play_hf_a10g_train/post_training_eval_generated_tasks.json",
"generator_model": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
"model_evaluations": {
"finetuned_answerer": {
"episodes": [
{
"agent_answer": "finished_subtasks\": 2",
"completion_length": 433,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.4462659972989426,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_25",
"task_id": "swarm_v2_r3_0",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2",
"completion_length": 434,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_1",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2",
"completion_length": 434,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5018390028775422,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r3_2",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2,",
"completion_length": 440,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5000528755950792,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_3",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_sub",
"completion_length": 426,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.5017743415980812,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_3",
"task_id": "swarm_v2_r3_4",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\":",
"completion_length": 435,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.4462659972989426,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_5",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks",
"completion_length": 431,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.49966549250864506,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r3_6",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "critical",
"completion_length": 447,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.4454139519345446,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r3_7",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "critical",
"completion_length": 445,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.4994635369242434,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_8",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2,",
"completion_length": 431,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.530983823504946,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r3_9",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "dst\": \"user",
"completion_length": 452,
"graph_f1": 0.5,
"predicted_edge_count": 1,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.4353546524684235,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_10",
"task_type": "swarm_v2_3hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2,",
"completion_length": 448,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_apex_dynamics",
"task_id": "swarm_v2_r3_11",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "critical_steps\":",
"completion_length": 459,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_12",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "",
"completion_length": 446,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.36246822117289235,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_13",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2",
"completion_length": 428,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.4462659972989426,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_14",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "dst\": \"loc",
"completion_length": 447,
"graph_f1": 0.5,
"predicted_edge_count": 1,
"question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.4353546524684235,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r3_15",
"task_type": "swarm_v2_3hop_trace"
},
{
"agent_answer": "finished_subtasks\":",
"completion_length": 433,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5000528755950792,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_16",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\": 2,",
"completion_length": 437,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.44122448096324046,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r3_17",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\":",
"completion_length": 435,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.4454139519345446,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_18",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "critical_steps\":",
"completion_length": 456,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_19",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\":",
"completion_length": 433,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5000528755950792,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_20",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "finished_subtasks\":",
"completion_length": 435,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.4462659972989426,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_21",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "dst\": \"user_",
"completion_length": 446,
"graph_f1": 1.0,
"predicted_edge_count": 3,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.4993624365793478,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_22",
"task_type": "swarm_v2_3hop_trace"
},
{
"agent_answer": "finished_subtasks",
"completion_length": 431,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.5001432839561198,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r3_23",
"task_type": "swarm_v2_2hop_trace"
}
],
"model_path": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model",
"summary": {
"avg_completion_length": 439.25,
"avg_graph_f1": 0.6805555555555555,
"avg_reward": 0.4568612751019469,
"episodes": 24,
"task_success_rate": 0.0
}
},
"original_answerer": {
"episodes": [
{
"agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_delhi\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst",
"completion_length": 388,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.4462659972989426,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_25",
"task_id": "swarm_v2_r3_0",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_3",
"completion_length": 414,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5717814630149074,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_1",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_24\",\"supporting_edges\":[{\"src\":\"alias_24_458\",\"rel\":\"alias_of\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",",
"completion_length": 387,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5018390028775422,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r3_2",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",",
"completion_length": 381,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5000528755950792,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_3",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"user_39\",",
"completion_length": 396,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.5017743415980812,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_3",
"task_id": "swarm_v2_r3_4",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_7",
"completion_length": 387,
"graph_f1": 0.0,
"predicted_edge_count": 2,
"question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.7089737433490155,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_5",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_34\",\"supporting_edges\":[{\"src\":\"alias_34_511\",\"rel\":\"alias_of\",\"dst\":\"user_34\",\"confidence\":1.0},{\"src\":\"user_34\",\"rel\":\"works_at\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_0\",\"confidence\":1.0},{\"src\":\"user_0\",\"rel\":\"works_at\",\"dst\":\"user_16\",\"confidence\":1.0},{\"src\":\"user_16\",\"rel\":\"works",
"completion_length": 361,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.49966549250864506,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r3_6",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":",
"completion_length": 408,
"graph_f1": 0.6666666666666666,
"predicted_edge_count": 1,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.4454139519345446,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r3_7",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"user_6\",\"confidence\":1.0},{\"src\":\"user_6\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1",
"completion_length": 386,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.4994635369242434,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_8",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_3",
"completion_length": 423,
"graph_f1": 0.5,
"predicted_edge_count": 2,
"question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.6989023123676081,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r3_9",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"connected_to\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",",
"completion_length": 383,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_10",
"task_type": "swarm_v2_3hop_trace"
},
{
"agent_answer": "{\"answer\":\"org_apex_dynamics\",\"supporting_edges\":[{\"src\":\"user_16\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_18\",\"confidence\":1.0},{\"src\":\"user_18\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"connected_to\",\"dst",
"completion_length": 411,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.5363758482799911,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_apex_dynamics",
"task_id": "swarm_v2_r3_11",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"org_northbridge\",\"supporting_edges\":[{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":",
"completion_length": 431,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_12",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_7",
"completion_length": 418,
"graph_f1": 0.0,
"predicted_edge_count": 2,
"question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.7410081838633457,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_13",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_11_684\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_north",
"completion_length": 387,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_14",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_35\",\"supporting_edges\":[{\"src\":\"alias_29_495\",\"rel\":\"alias_of\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel",
"completion_length": 375,
"graph_f1": 0.5,
"predicted_edge_count": 1,
"question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.42825354331930193,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r3_15",
"task_type": "swarm_v2_3hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_38_337\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
"completion_length": 378,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_16",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_3",
"completion_length": 391,
"graph_f1": 0.5,
"predicted_edge_count": 2,
"question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.6997365142465289,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r3_17",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_39\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_39\",\"confidence\":1.0},{\"src\":\"user_39\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
"completion_length": 373,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.49900727548994456,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_18",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_6",
"completion_length": 415,
"graph_f1": 0.0,
"predicted_edge_count": 2,
"question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.7402569345038308,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_19",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_32_394\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
"completion_length": 378,
"graph_f1": 0.0,
"predicted_edge_count": 0,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r3_20",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
"completion_length": 373,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.4998888700517808,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r3_21",
"task_type": "swarm_v2_2hop_trace"
},
{
"agent_answer": "user_3",
"completion_length": 401,
"graph_f1": 0.0,
"predicted_edge_count": 3,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.468212688181137,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r3_22",
"task_type": "swarm_v2_3hop_trace"
},
{
"agent_answer": "user_32",
"completion_length": 434,
"graph_f1": 1.0,
"predicted_edge_count": 2,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.7753896096874865,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r3_23",
"task_type": "swarm_v2_2hop_trace"
}
],
"model_path": "Qwen/Qwen2.5-0.5B-Instruct",
"summary": {
"avg_completion_length": 394.9583333333333,
"avg_graph_f1": 0.5347222222222222,
"avg_reward": 0.5307706200858483,
"episodes": 24,
"task_success_rate": 0.0
}
}
},
"pipeline_mode": "swarm_v2",
"skipped": false,
"summary": {
"compared_models": [
"finetuned_answerer",
"original_answerer"
],
"delta_vs_original": {
"avg_graph_f1": 0.14583333333333326,
"avg_reward": -0.07390934498390139,
"task_success_rate": 0.0
},
"finetuned_answerer": {
"avg_completion_length": 439.25,
"avg_graph_f1": 0.6805555555555555,
"avg_reward": 0.4568612751019469,
"episodes": 24,
"task_success_rate": 0.0
},
"generated_task_count": 24,
"generator_valid_rate": 0.5,
"original_answerer": {
"avg_completion_length": 394.9583333333333,
"avg_graph_f1": 0.5347222222222222,
"avg_reward": 0.5307706200858483,
"episodes": 24,
"task_success_rate": 0.0
},
"top_generator_invalid_reasons": [
[
"duplicate_or_near_duplicate",
3
]
]
},
"validation_reports_path": "artifacts/self_play_hf_a10g_train/post_training_eval_validation_reports.json"
}