OSINT / artifacts /local_hf_eval_round_006 /post_training_evaluation.json
ritishshrirao's picture
Add evaluation, minor updates to HF space
8ad6382
{
"answerer_models": {
"finetuned_answerer": "artifacts/hf_downloads/self_play_hf_l40s_full/round_006/answerer_train/final_model",
"original_answerer": "Qwen/Qwen2.5-0.5B-Instruct"
},
"generated_tasks_path": "artifacts/local_hf_eval_round_006/post_training_eval_generated_tasks.json",
"generator_model": "artifacts/hf_downloads/self_play_hf_l40s_full/round_006/answerer_train/final_model",
"model_evaluations": {
"finetuned_answerer": {
"episodes": [
{
"agent_answer": "user_25",
"completion_length": 547,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "user_25",
"task_id": "swarm_v2_r7_0",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_30",
"rel": "alias_of",
"src": "alias_30_572"
},
{
"confidence": 0.8,
"dst": "user_25",
"rel": "connected_to",
"src": "user_30"
}
]
},
{
"agent_answer": "loc_pune",
"completion_length": 552,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_1",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_11",
"rel": "connected_to",
"src": "user_37"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_11"
}
]
},
{
"agent_answer": "loc_hyderabad",
"completion_length": 553,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r7_2",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_24",
"rel": "alias_of",
"src": "alias_24_458"
},
{
"confidence": 1.0,
"dst": "loc_hyderabad",
"rel": "located_in",
"src": "user_24"
}
]
},
{
"agent_answer": "loc_bengaluru",
"completion_length": 599,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_3",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_4",
"rel": "alias_of",
"src": "alias_4_664"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_4"
}
]
},
{
"agent_answer": "user_3",
"completion_length": 563,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "user_3",
"task_id": "swarm_v2_r7_4",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
}
]
},
{
"agent_answer": "org_northbridge",
"completion_length": 595,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_5",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_20",
"rel": "alias_of",
"src": "alias_20_174"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_20"
}
]
},
{
"agent_answer": "org_helios_labs",
"completion_length": 556,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r7_6",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_34",
"rel": "alias_of",
"src": "alias_34_511"
},
{
"confidence": 1.0,
"dst": "org_helios_labs",
"rel": "works_at",
"src": "user_34"
}
]
},
{
"agent_answer": "user_12",
"completion_length": 577,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r7_7",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_3_544"
},
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
}
]
},
{
"agent_answer": "loc_pune",
"completion_length": 582,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_8",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_3_544"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_3"
}
]
},
{
"agent_answer": "user_12",
"completion_length": 595,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r7_9",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
},
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
}
]
},
{
"agent_answer": "user_1",
"completion_length": 579,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.6704928651250475,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_10",
"task_type": "swarm_v2_3hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_4",
"rel": "alias_of",
"src": "alias_4_664"
},
{
"confidence": 0.8,
"dst": "user_13",
"rel": "connected_to",
"src": "user_4"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_13"
}
]
},
{
"agent_answer": "org_apex_dynamics",
"completion_length": 552,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_apex_dynamics",
"task_id": "swarm_v2_r7_11",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_18",
"rel": "connected_to",
"src": "user_16"
},
{
"confidence": 1.0,
"dst": "org_apex_dynamics",
"rel": "works_at",
"src": "user_18"
}
]
},
{
"agent_answer": "org_northbridge",
"completion_length": 555,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_12",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_12"
}
]
},
{
"agent_answer": "org_northbridge",
"completion_length": 603,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_13",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_25",
"rel": "connected_to",
"src": "user_30"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_25"
}
]
},
{
"agent_answer": "org_northbridge",
"completion_length": 568,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_14",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_11",
"rel": "alias_of",
"src": "alias_11_684"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_11"
}
]
},
{
"agent_answer": "user_35",
"completion_length": 547,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.6704928651250475,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r7_15",
"task_type": "swarm_v2_3hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_29",
"rel": "alias_of",
"src": "alias_29_495"
},
{
"confidence": 0.8,
"dst": "user_35",
"rel": "connected_to",
"src": "user_29"
},
{
"confidence": 1.0,
"dst": "loc_hyderabad",
"rel": "located_in",
"src": "user_35"
}
]
},
{
"agent_answer": "loc_bengaluru",
"completion_length": 548,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_16",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_38",
"rel": "alias_of",
"src": "alias_38_337"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_38"
}
]
},
{
"agent_answer": "user_12",
"completion_length": 547,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r7_17",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_38"
},
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
}
]
},
{
"agent_answer": "org_northbridge",
"completion_length": 555,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_18",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_39"
}
]
},
{
"agent_answer": "loc_pune",
"completion_length": 555,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_19",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_6",
"rel": "connected_to",
"src": "user_34"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_6"
}
]
},
{
"agent_answer": "loc_bengaluru",
"completion_length": 599,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_20",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_32",
"rel": "alias_of",
"src": "alias_32_394"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_32"
}
]
},
{
"agent_answer": "org_northbridge",
"completion_length": 561,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_21",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_30",
"rel": "alias_of",
"src": "alias_30_572"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_30"
}
]
},
{
"agent_answer": "user_3",
"completion_length": 553,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.6704928651250475,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_22",
"task_type": "swarm_v2_3hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_3"
}
]
},
{
"agent_answer": "org_helios_labs",
"completion_length": 556,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.9323133138030703,
"success": 1,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r7_23",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_32",
"rel": "alias_of",
"src": "alias_32_394"
},
{
"confidence": 1.0,
"dst": "org_helios_labs",
"rel": "works_at",
"src": "user_32"
}
]
}
],
"model_path": "artifacts/hf_downloads/self_play_hf_l40s_full/round_006/answerer_train/final_model",
"summary": {
"avg_completion_length": 566.5416666666666,
"avg_graph_f1": 0.0,
"avg_reward": 0.8995857577183174,
"episodes": 24,
"task_success_rate": 0.875
}
},
"original_answerer": {
"episodes": [
{
"agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_delhi\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst",
"completion_length": 388,
"graph_f1": 0.6666666666666666,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_30",
"rel": "alias_of",
"src": "alias_30_572"
}
],
"predicted_edge_count": 1,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.4462659972989426,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_25",
"task_id": "swarm_v2_r7_0",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_30",
"rel": "alias_of",
"src": "alias_30_572"
},
{
"confidence": 0.8,
"dst": "user_25",
"rel": "connected_to",
"src": "user_30"
}
]
},
{
"agent_answer": "user_3",
"completion_length": 445,
"graph_f1": 0.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "connected_to",
"src": "user_37"
},
{
"confidence": 1.0,
"dst": "user_3",
"rel": "located_in",
"src": "loc_pune"
}
],
"predicted_edge_count": 2,
"question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5084758288369232,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_1",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_11",
"rel": "connected_to",
"src": "user_37"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_11"
}
]
},
{
"agent_answer": "{\"answer\":\"user_24\",\"supporting_edges\":[{\"src\":\"alias_24_458\",\"rel\":\"alias_of\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",",
"completion_length": 387,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_24",
"rel": "alias_of",
"src": "alias_24_458"
},
{
"confidence": 1.0,
"dst": "loc_hyderabad",
"rel": "located_in",
"src": "user_24"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5018390028775422,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r7_2",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_24",
"rel": "alias_of",
"src": "alias_24_458"
},
{
"confidence": 1.0,
"dst": "loc_hyderabad",
"rel": "located_in",
"src": "user_24"
}
]
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",",
"completion_length": 381,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_4",
"rel": "alias_of",
"src": "alias_4_664"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_4"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5000528755950792,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_3",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_4",
"rel": "alias_of",
"src": "alias_4_664"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_4"
}
]
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"user_39\",",
"completion_length": 396,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.5017743415980812,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_3",
"task_id": "swarm_v2_r7_4",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
}
]
},
{
"agent_answer": "user_7",
"completion_length": 387,
"graph_f1": 0.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_7",
"rel": "alias_of",
"src": "alias_20_174"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_7"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.7089737433490155,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_5",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_20",
"rel": "alias_of",
"src": "alias_20_174"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_20"
}
]
},
{
"agent_answer": "{\"answer\":\"user_34\",\"supporting_edges\":[{\"src\":\"alias_34_511\",\"rel\":\"alias_of\",\"dst\":\"user_34\",\"confidence\":1.0},{\"src\":\"user_34\",\"rel\":\"works_at\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_0\",\"confidence\":1.0},{\"src\":\"user_0\",\"rel\":\"works_at\",\"dst\":\"user_16\",\"confidence\":1.0},{\"src\":\"user_16\",\"rel\":\"works",
"completion_length": 361,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_34",
"rel": "alias_of",
"src": "alias_34_511"
},
{
"confidence": 1.0,
"dst": "org_helios_labs",
"rel": "works_at",
"src": "user_34"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.49966549250864506,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r7_6",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_34",
"rel": "alias_of",
"src": "alias_34_511"
},
{
"confidence": 1.0,
"dst": "org_helios_labs",
"rel": "works_at",
"src": "user_34"
}
]
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":",
"completion_length": 408,
"graph_f1": 0.6666666666666666,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_3_544"
}
],
"predicted_edge_count": 1,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.4454139519345446,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r7_7",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_3_544"
},
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
}
]
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"user_6\",\"confidence\":",
"completion_length": 398,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_3_544"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_3"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.4994635369242434,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_8",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_3_544"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_3"
}
]
},
{
"agent_answer": "user_3",
"completion_length": 423,
"graph_f1": 0.5,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
},
{
"confidence": 1.0,
"dst": "user_39",
"rel": "connected_to",
"src": "user_3"
}
],
"predicted_edge_count": 2,
"question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.6989023123676081,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r7_9",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
},
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
}
]
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"connected_to\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",",
"completion_length": 383,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_10",
"task_type": "swarm_v2_3hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_4",
"rel": "alias_of",
"src": "alias_4_664"
},
{
"confidence": 0.8,
"dst": "user_13",
"rel": "connected_to",
"src": "user_4"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_13"
}
]
},
{
"agent_answer": "{\"answer\":\"org_apex_dynamics\",\"supporting_edges\":[{\"src\":\"user_16\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_18\",\"confidence\":1.0},{\"src\":\"user_18\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"connected_to\",\"dst",
"completion_length": 411,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 0.8,
"dst": "user_18",
"rel": "connected_to",
"src": "user_16"
},
{
"confidence": 1.0,
"dst": "org_apex_dynamics",
"rel": "works_at",
"src": "user_18"
}
],
"predicted_edge_count": 2,
"question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.5363758482799911,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_apex_dynamics",
"task_id": "swarm_v2_r7_11",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_18",
"rel": "connected_to",
"src": "user_16"
},
{
"confidence": 1.0,
"dst": "org_apex_dynamics",
"rel": "works_at",
"src": "user_18"
}
]
},
{
"agent_answer": "{\"answer\":\"org_northbridge\",\"supporting_edges\":[{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":",
"completion_length": 431,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_12",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_12"
}
]
},
{
"agent_answer": "user_7",
"completion_length": 418,
"graph_f1": 0.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "connected_to",
"src": "user_30"
},
{
"confidence": 1.0,
"dst": "user_7",
"rel": "works_at",
"src": "org_northbridge"
}
],
"predicted_edge_count": 2,
"question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?",
"reward": 0.7410081838633457,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_13",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_25",
"rel": "connected_to",
"src": "user_30"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_25"
}
]
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_11_684\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_north",
"completion_length": 387,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_14",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_11",
"rel": "alias_of",
"src": "alias_11_684"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_11"
}
]
},
{
"agent_answer": "{\"answer\":\"user_35\",\"supporting_edges\":[{\"src\":\"alias_29_495\",\"rel\":\"alias_of\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel",
"completion_length": 375,
"graph_f1": 0.5,
"pred_edges": [
{
"confidence": 1.0,
"dst": "loc_hyderabad",
"rel": "located_in",
"src": "user_35"
}
],
"predicted_edge_count": 1,
"question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.42825354331930193,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_hyderabad",
"task_id": "swarm_v2_r7_15",
"task_type": "swarm_v2_3hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_29",
"rel": "alias_of",
"src": "alias_29_495"
},
{
"confidence": 0.8,
"dst": "user_35",
"rel": "connected_to",
"src": "user_29"
},
{
"confidence": 1.0,
"dst": "loc_hyderabad",
"rel": "located_in",
"src": "user_35"
}
]
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_38_337\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
"completion_length": 378,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_16",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_38",
"rel": "alias_of",
"src": "alias_38_337"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_38"
}
]
},
{
"agent_answer": "user_3",
"completion_length": 391,
"graph_f1": 0.5,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "connected_to",
"src": "user_38"
},
{
"confidence": 1.0,
"dst": "user_38",
"rel": "connected_to",
"src": "user_3"
}
],
"predicted_edge_count": 2,
"question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?",
"reward": 0.6997365142465289,
"success": 0,
"support_edge_count": 2,
"task_answer": "user_12",
"task_id": "swarm_v2_r7_17",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_38"
},
{
"confidence": 0.8,
"dst": "user_12",
"rel": "connected_to",
"src": "user_3"
}
]
},
{
"agent_answer": "{\"answer\":\"user_39\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_39\",\"confidence\":1.0},{\"src\":\"user_39\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
"completion_length": 373,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_39"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.49900727548994456,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_18",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_39"
}
]
},
{
"agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"user_34\",\"rel\":\"connected_to\",\"dst\":\"user_6\",\"confidence\":1.0},{\"src\":\"user_6\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_6\",\"confidence\":1.0}],\"orchestrator\":{\"spawn_count\":1,\"finished_subtasks\":0,\"critical_steps\":0",
"completion_length": 412,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 0.8,
"dst": "user_6",
"rel": "connected_to",
"src": "user_34"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_6"
}
],
"predicted_edge_count": 2,
"question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?",
"reward": 0.5347671871919794,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_19",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 0.8,
"dst": "user_6",
"rel": "connected_to",
"src": "user_34"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_6"
}
]
},
{
"agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_32_394\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5",
"completion_length": 378,
"graph_f1": 0.0,
"pred_edges": [],
"predicted_edge_count": 0,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?",
"reward": 0.39524653939368076,
"success": 0,
"support_edge_count": 2,
"task_answer": "loc_bengaluru",
"task_id": "swarm_v2_r7_20",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_32",
"rel": "alias_of",
"src": "alias_32_394"
},
{
"confidence": 1.0,
"dst": "loc_bengaluru",
"rel": "located_in",
"src": "user_32"
}
]
},
{
"agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":",
"completion_length": 373,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_30",
"rel": "alias_of",
"src": "alias_30_572"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_30"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.4998888700517808,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_northbridge",
"task_id": "swarm_v2_r7_21",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_30",
"rel": "alias_of",
"src": "alias_30_572"
},
{
"confidence": 1.0,
"dst": "org_northbridge",
"rel": "works_at",
"src": "user_30"
}
]
},
{
"agent_answer": "user_3",
"completion_length": 431,
"graph_f1": 0.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_3",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "connected_to",
"src": "user_3"
},
{
"confidence": 1.0,
"dst": "user_3",
"rel": "located_in",
"src": "loc_pune"
}
],
"predicted_edge_count": 3,
"question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?",
"reward": 0.468212688181137,
"success": 0,
"support_edge_count": 3,
"task_answer": "loc_pune",
"task_id": "swarm_v2_r7_22",
"task_type": "swarm_v2_3hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_39",
"rel": "alias_of",
"src": "alias_39_951"
},
{
"confidence": 0.8,
"dst": "user_3",
"rel": "connected_to",
"src": "user_39"
},
{
"confidence": 1.0,
"dst": "loc_pune",
"rel": "located_in",
"src": "user_3"
}
]
},
{
"agent_answer": "user_32",
"completion_length": 434,
"graph_f1": 1.0,
"pred_edges": [
{
"confidence": 1.0,
"dst": "user_32",
"rel": "alias_of",
"src": "alias_32_394"
},
{
"confidence": 1.0,
"dst": "org_helios_labs",
"rel": "works_at",
"src": "user_32"
}
],
"predicted_edge_count": 2,
"question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?",
"reward": 0.7753896096874865,
"success": 0,
"support_edge_count": 2,
"task_answer": "org_helios_labs",
"task_id": "swarm_v2_r7_23",
"task_type": "swarm_v2_2hop_trace",
"truth_edges": [
{
"confidence": 1.0,
"dst": "user_32",
"rel": "alias_of",
"src": "alias_32_394"
},
{
"confidence": 1.0,
"dst": "org_helios_labs",
"rel": "works_at",
"src": "user_32"
}
]
}
],
"model_path": "Qwen/Qwen2.5-0.5B-Instruct",
"summary": {
"avg_completion_length": 397.875,
"avg_graph_f1": 0.5347222222222222,
"avg_reward": 0.5195708125237718,
"episodes": 24,
"task_success_rate": 0.0
}
}
},
"pipeline_mode": "swarm_v2",
"skipped": false,
"summary": {
"compared_models": [
"finetuned_answerer",
"original_answerer"
],
"delta_vs_original": {
"avg_graph_f1": -0.5347222222222222,
"avg_reward": 0.3800149451945456,
"task_success_rate": 0.875
},
"finetuned_answerer": {
"avg_completion_length": 566.5416666666666,
"avg_graph_f1": 0.0,
"avg_reward": 0.8995857577183174,
"episodes": 24,
"task_success_rate": 0.875
},
"generated_task_count": 24,
"generator_valid_rate": 0.5,
"original_answerer": {
"avg_completion_length": 397.875,
"avg_graph_f1": 0.5347222222222222,
"avg_reward": 0.5195708125237718,
"episodes": 24,
"task_success_rate": 0.0
},
"top_generator_invalid_reasons": [
[
"duplicate_or_near_duplicate",
3
]
]
},
"validation_reports_path": "artifacts/local_hf_eval_round_006/post_training_eval_validation_reports.json"
}