| { |
| "answerer_models": { |
| "finetuned_answerer": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model", |
| "original_answerer": "Qwen/Qwen2.5-0.5B-Instruct" |
| }, |
| "generated_tasks_path": "artifacts/self_play_hf_a10g_train/post_training_eval_generated_tasks.json", |
| "generator_model": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model", |
| "model_evaluations": { |
| "finetuned_answerer": { |
| "episodes": [ |
| { |
| "agent_answer": "finished_subtasks\": 2", |
| "completion_length": 433, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.4462659972989426, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_25", |
| "task_id": "swarm_v2_r3_0", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2", |
| "completion_length": 434, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_1", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2", |
| "completion_length": 434, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5018390028775422, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_hyderabad", |
| "task_id": "swarm_v2_r3_2", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2,", |
| "completion_length": 440, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5000528755950792, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_3", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_sub", |
| "completion_length": 426, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.5017743415980812, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_3", |
| "task_id": "swarm_v2_r3_4", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\":", |
| "completion_length": 435, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.4462659972989426, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_5", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks", |
| "completion_length": 431, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.49966549250864506, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_helios_labs", |
| "task_id": "swarm_v2_r3_6", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "critical", |
| "completion_length": 447, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.4454139519345446, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_12", |
| "task_id": "swarm_v2_r3_7", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "critical", |
| "completion_length": 445, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.4994635369242434, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_8", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2,", |
| "completion_length": 431, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.530983823504946, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_12", |
| "task_id": "swarm_v2_r3_9", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "dst\": \"user", |
| "completion_length": 452, |
| "graph_f1": 0.5, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?", |
| "reward": 0.4353546524684235, |
| "success": 0, |
| "support_edge_count": 3, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_10", |
| "task_type": "swarm_v2_3hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2,", |
| "completion_length": 448, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_apex_dynamics", |
| "task_id": "swarm_v2_r3_11", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "critical_steps\":", |
| "completion_length": 459, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_12", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "", |
| "completion_length": 446, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.36246822117289235, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_13", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2", |
| "completion_length": 428, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.4462659972989426, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_14", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "dst\": \"loc", |
| "completion_length": 447, |
| "graph_f1": 0.5, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?", |
| "reward": 0.4353546524684235, |
| "success": 0, |
| "support_edge_count": 3, |
| "task_answer": "loc_hyderabad", |
| "task_id": "swarm_v2_r3_15", |
| "task_type": "swarm_v2_3hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\":", |
| "completion_length": 433, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5000528755950792, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_16", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\": 2,", |
| "completion_length": 437, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.44122448096324046, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_12", |
| "task_id": "swarm_v2_r3_17", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\":", |
| "completion_length": 435, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.4454139519345446, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_18", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "critical_steps\":", |
| "completion_length": 456, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_19", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\":", |
| "completion_length": 433, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5000528755950792, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_20", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks\":", |
| "completion_length": 435, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.4462659972989426, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_21", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "dst\": \"user_", |
| "completion_length": 446, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 3, |
| "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?", |
| "reward": 0.4993624365793478, |
| "success": 0, |
| "support_edge_count": 3, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_22", |
| "task_type": "swarm_v2_3hop_trace" |
| }, |
| { |
| "agent_answer": "finished_subtasks", |
| "completion_length": 431, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.5001432839561198, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_helios_labs", |
| "task_id": "swarm_v2_r3_23", |
| "task_type": "swarm_v2_2hop_trace" |
| } |
| ], |
| "model_path": "artifacts/self_play_hf_a10g_train/round_002/answerer_train/final_model", |
| "summary": { |
| "avg_completion_length": 439.25, |
| "avg_graph_f1": 0.6805555555555555, |
| "avg_reward": 0.4568612751019469, |
| "episodes": 24, |
| "task_success_rate": 0.0 |
| } |
| }, |
| "original_answerer": { |
| "episodes": [ |
| { |
| "agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_delhi\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"connected_to\",\"dst", |
| "completion_length": 388, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_30_572 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.4462659972989426, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_25", |
| "task_id": "swarm_v2_r3_0", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_3", |
| "completion_length": 414, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_37 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5717814630149074, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_1", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_24\",\"supporting_edges\":[{\"src\":\"alias_24_458\",\"rel\":\"alias_of\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_24\",\"confidence\":1.0},{\"src\":\"user_24\",\"rel\":\"located_in\",", |
| "completion_length": 387, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_24_458 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5018390028775422, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_hyderabad", |
| "task_id": "swarm_v2_r3_2", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",", |
| "completion_length": 381, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_4_664 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.5000528755950792, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_3", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"user_39\",", |
| "completion_length": 396, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.5017743415980812, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_3", |
| "task_id": "swarm_v2_r3_4", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_7", |
| "completion_length": 387, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_20_174 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.7089737433490155, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_5", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_34\",\"supporting_edges\":[{\"src\":\"alias_34_511\",\"rel\":\"alias_of\",\"dst\":\"user_34\",\"confidence\":1.0},{\"src\":\"user_34\",\"rel\":\"works_at\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",\"dst\":\"user_0\",\"confidence\":1.0},{\"src\":\"user_0\",\"rel\":\"works_at\",\"dst\":\"user_16\",\"confidence\":1.0},{\"src\":\"user_16\",\"rel\":\"works", |
| "completion_length": 361, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_34_511 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.49966549250864506, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_helios_labs", |
| "task_id": "swarm_v2_r3_6", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"loc_pune\",\"confidence\":", |
| "completion_length": 408, |
| "graph_f1": 0.6666666666666666, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_3_544 and follow the relation path alias_of -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.4454139519345446, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_12", |
| "task_id": "swarm_v2_r3_7", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_3\",\"supporting_edges\":[{\"src\":\"alias_3_544\",\"rel\":\"alias_of\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"located_in\",\"dst\":\"user_6\",\"confidence\":1.0},{\"src\":\"user_6\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1", |
| "completion_length": 386, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_3_544 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.4994635369242434, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_8", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_3", |
| "completion_length": 423, |
| "graph_f1": 0.5, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_39 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.6989023123676081, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_12", |
| "task_id": "swarm_v2_r3_9", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_4_664\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"connected_to\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel\":\"works_at\",", |
| "completion_length": 383, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at alias_4_664 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 3, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_10", |
| "task_type": "swarm_v2_3hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"org_apex_dynamics\",\"supporting_edges\":[{\"src\":\"user_16\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_18\",\"confidence\":1.0},{\"src\":\"user_18\",\"rel\":\"connected_to\",\"dst\":\"org_apex_dynamics\",\"confidence\":1.0},{\"src\":\"org_apex_dynamics\",\"rel\":\"works_at\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"connected_to\",\"dst", |
| "completion_length": 411, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_16 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.5363758482799911, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_apex_dynamics", |
| "task_id": "swarm_v2_r3_11", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"org_northbridge\",\"supporting_edges\":[{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"connected_to\",\"dst\":\"org_northbridge\",\"confidence\":", |
| "completion_length": 431, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at user_3 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_12", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_7", |
| "completion_length": 418, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_30 and follow the relation path connected_to -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.7410081838633457, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_13", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_11_684\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"located_in\",\"dst\":\"loc_pune\",\"confidence\":1.0},{\"src\":\"loc_pune\",\"rel\":\"located_in\",\"dst\":\"org_north", |
| "completion_length": 387, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at alias_11_684 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_14", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_35\",\"supporting_edges\":[{\"src\":\"alias_29_495\",\"rel\":\"alias_of\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"connected_to\",\"dst\":\"loc_hyderabad\",\"confidence\":1.0},{\"src\":\"loc_hyderabad\",\"rel\":\"located_in\",\"dst\":\"user_35\",\"confidence\":1.0},{\"src\":\"user_35\",\"rel\":\"located_in\",\"dst\":\"org_helios_labs\",\"confidence\":1.0},{\"src\":\"org_helios_labs\",\"rel", |
| "completion_length": 375, |
| "graph_f1": 0.5, |
| "predicted_edge_count": 1, |
| "question": "If you start at alias_29_495 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?", |
| "reward": 0.42825354331930193, |
| "success": 0, |
| "support_edge_count": 3, |
| "task_answer": "loc_hyderabad", |
| "task_id": "swarm_v2_r3_15", |
| "task_type": "swarm_v2_3hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_38_337\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5", |
| "completion_length": 378, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at alias_38_337 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_16", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_3", |
| "completion_length": 391, |
| "graph_f1": 0.5, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_38 and follow the relation path connected_to -> connected_to, which entity do you reach after 2 hops?", |
| "reward": 0.6997365142465289, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "user_12", |
| "task_id": "swarm_v2_r3_17", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_39\",\"supporting_edges\":[{\"src\":\"alias_39_951\",\"rel\":\"alias_of\",\"dst\":\"user_39\",\"confidence\":1.0},{\"src\":\"user_39\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":", |
| "completion_length": 373, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_39_951 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.49900727548994456, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_18", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_6", |
| "completion_length": 415, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at user_34 and follow the relation path connected_to -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.7402569345038308, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_19", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_7\",\"supporting_edges\":[{\"src\":\"alias_32_394\",\"rel\":\"alias_of\",\"dst\":\"user_7\",\"confidence\":1.0},{\"src\":\"user_7\",\"rel\":\"located_in\",\"dst\":\"loc_bengaluru\",\"confidence\":1.0},{\"src\":\"loc_bengaluru\",\"rel\":\"located_in\",\"dst\":\"user_1\",\"confidence\":1.0},{\"src\":\"user_1\",\"rel\":\"located_in\",\"dst\":\"user_4\",\"confidence\":1.0},{\"src\":\"user_4\",\"rel\":\"located_in\",\"dst\":\"user_5", |
| "completion_length": 378, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 0, |
| "question": "If you start at alias_32_394 and follow the relation path alias_of -> located_in, which entity do you reach after 2 hops?", |
| "reward": 0.39524653939368076, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "loc_bengaluru", |
| "task_id": "swarm_v2_r3_20", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "{\"answer\":\"user_30\",\"supporting_edges\":[{\"src\":\"alias_30_572\",\"rel\":\"alias_of\",\"dst\":\"user_30\",\"confidence\":1.0},{\"src\":\"user_30\",\"rel\":\"works_at\",\"dst\":\"org_northbridge\",\"confidence\":1.0},{\"src\":\"org_northbridge\",\"rel\":\"works_at\",\"dst\":\"user_3\",\"confidence\":1.0},{\"src\":\"user_3\",\"rel\":\"works_at\",\"dst\":\"user_10\",\"confidence\":1.0},{\"src\":\"user_10\",\"rel\":\"works_at\",\"dst\":", |
| "completion_length": 373, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_30_572 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.4998888700517808, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_northbridge", |
| "task_id": "swarm_v2_r3_21", |
| "task_type": "swarm_v2_2hop_trace" |
| }, |
| { |
| "agent_answer": "user_3", |
| "completion_length": 401, |
| "graph_f1": 0.0, |
| "predicted_edge_count": 3, |
| "question": "If you start at alias_39_951 and follow the relation path alias_of -> connected_to -> located_in, which entity do you reach after 3 hops?", |
| "reward": 0.468212688181137, |
| "success": 0, |
| "support_edge_count": 3, |
| "task_answer": "loc_pune", |
| "task_id": "swarm_v2_r3_22", |
| "task_type": "swarm_v2_3hop_trace" |
| }, |
| { |
| "agent_answer": "user_32", |
| "completion_length": 434, |
| "graph_f1": 1.0, |
| "predicted_edge_count": 2, |
| "question": "If you start at alias_32_394 and follow the relation path alias_of -> works_at, which entity do you reach after 2 hops?", |
| "reward": 0.7753896096874865, |
| "success": 0, |
| "support_edge_count": 2, |
| "task_answer": "org_helios_labs", |
| "task_id": "swarm_v2_r3_23", |
| "task_type": "swarm_v2_2hop_trace" |
| } |
| ], |
| "model_path": "Qwen/Qwen2.5-0.5B-Instruct", |
| "summary": { |
| "avg_completion_length": 394.9583333333333, |
| "avg_graph_f1": 0.5347222222222222, |
| "avg_reward": 0.5307706200858483, |
| "episodes": 24, |
| "task_success_rate": 0.0 |
| } |
| } |
| }, |
| "pipeline_mode": "swarm_v2", |
| "skipped": false, |
| "summary": { |
| "compared_models": [ |
| "finetuned_answerer", |
| "original_answerer" |
| ], |
| "delta_vs_original": { |
| "avg_graph_f1": 0.14583333333333326, |
| "avg_reward": -0.07390934498390139, |
| "task_success_rate": 0.0 |
| }, |
| "finetuned_answerer": { |
| "avg_completion_length": 439.25, |
| "avg_graph_f1": 0.6805555555555555, |
| "avg_reward": 0.4568612751019469, |
| "episodes": 24, |
| "task_success_rate": 0.0 |
| }, |
| "generated_task_count": 24, |
| "generator_valid_rate": 0.5, |
| "original_answerer": { |
| "avg_completion_length": 394.9583333333333, |
| "avg_graph_f1": 0.5347222222222222, |
| "avg_reward": 0.5307706200858483, |
| "episodes": 24, |
| "task_success_rate": 0.0 |
| }, |
| "top_generator_invalid_reasons": [ |
| [ |
| "duplicate_or_near_duplicate", |
| 3 |
| ] |
| ] |
| }, |
| "validation_reports_path": "artifacts/self_play_hf_a10g_train/post_training_eval_validation_reports.json" |
| } |