Spaces:
Paused
Paused
File size: 2,651 Bytes
db4fa53 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | {
"rounds": 3,
"output_dir": "artifacts/self_play",
"dry_run": true,
"canonical_graph_mode": "generate",
"pipeline_mode": "swarm_v2",
"model_topology": "dual",
"phase_schedule": "generator_answerer",
"tuning_mode": "full",
"shared_model_name_or_path": "",
"seed_tasks_per_round": 16,
"generated_tasks_per_round": 24,
"generator_prompts_per_round": 24,
"max_graph_context_nodes": 100,
"max_graph_context_edges": 100,
"max_support_edges": 8,
"answerer_judge_max_new_tokens": 48,
"generator_reward_weights": {
"validity": 0.35,
"hardness": 0.45,
"diversity": 0.1,
"consistency": 0.1
},
"lora": {
"r": 16,
"alpha": 32,
"dropout": 0.05,
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
"bias": "none",
"task_type": "CAUSAL_LM"
},
"swarm_v2": {
"generator_swarm": {
"shared_context": true,
"max_agents": 4,
"max_breadth": 3,
"max_depth": 2,
"planner_rounds": 2,
"tools_per_agent": 2
},
"answerer_swarm": {
"shared_context": true,
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"planner_rounds": 2,
"tools_per_agent": 2
},
"validation": {
"max_support_edges": 8,
"max_path_hops": 4,
"max_context_nodes": 14,
"max_context_edges": 8,
"duplicate_similarity_threshold": 0.8
},
"shared_context": {
"shared_by_default": true,
"max_nodes": 14,
"max_edges": 8,
"target_pressure": 0.85
}
},
"generator_phase": {
"model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
"learning_rate": 1e-06,
"max_steps": 64,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 4,
"num_generations": 4,
"max_completion_length": 256,
"temperature": 1.0,
"top_p": 1.0,
"beta": 0.01,
"epsilon": 0.2,
"num_iterations": 1,
"loss_type": "dapo",
"scale_rewards": "none",
"logging_steps": 10,
"save_steps": 50,
"output_subdir": "generator",
"use_vllm": false,
"vllm_mode": "colocate"
},
"answerer_phase": {
"model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
"learning_rate": 1e-06,
"max_steps": 64,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 4,
"num_generations": 4,
"max_completion_length": 192,
"temperature": 1.0,
"top_p": 1.0,
"beta": 0.01,
"epsilon": 0.2,
"num_iterations": 1,
"loss_type": "dapo",
"scale_rewards": "none",
"logging_steps": 10,
"save_steps": 50,
"output_subdir": "answerer",
"use_vllm": false,
"vllm_mode": "colocate"
}
}
|