OSINT / config /self_play_training_example.json
siddeshwar-kagatikar
Sync current main to Hugging Face Space
fe1f842
{
"rounds": 3,
"output_dir": "artifacts/self_play",
"dry_run": true,
"canonical_graph_mode": "generate",
"pipeline_mode": "swarm_v2",
"model_topology": "dual",
"phase_schedule": "generator_answerer",
"tuning_mode": "full",
"shared_model_name_or_path": "",
"seed_tasks_per_round": 16,
"generated_tasks_per_round": 24,
"generator_prompts_per_round": 24,
"max_graph_context_nodes": 100,
"max_graph_context_edges": 100,
"max_support_edges": 8,
"answerer_judge_max_new_tokens": 48,
"generated_task_max_new_tokens": 512,
"post_training_eval_questions": 24,
"post_training_eval_answer_max_new_tokens": 128,
"generator_reward_weights": {
"validity": 0.45,
"hardness": 0.2,
"diversity": 0.15,
"consistency": 0.2
},
"lora": {
"r": 16,
"alpha": 32,
"dropout": 0.05,
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
"bias": "none",
"task_type": "CAUSAL_LM"
},
"swarm_v2": {
"generator_swarm": {
"shared_context": true,
"max_agents": 4,
"max_breadth": 3,
"max_depth": 2,
"planner_rounds": 2,
"tools_per_agent": 2
},
"answerer_swarm": {
"shared_context": true,
"max_agents": 3,
"max_breadth": 2,
"max_depth": 2,
"planner_rounds": 2,
"tools_per_agent": 2
},
"validation": {
"max_support_edges": 8,
"max_path_hops": 4,
"max_context_nodes": 14,
"max_context_edges": 8,
"duplicate_similarity_threshold": 0.8
},
"shared_context": {
"shared_by_default": true,
"max_nodes": 14,
"max_edges": 8,
"target_pressure": 0.85
}
},
"generator_phase": {
"model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
"learning_rate": 5e-06,
"max_steps": 64,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 4,
"num_generations": 4,
"max_completion_length": 384,
"max_prompt_length": 1024,
"generation_batch_size": 8,
"temperature": 1.0,
"top_p": 1.0,
"beta": 0.01,
"epsilon": 0.2,
"num_iterations": 1,
"loss_type": "dapo",
"scale_rewards": "none",
"logging_steps": 10,
"save_steps": 50,
"save_total_limit": 2,
"optim": "adamw_torch_fused",
"bf16": true,
"tf32": true,
"gradient_checkpointing": false,
"dataloader_num_workers": 2,
"dataloader_persistent_workers": true,
"dataloader_prefetch_factor": 2,
"output_subdir": "generator",
"use_vllm": false,
"vllm_mode": "colocate"
},
"answerer_phase": {
"model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
"learning_rate": 3e-06,
"max_steps": 64,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 4,
"num_generations": 4,
"max_completion_length": 192,
"max_prompt_length": 1024,
"generation_batch_size": 8,
"temperature": 1.0,
"top_p": 1.0,
"beta": 0.01,
"epsilon": 0.2,
"num_iterations": 1,
"loss_type": "dapo",
"scale_rewards": "none",
"logging_steps": 10,
"save_steps": 50,
"save_total_limit": 2,
"optim": "adamw_torch_fused",
"bf16": true,
"tf32": true,
"gradient_checkpointing": false,
"dataloader_num_workers": 2,
"dataloader_persistent_workers": true,
"dataloader_prefetch_factor": 2,
"output_subdir": "answerer",
"use_vllm": false,
"vllm_mode": "colocate"
}
}