Spaces:
Paused
Paused
| { | |
| "rounds": 3, | |
| "output_dir": "artifacts/self_play", | |
| "dry_run": true, | |
| "canonical_graph_mode": "generate", | |
| "pipeline_mode": "swarm_v2", | |
| "model_topology": "dual", | |
| "phase_schedule": "generator_answerer", | |
| "tuning_mode": "full", | |
| "shared_model_name_or_path": "", | |
| "seed_tasks_per_round": 16, | |
| "generated_tasks_per_round": 24, | |
| "generator_prompts_per_round": 24, | |
| "max_graph_context_nodes": 100, | |
| "max_graph_context_edges": 100, | |
| "max_support_edges": 8, | |
| "answerer_judge_max_new_tokens": 48, | |
| "generator_reward_weights": { | |
| "validity": 0.35, | |
| "hardness": 0.45, | |
| "diversity": 0.1, | |
| "consistency": 0.1 | |
| }, | |
| "lora": { | |
| "r": 16, | |
| "alpha": 32, | |
| "dropout": 0.05, | |
| "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"], | |
| "bias": "none", | |
| "task_type": "CAUSAL_LM" | |
| }, | |
| "swarm_v2": { | |
| "generator_swarm": { | |
| "shared_context": true, | |
| "max_agents": 4, | |
| "max_breadth": 3, | |
| "max_depth": 2, | |
| "planner_rounds": 2, | |
| "tools_per_agent": 2 | |
| }, | |
| "answerer_swarm": { | |
| "shared_context": true, | |
| "max_agents": 3, | |
| "max_breadth": 2, | |
| "max_depth": 2, | |
| "planner_rounds": 2, | |
| "tools_per_agent": 2 | |
| }, | |
| "validation": { | |
| "max_support_edges": 8, | |
| "max_path_hops": 4, | |
| "max_context_nodes": 14, | |
| "max_context_edges": 8, | |
| "duplicate_similarity_threshold": 0.8 | |
| }, | |
| "shared_context": { | |
| "shared_by_default": true, | |
| "max_nodes": 14, | |
| "max_edges": 8, | |
| "target_pressure": 0.85 | |
| } | |
| }, | |
| "generator_phase": { | |
| "model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct", | |
| "learning_rate": 1e-06, | |
| "max_steps": 64, | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 4, | |
| "num_generations": 4, | |
| "max_completion_length": 256, | |
| "temperature": 1.0, | |
| "top_p": 1.0, | |
| "beta": 0.01, | |
| "epsilon": 0.2, | |
| "num_iterations": 1, | |
| "loss_type": "dapo", | |
| "scale_rewards": "none", | |
| "logging_steps": 10, | |
| "save_steps": 50, | |
| "output_subdir": "generator", | |
| "use_vllm": false, | |
| "vllm_mode": "colocate" | |
| }, | |
| "answerer_phase": { | |
| "model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct", | |
| "learning_rate": 1e-06, | |
| "max_steps": 64, | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 4, | |
| "num_generations": 4, | |
| "max_completion_length": 192, | |
| "temperature": 1.0, | |
| "top_p": 1.0, | |
| "beta": 0.01, | |
| "epsilon": 0.2, | |
| "num_iterations": 1, | |
| "loss_type": "dapo", | |
| "scale_rewards": "none", | |
| "logging_steps": 10, | |
| "save_steps": 50, | |
| "output_subdir": "answerer", | |
| "use_vllm": false, | |
| "vllm_mode": "colocate" | |
| } | |
| } | |