File size: 3,400 Bytes
d814291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe1f842
 
 
d814291
fe1f842
 
 
 
d814291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe1f842
d814291
 
 
 
fe1f842
 
 
d814291
 
 
 
 
 
 
 
 
fe1f842
 
 
 
 
 
 
 
d814291
 
 
 
 
 
fe1f842
d814291
 
 
 
 
fe1f842
 
d814291
 
 
 
 
 
 
 
 
fe1f842
 
 
 
 
 
 
 
d814291
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  "rounds": 3,
  "output_dir": "artifacts/self_play",
  "dry_run": true,
  "canonical_graph_mode": "generate",
  "pipeline_mode": "swarm_v2",
  "model_topology": "dual",
  "phase_schedule": "generator_answerer",
  "tuning_mode": "full",
  "shared_model_name_or_path": "",
  "seed_tasks_per_round": 16,
  "generated_tasks_per_round": 24,
  "generator_prompts_per_round": 24,
  "max_graph_context_nodes": 100,
  "max_graph_context_edges": 100,
  "max_support_edges": 8,
  "answerer_judge_max_new_tokens": 48,
  "generated_task_max_new_tokens": 512,
  "post_training_eval_questions": 24,
  "post_training_eval_answer_max_new_tokens": 128,
  "generator_reward_weights": {
    "validity": 0.45,
    "hardness": 0.2,
    "diversity": 0.15,
    "consistency": 0.2
  },
  "lora": {
    "r": 16,
    "alpha": 32,
    "dropout": 0.05,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
    "bias": "none",
    "task_type": "CAUSAL_LM"
  },
  "swarm_v2": {
    "generator_swarm": {
      "shared_context": true,
      "max_agents": 4,
      "max_breadth": 3,
      "max_depth": 2,
      "planner_rounds": 2,
      "tools_per_agent": 2
    },
    "answerer_swarm": {
      "shared_context": true,
      "max_agents": 3,
      "max_breadth": 2,
      "max_depth": 2,
      "planner_rounds": 2,
      "tools_per_agent": 2
    },
    "validation": {
      "max_support_edges": 8,
      "max_path_hops": 4,
      "max_context_nodes": 14,
      "max_context_edges": 8,
      "duplicate_similarity_threshold": 0.8
    },
    "shared_context": {
      "shared_by_default": true,
      "max_nodes": 14,
      "max_edges": 8,
      "target_pressure": 0.85
    }
  },
  "generator_phase": {
    "model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
    "learning_rate": 5e-06,
    "max_steps": 64,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 4,
    "num_generations": 4,
    "max_completion_length": 384,
    "max_prompt_length": 1024,
    "generation_batch_size": 8,
    "temperature": 1.0,
    "top_p": 1.0,
    "beta": 0.01,
    "epsilon": 0.2,
    "num_iterations": 1,
    "loss_type": "dapo",
    "scale_rewards": "none",
    "logging_steps": 10,
    "save_steps": 50,
    "save_total_limit": 2,
    "optim": "adamw_torch_fused",
    "bf16": true,
    "tf32": true,
    "gradient_checkpointing": false,
    "dataloader_num_workers": 2,
    "dataloader_persistent_workers": true,
    "dataloader_prefetch_factor": 2,
    "output_subdir": "generator",
    "use_vllm": false,
    "vllm_mode": "colocate"
  },
  "answerer_phase": {
    "model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
    "learning_rate": 3e-06,
    "max_steps": 64,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 4,
    "num_generations": 4,
    "max_completion_length": 192,
    "max_prompt_length": 1024,
    "generation_batch_size": 8,
    "temperature": 1.0,
    "top_p": 1.0,
    "beta": 0.01,
    "epsilon": 0.2,
    "num_iterations": 1,
    "loss_type": "dapo",
    "scale_rewards": "none",
    "logging_steps": 10,
    "save_steps": 50,
    "save_total_limit": 2,
    "optim": "adamw_torch_fused",
    "bf16": true,
    "tf32": true,
    "gradient_checkpointing": false,
    "dataloader_num_workers": 2,
    "dataloader_persistent_workers": true,
    "dataloader_prefetch_factor": 2,
    "output_subdir": "answerer",
    "use_vllm": false,
    "vllm_mode": "colocate"
  }
}