{
  "rounds": 3,
  "output_dir": "artifacts/self_play",
  "dry_run": true,
  "canonical_graph_mode": "generate",
  "pipeline_mode": "swarm_v2",
  "model_topology": "dual",
  "phase_schedule": "generator_answerer",
  "tuning_mode": "full",
  "shared_model_name_or_path": "",
  "seed_tasks_per_round": 16,
  "generated_tasks_per_round": 24,
  "generator_prompts_per_round": 24,
  "max_graph_context_nodes": 100,
  "max_graph_context_edges": 100,
  "max_support_edges": 8,
  "answerer_judge_max_new_tokens": 48,
  "generator_reward_weights": {
    "validity": 0.35,
    "hardness": 0.45,
    "diversity": 0.1,
    "consistency": 0.1
  },
  "lora": {
    "r": 16,
    "alpha": 32,
    "dropout": 0.05,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
    "bias": "none",
    "task_type": "CAUSAL_LM"
  },
  "swarm_v2": {
    "generator_swarm": {
      "shared_context": true,
      "max_agents": 4,
      "max_breadth": 3,
      "max_depth": 2,
      "planner_rounds": 2,
      "tools_per_agent": 2
    },
    "answerer_swarm": {
      "shared_context": true,
      "max_agents": 3,
      "max_breadth": 2,
      "max_depth": 2,
      "planner_rounds": 2,
      "tools_per_agent": 2
    },
    "validation": {
      "max_support_edges": 8,
      "max_path_hops": 4,
      "max_context_nodes": 14,
      "max_context_edges": 8,
      "duplicate_similarity_threshold": 0.8
    },
    "shared_context": {
      "shared_by_default": true,
      "max_nodes": 14,
      "max_edges": 8,
      "target_pressure": 0.85
    }
  },
  "generator_phase": {
    "model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
    "learning_rate": 1e-06,
    "max_steps": 64,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 4,
    "num_generations": 4,
    "max_completion_length": 256,
    "temperature": 1.0,
    "top_p": 1.0,
    "beta": 0.01,
    "epsilon": 0.2,
    "num_iterations": 1,
    "loss_type": "dapo",
    "scale_rewards": "none",
    "logging_steps": 10,
    "save_steps": 50,
    "output_subdir": "generator",
    "use_vllm": false,
    "vllm_mode": "colocate"
  },
  "answerer_phase": {
    "model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
    "learning_rate": 1e-06,
    "max_steps": 64,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 4,
    "num_generations": 4,
    "max_completion_length": 192,
    "temperature": 1.0,
    "top_p": 1.0,
    "beta": 0.01,
    "epsilon": 0.2,
    "num_iterations": 1,
    "loss_type": "dapo",
    "scale_rewards": "none",
    "logging_steps": 10,
    "save_steps": 50,
    "output_subdir": "answerer",
    "use_vllm": false,
    "vllm_mode": "colocate"
  }
}