OSINT1

Paused

OSINT1 / config /self_play_training_example.json

siddeshwar-kagatikar

Deploy clean snapshot to Hugging Face Space.

db4fa53 13 days ago

2.65 kB

	{
	"rounds": 3,
	"output_dir": "artifacts/self_play",
	"dry_run": true,
	"canonical_graph_mode": "generate",
	"pipeline_mode": "swarm_v2",
	"model_topology": "dual",
	"phase_schedule": "generator_answerer",
	"tuning_mode": "full",
	"shared_model_name_or_path": "",
	"seed_tasks_per_round": 16,
	"generated_tasks_per_round": 24,
	"generator_prompts_per_round": 24,
	"max_graph_context_nodes": 100,
	"max_graph_context_edges": 100,
	"max_support_edges": 8,
	"answerer_judge_max_new_tokens": 48,
	"generator_reward_weights": {
	"validity": 0.35,
	"hardness": 0.45,
	"diversity": 0.1,
	"consistency": 0.1
	},
	"lora": {
	"r": 16,
	"alpha": 32,
	"dropout": 0.05,
	"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
	"bias": "none",
	"task_type": "CAUSAL_LM"
	},
	"swarm_v2": {
	"generator_swarm": {
	"shared_context": true,
	"max_agents": 4,
	"max_breadth": 3,
	"max_depth": 2,
	"planner_rounds": 2,
	"tools_per_agent": 2
	},
	"answerer_swarm": {
	"shared_context": true,
	"max_agents": 3,
	"max_breadth": 2,
	"max_depth": 2,
	"planner_rounds": 2,
	"tools_per_agent": 2
	},
	"validation": {
	"max_support_edges": 8,
	"max_path_hops": 4,
	"max_context_nodes": 14,
	"max_context_edges": 8,
	"duplicate_similarity_threshold": 0.8
	},
	"shared_context": {
	"shared_by_default": true,
	"max_nodes": 14,
	"max_edges": 8,
	"target_pressure": 0.85
	}
	},
	"generator_phase": {
	"model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
	"learning_rate": 1e-06,
	"max_steps": 64,
	"per_device_train_batch_size": 2,
	"gradient_accumulation_steps": 4,
	"num_generations": 4,
	"max_completion_length": 256,
	"temperature": 1.0,
	"top_p": 1.0,
	"beta": 0.01,
	"epsilon": 0.2,
	"num_iterations": 1,
	"loss_type": "dapo",
	"scale_rewards": "none",
	"logging_steps": 10,
	"save_steps": 50,
	"output_subdir": "generator",
	"use_vllm": false,
	"vllm_mode": "colocate"
	},
	"answerer_phase": {
	"model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
	"learning_rate": 1e-06,
	"max_steps": 64,
	"per_device_train_batch_size": 2,
	"gradient_accumulation_steps": 4,
	"num_generations": 4,
	"max_completion_length": 192,
	"temperature": 1.0,
	"top_p": 1.0,
	"beta": 0.01,
	"epsilon": 0.2,
	"num_iterations": 1,
	"loss_type": "dapo",
	"scale_rewards": "none",
	"logging_steps": 10,
	"save_steps": 50,
	"output_subdir": "answerer",
	"use_vllm": false,
	"vllm_mode": "colocate"
	}
	}