| { | |
| "model_name": "Qwen/Qwen3-0.6B", | |
| "questions_path": "data/questions/questions_train.json", | |
| "db_dir": "data/databases", | |
| "output_dir": "outputs/grpo_run", | |
| "num_train_epochs": 1, | |
| "per_device_train_batch_size": 8, | |
| "num_generations": 8, | |
| "max_completion_length": 512, | |
| "step_budget": 10, | |
| "logging_steps": 10, | |
| "precision": "bf16", | |
| "enable_thinking": false, | |
| "num_completions_to_print": 1 | |
| } | |