{ "model_name": "Qwen/Qwen3-0.6B", "questions_path": "data/questions/questions_train.json", "db_dir": "data/databases", "output_dir": "outputs/grpo_run", "num_train_epochs": 1, "per_device_train_batch_size": 8, "num_generations": 8, "max_completion_length": 512, "step_budget": 10, "logging_steps": 10, "precision": "bf16", "enable_thinking": false, "num_completions_to_print": 1 }