{ "sft_checkpoint": "./sft_checkpoint", "rollouts_per_difficulty": 16, "difficulty_mix": [ 1, 1, 2, 2, 3 ], "num_train_epochs": 10.0, "per_device_batch": 12, "grad_accum": 4, "num_generations": 8, "lr": 1e-06, "beta": 0.05, "gamma": 0.98, "seed": 42, "n_unique_prompts": 247, "n_state_snapshots": 612, "use_vllm": false }