salespath-grpo / training_metadata.json
Lomesh7777's picture
Upload folder using huggingface_hub
88d2321 verified
{
"sft_checkpoint": "./sft_checkpoint",
"rollouts_per_difficulty": 16,
"difficulty_mix": [
1,
1,
2,
2,
3
],
"num_train_epochs": 10.0,
"per_device_batch": 12,
"grad_accum": 4,
"num_generations": 8,
"lr": 1e-06,
"beta": 0.05,
"gamma": 0.98,
"seed": 42,
"n_unique_prompts": 247,
"n_state_snapshots": 612,
"use_vllm": false
}