| .\venv-train\Scripts\python -m training.grpo_train ` | |
| --model models/parlay-sft ` | |
| --data data/episodes.jsonl ` | |
| --output models/parlay-grpo ` | |
| --steps 500 | |
| .\venv-train\Scripts\python -m training.grpo_train ` | |
| --model models/parlay-sft ` | |
| --data data/episodes.jsonl ` | |
| --output models/parlay-grpo ` | |
| --steps 500 | |