| # Unsloth path: install after uv sync --extra train | |
| # Unsloth pins trl<=0.24; this project uses trl>=0.29 for training_script.py. | |
| # Use --no-deps to avoid downgrading trl (Unsloth works with TRL 0.29 in practice). | |
| # | |
| # Install: | |
| # uv sync --extra train | |
| # uv pip install unsloth unsloth_zoo --no-deps | |
| # | |
| # Run training with Qwen3-4B-Base: | |
| # uv run python training_unsloth.py --model-id Qwen/Qwen3-4B-Base --output-dir training/grpo-unsloth-qwen3-4b | |
| # | |
| unsloth>=2025.10.14 | |
| unsloth_zoo | |