Spaces:
Sleeping
Sleeping
| # Training Space Dockerfile β JupyterLab + auto-run on port 7860. | |
| # | |
| # Two modes (controlled by START_MODE env var in Space settings): | |
| # START_MODE=jupyter β opens JupyterLab so you can run train_grpo.ipynb manually | |
| # START_MODE=auto β runs train.py immediately, no interaction needed | |
| # | |
| # HF Space secrets to set: | |
| # HF_TOKEN, HF_USERNAME, WANDB_API_KEY | |
| # GRPO_STEPS (optional, default 100) | |
| # Use the latest stable PyTorch image. | |
| # unsloth-zoo pulls torchao>=0.13 which needs torch.utils._pytree.register_constant β | |
| # available from torch 2.6.0+. cu126 is the matching CUDA toolkit. | |
| FROM pytorch/pytorch:2.6.0-cuda12.6-cudnn9-devel | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| ENV PYTHONUNBUFFERED=1 | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git curl build-essential \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /workspace | |
| # 1. Install unsloth exactly as their docs recommend. | |
| # --force-reinstall ensures no stale cached wheels. | |
| RUN pip install --upgrade --force-reinstall --no-cache-dir unsloth unsloth_zoo | |
| # 2. transformers v5 from git β required for Qwen3 support. | |
| # The stable 4.x release eagerly imports torchao's quantizer which cascades | |
| # into a hard import failure across peft β trl β SFTConfig. | |
| RUN pip install --no-cache-dir \ | |
| "git+https://github.com/huggingface/transformers.git@main" | |
| # 3. Install project deps. | |
| COPY pyproject.toml README.md ./ | |
| COPY src/ src/ | |
| RUN pip install --no-cache-dir -e ".[data,training]" | |
| # 4. JupyterLab for interactive mode. | |
| RUN pip install --no-cache-dir jupyterlab ipywidgets | |
| # 5. Copy notebooks and training scripts. | |
| COPY notebooks/ notebooks/ | |
| COPY train.py ./ | |
| # Persistent storage expected at /data (attach 20 GB disk in Space settings) | |
| RUN mkdir -p /data/checkpoints /data/scenarios /data/sft_dataset | |
| EXPOSE 7860 | |
| ENV START_MODE=jupyter | |
| COPY train-entrypoint.sh /train-entrypoint.sh | |
| RUN chmod +x /train-entrypoint.sh | |
| ENTRYPOINT ["/train-entrypoint.sh"] | |