YashashMathur commited on
Commit
9e1ad05
·
verified ·
1 Parent(s): ff9091d

fix: force torch 2.5.1+cu121 after unsloth to prevent colab-new downgrade

Browse files
Files changed (1) hide show
  1. hf_training/Dockerfile +8 -11
hf_training/Dockerfile CHANGED
@@ -5,24 +5,21 @@ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
5
 
6
  WORKDIR /app
7
 
8
- # Upgrade PyTorch to 2.5.1 (cu121)unsloth_zoo requires torch._inductor.config
9
- # which only became an accessible attribute in PyTorch 2.4+
10
- RUN pip install --upgrade --no-cache-dir \
11
- torch==2.5.1 torchvision torchaudio \
12
- --index-url https://download.pytorch.org/whl/cu121
13
-
14
- # Install Unsloth and training dependencies
15
  RUN pip install --no-cache-dir \
16
- "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" \
17
  && pip install --no-cache-dir --no-deps xformers \
18
  && pip install --no-cache-dir \
19
  trl peft accelerate bitsandbytes huggingface_hub safetensors
20
 
21
- # Copy training script and dataset
 
 
 
 
 
22
  COPY train.py .
23
  COPY aegis_training_data_500.json .
24
 
25
  EXPOSE 7860
26
-
27
- # -u for unbuffered stdout so logs appear in real time in HF Space console
28
  CMD ["python", "-u", "train.py"]
 
5
 
6
  WORKDIR /app
7
 
8
+ # Install Unsloth (no [colab-new] extrathat extra pins old PyTorch and downgrades torch)
 
 
 
 
 
 
9
  RUN pip install --no-cache-dir \
10
+ "unsloth @ git+https://github.com/unslothai/unsloth.git" \
11
  && pip install --no-cache-dir --no-deps xformers \
12
  && pip install --no-cache-dir \
13
  trl peft accelerate bitsandbytes huggingface_hub safetensors
14
 
15
+ # Force PyTorch 2.5.1+cu121 LAST — unsloth_zoo requires torch._inductor.config (PyTorch 2.4+)
16
+ # Must be after all other installs to prevent any package from downgrading it
17
+ RUN pip install --force-reinstall --no-cache-dir \
18
+ "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" \
19
+ --index-url https://download.pytorch.org/whl/cu121
20
+
21
  COPY train.py .
22
  COPY aegis_training_data_500.json .
23
 
24
  EXPOSE 7860
 
 
25
  CMD ["python", "-u", "train.py"]