Drop unsloth: use standard bitsandbytes 4-bit + peft LoRA + TRL GRPOTrainer
Browse files- Dockerfile +3 -10
- requirements-training.txt +1 -1
- run_training.py +25 -24
Dockerfile
CHANGED
|
@@ -21,7 +21,6 @@ ENV CXX=/usr/bin/g++
|
|
| 21 |
RUN useradd -m -u 1000 user
|
| 22 |
USER user
|
| 23 |
ENV PATH="/home/user/.local/bin:$PATH"
|
| 24 |
-
ENV LD_LIBRARY_PATH="/home/user/.local/lib/python3.10/site-packages/nvidia/nvjitlink/lib:/home/user/.local/lib/python3.10/site-packages/nvidia/cuda_runtime/lib:$LD_LIBRARY_PATH"
|
| 25 |
|
| 26 |
WORKDIR /app
|
| 27 |
|
|
@@ -29,19 +28,13 @@ WORKDIR /app
|
|
| 29 |
COPY --chown=user requirements.txt .
|
| 30 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 31 |
|
| 32 |
-
# 2. PyTorch
|
| 33 |
-
RUN pip install --no-cache-dir torch
|
| 34 |
|
| 35 |
-
# 3. Training deps (no unsloth
|
| 36 |
COPY --chown=user requirements-training.txt .
|
| 37 |
RUN pip install --no-cache-dir -r requirements-training.txt
|
| 38 |
|
| 39 |
-
# 4. Unsloth --no-deps (avoids torchao>=0.13 conflict)
|
| 40 |
-
RUN pip install --no-cache-dir --no-deps unsloth==2025.11.1 unsloth_zoo==2025.11.1
|
| 41 |
-
|
| 42 |
-
# 5. Remove torchao if pulled in (incompatible with torch 2.6, crashes transformers)
|
| 43 |
-
RUN pip uninstall -y torchao 2>/dev/null; true
|
| 44 |
-
|
| 45 |
# --- App code ---
|
| 46 |
COPY --chown=user src/ /app/src/
|
| 47 |
COPY --chown=user training/ /app/training/
|
|
|
|
| 21 |
RUN useradd -m -u 1000 user
|
| 22 |
USER user
|
| 23 |
ENV PATH="/home/user/.local/bin:$PATH"
|
|
|
|
| 24 |
|
| 25 |
WORKDIR /app
|
| 26 |
|
|
|
|
| 28 |
COPY --chown=user requirements.txt .
|
| 29 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 30 |
|
| 31 |
+
# 2. PyTorch with CUDA
|
| 32 |
+
RUN pip install --no-cache-dir torch --extra-index-url https://download.pytorch.org/whl/cu121
|
| 33 |
|
| 34 |
+
# 3. Training deps (standard stack, no unsloth)
|
| 35 |
COPY --chown=user requirements-training.txt .
|
| 36 |
RUN pip install --no-cache-dir -r requirements-training.txt
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# --- App code ---
|
| 39 |
COPY --chown=user src/ /app/src/
|
| 40 |
COPY --chown=user training/ /app/training/
|
requirements-training.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Training deps
|
| 2 |
transformers>=4.46.0,<4.52
|
| 3 |
trl>=0.12.0,<0.16
|
| 4 |
peft>=0.13.0
|
|
|
|
| 1 |
+
# Training deps — standard stack, no unsloth
|
| 2 |
transformers>=4.46.0,<4.52
|
| 3 |
trl>=0.12.0,<0.16
|
| 4 |
peft>=0.13.0
|
run_training.py
CHANGED
|
@@ -58,26 +58,30 @@ def run_grpo_training():
|
|
| 58 |
)
|
| 59 |
|
| 60 |
# ── 1. Load model ──
|
| 61 |
-
print("\n[1/6] Loading model with
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
if tokenizer.pad_token is None:
|
| 83 |
tokenizer.pad_token = tokenizer.eos_token
|
|
@@ -239,10 +243,7 @@ def run_grpo_training():
|
|
| 239 |
|
| 240 |
# ── 5. Post-training evaluation ──
|
| 241 |
print("\n[5/6] Evaluating trained model...")
|
| 242 |
-
|
| 243 |
-
FastLanguageModel.for_inference(model)
|
| 244 |
-
except Exception:
|
| 245 |
-
pass
|
| 246 |
|
| 247 |
def trained_generate(prompt):
|
| 248 |
messages = [
|
|
|
|
| 58 |
)
|
| 59 |
|
| 60 |
# ── 1. Load model ──
|
| 61 |
+
print("\n[1/6] Loading model with bitsandbytes 4-bit...")
|
| 62 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 63 |
+
from peft import LoraConfig, get_peft_model
|
| 64 |
+
|
| 65 |
+
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 66 |
+
bnb_config = BitsAndBytesConfig(
|
| 67 |
+
load_in_4bit=True,
|
| 68 |
+
bnb_4bit_quant_type="nf4",
|
| 69 |
+
bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
|
| 70 |
+
bnb_4bit_use_double_quant=True,
|
| 71 |
+
)
|
| 72 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 73 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 74 |
+
MODEL_NAME, quantization_config=bnb_config, device_map="auto",
|
| 75 |
+
)
|
| 76 |
+
lora_config = LoraConfig(
|
| 77 |
+
r=16, lora_alpha=16, lora_dropout=0,
|
| 78 |
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
| 79 |
+
"gate_proj", "up_proj", "down_proj"],
|
| 80 |
+
task_type="CAUSAL_LM",
|
| 81 |
+
)
|
| 82 |
+
model = get_peft_model(model, lora_config)
|
| 83 |
+
print(f" Model: {MODEL_NAME}")
|
| 84 |
+
print(f" Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
|
| 85 |
|
| 86 |
if tokenizer.pad_token is None:
|
| 87 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 243 |
|
| 244 |
# ── 5. Post-training evaluation ──
|
| 245 |
print("\n[5/6] Evaluating trained model...")
|
| 246 |
+
model.eval()
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
def trained_generate(prompt):
|
| 249 |
messages = [
|