K446 commited on
Commit
6072ace
·
1 Parent(s): b724812

Drop unsloth: use standard bitsandbytes 4-bit + peft LoRA + TRL GRPOTrainer

Browse files
Files changed (3) hide show
  1. Dockerfile +3 -10
  2. requirements-training.txt +1 -1
  3. run_training.py +25 -24
Dockerfile CHANGED
@@ -21,7 +21,6 @@ ENV CXX=/usr/bin/g++
21
  RUN useradd -m -u 1000 user
22
  USER user
23
  ENV PATH="/home/user/.local/bin:$PATH"
24
- ENV LD_LIBRARY_PATH="/home/user/.local/lib/python3.10/site-packages/nvidia/nvjitlink/lib:/home/user/.local/lib/python3.10/site-packages/nvidia/cuda_runtime/lib:$LD_LIBRARY_PATH"
25
 
26
  WORKDIR /app
27
 
@@ -29,19 +28,13 @@ WORKDIR /app
29
  COPY --chown=user requirements.txt .
30
  RUN pip install --no-cache-dir -r requirements.txt
31
 
32
- # 2. PyTorch 2.6.0 + CUDA 12.1
33
- RUN pip install --no-cache-dir torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu121
34
 
35
- # 3. Training deps (no unsloth here)
36
  COPY --chown=user requirements-training.txt .
37
  RUN pip install --no-cache-dir -r requirements-training.txt
38
 
39
- # 4. Unsloth --no-deps (avoids torchao>=0.13 conflict)
40
- RUN pip install --no-cache-dir --no-deps unsloth==2025.11.1 unsloth_zoo==2025.11.1
41
-
42
- # 5. Remove torchao if pulled in (incompatible with torch 2.6, crashes transformers)
43
- RUN pip uninstall -y torchao 2>/dev/null; true
44
-
45
  # --- App code ---
46
  COPY --chown=user src/ /app/src/
47
  COPY --chown=user training/ /app/training/
 
21
  RUN useradd -m -u 1000 user
22
  USER user
23
  ENV PATH="/home/user/.local/bin:$PATH"
 
24
 
25
  WORKDIR /app
26
 
 
28
  COPY --chown=user requirements.txt .
29
  RUN pip install --no-cache-dir -r requirements.txt
30
 
31
+ # 2. PyTorch with CUDA
32
+ RUN pip install --no-cache-dir torch --extra-index-url https://download.pytorch.org/whl/cu121
33
 
34
+ # 3. Training deps (standard stack, no unsloth)
35
  COPY --chown=user requirements-training.txt .
36
  RUN pip install --no-cache-dir -r requirements-training.txt
37
 
 
 
 
 
 
 
38
  # --- App code ---
39
  COPY --chown=user src/ /app/src/
40
  COPY --chown=user training/ /app/training/
requirements-training.txt CHANGED
@@ -1,4 +1,4 @@
1
- # Training deps (torch, unsloth installed separately in Dockerfile)
2
  transformers>=4.46.0,<4.52
3
  trl>=0.12.0,<0.16
4
  peft>=0.13.0
 
1
+ # Training deps standard stack, no unsloth
2
  transformers>=4.46.0,<4.52
3
  trl>=0.12.0,<0.16
4
  peft>=0.13.0
run_training.py CHANGED
@@ -58,26 +58,30 @@ def run_grpo_training():
58
  )
59
 
60
  # ── 1. Load model ──
61
- print("\n[1/6] Loading model with Unsloth...")
62
- try:
63
- from unsloth import FastLanguageModel
64
- MODEL_NAME = "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit"
65
- model, tokenizer = FastLanguageModel.from_pretrained(
66
- model_name=MODEL_NAME, max_seq_length=2048, load_in_4bit=True,
67
- )
68
- model = FastLanguageModel.get_peft_model(
69
- model, r=16, lora_alpha=16, lora_dropout=0,
70
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
71
- "gate_proj", "up_proj", "down_proj"],
72
- )
73
- print(f" Model: {MODEL_NAME}")
74
- print(f" Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
75
- except ImportError:
76
- print("WARNING: Unsloth not available, using standard loading")
77
- from transformers import AutoTokenizer, AutoModelForCausalLM
78
- MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
79
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
80
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
 
 
 
81
 
82
  if tokenizer.pad_token is None:
83
  tokenizer.pad_token = tokenizer.eos_token
@@ -239,10 +243,7 @@ def run_grpo_training():
239
 
240
  # ── 5. Post-training evaluation ──
241
  print("\n[5/6] Evaluating trained model...")
242
- try:
243
- FastLanguageModel.for_inference(model)
244
- except Exception:
245
- pass
246
 
247
  def trained_generate(prompt):
248
  messages = [
 
58
  )
59
 
60
  # ── 1. Load model ──
61
+ print("\n[1/6] Loading model with bitsandbytes 4-bit...")
62
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
63
+ from peft import LoraConfig, get_peft_model
64
+
65
+ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
66
+ bnb_config = BitsAndBytesConfig(
67
+ load_in_4bit=True,
68
+ bnb_4bit_quant_type="nf4",
69
+ bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
70
+ bnb_4bit_use_double_quant=True,
71
+ )
72
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
73
+ model = AutoModelForCausalLM.from_pretrained(
74
+ MODEL_NAME, quantization_config=bnb_config, device_map="auto",
75
+ )
76
+ lora_config = LoraConfig(
77
+ r=16, lora_alpha=16, lora_dropout=0,
78
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
79
+ "gate_proj", "up_proj", "down_proj"],
80
+ task_type="CAUSAL_LM",
81
+ )
82
+ model = get_peft_model(model, lora_config)
83
+ print(f" Model: {MODEL_NAME}")
84
+ print(f" Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
85
 
86
  if tokenizer.pad_token is None:
87
  tokenizer.pad_token = tokenizer.eos_token
 
243
 
244
  # ── 5. Post-training evaluation ──
245
  print("\n[5/6] Evaluating trained model...")
246
+ model.eval()
 
 
 
247
 
248
  def trained_generate(prompt):
249
  messages = [