Spaces:

Prasham1710
/

ci-triage-training

Sleeping

Prasham.Jain Claude Sonnet 4.6 commited on about 1 month ago

Commit

68277e2

1 Parent(s): e3da0da

fix(training): drop unsloth, use bitsandbytes+PEFT for SFT

Unsloth requires transformers>=4.51 (for CompileConfig) but torch 2.4.1
in the Docker image forces transformers<=4.46 to avoid the torchao
version conflict. Remove unsloth entirely; use AutoModelForCausalLM +
BitsAndBytesConfig (nf4 4-bit) + PEFT LoRA instead — works identically
on 46 GB VRAM with no version conflicts.

Also fixes:
- MODEL_NAME: Qwen/Qwen3.5-4B → Qwen/Qwen3-4B (correct model ID)
- grpo.py: hp.pop() was called before hp dict was constructed (NameError)
- Dockerfile.train: remove unsloth install step, add bitsandbytes pin

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

Dockerfile.train +6 -9
src/ci_triage_env/training/grpo.py +4 -4
src/ci_triage_env/training/sft.py +35 -15

Dockerfile.train CHANGED Viewed

@@ -20,21 +20,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 WORKDIR /workspace
-# 1. Pin torchao BEFORE installing anything else.
-#    Latest torchao requires torch>=2.11 but this image ships torch 2.4.
-#    transformers>=4.47 pulls torchao as a dep, so we must pin transformers too.
 RUN pip install --no-cache-dir \
     "torchao==0.5.0" \
     "transformers==4.46.3" \
     "trl==0.11.4" \
     "peft==0.13.2" \
-    "accelerate==0.34.2"
-# 2. Install unsloth (must come after torch)
-RUN pip install --no-cache-dir \
-    "unsloth[cu121-torch240] @ git+https://github.com/unslothai/unsloth.git"
-# 3. Install project deps (transformers/trl/peft already pinned above, won't be overridden)
 COPY pyproject.toml README.md ./
 COPY src/ src/
 RUN pip install --no-cache-dir -e ".[data,training]"

 WORKDIR /workspace
+# 1. Pin versions compatible with torch 2.4.1 in this image.
+#    torchao latest requires torch>=2.11; transformers>=4.47 pulls torchao as dep.
+#    bitsandbytes replaces unsloth for 4-bit quantisation.
 RUN pip install --no-cache-dir \
     "torchao==0.5.0" \
     "transformers==4.46.3" \
     "trl==0.11.4" \
     "peft==0.13.2" \
+    "accelerate==0.34.2" \
+    "bitsandbytes>=0.43.0"
+# 2. Install project deps (versions pinned above won't be overridden)
 COPY pyproject.toml README.md ./
 COPY src/ src/
 RUN pip install --no-cache-dir -e ".[data,training]"

src/ci_triage_env/training/grpo.py CHANGED Viewed

@@ -64,6 +64,10 @@ def run_grpo(
     train_dir = Path(scenarios_train_path)
     scenario_ids = [p.stem for p in train_dir.rglob("*.json")] if train_dir.exists() else []
     max_turns = hp.pop("max_turns", 4)   # short episodes for faster GRPO
     rollout = TrainingRollout(
         env_client=env_client,
@@ -74,10 +78,6 @@ def run_grpo(
     model, tokenizer = load_model_for_sft(model_name=sft_checkpoint_dir)
-    hp = dict(GRPO_HYPERPARAMS)
-    if hyperparams:
-        hp.update(hyperparams)
     config = GRPOConfig(
         output_dir=output_dir,
         max_steps=total_steps,

     train_dir = Path(scenarios_train_path)
     scenario_ids = [p.stem for p in train_dir.rglob("*.json")] if train_dir.exists() else []
+    hp = dict(GRPO_HYPERPARAMS)
+    if hyperparams:
+        hp.update(hyperparams)
     max_turns = hp.pop("max_turns", 4)   # short episodes for faster GRPO
     rollout = TrainingRollout(
         env_client=env_client,
     model, tokenizer = load_model_for_sft(model_name=sft_checkpoint_dir)
     config = GRPOConfig(
         output_dir=output_dir,
         max_steps=total_steps,

src/ci_triage_env/training/sft.py CHANGED Viewed

@@ -1,12 +1,12 @@
-"""SFT warmstart trainer — Qwen3.5-4B + LoRA on the C3 trajectory dataset.
-All GPU-heavy imports (unsloth, trl, torch) are lazy so the module is
 importable without a GPU for testing.
 """
 from __future__ import annotations
-MODEL_NAME = "Qwen/Qwen3.5-4B"
 MAX_SEQ_LEN = 8192
@@ -14,28 +14,48 @@ def load_model_for_sft(
     model_name: str = MODEL_NAME,
     max_seq_length: int = MAX_SEQ_LEN,
 ):
-    """Load Qwen model with Unsloth + LoRA. Requires GPU and unsloth installed."""
-    from unsloth import FastLanguageModel  # type: ignore[import]
-    model, tokenizer = FastLanguageModel.from_pretrained(
-        model_name=model_name,
-        max_seq_length=max_seq_length,
         load_in_4bit=True,
-        dtype=None,
     )
-    model = FastLanguageModel.get_peft_model(
-        model,
         r=16,
         target_modules=[
             "q_proj", "k_proj", "v_proj", "o_proj",
             "gate_proj", "up_proj", "down_proj",
         ],
-        lora_alpha=32,
         lora_dropout=0.0,
         bias="none",
-        use_gradient_checkpointing="unsloth",
-        random_state=3407,
     )
     return model, tokenizer
@@ -57,7 +77,7 @@ def run_sft(
     gradient_accumulation_steps: int = 4,
     model_name: str = MODEL_NAME,
 ) -> str:
-    """Train the SFT warmstart model. Requires GPU + unsloth + trl installed.
     Args:
         dataset_path: Path to a HF Dataset saved by trajectory_gen (save_to_disk).

+"""SFT warmstart trainer — Qwen3-4B + LoRA on the C3 trajectory dataset.
+All GPU-heavy imports (trl, torch, peft) are lazy so the module is
 importable without a GPU for testing.
 """
 from __future__ import annotations
+MODEL_NAME = "Qwen/Qwen3-4B"
 MAX_SEQ_LEN = 8192
     model_name: str = MODEL_NAME,
     max_seq_length: int = MAX_SEQ_LEN,
 ):
+    """Load Qwen3-4B in 4-bit via bitsandbytes + LoRA via PEFT. Requires GPU."""
+    import torch
+    from peft import LoraConfig, TaskType, get_peft_model  # type: ignore[import]
+    from transformers import (  # type: ignore[import]
+        AutoModelForCausalLM,
+        AutoTokenizer,
+        BitsAndBytesConfig,
+    )
+    bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16,
+        bnb_4bit_use_double_quant=True,
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        quantization_config=bnb_config,
+        device_map="auto",
+        trust_remote_code=True,
     )
+    model.gradient_checkpointing_enable()
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.model_max_length = max_seq_length
+    lora_config = LoraConfig(
         r=16,
+        lora_alpha=32,
         target_modules=[
             "q_proj", "k_proj", "v_proj", "o_proj",
             "gate_proj", "up_proj", "down_proj",
         ],
         lora_dropout=0.0,
         bias="none",
+        task_type=TaskType.CAUSAL_LM,
     )
+    model = get_peft_model(model, lora_config)
+    model.print_trainable_parameters()
     return model, tokenizer
     gradient_accumulation_steps: int = 4,
     model_name: str = MODEL_NAME,
 ) -> str:
+    """Train the SFT warmstart model. Requires GPU + trl + peft + bitsandbytes.
     Args:
         dataset_path: Path to a HF Dataset saved by trajectory_gen (save_to_disk).