Spaces:

aamrinder
/

subtext-arena

Sleeping

aamrinder commited on 16 days ago

Commit

70346e7

verified ·

1 Parent(s): 37818f1

Upload folder using huggingface_hub

Files changed (2) hide show

train/hour1_smoke.py CHANGED Viewed

@@ -83,17 +83,19 @@ def main():
     # 5. Load Qwen2.5-3B-Instruct + LoRA
     print("\n[5/6] loading Qwen2.5-3B-Instruct (4-bit + LoRA)")
     try:
         model, tokenizer = FastLanguageModel.from_pretrained(
             model_name="unsloth/Qwen2.5-3B-Instruct",
             max_seq_length=2048,  # smaller than full 4096 for speed
             load_in_4bit=True,
         )
         model = FastLanguageModel.get_peft_model(
             model,
             r=8,                   # smaller r for the smoke test
             lora_alpha=16,
             target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
-            use_gradient_checkpointing="unsloth",
         )
         n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
         print(f"   ✓ model loaded; {n_trainable / 1e6:.1f}M LoRA params trainable")

     # 5. Load Qwen2.5-3B-Instruct + LoRA
     print("\n[5/6] loading Qwen2.5-3B-Instruct (4-bit + LoRA)")
     try:
+        import torch as _t
         model, tokenizer = FastLanguageModel.from_pretrained(
             model_name="unsloth/Qwen2.5-3B-Instruct",
             max_seq_length=2048,  # smaller than full 4096 for speed
             load_in_4bit=True,
+            dtype=_t.bfloat16,    # avoid LoRA dtype mismatch on L4
         )
         model = FastLanguageModel.get_peft_model(
             model,
             r=8,                   # smaller r for the smoke test
             lora_alpha=16,
             target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
+            use_gradient_checkpointing=True,   # plain torch GC, not "unsloth" custom
         )
         n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
         print(f"   ✓ model loaded; {n_trainable / 1e6:.1f}M LoRA params trainable")

train/train_grpo.py CHANGED Viewed

@@ -283,17 +283,19 @@ def main():
     from trl import GRPOTrainer, GRPOConfig
     print(f"[load] {args.model}, 4-bit, max_seq_length={args.seq_length}")
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name=args.model,
         max_seq_length=args.seq_length,
         load_in_4bit=True,
     )
     model = FastLanguageModel.get_peft_model(
         model,
         r=args.lora_r,
         lora_alpha=args.lora_r,
         target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
-        use_gradient_checkpointing="unsloth",
     )
     config = GRPOConfig(

     from trl import GRPOTrainer, GRPOConfig
     print(f"[load] {args.model}, 4-bit, max_seq_length={args.seq_length}")
+    import torch as _t
     model, tokenizer = FastLanguageModel.from_pretrained(
         model_name=args.model,
         max_seq_length=args.seq_length,
         load_in_4bit=True,
+        dtype=_t.bfloat16,    # explicit dtype prevents LoRA Half/Float mismatch
     )
     model = FastLanguageModel.get_peft_model(
         model,
         r=args.lora_r,
         lora_alpha=args.lora_r,
         target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
+        use_gradient_checkpointing=True,   # plain torch GC; avoids unsloth-zoo dtype bug
     )
     config = GRPOConfig(