Lomesh2000 commited on
Commit
87db122
·
1 Parent(s): d1fd1c6

fix: prevent Unsloth from attaching adapters twice during GRPO

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +19 -11
training/train_grpo.py CHANGED
@@ -84,17 +84,25 @@ if USE_UNSLOTH:
84
  dtype=None,
85
  load_in_4bit=True,
86
  )
87
- model = FastLanguageModel.get_peft_model(
88
- model,
89
- r=LORA_R,
90
- lora_alpha=LORA_ALPHA,
91
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
92
- "gate_proj", "up_proj", "down_proj"],
93
- lora_dropout=0.0,
94
- bias="none",
95
- use_gradient_checkpointing="unsloth",
96
- random_state=42,
97
- )
 
 
 
 
 
 
 
 
98
  else:
99
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
100
  from peft import get_peft_model, LoraConfig, TaskType
 
84
  dtype=None,
85
  load_in_4bit=True,
86
  )
87
+
88
+ # If the model is already a PEFT model (e.g. loaded from SFT checkpoint),
89
+ # we don't need to add new LoRA adapters. Unsloth will throw an error if we try.
90
+ is_peft = hasattr(model, "peft_config") or "PeftModel" in str(type(model))
91
+
92
+ if not is_peft:
93
+ model = FastLanguageModel.get_peft_model(
94
+ model,
95
+ r=LORA_R,
96
+ lora_alpha=LORA_ALPHA,
97
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
98
+ "gate_proj", "up_proj", "down_proj"],
99
+ lora_dropout=0.0,
100
+ bias="none",
101
+ use_gradient_checkpointing="unsloth",
102
+ random_state=42,
103
+ )
104
+ else:
105
+ print("✅ Loaded existing PEFT adapters from checkpoint.")
106
  else:
107
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
108
  from peft import get_peft_model, LoraConfig, TaskType