shank commited on
Commit
bdec91d
Β·
1 Parent(s): db12eaa

Fix: Removed BitsandBytes

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +10 -16
training/train_grpo.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  AgentDebuggerEnv β€” GRPO Training Script
3
- Model: Qwen2.5-Coder-7B-Instruct (4-bit quantized via bitsandbytes)
4
  Algorithm: GRPO (Group Relative Policy Optimization) via HuggingFace TRL
5
  GPU: auto-detected at runtime (A100/H100 β†’ bfloat16+large batch, T4/V100 β†’ float16+small batch)
6
 
@@ -50,7 +50,6 @@ if not args.test_local:
50
  "accelerate==1.0.1",
51
  "trl==0.15.2",
52
  "peft==0.13.2",
53
- "bitsandbytes==0.45.5",
54
  ]
55
  print("Installing training dependencies...", flush=True)
56
  ret = os.system(
@@ -67,7 +66,7 @@ if not args.test_local:
67
  import wandb
68
  from datasets import Dataset
69
  from transformers import (
70
- AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainerCallback
71
  )
72
  from peft import get_peft_model, LoraConfig, TaskType
73
  from trl import GRPOTrainer, GRPOConfig
@@ -86,7 +85,7 @@ if not args.test_local:
86
  f"trl={_pkg_ver('trl')} "
87
  f"accelerate={_pkg_ver('accelerate')} "
88
  f"peft={_pkg_ver('peft')} "
89
- f"bitsandbytes={_pkg_ver('bitsandbytes')}"
90
  )
91
 
92
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -291,12 +290,12 @@ if _gpu_vram_gb >= 40: # A100 40GB / A100 80GB
291
  _num_gen = 8
292
  _max_comp = 256
293
  _lora_r = 16
294
- elif _gpu_vram_gb >= 20: # V100 32GB
295
  _batch = 1
296
  _grad_accum = 8
297
- _num_gen = 6
298
- _max_comp = 220
299
- _lora_r = 12
300
  else: # T4 15GB / anything smaller
301
  _batch = 1
302
  _grad_accum = 8
@@ -309,20 +308,15 @@ print(f"Training config: batch={_batch} grad_accum={_grad_accum} "
309
  f"dtype={COMPUTE_DTYPE}")
310
 
311
  # ── Load model ────────────────────────────────────────────────────────────────
312
- print(f"Loading {MODEL_NAME}...")
313
- bnb_config = BitsAndBytesConfig(
314
- load_in_4bit=True,
315
- bnb_4bit_quant_type="nf4",
316
- bnb_4bit_compute_dtype=COMPUTE_DTYPE,
317
- bnb_4bit_use_double_quant=True,
318
- )
319
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
320
  tokenizer.pad_token = tokenizer.eos_token
321
  tokenizer.padding_side = "left"
322
 
323
  model = AutoModelForCausalLM.from_pretrained(
324
  MODEL_NAME,
325
- quantization_config=bnb_config,
326
  device_map="auto",
327
  trust_remote_code=True,
328
  torch_dtype=COMPUTE_DTYPE,
 
1
  """
2
  AgentDebuggerEnv β€” GRPO Training Script
3
+ Model: Qwen2.5-Coder-7B-Instruct (float16/bfloat16 + LoRA, no quantization)
4
  Algorithm: GRPO (Group Relative Policy Optimization) via HuggingFace TRL
5
  GPU: auto-detected at runtime (A100/H100 β†’ bfloat16+large batch, T4/V100 β†’ float16+small batch)
6
 
 
50
  "accelerate==1.0.1",
51
  "trl==0.15.2",
52
  "peft==0.13.2",
 
53
  ]
54
  print("Installing training dependencies...", flush=True)
55
  ret = os.system(
 
66
  import wandb
67
  from datasets import Dataset
68
  from transformers import (
69
+ AutoModelForCausalLM, AutoTokenizer, TrainerCallback
70
  )
71
  from peft import get_peft_model, LoraConfig, TaskType
72
  from trl import GRPOTrainer, GRPOConfig
 
85
  f"trl={_pkg_ver('trl')} "
86
  f"accelerate={_pkg_ver('accelerate')} "
87
  f"peft={_pkg_ver('peft')} "
88
+ f"dtype={COMPUTE_DTYPE}"
89
  )
90
 
91
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
290
  _num_gen = 8
291
  _max_comp = 256
292
  _lora_r = 16
293
+ elif _gpu_vram_gb >= 20: # A10G 24GB / V100 32GB β€” float16 model ~14GB
294
  _batch = 1
295
  _grad_accum = 8
296
+ _num_gen = 4
297
+ _max_comp = 192
298
+ _lora_r = 8
299
  else: # T4 15GB / anything smaller
300
  _batch = 1
301
  _grad_accum = 8
 
308
  f"dtype={COMPUTE_DTYPE}")
309
 
310
  # ── Load model ────────────────────────────────────────────────────────────────
311
+ # Load in native float16/bfloat16 β€” no bitsandbytes needed.
312
+ # A10G (24GB) fits Qwen2.5-7B in float16 (~14GB) with room for LoRA + activations.
313
+ print(f"Loading {MODEL_NAME} in {COMPUTE_DTYPE}...")
 
 
 
 
314
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
315
  tokenizer.pad_token = tokenizer.eos_token
316
  tokenizer.padding_side = "left"
317
 
318
  model = AutoModelForCausalLM.from_pretrained(
319
  MODEL_NAME,
 
320
  device_map="auto",
321
  trust_remote_code=True,
322
  torch_dtype=COMPUTE_DTYPE,