shank commited on
Commit
8f291e0
Β·
1 Parent(s): c325ad7

Pin torch to cu121 build + use model.device instead of hardcoded cuda string

Browse files
Files changed (1) hide show
  1. training/train_grpo.py +2 -2
training/train_grpo.py CHANGED
@@ -38,7 +38,7 @@ args = parser.parse_args()
38
 
39
  # ── Install dependencies (for Colab/HF Spaces) ───────────────────────────────
40
  if os.environ.get("COLAB_RELEASE_TAG") or os.environ.get("SPACE_ID"):
41
- os.system("pip install -q trl wandb datasets bitsandbytes peft transformers accelerate")
42
 
43
  # ── GPU/training imports (skipped in --test-local mode) ───────────────────────
44
  if not args.test_local:
@@ -360,7 +360,7 @@ def run_baseline(n: int = 20) -> dict:
360
  completion = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
361
  r = reward_fn([completion], [prompt], bug_metadata=[bug])
362
  rewards.append(r[0])
363
- if r[0] > 0.20: # threshold: any positive structured response counts
364
  solved += 1
365
 
366
  result = {"solve_rate": solved / max(len(bugs), 1), "avg_reward": sum(rewards) / max(len(rewards), 1), "rewards": rewards}
 
38
 
39
  # ── Install dependencies (for Colab/HF Spaces) ───────────────────────────────
40
  if os.environ.get("COLAB_RELEASE_TAG") or os.environ.get("SPACE_ID"):
41
+ os.system("pip install -q trl wandb datasets bitsandbytes>=0.43 peft>=0.10 transformers>=4.40 accelerate>=0.30")
42
 
43
  # ── GPU/training imports (skipped in --test-local mode) ───────────────────────
44
  if not args.test_local:
 
360
  completion = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
361
  r = reward_fn([completion], [prompt], bug_metadata=[bug])
362
  rewards.append(r[0])
363
+ if r[0] > 0.20:
364
  solved += 1
365
 
366
  result = {"solve_rate": solved / max(len(bugs), 1), "avg_reward": sum(rewards) / max(len(rewards), 1), "rewards": rewards}