shank commited on
Commit Β·
8f291e0
1
Parent(s): c325ad7
Pin torch to cu121 build + use model.device instead of hardcoded cuda string
Browse files- training/train_grpo.py +2 -2
training/train_grpo.py
CHANGED
|
@@ -38,7 +38,7 @@ args = parser.parse_args()
|
|
| 38 |
|
| 39 |
# ββ Install dependencies (for Colab/HF Spaces) βββββββββββββββββββββββββββββββ
|
| 40 |
if os.environ.get("COLAB_RELEASE_TAG") or os.environ.get("SPACE_ID"):
|
| 41 |
-
os.system("pip install -q trl wandb datasets bitsandbytes peft transformers accelerate")
|
| 42 |
|
| 43 |
# ββ GPU/training imports (skipped in --test-local mode) βββββββββββββββββββββββ
|
| 44 |
if not args.test_local:
|
|
@@ -360,7 +360,7 @@ def run_baseline(n: int = 20) -> dict:
|
|
| 360 |
completion = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
| 361 |
r = reward_fn([completion], [prompt], bug_metadata=[bug])
|
| 362 |
rewards.append(r[0])
|
| 363 |
-
if r[0] > 0.20:
|
| 364 |
solved += 1
|
| 365 |
|
| 366 |
result = {"solve_rate": solved / max(len(bugs), 1), "avg_reward": sum(rewards) / max(len(rewards), 1), "rewards": rewards}
|
|
|
|
| 38 |
|
| 39 |
# ββ Install dependencies (for Colab/HF Spaces) βββββββββββββββββββββββββββββββ
|
| 40 |
if os.environ.get("COLAB_RELEASE_TAG") or os.environ.get("SPACE_ID"):
|
| 41 |
+
os.system("pip install -q trl wandb datasets bitsandbytes>=0.43 peft>=0.10 transformers>=4.40 accelerate>=0.30")
|
| 42 |
|
| 43 |
# ββ GPU/training imports (skipped in --test-local mode) βββββββββββββββββββββββ
|
| 44 |
if not args.test_local:
|
|
|
|
| 360 |
completion = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
| 361 |
r = reward_fn([completion], [prompt], bug_metadata=[bug])
|
| 362 |
rewards.append(r[0])
|
| 363 |
+
if r[0] > 0.20:
|
| 364 |
solved += 1
|
| 365 |
|
| 366 |
result = {"solve_rate": solved / max(len(bugs), 1), "avg_reward": sum(rewards) / max(len(rewards), 1), "rewards": rewards}
|