shank commited on
Commit Β·
a2fa47a
1
Parent(s): 2bfaf77
fix: upgrade bitsandbytes>=0.49.0 (triton.ops), switch to Qwen2.5-Coder-3B
Browse files- training/train_grpo.py +2 -2
training/train_grpo.py
CHANGED
|
@@ -50,7 +50,7 @@ if not args.test_local:
|
|
| 50 |
"accelerate==1.0.1",
|
| 51 |
"trl==0.14.0",
|
| 52 |
"peft==0.13.2",
|
| 53 |
-
"bitsandbytes=
|
| 54 |
]
|
| 55 |
print("Installing training dependencies...", flush=True)
|
| 56 |
ret = os.system(
|
|
@@ -94,7 +94,7 @@ from server.reward_calculator import DebugRewardCalculator
|
|
| 94 |
from server.models import parse_agent_output
|
| 95 |
|
| 96 |
# ββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 97 |
-
MODEL_NAME = "Qwen/Qwen2.5-Coder-
|
| 98 |
HF_REPO = "shashaank0707/AgentDebugger-trained"
|
| 99 |
MAX_STEPS = 10 if args.test else args.max_steps
|
| 100 |
CHECKPOINT_DIR = "./checkpoints"
|
|
|
|
| 50 |
"accelerate==1.0.1",
|
| 51 |
"trl==0.14.0",
|
| 52 |
"peft==0.13.2",
|
| 53 |
+
"bitsandbytes>=0.49.0",
|
| 54 |
]
|
| 55 |
print("Installing training dependencies...", flush=True)
|
| 56 |
ret = os.system(
|
|
|
|
| 94 |
from server.models import parse_agent_output
|
| 95 |
|
| 96 |
# ββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 97 |
+
MODEL_NAME = "Qwen/Qwen2.5-Coder-3B-Instruct"
|
| 98 |
HF_REPO = "shashaank0707/AgentDebugger-trained"
|
| 99 |
MAX_STEPS = 10 if args.test else args.max_steps
|
| 100 |
CHECKPOINT_DIR = "./checkpoints"
|