Spaces:
Runtime error
Runtime error
Commit ·
5edec00
1
Parent(s): b8ede5e
Update blog with 0.5B results and project metrics
Browse files- training/traingrpo.ipynb +0 -80
training/traingrpo.ipynb
CHANGED
|
@@ -320,86 +320,6 @@
|
|
| 320 |
"from IPython.display import Image\n",
|
| 321 |
"display(Image(\"/content/salespath_out/reward_graph.png\"))"
|
| 322 |
]
|
| 323 |
-
},
|
| 324 |
-
{
|
| 325 |
-
"cell_type": "code",
|
| 326 |
-
"execution_count": null,
|
| 327 |
-
"id": "8c0ddbf2",
|
| 328 |
-
"metadata": {},
|
| 329 |
-
"outputs": [],
|
| 330 |
-
"source": [
|
| 331 |
-
"# ============================================================\n",
|
| 332 |
-
"# CELL 3 — (Optional) Rollout Smoke Test\n",
|
| 333 |
-
"# ============================================================\n",
|
| 334 |
-
"# Skip this to save time if you already validated the environment.\n",
|
| 335 |
-
"# import os\n",
|
| 336 |
-
"# os.chdir(\"/content/salespath_env\")\n",
|
| 337 |
-
"# !python -m training.test_rollout"
|
| 338 |
-
]
|
| 339 |
-
},
|
| 340 |
-
{
|
| 341 |
-
"cell_type": "code",
|
| 342 |
-
"execution_count": null,
|
| 343 |
-
"id": "970d9bc7",
|
| 344 |
-
"metadata": {},
|
| 345 |
-
"outputs": [],
|
| 346 |
-
"source": [
|
| 347 |
-
"# ============================================================\n",
|
| 348 |
-
"# CELL 4 — (Optional) Curriculum Loop\n",
|
| 349 |
-
"# ============================================================\n",
|
| 350 |
-
"# Skip this. We will jump straight to GRPO for the 7B model.\n"
|
| 351 |
-
]
|
| 352 |
-
},
|
| 353 |
-
{
|
| 354 |
-
"cell_type": "code",
|
| 355 |
-
"execution_count": null,
|
| 356 |
-
"id": "3643fb56",
|
| 357 |
-
"metadata": {},
|
| 358 |
-
"outputs": [
|
| 359 |
-
{
|
| 360 |
-
"name": "stdout",
|
| 361 |
-
"output_type": "stream",
|
| 362 |
-
"text": [
|
| 363 |
-
"Launching TRL GRPO mode...\n",
|
| 364 |
-
"2026-04-26 07:11:29.087629: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
| 365 |
-
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
| 366 |
-
"E0000 00:00:1777187489.326359 17310 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
| 367 |
-
"E0000 00:00:1777187489.397054 17310 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
| 368 |
-
"W0000 00:00:1777187489.873542 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
| 369 |
-
"W0000 00:00:1777187489.873617 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
| 370 |
-
"W0000 00:00:1777187489.873622 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
| 371 |
-
"W0000 00:00:1777187489.873627 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
|
| 372 |
-
"2026-04-26 07:11:29.920915: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
| 373 |
-
"To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
| 374 |
-
"Skipping import of cpp extensions due to incompatible torch version. Please upgrade to torch >= 2.11.0 (found 2.10.0+cu128).\n",
|
| 375 |
-
"Loading checkpoint shards: 100% 4/4 [00:51<00:00, 12.78s/it]\n",
|
| 376 |
-
"Some parameters are on the meta device because they were offloaded to the cpu.\n",
|
| 377 |
-
"Loading checkpoint shards: 25% 1/4 [00:20<01:00, 20.18s/it]^C\n"
|
| 378 |
-
]
|
| 379 |
-
}
|
| 380 |
-
],
|
| 381 |
-
"source": [
|
| 382 |
-
"# ============================================================\n",
|
| 383 |
-
"# CELL 5 — GRPO Training (7B Model, Memory Safe)\n",
|
| 384 |
-
"# Initial Health Check: 150 Steps\n",
|
| 385 |
-
"# ============================================================\n",
|
| 386 |
-
"import os\n",
|
| 387 |
-
"os.environ[\"PYTORCH_ALLOC_CONF\"] = \"expandable_segments:True\"\n",
|
| 388 |
-
"os.chdir(\"/content/salespath_env\")\n",
|
| 389 |
-
"\n",
|
| 390 |
-
"!PYTORCH_ALLOC_CONF=expandable_segments:True \\\n",
|
| 391 |
-
"python -m training.grpo_train \\\n",
|
| 392 |
-
" --mode grpo \\\n",
|
| 393 |
-
" --model-name unsloth/Qwen2.5-7B-Instruct \\\n",
|
| 394 |
-
" --grpo-steps 150 \\\n",
|
| 395 |
-
" --grpo-dataset-size 128 \\\n",
|
| 396 |
-
" --num-generations 2 \\\n",
|
| 397 |
-
" --max-completion-length 128 \\\n",
|
| 398 |
-
" --per-device-train-batch-size 1 \\\n",
|
| 399 |
-
" --gradient-accumulation-steps 8 \\\n",
|
| 400 |
-
" --output-dir /content/salespath_out \\\n",
|
| 401 |
-
" --logging-steps 10"
|
| 402 |
-
]
|
| 403 |
}
|
| 404 |
],
|
| 405 |
"metadata": {
|
|
|
|
| 320 |
"from IPython.display import Image\n",
|
| 321 |
"display(Image(\"/content/salespath_out/reward_graph.png\"))"
|
| 322 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
}
|
| 324 |
],
|
| 325 |
"metadata": {
|