Spaces:
Runtime error
Runtime error
Commit ·
b8ede5e
1
Parent(s): dd9667a
Update blog with 0.5B results and project metrics
Browse files- training/traingrpo.ipynb +0 -102
training/traingrpo.ipynb
CHANGED
|
@@ -19,26 +19,6 @@
|
|
| 19 |
"7. Run **Cell 5** (reward graph)"
|
| 20 |
]
|
| 21 |
},
|
| 22 |
-
{
|
| 23 |
-
"cell_type": "markdown",
|
| 24 |
-
"id": "f9d908a8",
|
| 25 |
-
"metadata": {},
|
| 26 |
-
"source": [
|
| 27 |
-
"# SalesPath — Colab Training Notebook (7B Scale-Up)\n",
|
| 28 |
-
"\n",
|
| 29 |
-
"**Stack:** OpenEnv + GRPO (TRL) + Unsloth + Qwen 2.5 7B\n",
|
| 30 |
-
"\n",
|
| 31 |
-
"**Instructions:**\n",
|
| 32 |
-
"1. Runtime → Change runtime type → **T4 GPU**\n",
|
| 33 |
-
"2. Add `HF_TOKEN` in Colab Secrets (left sidebar 🔑)\n",
|
| 34 |
-
"3. Run **Cell 1** once (installs + clones)\n",
|
| 35 |
-
"4. Run **Cell 2** (starts server + validates)\n",
|
| 36 |
-
"5. Skip Cell 3 & 4 (already validated with 0.5B)\n",
|
| 37 |
-
"6. Run **Cell 5** (GRPO training - 150 steps health check)\n",
|
| 38 |
-
"7. Run **Cell 6** (reward graph)\n",
|
| 39 |
-
"8. Run **Cell 7** (Push to HF)"
|
| 40 |
-
]
|
| 41 |
-
},
|
| 42 |
{
|
| 43 |
"cell_type": "code",
|
| 44 |
"execution_count": 1,
|
|
@@ -420,88 +400,6 @@
|
|
| 420 |
" --output-dir /content/salespath_out \\\n",
|
| 421 |
" --logging-steps 10"
|
| 422 |
]
|
| 423 |
-
},
|
| 424 |
-
{
|
| 425 |
-
"cell_type": "code",
|
| 426 |
-
"execution_count": null,
|
| 427 |
-
"id": "13db57ec",
|
| 428 |
-
"metadata": {},
|
| 429 |
-
"outputs": [],
|
| 430 |
-
"source": [
|
| 431 |
-
"# ============================================================\n",
|
| 432 |
-
"# CELL 5 — GRPO Training (gradient updates via TRL)\n",
|
| 433 |
-
"# ============================================================\n",
|
| 434 |
-
"# import os\n",
|
| 435 |
-
"# os.chdir(\"/content/salespath_env\")\n",
|
| 436 |
-
"\n",
|
| 437 |
-
"# grpo_cmd = (\n",
|
| 438 |
-
"# \"python -m training.grpo_train \"\n",
|
| 439 |
-
"# \"--mode grpo \"\n",
|
| 440 |
-
"# \"--model-name Qwen/Qwen2.5-0.5B-Instruct \"\n",
|
| 441 |
-
"# \"--grpo-steps 100 \"\n",
|
| 442 |
-
"# \"--grpo-dataset-size 256 \"\n",
|
| 443 |
-
"# \"--num-generations 4 \"\n",
|
| 444 |
-
"# \"--max-completion-length 64 \"\n",
|
| 445 |
-
"# \"--output-dir /content/salespath_out \"\n",
|
| 446 |
-
"# \"--logging-steps 5\"\n",
|
| 447 |
-
"# )\n",
|
| 448 |
-
"# !{grpo_cmd}\n",
|
| 449 |
-
"\n"
|
| 450 |
-
]
|
| 451 |
-
},
|
| 452 |
-
{
|
| 453 |
-
"cell_type": "markdown",
|
| 454 |
-
"id": "e33864a1",
|
| 455 |
-
"metadata": {},
|
| 456 |
-
"source": [
|
| 457 |
-
"## Final Push to HuggingFace\n",
|
| 458 |
-
"Run this after you have confirmed the 150 (or 300+) steps look good."
|
| 459 |
-
]
|
| 460 |
-
},
|
| 461 |
-
{
|
| 462 |
-
"cell_type": "code",
|
| 463 |
-
"execution_count": null,
|
| 464 |
-
"id": "b2a78334",
|
| 465 |
-
"metadata": {},
|
| 466 |
-
"outputs": [
|
| 467 |
-
{
|
| 468 |
-
"ename": "SyntaxError",
|
| 469 |
-
"evalue": "incomplete input (1350301498.py, line 27)",
|
| 470 |
-
"output_type": "error",
|
| 471 |
-
"traceback": [
|
| 472 |
-
"\u001b[0;36m File \u001b[0;32m\"/tmp/ipykernel_17054/1350301498.py\"\u001b[0;36m, line \u001b[0;32m27\u001b[0m\n\u001b[0;31m \"\"\"\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m incomplete input\n"
|
| 473 |
-
]
|
| 474 |
-
}
|
| 475 |
-
],
|
| 476 |
-
"source": [
|
| 477 |
-
"# ============================================================\n",
|
| 478 |
-
"# CELL 7 — Push Merged Model to HuggingFace\n",
|
| 479 |
-
"# ============================================================\n",
|
| 480 |
-
"import os\n",
|
| 481 |
-
"os.chdir(\"/content/salespath_env\")\n",
|
| 482 |
-
"\n",
|
| 483 |
-
"# We load the final checkpoint and push it.\n",
|
| 484 |
-
"hf_token = os.environ.get(\"HF_TOKEN\")\n",
|
| 485 |
-
"if not hf_token:\n",
|
| 486 |
-
" print(\"⚠️ HF_TOKEN not found in secrets. Cannot push.\")\n",
|
| 487 |
-
"else:\n",
|
| 488 |
-
" !python -c \"\"\"\n",
|
| 489 |
-
"import os\n",
|
| 490 |
-
"from unsloth import FastLanguageModel\n",
|
| 491 |
-
"model, tokenizer = FastLanguageModel.from_pretrained(\n",
|
| 492 |
-
" '/content/salespath_out/grpo_final',\n",
|
| 493 |
-
" max_seq_length=2048,\n",
|
| 494 |
-
" load_in_4bit=True,\n",
|
| 495 |
-
")\n",
|
| 496 |
-
"model.push_to_hub_merged(\n",
|
| 497 |
-
" 'Imsachin010/salespath-qwen25-7b',\n",
|
| 498 |
-
" tokenizer,\n",
|
| 499 |
-
" save_method='merged_16bit',\n",
|
| 500 |
-
" token=os.environ.get('HF_TOKEN')\n",
|
| 501 |
-
")\n",
|
| 502 |
-
"print('✅ Successfully pushed to HF!')\n",
|
| 503 |
-
"\"\"\""
|
| 504 |
-
]
|
| 505 |
}
|
| 506 |
],
|
| 507 |
"metadata": {
|
|
|
|
| 19 |
"7. Run **Cell 5** (reward graph)"
|
| 20 |
]
|
| 21 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
{
|
| 23 |
"cell_type": "code",
|
| 24 |
"execution_count": 1,
|
|
|
|
| 400 |
" --output-dir /content/salespath_out \\\n",
|
| 401 |
" --logging-steps 10"
|
| 402 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
}
|
| 404 |
],
|
| 405 |
"metadata": {
|