Spaces:

Imsachin010
/

salespath-env

Runtime error

App Files Files Community

Imsachin010 commited on 12 days ago

Commit

5edec00

1 Parent(s): b8ede5e

Update blog with 0.5B results and project metrics

Browse files

Files changed (1) hide show

training/traingrpo.ipynb +0 -80

training/traingrpo.ipynb CHANGED Viewed

@@ -320,86 +320,6 @@
         "from IPython.display import Image\n",
         "display(Image(\"/content/salespath_out/reward_graph.png\"))"
       ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "8c0ddbf2",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ============================================================\n",
-        "# CELL 3 — (Optional) Rollout Smoke Test\n",
-        "# ============================================================\n",
-        "# Skip this to save time if you already validated the environment.\n",
-        "# import os\n",
-        "# os.chdir(\"/content/salespath_env\")\n",
-        "# !python -m training.test_rollout"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "970d9bc7",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ============================================================\n",
-        "# CELL 4 — (Optional) Curriculum Loop\n",
-        "# ============================================================\n",
-        "# Skip this. We will jump straight to GRPO for the 7B model.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "3643fb56",
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Launching TRL GRPO mode...\n",
-            "2026-04-26 07:11:29.087629: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-            "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
-            "E0000 00:00:1777187489.326359   17310 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-            "E0000 00:00:1777187489.397054   17310 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-            "W0000 00:00:1777187489.873542   17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
-            "W0000 00:00:1777187489.873617   17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
-            "W0000 00:00:1777187489.873622   17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
-            "W0000 00:00:1777187489.873627   17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
-            "2026-04-26 07:11:29.920915: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
-            "To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-            "Skipping import of cpp extensions due to incompatible torch version. Please upgrade to torch >= 2.11.0 (found 2.10.0+cu128).\n",
-            "Loading checkpoint shards: 100% 4/4 [00:51<00:00, 12.78s/it]\n",
-            "Some parameters are on the meta device because they were offloaded to the cpu.\n",
-            "Loading checkpoint shards:  25% 1/4 [00:20<01:00, 20.18s/it]^C\n"
-          ]
-        }
-      ],
-      "source": [
-        "# ============================================================\n",
-        "# CELL 5 — GRPO Training (7B Model, Memory Safe)\n",
-        "# Initial Health Check: 150 Steps\n",
-        "# ============================================================\n",
-        "import os\n",
-        "os.environ[\"PYTORCH_ALLOC_CONF\"] = \"expandable_segments:True\"\n",
-        "os.chdir(\"/content/salespath_env\")\n",
-        "\n",
-        "!PYTORCH_ALLOC_CONF=expandable_segments:True \\\n",
-        "python -m training.grpo_train \\\n",
-        "    --mode grpo \\\n",
-        "    --model-name unsloth/Qwen2.5-7B-Instruct \\\n",
-        "    --grpo-steps 150 \\\n",
-        "    --grpo-dataset-size 128 \\\n",
-        "    --num-generations 2 \\\n",
-        "    --max-completion-length 128 \\\n",
-        "    --per-device-train-batch-size 1 \\\n",
-        "    --gradient-accumulation-steps 8 \\\n",
-        "    --output-dir /content/salespath_out \\\n",
-        "    --logging-steps 10"
-      ]
     }
   ],
   "metadata": {

         "from IPython.display import Image\n",
         "display(Image(\"/content/salespath_out/reward_graph.png\"))"
       ]
     }
   ],
   "metadata": {