Imsachin010 commited on
Commit
5edec00
·
1 Parent(s): b8ede5e

Update blog with 0.5B results and project metrics

Browse files
Files changed (1) hide show
  1. training/traingrpo.ipynb +0 -80
training/traingrpo.ipynb CHANGED
@@ -320,86 +320,6 @@
320
  "from IPython.display import Image\n",
321
  "display(Image(\"/content/salespath_out/reward_graph.png\"))"
322
  ]
323
- },
324
- {
325
- "cell_type": "code",
326
- "execution_count": null,
327
- "id": "8c0ddbf2",
328
- "metadata": {},
329
- "outputs": [],
330
- "source": [
331
- "# ============================================================\n",
332
- "# CELL 3 — (Optional) Rollout Smoke Test\n",
333
- "# ============================================================\n",
334
- "# Skip this to save time if you already validated the environment.\n",
335
- "# import os\n",
336
- "# os.chdir(\"/content/salespath_env\")\n",
337
- "# !python -m training.test_rollout"
338
- ]
339
- },
340
- {
341
- "cell_type": "code",
342
- "execution_count": null,
343
- "id": "970d9bc7",
344
- "metadata": {},
345
- "outputs": [],
346
- "source": [
347
- "# ============================================================\n",
348
- "# CELL 4 — (Optional) Curriculum Loop\n",
349
- "# ============================================================\n",
350
- "# Skip this. We will jump straight to GRPO for the 7B model.\n"
351
- ]
352
- },
353
- {
354
- "cell_type": "code",
355
- "execution_count": null,
356
- "id": "3643fb56",
357
- "metadata": {},
358
- "outputs": [
359
- {
360
- "name": "stdout",
361
- "output_type": "stream",
362
- "text": [
363
- "Launching TRL GRPO mode...\n",
364
- "2026-04-26 07:11:29.087629: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
365
- "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
366
- "E0000 00:00:1777187489.326359 17310 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
367
- "E0000 00:00:1777187489.397054 17310 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
368
- "W0000 00:00:1777187489.873542 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
369
- "W0000 00:00:1777187489.873617 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
370
- "W0000 00:00:1777187489.873622 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
371
- "W0000 00:00:1777187489.873627 17310 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
372
- "2026-04-26 07:11:29.920915: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
373
- "To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
374
- "Skipping import of cpp extensions due to incompatible torch version. Please upgrade to torch >= 2.11.0 (found 2.10.0+cu128).\n",
375
- "Loading checkpoint shards: 100% 4/4 [00:51<00:00, 12.78s/it]\n",
376
- "Some parameters are on the meta device because they were offloaded to the cpu.\n",
377
- "Loading checkpoint shards: 25% 1/4 [00:20<01:00, 20.18s/it]^C\n"
378
- ]
379
- }
380
- ],
381
- "source": [
382
- "# ============================================================\n",
383
- "# CELL 5 — GRPO Training (7B Model, Memory Safe)\n",
384
- "# Initial Health Check: 150 Steps\n",
385
- "# ============================================================\n",
386
- "import os\n",
387
- "os.environ[\"PYTORCH_ALLOC_CONF\"] = \"expandable_segments:True\"\n",
388
- "os.chdir(\"/content/salespath_env\")\n",
389
- "\n",
390
- "!PYTORCH_ALLOC_CONF=expandable_segments:True \\\n",
391
- "python -m training.grpo_train \\\n",
392
- " --mode grpo \\\n",
393
- " --model-name unsloth/Qwen2.5-7B-Instruct \\\n",
394
- " --grpo-steps 150 \\\n",
395
- " --grpo-dataset-size 128 \\\n",
396
- " --num-generations 2 \\\n",
397
- " --max-completion-length 128 \\\n",
398
- " --per-device-train-batch-size 1 \\\n",
399
- " --gradient-accumulation-steps 8 \\\n",
400
- " --output-dir /content/salespath_out \\\n",
401
- " --logging-steps 10"
402
- ]
403
  }
404
  ],
405
  "metadata": {
 
320
  "from IPython.display import Image\n",
321
  "display(Image(\"/content/salespath_out/reward_graph.png\"))"
322
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  }
324
  ],
325
  "metadata": {