helloAK96 commited on
Commit
5aa47dc
·
verified ·
1 Parent(s): 7a6ae0c

Notebook: Phase 8c cleanup cell to free GPU before Phase 9 mini-train (OOM fix)

Browse files
Files changed (1) hide show
  1. notebooks/colab_train.ipynb +71 -17
notebooks/colab_train.ipynb CHANGED
@@ -403,6 +403,43 @@
403
  "KL to base 0.595, sustained.\n"
404
  ]
405
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  {
407
  "cell_type": "markdown",
408
  "metadata": {},
@@ -428,12 +465,16 @@
428
  "import os\n",
429
  "os.chdir('/content/chaosops_src')\n",
430
  "os.makedirs('/content/artifacts/mini-grpo', exist_ok=True)\n",
431
- "!PYTHONPATH=/tmp python -m chaosops.train.grpo_train \\\n",
 
 
 
432
  " --model-name Qwen/Qwen2.5-1.5B-Instruct \\\n",
433
  " --backend transformers \\\n",
434
  " --total-episodes 20 \\\n",
435
  " --group-size 2 \\\n",
436
- " --lora-rank 16 \\\n",
 
437
  " --learning-rate 2e-5 \\\n",
438
  " --temperature 0.8 \\\n",
439
  " --rogue-bonus-multiplier 2.0 \\\n",
@@ -447,21 +488,34 @@
447
  "execution_count": null,
448
  "outputs": [],
449
  "source": [
450
- "# Plot the mini-run reward curve so judges can see live signal\n",
451
- "import json, matplotlib.pyplot as plt\n",
452
- "log = json.load(open('/content/artifacts/mini-grpo/training_metrics.json'))\n",
453
- "xs = [e['episode'] for e in log]\n",
454
- "ys = [e['mean_combined_reward'] for e in log]\n",
455
- "plt.figure(figsize=(8, 4))\n",
456
- "plt.plot(xs, ys, 'o-', color='#8e44ad', linewidth=2)\n",
457
- "plt.axhline(0, color='#888', linewidth=0.6)\n",
458
- "plt.xlabel('Training step')\n",
459
- "plt.ylabel('Mean combined reward (0.6 \u00b7 team + 0.4 \u00b7 oversight)')\n",
460
- "plt.title('Mini-GRPO reward curve (Qwen 2.5-1.5B, 20 steps, T4)')\n",
461
- "plt.grid(True, linestyle=':', alpha=0.4)\n",
462
- "plt.tight_layout()\n",
463
- "plt.savefig('/content/artifacts/mini-grpo/learning_curve.png', dpi=150)\n",
464
- "plt.show()\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  ]
466
  },
467
  {
 
403
  "KL to base 0.595, sustained.\n"
404
  ]
405
  },
406
+ {
407
+ "cell_type": "markdown",
408
+ "metadata": {},
409
+ "source": [
410
+ "## Phase 8c \u2014 Free GPU memory before mini-training\n",
411
+ "\n",
412
+ "Phase 6 loaded the trained Qwen-3B (~7 GB) into the kernel's GPU memory\n",
413
+ "and Phase 8 ran a separate eval subprocess. Before we launch the mini\n",
414
+ "GRPO retrain (which spawns *another* subprocess that loads Qwen-1.5B\n",
415
+ "for training), we have to free the parent kernel's GPU references \u2014\n",
416
+ "otherwise the subprocess sees < 100 MB free on a T4 and OOMs\n",
417
+ "immediately. Skip this cell only if Phase 9 won't be run.\n"
418
+ ]
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "metadata": {},
423
+ "execution_count": null,
424
+ "outputs": [],
425
+ "source": [
426
+ "# Drop every reference to the loaded trained model + LoRA so the\n",
427
+ "# subprocess in Phase 9 has the full GPU to itself.\n",
428
+ "import gc, torch\n",
429
+ "for name in ('trained', 'policy', 'result_trained'):\n",
430
+ " if name in globals():\n",
431
+ " try:\n",
432
+ " del globals()[name]\n",
433
+ " except Exception:\n",
434
+ " pass\n",
435
+ "gc.collect()\n",
436
+ "if torch.cuda.is_available():\n",
437
+ " torch.cuda.empty_cache()\n",
438
+ " torch.cuda.ipc_collect()\n",
439
+ " free_mb, total_mb = (m // (1024**2) for m in torch.cuda.mem_get_info())\n",
440
+ " print(f'GPU free: {free_mb} MB / {total_mb} MB')\n"
441
+ ]
442
+ },
443
  {
444
  "cell_type": "markdown",
445
  "metadata": {},
 
465
  "import os\n",
466
  "os.chdir('/content/chaosops_src')\n",
467
  "os.makedirs('/content/artifacts/mini-grpo', exist_ok=True)\n",
468
+ "# Sized for free-tier T4 (16 GB) AFTER Phase 8c cleanup.\n",
469
+ "# lora_rank=8, max_seq_length=768, group=2 keeps peak VRAM ~6 GB.\n",
470
+ "!PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \\\n",
471
+ " PYTHONPATH=/tmp python -m chaosops.train.grpo_train \\\n",
472
  " --model-name Qwen/Qwen2.5-1.5B-Instruct \\\n",
473
  " --backend transformers \\\n",
474
  " --total-episodes 20 \\\n",
475
  " --group-size 2 \\\n",
476
+ " --lora-rank 8 \\\n",
477
+ " --max-seq-length 768 \\\n",
478
  " --learning-rate 2e-5 \\\n",
479
  " --temperature 0.8 \\\n",
480
  " --rogue-bonus-multiplier 2.0 \\\n",
 
488
  "execution_count": null,
489
  "outputs": [],
490
  "source": [
491
+ "# Plot the mini-run reward curve so judges can see live signal.\n",
492
+ "# If training crashed (OOM, etc.) we print a hint instead of throwing.\n",
493
+ "import json, os, matplotlib.pyplot as plt\n",
494
+ "metrics_path = '/content/artifacts/mini-grpo/training_metrics.json'\n",
495
+ "if not os.path.exists(metrics_path):\n",
496
+ " print('No training_metrics.json found \u2014 Phase 9 training did not complete.')\n",
497
+ " print('Common causes:')\n",
498
+ " print(' \u2022 Phase 8c memory cleanup was skipped \u2192 mini-train OOMed.')\n",
499
+ " print(' \u2022 Colab kernel ran out of GPU before training started.')\n",
500
+ " print('Phase 8 already proved the trained adapter beats baselines \u2014')\n",
501
+ " print('Phase 9 is OPTIONAL training-pipeline reproducibility evidence.')\n",
502
+ "else:\n",
503
+ " log = json.load(open(metrics_path))\n",
504
+ " if not log:\n",
505
+ " print('training_metrics.json is empty \u2014 no log points were captured.')\n",
506
+ " else:\n",
507
+ " xs = [e['episode'] for e in log]\n",
508
+ " ys = [e['mean_combined_reward'] for e in log]\n",
509
+ " plt.figure(figsize=(8, 4))\n",
510
+ " plt.plot(xs, ys, 'o-', color='#8e44ad', linewidth=2)\n",
511
+ " plt.axhline(0, color='#888', linewidth=0.6)\n",
512
+ " plt.xlabel('Training step')\n",
513
+ " plt.ylabel('Mean combined reward (0.6 \u00b7 team + 0.4 \u00b7 oversight)')\n",
514
+ " plt.title('Mini-GRPO reward curve (Qwen 2.5-1.5B, 20 steps, T4)')\n",
515
+ " plt.grid(True, linestyle=':', alpha=0.4)\n",
516
+ " plt.tight_layout()\n",
517
+ " plt.savefig('/content/artifacts/mini-grpo/learning_curve.png', dpi=150)\n",
518
+ " plt.show()\n"
519
  ]
520
  },
521
  {