Spaces:

ycwhencpp
/

final-iteration

Paused

anuragredbus commited on 12 days ago

Commit

d8bb03f

1 Parent(s): 17149c8

train_grpo: rename monthly_* tasks to weekly_* (with env alias)

- training/train_grpo.ipynb: rename TASKS list and matching plot label
prefix-strips from monthly_* to weekly_*.
- server/viraltest_environment.py: add _TASK_ALIASES so the renamed
weekly_* identifiers route to the existing monthly_* graders /
baselines without breaking external callers. Also picks up the
TASK_HORIZON env-var override that was already staged locally.

Why: the configurable horizon (default 15 days) is closer to a weekly
than a monthly cycle, so the task names should reflect that. The alias
keeps run-output JSON, validate-submission scripts, and the dashboard
working unchanged.

Made-with: Cursor

Files changed (2) hide show

server/viraltest_environment.py +15 -1
training/train_grpo.ipynb +59 -59

server/viraltest_environment.py CHANGED Viewed

@@ -13,6 +13,7 @@ Multi-day creator optimization with:
 import json
 import math
 import random
 from collections import defaultdict
 from dataclasses import dataclass, field
@@ -102,7 +103,8 @@ _FOLLOWERS_BY_ARCHETYPE: Dict[str, int] = {
 # ---------------------------------------------------------------------------
 # Episode length in daily env steps. Graders and UI should stay consistent with this value.
-TASK_HORIZON = 15
 # Distinct positive tags for full tag_discovery score in strategic/competitive graders.
 # Caps at 30 (original month-scale bar); scales down only for very short horizons.
@@ -149,6 +151,16 @@ INTENT_MULTIPLIER = {
 VALID_TASKS = ("monthly_engage", "monthly_strategic", "monthly_competitive")
 INITIAL_FOLLOWERS = 10000
 REST_RECOVERY = 0.12
 CREATE_CONTENT_COST = 0.05
@@ -1182,6 +1194,8 @@ class ViraltestEnvironment(Environment):
     def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> ViraltestObservation:
         self._task = kwargs.get("task", "monthly_engage")
         if self._task not in VALID_TASKS:
             self._task = "monthly_engage"

 import json
 import math
+import os
 import random
 from collections import defaultdict
 from dataclasses import dataclass, field
 # ---------------------------------------------------------------------------
 # Episode length in daily env steps. Graders and UI should stay consistent with this value.
+# Override via env var TASK_HORIZON (e.g. TASK_HORIZON=1 for ultra-fast local debug runs).
+TASK_HORIZON = int(os.environ.get("TASK_HORIZON", "15"))
 # Distinct positive tags for full tag_discovery score in strategic/competitive graders.
 # Caps at 30 (original month-scale bar); scales down only for very short horizons.
 VALID_TASKS = ("monthly_engage", "monthly_strategic", "monthly_competitive")
+# Backward-compatible aliases. The training notebook now uses `weekly_*` task names
+# (the configurable TASK_HORIZON defaults to 15 days, which is closer to a weekly
+# horizon than a monthly one). They route to the same graders / baselines as the
+# canonical `monthly_*` names, so external callers using either spelling work.
+_TASK_ALIASES: Dict[str, str] = {
+    "weekly_engage": "monthly_engage",
+    "weekly_strategic": "monthly_strategic",
+    "weekly_competitive": "monthly_competitive",
+}
 INITIAL_FOLLOWERS = 10000
 REST_RECOVERY = 0.12
 CREATE_CONTENT_COST = 0.05
     def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> ViraltestObservation:
         self._task = kwargs.get("task", "monthly_engage")
+        # Accept the renamed `weekly_*` task identifiers used by the training notebook.
+        self._task = _TASK_ALIASES.get(self._task, self._task)
         if self._task not in VALID_TASKS:
             self._task = "monthly_engage"

training/train_grpo.ipynb CHANGED Viewed

@@ -25,7 +25,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 1: Install dependencies (quote versions — zsh treats `>` as redirect otherwise)\n",
         "!pip install -q torch torchvision torchaudio\n",
@@ -37,13 +39,13 @@
         "# This avoids the from-source build that fails when the container has no nvcc / CUDA_HOME.\n",
         "# Falls back to sdpa if the wheel install fails (e.g. on a different env).\n",
         "!pip install -q \"https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl\" || pip install -q flash-attn --no-build-isolation || echo \"flash-attn install skipped; will use sdpa\""
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 2: Resolve repo path (Colab / Kaggle: fresh clone. Local: auto-detect project root)\n",
         "import os\n",
@@ -126,13 +128,13 @@
         "print(f\"Branch: {REPO_BRANCH}\")\n",
         "print(f\"Commit: {commit}\")\n",
         "print(f\"Plots dir: {PLOTS_DIR}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 3: Imports (with runtime validation)\n",
         "import json, random, time, textwrap, copy, os, sys\n",
@@ -171,7 +173,7 @@
         "NICHES = list(TOPIC_CATEGORIES.keys())\n",
         "CONTENT_TYPES = [\"reel\", \"carousel\", \"story\", \"text_post\"]\n",
         "INTENTS = [\"send_bait\", \"save_bait\", \"watch_bait\", \"like_bait\"]\n",
-        "TASKS = [\"monthly_engage\", \"monthly_strategic\", \"monthly_competitive\"]\n",
         "\n",
         "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}\")\n",
         "print(f\"Tags: {len(TAG_POOL)}, Topics: {len(ALL_TOPICS)}, Horizon: {TASK_HORIZON} days\")\n",
@@ -198,9 +200,7 @@
         "# hint stays on for both (current behaviour preserved).\n",
         "HINT_ALWAYS = not TEST_ONLY\n",
         "print(f\"SMOKE_MODE={SMOKE_MODE} | TEST_ONLY={TEST_ONLY} | HINT_ALWAYS={HINT_ALWAYS}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -213,7 +213,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 4: Define heuristic agents + episode runner\n",
         "_rng = random.Random(42)\n",
@@ -289,13 +291,13 @@
         "            \"rewards\": rewards, \"energies\": energies}\n",
         "\n",
         "print(\"Agents and episode runner defined.\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 5: Run baselines (safe)\n",
         "print(\"Running heuristic baselines (5 agents × 3 tasks)...\")\n",
@@ -330,13 +332,13 @@
         "for name in BASELINE_AGENTS:\n",
         "    scores = [baseline_results[name][t][\"grader_score\"] for t in TASKS]\n",
         "    print(f\"{name:<14s} {scores[0]:>10.4f} {scores[1]:>12.4f} {scores[2]:>14.4f} {sum(scores)/3:>8.4f}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 6: Baseline plots\n",
         "fig, axes = plt.subplots(1, 3, figsize=(16, 5), sharey=True)\n",
@@ -345,7 +347,7 @@
         "for i, task in enumerate(TASKS):\n",
         "    scores = [baseline_results[a][task][\"grader_score\"] for a in agent_names]\n",
         "    bars = axes[i].barh(agent_names, scores, color=colors)\n",
-        "    axes[i].set_title(task.replace(\"monthly_\", \"\").title(), fontsize=13, fontweight='bold')\n",
         "    for bar, score in zip(bars, scores):\n",
         "        axes[i].text(bar.get_width() + 0.005, bar.get_y() + bar.get_height()/2,\n",
         "                     f\"{score:.4f}\", va='center', fontsize=9)\n",
@@ -354,9 +356,7 @@
         "fig.tight_layout()\n",
         "fig.savefig(f\"{PLOTS_DIR}/baseline_leaderboard.png\", dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -369,7 +369,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 7: Load model (Qwen2.5-3B bf16 on CUDA + flash-attn-2; fp16/fp32 fallback)\n",
         "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
@@ -413,13 +415,13 @@
         "print(f\"Model loaded. dtype={next(model.parameters()).dtype} device={next(model.parameters()).device}\")\n",
         "if torch.cuda.is_available():\n",
         "    print(f\"CUDA memory: {torch.cuda.memory_allocated()/1e9:.2f} GB\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 8: LLM agent functions\n",
         "_SYSTEM_BASE = textwrap.dedent(\"\"\"\\\n",
@@ -762,9 +764,7 @@
         "\n",
         "\n",
         "print(\"LLM agent functions defined (batched).\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -777,7 +777,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 9: Run untrained model (batched: all 3 tasks in parallel envs)\n",
         "print(\"Running UNTRAINED base model on all tasks (batched)...\")\n",
@@ -791,9 +793,7 @@
         "print(f\"BEFORE TRAINING (took {time.time()-t0:.1f}s):\")\n",
         "for t in TASKS:\n",
         "    print(f\"  {t}: grader={before_results[t]['grader_score']:.4f}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -812,7 +812,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 10: Attach LoRA adapter\n",
         "from peft import LoraConfig, get_peft_model, TaskType\n",
@@ -834,13 +836,13 @@
         "model.enable_input_require_grads()\n",
         "peft_model = get_peft_model(model, lora_config)\n",
         "peft_model.print_trainable_parameters()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 11: Two-phase training loop (timing -> content)\n",
         "# Each phase: 3 rounds (round 0 = hardcoded peak-hours hint, rounds 1-2 = normal prompt).\n",
@@ -986,9 +988,7 @@
         "elapsed = time.time() - t_start\n",
         "print(f\"\\nTwo-phase training complete in {elapsed/60:.1f} min\")\n",
         "print(pd.DataFrame(training_log).to_string(index=False))"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -1001,7 +1001,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 12: Run trained model (batched)\n",
         "print(\"Running TRAINED model on all tasks (batched)...\")\n",
@@ -1045,13 +1047,13 @@
         "            print(f\"  {t}: {a:.4f} -> {new_a:.4f} (was delta={a-b:+.4f}, now {new_a-b:+.4f})\")\n",
         "        else:\n",
         "            print(f\"  {t}: {a:.4f} (organic delta {a-b:+.4f}, no boost needed)\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 12.5: Debug — analyse io_log.jsonl (before vs after, tool error rate, hint usage)\n",
         "import re\n",
@@ -1112,9 +1114,7 @@
         "    if bk and ak:\n",
         "        print(\"BEFORE response head:\", bk[\"response\"][:300].replace(\"\\n\", \" \"))\n",
         "        print(\"AFTER  response head:\", ak[\"response\"][:300].replace(\"\\n\", \" \"))"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -1125,7 +1125,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 13: Training curves (two-phase)\n",
         "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
@@ -1153,16 +1155,16 @@
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/reward_curve.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 14: Before vs After\n",
-        "task_labels = [t.replace('monthly_', '').title() for t in TASKS]\n",
         "x = np.arange(len(TASKS))\n",
         "w = 0.25\n",
         "\n",
@@ -1189,13 +1191,13 @@
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/before_after.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 15: Trajectory comparison\n",
         "fig, axes = plt.subplots(2, 3, figsize=(16, 8))\n",
@@ -1211,7 +1213,7 @@
         "    sr = baseline_results[\"smart\"][task]\n",
         "    axes[0, i].plot(sr[\"rewards\"], label=\"Smart\", color='#9E9E9E', lw=1, ls=':')\n",
         "    axes[1, i].plot(sr[\"energies\"], label=\"Smart\", color='#9E9E9E', lw=1, ls=':')\n",
-        "    t_name = task.replace('monthly_', '').title()\n",
         "    axes[0, i].set_title(f\"{t_name} — Rewards\"); axes[0, i].grid(True, alpha=0.3)\n",
         "    axes[1, i].set_title(f\"{t_name} — Energy\"); axes[1, i].grid(True, alpha=0.3)\n",
         "axes[0, 2].legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
@@ -1219,9 +1221,7 @@
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/training_trajectories.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -1232,7 +1232,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 16: Final summary\n",
         "print(\"=\" * 67)\n",
@@ -1271,13 +1273,13 @@
         "\n",
         "print(f\"\\nSaved to {PLOTS_DIR}/\")\n",
         "print(\"All results are from real LoRA weight updates on real environment runs.\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 17: Save adapter\n",
         "save_path = \"./viraltest_trained_adapter\"\n",
@@ -1285,9 +1287,7 @@
         "tokenizer.save_pretrained(save_path)\n",
         "print(f\"LoRA adapter saved to {save_path}\")\n",
         "print(\"Load with: PeftModel.from_pretrained(base_model, save_path)\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     }
   ],
   "metadata": {
@@ -1313,4 +1313,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 4
-}

     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 1: Install dependencies (quote versions — zsh treats `>` as redirect otherwise)\n",
         "!pip install -q torch torchvision torchaudio\n",
         "# This avoids the from-source build that fails when the container has no nvcc / CUDA_HOME.\n",
         "# Falls back to sdpa if the wheel install fails (e.g. on a different env).\n",
         "!pip install -q \"https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl\" || pip install -q flash-attn --no-build-isolation || echo \"flash-attn install skipped; will use sdpa\""
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 2: Resolve repo path (Colab / Kaggle: fresh clone. Local: auto-detect project root)\n",
         "import os\n",
         "print(f\"Branch: {REPO_BRANCH}\")\n",
         "print(f\"Commit: {commit}\")\n",
         "print(f\"Plots dir: {PLOTS_DIR}\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 3: Imports (with runtime validation)\n",
         "import json, random, time, textwrap, copy, os, sys\n",
         "NICHES = list(TOPIC_CATEGORIES.keys())\n",
         "CONTENT_TYPES = [\"reel\", \"carousel\", \"story\", \"text_post\"]\n",
         "INTENTS = [\"send_bait\", \"save_bait\", \"watch_bait\", \"like_bait\"]\n",
+        "TASKS = [\"weekly_engage\", \"weekly_strategic\", \"weekly_competitive\"]\n",
         "\n",
         "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}\")\n",
         "print(f\"Tags: {len(TAG_POOL)}, Topics: {len(ALL_TOPICS)}, Horizon: {TASK_HORIZON} days\")\n",
         "# hint stays on for both (current behaviour preserved).\n",
         "HINT_ALWAYS = not TEST_ONLY\n",
         "print(f\"SMOKE_MODE={SMOKE_MODE} | TEST_ONLY={TEST_ONLY} | HINT_ALWAYS={HINT_ALWAYS}\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 4: Define heuristic agents + episode runner\n",
         "_rng = random.Random(42)\n",
         "            \"rewards\": rewards, \"energies\": energies}\n",
         "\n",
         "print(\"Agents and episode runner defined.\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 5: Run baselines (safe)\n",
         "print(\"Running heuristic baselines (5 agents × 3 tasks)...\")\n",
         "for name in BASELINE_AGENTS:\n",
         "    scores = [baseline_results[name][t][\"grader_score\"] for t in TASKS]\n",
         "    print(f\"{name:<14s} {scores[0]:>10.4f} {scores[1]:>12.4f} {scores[2]:>14.4f} {sum(scores)/3:>8.4f}\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 6: Baseline plots\n",
         "fig, axes = plt.subplots(1, 3, figsize=(16, 5), sharey=True)\n",
         "for i, task in enumerate(TASKS):\n",
         "    scores = [baseline_results[a][task][\"grader_score\"] for a in agent_names]\n",
         "    bars = axes[i].barh(agent_names, scores, color=colors)\n",
+        "    axes[i].set_title(task.replace(\"weekly_\", \"\").title(), fontsize=13, fontweight='bold')\n",
         "    for bar, score in zip(bars, scores):\n",
         "        axes[i].text(bar.get_width() + 0.005, bar.get_y() + bar.get_height()/2,\n",
         "                     f\"{score:.4f}\", va='center', fontsize=9)\n",
         "fig.tight_layout()\n",
         "fig.savefig(f\"{PLOTS_DIR}/baseline_leaderboard.png\", dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 7: Load model (Qwen2.5-3B bf16 on CUDA + flash-attn-2; fp16/fp32 fallback)\n",
         "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
         "print(f\"Model loaded. dtype={next(model.parameters()).dtype} device={next(model.parameters()).device}\")\n",
         "if torch.cuda.is_available():\n",
         "    print(f\"CUDA memory: {torch.cuda.memory_allocated()/1e9:.2f} GB\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 8: LLM agent functions\n",
         "_SYSTEM_BASE = textwrap.dedent(\"\"\"\\\n",
         "\n",
         "\n",
         "print(\"LLM agent functions defined (batched).\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 9: Run untrained model (batched: all 3 tasks in parallel envs)\n",
         "print(\"Running UNTRAINED base model on all tasks (batched)...\")\n",
         "print(f\"BEFORE TRAINING (took {time.time()-t0:.1f}s):\")\n",
         "for t in TASKS:\n",
         "    print(f\"  {t}: grader={before_results[t]['grader_score']:.4f}\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 10: Attach LoRA adapter\n",
         "from peft import LoraConfig, get_peft_model, TaskType\n",
         "model.enable_input_require_grads()\n",
         "peft_model = get_peft_model(model, lora_config)\n",
         "peft_model.print_trainable_parameters()"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 11: Two-phase training loop (timing -> content)\n",
         "# Each phase: 3 rounds (round 0 = hardcoded peak-hours hint, rounds 1-2 = normal prompt).\n",
         "elapsed = time.time() - t_start\n",
         "print(f\"\\nTwo-phase training complete in {elapsed/60:.1f} min\")\n",
         "print(pd.DataFrame(training_log).to_string(index=False))"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 12: Run trained model (batched)\n",
         "print(\"Running TRAINED model on all tasks (batched)...\")\n",
         "            print(f\"  {t}: {a:.4f} -> {new_a:.4f} (was delta={a-b:+.4f}, now {new_a-b:+.4f})\")\n",
         "        else:\n",
         "            print(f\"  {t}: {a:.4f} (organic delta {a-b:+.4f}, no boost needed)\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 12.5: Debug — analyse io_log.jsonl (before vs after, tool error rate, hint usage)\n",
         "import re\n",
         "    if bk and ak:\n",
         "        print(\"BEFORE response head:\", bk[\"response\"][:300].replace(\"\\n\", \" \"))\n",
         "        print(\"AFTER  response head:\", ak[\"response\"][:300].replace(\"\\n\", \" \"))"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 13: Training curves (two-phase)\n",
         "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/reward_curve.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 14: Before vs After\n",
+        "task_labels = [t.replace('weekly_', '').title() for t in TASKS]\n",
         "x = np.arange(len(TASKS))\n",
         "w = 0.25\n",
         "\n",
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/before_after.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 15: Trajectory comparison\n",
         "fig, axes = plt.subplots(2, 3, figsize=(16, 8))\n",
         "    sr = baseline_results[\"smart\"][task]\n",
         "    axes[0, i].plot(sr[\"rewards\"], label=\"Smart\", color='#9E9E9E', lw=1, ls=':')\n",
         "    axes[1, i].plot(sr[\"energies\"], label=\"Smart\", color='#9E9E9E', lw=1, ls=':')\n",
+        "    t_name = task.replace('weekly_', '').title()\n",
         "    axes[0, i].set_title(f\"{t_name} — Rewards\"); axes[0, i].grid(True, alpha=0.3)\n",
         "    axes[1, i].set_title(f\"{t_name} — Energy\"); axes[1, i].grid(True, alpha=0.3)\n",
         "axes[0, 2].legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/training_trajectories.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 16: Final summary\n",
         "print(\"=\" * 67)\n",
         "\n",
         "print(f\"\\nSaved to {PLOTS_DIR}/\")\n",
         "print(\"All results are from real LoRA weight updates on real environment runs.\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 17: Save adapter\n",
         "save_path = \"./viraltest_trained_adapter\"\n",
         "tokenizer.save_pretrained(save_path)\n",
         "print(f\"LoRA adapter saved to {save_path}\")\n",
         "print(\"Load with: PeftModel.from_pretrained(base_model, save_path)\")"
+      ]
     }
   ],
   "metadata": {
   },
   "nbformat": 4,
   "nbformat_minor": 4
+}