rtferraz
/

tucano2-commerce

Model card Files Files and versions

xet

Community

rtferraz commited on 5 days ago

Commit

c641edb

verified ·

1 Parent(s): 0c9199c

add: notebook cell insertion script for base vs tuned comparison

Browse files

Files changed (1) hide show

scripts/insert_comparison_cell.py +66 -0

scripts/insert_comparison_cell.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+Run this on the workbench to insert the comparison cell into the notebook.
+Usage: python scripts/insert_comparison_cell.py
+"""
+import json
+from pathlib import Path
+NOTEBOOK_PATH = Path("/home/jupyter/tucano2/notebooks/v4_2_instruct_grpo.ipynb")
+# Adjust if your notebook is elsewhere
+CELL_SCRIPT_PATH = Path(__file__).parent.parent / "notebooks" / "cell_comparison_base_vs_tuned.py"
+# If running from repo root:
+if not CELL_SCRIPT_PATH.exists():
+    CELL_SCRIPT_PATH = Path("/home/jupyter/tucano2/notebooks/cell_comparison_base_vs_tuned.py")
+def main():
+    # Read notebook
+    with open(NOTEBOOK_PATH) as f:
+        nb = json.load(f)
+    # Read cell script
+    with open(CELL_SCRIPT_PATH) as f:
+        cell_code = f.read()
+    # Create markdown + code cells
+    md_cell = {
+        "cell_type": "markdown",
+        "metadata": {},
+        "source": [
+            "---\n",
+            "\n",
+            "## Cell 15: Base vs Tuned Comparison (Final Evaluation)\n",
+            "\n",
+            "**Purpose:** Definitive A/B test — same 65 eval prompts, same generation config, \n",
+            "comparing the raw base model against the GRPO-tuned best checkpoint (step 1100).\n",
+            "\n",
+            "**Prerequisites:** Cells 1-5 + Cell 7 + Cell 10 run. Best checkpoint at:\n",
+            "`models/tucano2-0.5B-instruct-grpo-v4.2-seed42/best_checkpoint/`\n",
+            "\n",
+            "**Output:** Per-task reward comparison table with Wilcoxon significance test + sample outputs.\n",
+            "\n",
+            "**Gate:** This is the final cell. No gate — it produces the experiment's conclusion."
+        ]
+    }
+    code_cell = {
+        "cell_type": "code",
+        "execution_count": None,
+        "metadata": {},
+        "outputs": [],
+        "source": [line + "\n" for line in cell_code.split("\n")[:-1]] + [cell_code.split("\n")[-1]]
+    }
+    # Insert at end of notebook
+    nb["cells"].append(md_cell)
+    nb["cells"].append(code_cell)
+    with open(NOTEBOOK_PATH, "w") as f:
+        json.dump(nb, f, ensure_ascii=False, indent=1)
+    print(f"✓ Inserted comparison cell at end of notebook ({len(nb['cells'])} cells total)")
+    print(f"  Notebook: {NOTEBOOK_PATH}")
+if __name__ == "__main__":
+    main()