File size: 2,279 Bytes
c641edb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | """
Run this on the workbench to insert the comparison cell into the notebook.
Usage: python scripts/insert_comparison_cell.py
"""
import json
from pathlib import Path
NOTEBOOK_PATH = Path("/home/jupyter/tucano2/notebooks/v4_2_instruct_grpo.ipynb")
# Adjust if your notebook is elsewhere
CELL_SCRIPT_PATH = Path(__file__).parent.parent / "notebooks" / "cell_comparison_base_vs_tuned.py"
# If running from repo root:
if not CELL_SCRIPT_PATH.exists():
CELL_SCRIPT_PATH = Path("/home/jupyter/tucano2/notebooks/cell_comparison_base_vs_tuned.py")
def main():
# Read notebook
with open(NOTEBOOK_PATH) as f:
nb = json.load(f)
# Read cell script
with open(CELL_SCRIPT_PATH) as f:
cell_code = f.read()
# Create markdown + code cells
md_cell = {
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"## Cell 15: Base vs Tuned Comparison (Final Evaluation)\n",
"\n",
"**Purpose:** Definitive A/B test — same 65 eval prompts, same generation config, \n",
"comparing the raw base model against the GRPO-tuned best checkpoint (step 1100).\n",
"\n",
"**Prerequisites:** Cells 1-5 + Cell 7 + Cell 10 run. Best checkpoint at:\n",
"`models/tucano2-0.5B-instruct-grpo-v4.2-seed42/best_checkpoint/`\n",
"\n",
"**Output:** Per-task reward comparison table with Wilcoxon significance test + sample outputs.\n",
"\n",
"**Gate:** This is the final cell. No gate — it produces the experiment's conclusion."
]
}
code_cell = {
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [line + "\n" for line in cell_code.split("\n")[:-1]] + [cell_code.split("\n")[-1]]
}
# Insert at end of notebook
nb["cells"].append(md_cell)
nb["cells"].append(code_cell)
with open(NOTEBOOK_PATH, "w") as f:
json.dump(nb, f, ensure_ascii=False, indent=1)
print(f"✓ Inserted comparison cell at end of notebook ({len(nb['cells'])} cells total)")
print(f" Notebook: {NOTEBOOK_PATH}")
if __name__ == "__main__":
main()
|