tucano2-commerce / scripts /insert_comparison_cell.py
rtferraz's picture
add: notebook cell insertion script for base vs tuned comparison
c641edb verified
raw
history blame
2.28 kB
"""
Run this on the workbench to insert the comparison cell into the notebook.
Usage: python scripts/insert_comparison_cell.py
"""
import json
from pathlib import Path
NOTEBOOK_PATH = Path("/home/jupyter/tucano2/notebooks/v4_2_instruct_grpo.ipynb")
# Adjust if your notebook is elsewhere
CELL_SCRIPT_PATH = Path(__file__).parent.parent / "notebooks" / "cell_comparison_base_vs_tuned.py"
# If running from repo root:
if not CELL_SCRIPT_PATH.exists():
CELL_SCRIPT_PATH = Path("/home/jupyter/tucano2/notebooks/cell_comparison_base_vs_tuned.py")
def main():
# Read notebook
with open(NOTEBOOK_PATH) as f:
nb = json.load(f)
# Read cell script
with open(CELL_SCRIPT_PATH) as f:
cell_code = f.read()
# Create markdown + code cells
md_cell = {
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"## Cell 15: Base vs Tuned Comparison (Final Evaluation)\n",
"\n",
"**Purpose:** Definitive A/B test — same 65 eval prompts, same generation config, \n",
"comparing the raw base model against the GRPO-tuned best checkpoint (step 1100).\n",
"\n",
"**Prerequisites:** Cells 1-5 + Cell 7 + Cell 10 run. Best checkpoint at:\n",
"`models/tucano2-0.5B-instruct-grpo-v4.2-seed42/best_checkpoint/`\n",
"\n",
"**Output:** Per-task reward comparison table with Wilcoxon significance test + sample outputs.\n",
"\n",
"**Gate:** This is the final cell. No gate — it produces the experiment's conclusion."
]
}
code_cell = {
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [line + "\n" for line in cell_code.split("\n")[:-1]] + [cell_code.split("\n")[-1]]
}
# Insert at end of notebook
nb["cells"].append(md_cell)
nb["cells"].append(code_cell)
with open(NOTEBOOK_PATH, "w") as f:
json.dump(nb, f, ensure_ascii=False, indent=1)
print(f"✓ Inserted comparison cell at end of notebook ({len(nb['cells'])} cells total)")
print(f" Notebook: {NOTEBOOK_PATH}")
if __name__ == "__main__":
main()