rtferraz commited on
Commit
c641edb
·
verified ·
1 Parent(s): 0c9199c

add: notebook cell insertion script for base vs tuned comparison

Browse files
Files changed (1) hide show
  1. scripts/insert_comparison_cell.py +66 -0
scripts/insert_comparison_cell.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Run this on the workbench to insert the comparison cell into the notebook.
3
+ Usage: python scripts/insert_comparison_cell.py
4
+ """
5
+ import json
6
+ from pathlib import Path
7
+
8
+ NOTEBOOK_PATH = Path("/home/jupyter/tucano2/notebooks/v4_2_instruct_grpo.ipynb")
9
+ # Adjust if your notebook is elsewhere
10
+
11
+ CELL_SCRIPT_PATH = Path(__file__).parent.parent / "notebooks" / "cell_comparison_base_vs_tuned.py"
12
+
13
+ # If running from repo root:
14
+ if not CELL_SCRIPT_PATH.exists():
15
+ CELL_SCRIPT_PATH = Path("/home/jupyter/tucano2/notebooks/cell_comparison_base_vs_tuned.py")
16
+
17
+ def main():
18
+ # Read notebook
19
+ with open(NOTEBOOK_PATH) as f:
20
+ nb = json.load(f)
21
+
22
+ # Read cell script
23
+ with open(CELL_SCRIPT_PATH) as f:
24
+ cell_code = f.read()
25
+
26
+ # Create markdown + code cells
27
+ md_cell = {
28
+ "cell_type": "markdown",
29
+ "metadata": {},
30
+ "source": [
31
+ "---\n",
32
+ "\n",
33
+ "## Cell 15: Base vs Tuned Comparison (Final Evaluation)\n",
34
+ "\n",
35
+ "**Purpose:** Definitive A/B test — same 65 eval prompts, same generation config, \n",
36
+ "comparing the raw base model against the GRPO-tuned best checkpoint (step 1100).\n",
37
+ "\n",
38
+ "**Prerequisites:** Cells 1-5 + Cell 7 + Cell 10 run. Best checkpoint at:\n",
39
+ "`models/tucano2-0.5B-instruct-grpo-v4.2-seed42/best_checkpoint/`\n",
40
+ "\n",
41
+ "**Output:** Per-task reward comparison table with Wilcoxon significance test + sample outputs.\n",
42
+ "\n",
43
+ "**Gate:** This is the final cell. No gate — it produces the experiment's conclusion."
44
+ ]
45
+ }
46
+
47
+ code_cell = {
48
+ "cell_type": "code",
49
+ "execution_count": None,
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [line + "\n" for line in cell_code.split("\n")[:-1]] + [cell_code.split("\n")[-1]]
53
+ }
54
+
55
+ # Insert at end of notebook
56
+ nb["cells"].append(md_cell)
57
+ nb["cells"].append(code_cell)
58
+
59
+ with open(NOTEBOOK_PATH, "w") as f:
60
+ json.dump(nb, f, ensure_ascii=False, indent=1)
61
+
62
+ print(f"✓ Inserted comparison cell at end of notebook ({len(nb['cells'])} cells total)")
63
+ print(f" Notebook: {NOTEBOOK_PATH}")
64
+
65
+ if __name__ == "__main__":
66
+ main()