File size: 2,279 Bytes
c641edb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Run this on the workbench to insert the comparison cell into the notebook.
Usage: python scripts/insert_comparison_cell.py
"""
import json
from pathlib import Path

NOTEBOOK_PATH = Path("/home/jupyter/tucano2/notebooks/v4_2_instruct_grpo.ipynb") 
# Adjust if your notebook is elsewhere

CELL_SCRIPT_PATH = Path(__file__).parent.parent / "notebooks" / "cell_comparison_base_vs_tuned.py"

# If running from repo root:
if not CELL_SCRIPT_PATH.exists():
    CELL_SCRIPT_PATH = Path("/home/jupyter/tucano2/notebooks/cell_comparison_base_vs_tuned.py")

def main():
    # Read notebook
    with open(NOTEBOOK_PATH) as f:
        nb = json.load(f)
    
    # Read cell script
    with open(CELL_SCRIPT_PATH) as f:
        cell_code = f.read()
    
    # Create markdown + code cells
    md_cell = {
        "cell_type": "markdown",
        "metadata": {},
        "source": [
            "---\n",
            "\n",
            "## Cell 15: Base vs Tuned Comparison (Final Evaluation)\n",
            "\n",
            "**Purpose:** Definitive A/B test — same 65 eval prompts, same generation config, \n",
            "comparing the raw base model against the GRPO-tuned best checkpoint (step 1100).\n",
            "\n",
            "**Prerequisites:** Cells 1-5 + Cell 7 + Cell 10 run. Best checkpoint at:\n",
            "`models/tucano2-0.5B-instruct-grpo-v4.2-seed42/best_checkpoint/`\n",
            "\n",
            "**Output:** Per-task reward comparison table with Wilcoxon significance test + sample outputs.\n",
            "\n",
            "**Gate:** This is the final cell. No gate — it produces the experiment's conclusion."
        ]
    }
    
    code_cell = {
        "cell_type": "code",
        "execution_count": None,
        "metadata": {},
        "outputs": [],
        "source": [line + "\n" for line in cell_code.split("\n")[:-1]] + [cell_code.split("\n")[-1]]
    }
    
    # Insert at end of notebook
    nb["cells"].append(md_cell)
    nb["cells"].append(code_cell)
    
    with open(NOTEBOOK_PATH, "w") as f:
        json.dump(nb, f, ensure_ascii=False, indent=1)
    
    print(f"✓ Inserted comparison cell at end of notebook ({len(nb['cells'])} cells total)")
    print(f"  Notebook: {NOTEBOOK_PATH}")

if __name__ == "__main__":
    main()