""" Convert grpo_vertex_v3.md → grpo_vertex_v3.ipynb Parses markdown with ```python fenced code blocks into Jupyter notebook cells. - ```python blocks → code cells - Everything else → markdown cells - Consecutive markdown sections are merged into single cells - Empty cells are skipped """ import json import re import sys from pathlib import Path def md_to_notebook(md_text: str) -> dict: """Parse markdown into notebook cells.""" cells = [] # Split on ```python and ``` boundaries # We need to track whether we're inside a code block lines = md_text.split('\n') current_type = 'markdown' # or 'code' current_lines = [] for line in lines: if line.strip() == '```python': # Flush current markdown if current_lines and current_type == 'markdown': text = '\n'.join(current_lines).strip() if text: cells.append(make_markdown_cell(text)) current_lines = [] current_type = 'code' elif line.strip() == '```' and current_type == 'code': # Flush current code if current_lines: code = '\n'.join(current_lines) # Remove trailing whitespace but keep structure code = code.rstrip() if code: cells.append(make_code_cell(code)) current_lines = [] current_type = 'markdown' else: current_lines.append(line) # Flush remaining if current_lines: text = '\n'.join(current_lines).strip() if text: if current_type == 'code': cells.append(make_code_cell(text)) else: cells.append(make_markdown_cell(text)) # Build notebook notebook = { "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.0", "mimetype": "text/x-python", "file_extension": ".py" } }, "cells": cells } return notebook def make_code_cell(source: str) -> dict: """Create a code cell.""" return { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": source.split('\n') # Will be joined with \n later } def make_markdown_cell(source: str) -> dict: """Create a markdown cell.""" return { "cell_type": "markdown", "metadata": {}, "source": source.split('\n') } def format_notebook(notebook: dict) -> str: """ Format notebook JSON with proper source line handling. Each line in source needs a trailing \n except the last. """ for cell in notebook["cells"]: lines = cell["source"] if lines: # Add \n to all lines except the last cell["source"] = [line + '\n' for line in lines[:-1]] + [lines[-1]] return json.dumps(notebook, indent=1, ensure_ascii=False) def main(): input_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("grpo_vertex_v3.md") output_path = input_path.with_suffix('.ipynb') if not input_path.exists(): print(f"Error: {input_path} not found") sys.exit(1) md_text = input_path.read_text(encoding='utf-8') notebook = md_to_notebook(md_text) # Stats code_cells = sum(1 for c in notebook["cells"] if c["cell_type"] == "code") md_cells = sum(1 for c in notebook["cells"] if c["cell_type"] == "markdown") output_path.write_text(format_notebook(notebook), encoding='utf-8') print(f"✓ Converted {input_path} → {output_path}") print(f" {code_cells} code cells, {md_cells} markdown cells") print(f" Size: {output_path.stat().st_size / 1024:.1f} KB") if __name__ == "__main__": main()