| """ |
| Convert grpo_vertex_v3.md β grpo_vertex_v3.ipynb |
| |
| Parses markdown with ```python fenced code blocks into Jupyter notebook cells. |
| - ```python blocks β code cells |
| - Everything else β markdown cells |
| - Consecutive markdown sections are merged into single cells |
| - Empty cells are skipped |
| """ |
|
|
| import json |
| import re |
| import sys |
| from pathlib import Path |
|
|
|
|
| def md_to_notebook(md_text: str) -> dict: |
| """Parse markdown into notebook cells.""" |
| cells = [] |
| |
| |
| |
| lines = md_text.split('\n') |
| |
| current_type = 'markdown' |
| current_lines = [] |
| |
| for line in lines: |
| if line.strip() == '```python': |
| |
| if current_lines and current_type == 'markdown': |
| text = '\n'.join(current_lines).strip() |
| if text: |
| cells.append(make_markdown_cell(text)) |
| current_lines = [] |
| current_type = 'code' |
| elif line.strip() == '```' and current_type == 'code': |
| |
| if current_lines: |
| code = '\n'.join(current_lines) |
| |
| code = code.rstrip() |
| if code: |
| cells.append(make_code_cell(code)) |
| current_lines = [] |
| current_type = 'markdown' |
| else: |
| current_lines.append(line) |
| |
| |
| if current_lines: |
| text = '\n'.join(current_lines).strip() |
| if text: |
| if current_type == 'code': |
| cells.append(make_code_cell(text)) |
| else: |
| cells.append(make_markdown_cell(text)) |
| |
| |
| notebook = { |
| "nbformat": 4, |
| "nbformat_minor": 5, |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3 (ipykernel)", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "name": "python", |
| "version": "3.10.0", |
| "mimetype": "text/x-python", |
| "file_extension": ".py" |
| } |
| }, |
| "cells": cells |
| } |
| |
| return notebook |
|
|
|
|
| def make_code_cell(source: str) -> dict: |
| """Create a code cell.""" |
| return { |
| "cell_type": "code", |
| "execution_count": None, |
| "metadata": {}, |
| "outputs": [], |
| "source": source.split('\n') |
| } |
|
|
|
|
| def make_markdown_cell(source: str) -> dict: |
| """Create a markdown cell.""" |
| return { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": source.split('\n') |
| } |
|
|
|
|
| def format_notebook(notebook: dict) -> str: |
| """ |
| Format notebook JSON with proper source line handling. |
| Each line in source needs a trailing \n except the last. |
| """ |
| for cell in notebook["cells"]: |
| lines = cell["source"] |
| if lines: |
| |
| cell["source"] = [line + '\n' for line in lines[:-1]] + [lines[-1]] |
| |
| return json.dumps(notebook, indent=1, ensure_ascii=False) |
|
|
|
|
| def main(): |
| input_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("grpo_vertex_v3.md") |
| output_path = input_path.with_suffix('.ipynb') |
| |
| if not input_path.exists(): |
| print(f"Error: {input_path} not found") |
| sys.exit(1) |
| |
| md_text = input_path.read_text(encoding='utf-8') |
| notebook = md_to_notebook(md_text) |
| |
| |
| code_cells = sum(1 for c in notebook["cells"] if c["cell_type"] == "code") |
| md_cells = sum(1 for c in notebook["cells"] if c["cell_type"] == "markdown") |
| |
| output_path.write_text(format_notebook(notebook), encoding='utf-8') |
| |
| print(f"β Converted {input_path} β {output_path}") |
| print(f" {code_cells} code cells, {md_cells} markdown cells") |
| print(f" Size: {output_path.stat().st_size / 1024:.1f} KB") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|