infy / scripts /convert_slides_to_pptx.py
shourya
Add generated PPTX slides and improved converter script
bbaad34
#!/usr/bin/env python3
"""
Convert session markdown slides to styled PowerPoint presentations.
Run from the infy/ directory:
python3 scripts/convert_slides_to_pptx.py
Requires: pip install python-pptx
"""
import re
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
# ── Brand colours ───────────────────────────────────────────────────────────
HF_ORANGE = RGBColor(0xFF, 0x6B, 0x35) # Hugging Face orange
HF_DARK = RGBColor(0x1C, 0x1C, 0x1C) # Near-black
HF_BLUE = RGBColor(0x00, 0x62, 0xCC) # Accent blue
CODE_BG = RGBColor(0xF0, 0xF0, 0xF0) # Light grey (code bg)
CODE_FG = RGBColor(0x2D, 0x2D, 0x2D) # Dark (code text)
SUBHEAD_CLR = RGBColor(0x00, 0x62, 0xCC) # Blue subheadings
TEXT_CLR = RGBColor(0x33, 0x33, 0x33) # Body text
WHITE = RGBColor(0xFF, 0xFF, 0xFF)
INLINE_CODE = RGBColor(0xC7, 0x26, 0x3B) # Red-ish for inline code
QUOTE_BG = RGBColor(0xFF, 0xF3, 0xED) # Light orange (blockquote bg)
TABLE_HDR = RGBColor(0xFF, 0x6B, 0x35) # Header row colour
BORDER_CLR = RGBColor(0xCC, 0xCC, 0xCC) # Light grey border
# ── Slide dimensions (widescreen 16:9) ──────────────────────────────────────
SW = Inches(13.33)
SH = Inches(7.5)
MARGIN = Inches(0.5)
TITLE_H = Inches(1.2)
BODY_TOP = Inches(1.45)
BODY_BOTTOM = Inches(7.2)
BODY_W = SW - 2 * MARGIN
# ── Low-level helpers ────────────────────────────────────────────────────────
def _set_bg(slide, rgb: RGBColor):
f = slide.background.fill
f.solid()
f.fore_color.rgb = rgb
def _add_rect(slide, x, y, w, h, rgb: RGBColor, border_rgb=None):
shape = slide.shapes.add_shape(1, x, y, w, h) # 1 = rectangle
shape.fill.solid()
shape.fill.fore_color.rgb = rgb
if border_rgb:
shape.line.color.rgb = border_rgb
shape.line.width = Pt(0.5)
else:
shape.line.color.rgb = rgb # same colour as fill β†’ invisible border
return shape
def _add_runs(para, text: str, base_size, base_color: RGBColor, base_bold=False):
"""Append inline-formatted runs (**bold** and `code`) to an existing paragraph."""
parts = re.split(r'(\*\*[^*]+?\*\*|`[^`]+?`|\*[^*]+?\*)', text)
for part in parts:
if not part:
continue
run = para.add_run()
if part.startswith('**') and part.endswith('**'):
run.text = part[2:-2]
run.font.bold = True
run.font.size = base_size
run.font.color.rgb = base_color
elif part.startswith('`') and part.endswith('`'):
run.text = part[1:-1]
run.font.name = 'Courier New'
run.font.size = Pt(max(base_size.pt - 2, 10))
run.font.color.rgb = INLINE_CODE
elif part.startswith('*') and part.endswith('*'):
run.text = part[1:-1]
run.font.italic = True
run.font.size = base_size
run.font.color.rgb = base_color
else:
run.text = part
run.font.bold = base_bold
run.font.size = base_size
run.font.color.rgb = base_color
# ── Markdown parser ──────────────────────────────────────────────────────────
def parse_slides(content: str):
"""
Split content into a list of (title, items[]) tuples, one per slide.
A new slide begins whenever a `---`-separated block starts with a #/##
heading. Continuation blocks (no heading) are merged into the previous
slide so that e.g. presenter details on a title slide don't become a
separate slide.
"""
raw_blocks = re.split(r'\n---\n', content)
merged = []
current = None
for block in raw_blocks:
block = block.strip()
if not block:
continue
if re.match(r'^#{1,2} ', block):
if current is not None:
merged.append(current)
current = block
else:
if current is not None:
current += '\n' + block
else:
current = block
if current:
merged.append(current)
return [_parse_one_slide(s) for s in merged]
def _parse_one_slide(raw: str):
"""
Parse one slide's text into (title, items[]).
Item shapes:
{'type': 'subheading', 'text': str}
{'type': 'bullet', 'text': str, 'level': int}
{'type': 'code', 'lines': list[str]}
{'type': 'quote', 'text': str}
{'type': 'table_row', 'cells': list[str], 'header': bool}
{'type': 'text', 'text': str}
{'type': 'separator'}
"""
lines = raw.strip().split('\n')
title = ''
items = []
title_found = False
in_code = False
code_buf = []
table_next_header = True
for line in lines:
# ── Code fence ──
if line.strip().startswith('```'):
if in_code:
items.append({'type': 'code', 'lines': list(code_buf)})
code_buf.clear()
in_code = False
else:
in_code = True
continue
if in_code:
code_buf.append(line)
continue
# ── Slide title (first # or ## heading) ──
if not title_found:
if line.startswith('## ') or line.startswith('# '):
raw_title = re.sub(r'^#{1,2} ', '', line).strip()
title = re.sub(r'^Slide \d+[\.:]\s*', '', raw_title)
title_found = True
continue
# ── Decorative separator inside a slide ──
if line.strip() in ('---', '***'):
items.append({'type': 'separator'})
continue
# ── ### subheading ──
if line.startswith('### '):
items.append({'type': 'subheading', 'text': line[4:].strip()})
table_next_header = True
continue
# ── Blockquote ──
if line.startswith('> '):
items.append({'type': 'quote', 'text': line[2:].strip()})
continue
# ── Table row ──
if line.strip().startswith('|'):
if re.match(r'^\|[\-\s\|:]+\|$', line.strip()):
table_next_header = False
continue # separator row
cells = [c.strip() for c in line.strip().strip('|').split('|')]
items.append({'type': 'table_row', 'cells': cells,
'header': table_next_header})
table_next_header = False
continue
# ── Bullet / numbered list ──
m = re.match(r'^(\s*)([-*β€’]|\d+\.)\s+(.+)$', line)
if m:
level = len(m.group(1)) // 2
items.append({'type': 'bullet', 'text': m.group(3), 'level': level})
table_next_header = True
continue
# ── Empty line ──
if not line.strip():
continue
# ── Plain / bold text ──
items.append({'type': 'text', 'text': line.strip()})
table_next_header = True
return title, items
# ── Slide renderer ───────────────────────────────────────────────────────────
def _get_blank_layout(prs):
for layout in prs.slide_layouts:
if 'blank' in layout.name.lower():
return layout
return prs.slide_layouts[6]
def render_slide(prs: Presentation, title: str, items: list):
"""Add one fully-styled slide to the presentation."""
slide = prs.slides.add_slide(_get_blank_layout(prs))
_set_bg(slide, WHITE)
# ─ Orange title bar ─
_add_rect(slide, 0, 0, SW, TITLE_H, HF_ORANGE)
_add_rect(slide, 0, TITLE_H, SW, Inches(0.045), HF_DARK)
# ─ Title text ─
tb = slide.shapes.add_textbox(MARGIN, Inches(0.22), BODY_W, Inches(0.92))
tf = tb.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
run = p.add_run()
run.text = title or 'Slide'
run.font.size = Pt(30)
run.font.bold = True
run.font.color.rgb = WHITE
# ─ Body ─
y = BODY_TOP
table_rows = []
def flush_table():
nonlocal y
if not table_rows:
return
row_h = Inches(0.32)
h = row_h * len(table_rows) + Inches(0.15)
if y + h > BODY_BOTTOM:
table_rows.clear()
return
_add_rect(slide, MARGIN, y, BODY_W, h,
RGBColor(0xF8, 0xF8, 0xF8), border_rgb=BORDER_CLR)
ttb = slide.shapes.add_textbox(
MARGIN + Inches(0.12), y + Inches(0.06),
BODY_W - Inches(0.24), h - Inches(0.1))
ttf = ttb.text_frame
ttf.word_wrap = True
for i, row_item in enumerate(table_rows):
tp = ttf.paragraphs[0] if i == 0 else ttf.add_paragraph()
is_hdr = row_item.get('header', False)
run = tp.add_run()
run.text = ' β”‚ '.join(row_item['cells'])
run.font.size = Pt(12)
run.font.name = 'Courier New'
run.font.bold = is_hdr
run.font.color.rgb = TABLE_HDR if is_hdr else HF_DARK
y += h + Inches(0.1)
table_rows.clear()
for item in items:
# flush pending table if moving to a non-table item
if item['type'] != 'table_row' and table_rows:
flush_table()
itype = item['type']
# ── Code block ──────────────────────────────────────────────────────
if itype == 'code':
code_lines = item['lines']
n = len(code_lines)
if n == 0:
continue
h = Inches(0.26) * n + Inches(0.22)
h = min(h, BODY_BOTTOM - y)
if y + Inches(0.4) > BODY_BOTTOM:
continue
_add_rect(slide, MARGIN, y, BODY_W, h, CODE_BG, border_rgb=BORDER_CLR)
ctb = slide.shapes.add_textbox(
MARGIN + Inches(0.15), y + Inches(0.1),
BODY_W - Inches(0.3), h - Inches(0.15))
ctf = ctb.text_frame
ctf.word_wrap = False
visible = max(1, int((h - Inches(0.15)) / Inches(0.26)))
for i, cl in enumerate(code_lines[:visible]):
cp = ctf.paragraphs[0] if i == 0 else ctf.add_paragraph()
run = cp.add_run()
run.text = cl
run.font.name = 'Courier New'
run.font.size = Pt(11)
run.font.color.rgb = CODE_FG
y += h + Inches(0.12)
# ── Table row ───────────────────────────────────────────────────────
elif itype == 'table_row':
table_rows.append(item)
# ── ### Subheading ──────────────────────────────────────────────────
elif itype == 'subheading':
h = Inches(0.44)
if y + h > BODY_BOTTOM:
continue
stb = slide.shapes.add_textbox(MARGIN, y, BODY_W, h)
stf = stb.text_frame
stf.word_wrap = True
p = stf.paragraphs[0]
_add_runs(p, item['text'], Pt(19), SUBHEAD_CLR, base_bold=True)
y += h
# ── Blockquote ──────────────────────────────────────────────────────
elif itype == 'quote':
h = Inches(0.5)
if y + h > BODY_BOTTOM:
continue
_add_rect(slide, MARGIN, y, Inches(0.08), h, HF_ORANGE)
_add_rect(slide, MARGIN + Inches(0.08), y,
BODY_W - Inches(0.08), h, QUOTE_BG)
qtb = slide.shapes.add_textbox(
MARGIN + Inches(0.22), y + Inches(0.07),
BODY_W - Inches(0.3), h - Inches(0.1))
qtf = qtb.text_frame
qtf.word_wrap = True
qp = qtf.paragraphs[0]
_add_runs(qp, item['text'], Pt(17), HF_DARK, base_bold=True)
y += h + Inches(0.06)
# ── Bullet ──────────────────────────────────────────────────────────
elif itype == 'bullet':
level = item.get('level', 0)
indent = Inches(0.36 * (level + 1))
h = Inches(0.34)
if y + h > BODY_BOTTOM:
continue
dot = 'β–Έ ' if level == 0 else 'β—¦ '
btb = slide.shapes.add_textbox(
MARGIN + indent, y, BODY_W - indent, h)
btf = btb.text_frame
btf.word_wrap = True
bp = btf.paragraphs[0]
mr = bp.add_run()
mr.text = dot
mr.font.size = Pt(15)
mr.font.color.rgb = HF_ORANGE
mr.font.bold = True
_add_runs(bp, item['text'], Pt(15), TEXT_CLR)
y += h
# ── Separator ───────────────────────────────────────────────────────
elif itype == 'separator':
if y + Inches(0.15) <= BODY_BOTTOM:
_add_rect(slide, MARGIN, y + Inches(0.06),
BODY_W, Inches(0.02), BORDER_CLR)
y += Inches(0.2)
# ── Plain / bold text ───────────────────────────────────────────────
elif itype == 'text':
text = item['text']
h = Inches(0.36)
if y + h > BODY_BOTTOM:
continue
txtb = slide.shapes.add_textbox(MARGIN, y, BODY_W, h)
txtf = txtb.text_frame
txtf.word_wrap = True
p = txtf.paragraphs[0]
_add_runs(p, text, Pt(15), TEXT_CLR)
y += h
# flush any remaining table
if table_rows:
flush_table()
# ── Main ─────────────────────────────────────────────────────────────────────
def markdown_to_pptx(md_file: str, pptx_file: str):
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
parsed = parse_slides(content)
prs = Presentation()
prs.slide_width = SW
prs.slide_height = SH
for title, items in parsed:
render_slide(prs, title, items)
prs.save(pptx_file)
print(f'βœ… {md_file} β†’ {pptx_file} ({len(parsed)} slides)')
if __name__ == '__main__':
print('=' * 60)
print('πŸ“Š Converting Markdown Slides β†’ PowerPoint')
print('=' * 60 + '\n')
try:
markdown_to_pptx('slides/SESSION1_SLIDES.md', 'slides/SESSION1_SLIDES.pptx')
markdown_to_pptx('slides/SESSION2_SLIDES.md', 'slides/SESSION2_SLIDES.pptx')
print('\nβœ… All done!')
print(' slides/SESSION1_SLIDES.pptx')
print(' slides/SESSION2_SLIDES.pptx')
except ImportError as e:
print(f'❌ Missing dependency: {e}')
print('\n Install with: pip3 install python-pptx')
except Exception as e:
import traceback
traceback.print_exc()
print(f'\n❌ Error: {e}')