Spaces:

Shouryahere
/

infy

Running

infy / scripts /convert_slides_to_pptx.py

shourya

Add generated PPTX slides and improved converter script

bbaad34 22 days ago

16.1 kB

	#!/usr/bin/env python3
	"""
	Convert session markdown slides to styled PowerPoint presentations.

	Run from the infy/ directory:
	python3 scripts/convert_slides_to_pptx.py

	Requires: pip install python-pptx
	"""

	import re
	from pptx import Presentation
	from pptx.util import Inches, Pt
	from pptx.dml.color import RGBColor
	from pptx.enum.text import PP_ALIGN

	# ── Brand colours ───────────────────────────────────────────────────────────
	HF_ORANGE = RGBColor(0xFF, 0x6B, 0x35) # Hugging Face orange
	HF_DARK = RGBColor(0x1C, 0x1C, 0x1C) # Near-black
	HF_BLUE = RGBColor(0x00, 0x62, 0xCC) # Accent blue
	CODE_BG = RGBColor(0xF0, 0xF0, 0xF0) # Light grey (code bg)
	CODE_FG = RGBColor(0x2D, 0x2D, 0x2D) # Dark (code text)
	SUBHEAD_CLR = RGBColor(0x00, 0x62, 0xCC) # Blue subheadings
	TEXT_CLR = RGBColor(0x33, 0x33, 0x33) # Body text
	WHITE = RGBColor(0xFF, 0xFF, 0xFF)
	INLINE_CODE = RGBColor(0xC7, 0x26, 0x3B) # Red-ish for inline code
	QUOTE_BG = RGBColor(0xFF, 0xF3, 0xED) # Light orange (blockquote bg)
	TABLE_HDR = RGBColor(0xFF, 0x6B, 0x35) # Header row colour
	BORDER_CLR = RGBColor(0xCC, 0xCC, 0xCC) # Light grey border

	# ── Slide dimensions (widescreen 16:9) ──────────────────────────────────────
	SW = Inches(13.33)
	SH = Inches(7.5)
	MARGIN = Inches(0.5)
	TITLE_H = Inches(1.2)
	BODY_TOP = Inches(1.45)
	BODY_BOTTOM = Inches(7.2)
	BODY_W = SW - 2 * MARGIN

	# ── Low-level helpers ────────────────────────────────────────────────────────

	def _set_bg(slide, rgb: RGBColor):
	f = slide.background.fill
	f.solid()
	f.fore_color.rgb = rgb


	def _add_rect(slide, x, y, w, h, rgb: RGBColor, border_rgb=None):
	shape = slide.shapes.add_shape(1, x, y, w, h) # 1 = rectangle
	shape.fill.solid()
	shape.fill.fore_color.rgb = rgb
	if border_rgb:
	shape.line.color.rgb = border_rgb
	shape.line.width = Pt(0.5)
	else:
	shape.line.color.rgb = rgb # same colour as fill → invisible border
	return shape


	def _add_runs(para, text: str, base_size, base_color: RGBColor, base_bold=False):
	"""Append inline-formatted runs (bold and `code`) to an existing paragraph."""
	parts = re.split(r'(\\[^]+?\\\|`[^`]+?`\|\[^]+?\)', text)
	for part in parts:
	if not part:
	continue
	run = para.add_run()
	if part.startswith('') and part.endswith(''):
	run.text = part[2:-2]
	run.font.bold = True
	run.font.size = base_size
	run.font.color.rgb = base_color
	elif part.startswith('`') and part.endswith('`'):
	run.text = part[1:-1]
	run.font.name = 'Courier New'
	run.font.size = Pt(max(base_size.pt - 2, 10))
	run.font.color.rgb = INLINE_CODE
	elif part.startswith('') and part.endswith(''):
	run.text = part[1:-1]
	run.font.italic = True
	run.font.size = base_size
	run.font.color.rgb = base_color
	else:
	run.text = part
	run.font.bold = base_bold
	run.font.size = base_size
	run.font.color.rgb = base_color


	# ── Markdown parser ──────────────────────────────────────────────────────────

	def parse_slides(content: str):
	"""
	Split content into a list of (title, items[]) tuples, one per slide.

	A new slide begins whenever a `---`-separated block starts with a #/##
	heading. Continuation blocks (no heading) are merged into the previous
	slide so that e.g. presenter details on a title slide don't become a
	separate slide.
	"""
	raw_blocks = re.split(r'\n---\n', content)
	merged = []
	current = None
	for block in raw_blocks:
	block = block.strip()
	if not block:
	continue
	if re.match(r'^#{1,2} ', block):
	if current is not None:
	merged.append(current)
	current = block
	else:
	if current is not None:
	current += '\n' + block
	else:
	current = block
	if current:
	merged.append(current)

	return [_parse_one_slide(s) for s in merged]


	def _parse_one_slide(raw: str):
	"""
	Parse one slide's text into (title, items[]).

	Item shapes:
	{'type': 'subheading', 'text': str}
	{'type': 'bullet', 'text': str, 'level': int}
	{'type': 'code', 'lines': list[str]}
	{'type': 'quote', 'text': str}
	{'type': 'table_row', 'cells': list[str], 'header': bool}
	{'type': 'text', 'text': str}
	{'type': 'separator'}
	"""
	lines = raw.strip().split('\n')
	title = ''
	items = []
	title_found = False
	in_code = False
	code_buf = []
	table_next_header = True

	for line in lines:
	# ── Code fence ──
	if line.strip().startswith('```'):
	if in_code:
	items.append({'type': 'code', 'lines': list(code_buf)})
	code_buf.clear()
	in_code = False
	else:
	in_code = True
	continue
	if in_code:
	code_buf.append(line)
	continue

	# ── Slide title (first # or ## heading) ──
	if not title_found:
	if line.startswith('## ') or line.startswith('# '):
	raw_title = re.sub(r'^#{1,2} ', '', line).strip()
	title = re.sub(r'^Slide \d+[\.:]\s*', '', raw_title)
	title_found = True
	continue

	# ── Decorative separator inside a slide ──
	if line.strip() in ('---', '***'):
	items.append({'type': 'separator'})
	continue

	# ── ### subheading ──
	if line.startswith('### '):
	items.append({'type': 'subheading', 'text': line[4:].strip()})
	table_next_header = True
	continue

	# ── Blockquote ──
	if line.startswith('> '):
	items.append({'type': 'quote', 'text': line[2:].strip()})
	continue

	# ── Table row ──
	if line.strip().startswith('\|'):
	if re.match(r'^\\|[\-\s\\|:]+\\|$', line.strip()):
	table_next_header = False
	continue # separator row
	cells = [c.strip() for c in line.strip().strip('\|').split('\|')]
	items.append({'type': 'table_row', 'cells': cells,
	'header': table_next_header})
	table_next_header = False
	continue

	# ── Bullet / numbered list ──
	m = re.match(r'^(\s)([-•]\|\d+\.)\s+(.+)$', line)
	if m:
	level = len(m.group(1)) // 2
	items.append({'type': 'bullet', 'text': m.group(3), 'level': level})
	table_next_header = True
	continue

	# ── Empty line ──
	if not line.strip():
	continue

	# ── Plain / bold text ──
	items.append({'type': 'text', 'text': line.strip()})
	table_next_header = True

	return title, items


	# ── Slide renderer ───────────────────────────────────────────────────────────

	def _get_blank_layout(prs):
	for layout in prs.slide_layouts:
	if 'blank' in layout.name.lower():
	return layout
	return prs.slide_layouts[6]


	def render_slide(prs: Presentation, title: str, items: list):
	"""Add one fully-styled slide to the presentation."""
	slide = prs.slides.add_slide(_get_blank_layout(prs))
	_set_bg(slide, WHITE)

	# ─ Orange title bar ─
	_add_rect(slide, 0, 0, SW, TITLE_H, HF_ORANGE)
	_add_rect(slide, 0, TITLE_H, SW, Inches(0.045), HF_DARK)

	# ─ Title text ─
	tb = slide.shapes.add_textbox(MARGIN, Inches(0.22), BODY_W, Inches(0.92))
	tf = tb.text_frame
	tf.word_wrap = True
	p = tf.paragraphs[0]
	run = p.add_run()
	run.text = title or 'Slide'
	run.font.size = Pt(30)
	run.font.bold = True
	run.font.color.rgb = WHITE

	# ─ Body ─
	y = BODY_TOP
	table_rows = []

	def flush_table():
	nonlocal y
	if not table_rows:
	return
	row_h = Inches(0.32)
	h = row_h * len(table_rows) + Inches(0.15)
	if y + h > BODY_BOTTOM:
	table_rows.clear()
	return
	_add_rect(slide, MARGIN, y, BODY_W, h,
	RGBColor(0xF8, 0xF8, 0xF8), border_rgb=BORDER_CLR)
	ttb = slide.shapes.add_textbox(
	MARGIN + Inches(0.12), y + Inches(0.06),
	BODY_W - Inches(0.24), h - Inches(0.1))
	ttf = ttb.text_frame
	ttf.word_wrap = True
	for i, row_item in enumerate(table_rows):
	tp = ttf.paragraphs[0] if i == 0 else ttf.add_paragraph()
	is_hdr = row_item.get('header', False)
	run = tp.add_run()
	run.text = ' │ '.join(row_item['cells'])
	run.font.size = Pt(12)
	run.font.name = 'Courier New'
	run.font.bold = is_hdr
	run.font.color.rgb = TABLE_HDR if is_hdr else HF_DARK
	y += h + Inches(0.1)
	table_rows.clear()

	for item in items:
	# flush pending table if moving to a non-table item
	if item['type'] != 'table_row' and table_rows:
	flush_table()

	itype = item['type']

	# ── Code block ──────────────────────────────────────────────────────
	if itype == 'code':
	code_lines = item['lines']
	n = len(code_lines)
	if n == 0:
	continue
	h = Inches(0.26) * n + Inches(0.22)
	h = min(h, BODY_BOTTOM - y)
	if y + Inches(0.4) > BODY_BOTTOM:
	continue
	_add_rect(slide, MARGIN, y, BODY_W, h, CODE_BG, border_rgb=BORDER_CLR)
	ctb = slide.shapes.add_textbox(
	MARGIN + Inches(0.15), y + Inches(0.1),
	BODY_W - Inches(0.3), h - Inches(0.15))
	ctf = ctb.text_frame
	ctf.word_wrap = False
	visible = max(1, int((h - Inches(0.15)) / Inches(0.26)))
	for i, cl in enumerate(code_lines[:visible]):
	cp = ctf.paragraphs[0] if i == 0 else ctf.add_paragraph()
	run = cp.add_run()
	run.text = cl
	run.font.name = 'Courier New'
	run.font.size = Pt(11)
	run.font.color.rgb = CODE_FG
	y += h + Inches(0.12)

	# ── Table row ───────────────────────────────────────────────────────
	elif itype == 'table_row':
	table_rows.append(item)

	# ── ### Subheading ──────────────────────────────────────────────────
	elif itype == 'subheading':
	h = Inches(0.44)
	if y + h > BODY_BOTTOM:
	continue
	stb = slide.shapes.add_textbox(MARGIN, y, BODY_W, h)
	stf = stb.text_frame
	stf.word_wrap = True
	p = stf.paragraphs[0]
	_add_runs(p, item['text'], Pt(19), SUBHEAD_CLR, base_bold=True)
	y += h

	# ── Blockquote ──────────────────────────────────────────────────────
	elif itype == 'quote':
	h = Inches(0.5)
	if y + h > BODY_BOTTOM:
	continue
	_add_rect(slide, MARGIN, y, Inches(0.08), h, HF_ORANGE)
	_add_rect(slide, MARGIN + Inches(0.08), y,
	BODY_W - Inches(0.08), h, QUOTE_BG)
	qtb = slide.shapes.add_textbox(
	MARGIN + Inches(0.22), y + Inches(0.07),
	BODY_W - Inches(0.3), h - Inches(0.1))
	qtf = qtb.text_frame
	qtf.word_wrap = True
	qp = qtf.paragraphs[0]
	_add_runs(qp, item['text'], Pt(17), HF_DARK, base_bold=True)
	y += h + Inches(0.06)

	# ── Bullet ──────────────────────────────────────────────────────────
	elif itype == 'bullet':
	level = item.get('level', 0)
	indent = Inches(0.36 * (level + 1))
	h = Inches(0.34)
	if y + h > BODY_BOTTOM:
	continue
	dot = '▸ ' if level == 0 else '◦ '
	btb = slide.shapes.add_textbox(
	MARGIN + indent, y, BODY_W - indent, h)
	btf = btb.text_frame
	btf.word_wrap = True
	bp = btf.paragraphs[0]
	mr = bp.add_run()
	mr.text = dot
	mr.font.size = Pt(15)
	mr.font.color.rgb = HF_ORANGE
	mr.font.bold = True
	_add_runs(bp, item['text'], Pt(15), TEXT_CLR)
	y += h

	# ── Separator ───────────────────────────────────────────────────────
	elif itype == 'separator':
	if y + Inches(0.15) <= BODY_BOTTOM:
	_add_rect(slide, MARGIN, y + Inches(0.06),
	BODY_W, Inches(0.02), BORDER_CLR)
	y += Inches(0.2)

	# ── Plain / bold text ───────────────────────────────────────────────
	elif itype == 'text':
	text = item['text']
	h = Inches(0.36)
	if y + h > BODY_BOTTOM:
	continue
	txtb = slide.shapes.add_textbox(MARGIN, y, BODY_W, h)
	txtf = txtb.text_frame
	txtf.word_wrap = True
	p = txtf.paragraphs[0]
	_add_runs(p, text, Pt(15), TEXT_CLR)
	y += h

	# flush any remaining table
	if table_rows:
	flush_table()


	# ── Main ─────────────────────────────────────────────────────────────────────

	def markdown_to_pptx(md_file: str, pptx_file: str):
	with open(md_file, 'r', encoding='utf-8') as f:
	content = f.read()

	parsed = parse_slides(content)
	prs = Presentation()
	prs.slide_width = SW
	prs.slide_height = SH

	for title, items in parsed:
	render_slide(prs, title, items)

	prs.save(pptx_file)
	print(f'✅ {md_file} → {pptx_file} ({len(parsed)} slides)')


	if __name__ == '__main__':
	print('=' * 60)
	print('📊 Converting Markdown Slides → PowerPoint')
	print('=' * 60 + '\n')

	try:
	markdown_to_pptx('slides/SESSION1_SLIDES.md', 'slides/SESSION1_SLIDES.pptx')
	markdown_to_pptx('slides/SESSION2_SLIDES.md', 'slides/SESSION2_SLIDES.pptx')
	print('\n✅ All done!')
	print(' slides/SESSION1_SLIDES.pptx')
	print(' slides/SESSION2_SLIDES.pptx')
	except ImportError as e:
	print(f'❌ Missing dependency: {e}')
	print('\n Install with: pip3 install python-pptx')
	except Exception as e:
	import traceback
	traceback.print_exc()
	print(f'\n❌ Error: {e}')