shourya commited on
Commit
bbaad34
Β·
1 Parent(s): d153152

Add generated PPTX slides and improved converter script

Browse files
scripts/convert_slides_to_pptx.py CHANGED
@@ -1,128 +1,419 @@
1
  #!/usr/bin/env python3
2
  """
3
- Convert markdown slides to PowerPoint (PPTX) format
4
- Requires: python-pptx
5
 
6
- Install: pip install python-pptx
 
 
 
7
  """
8
 
 
9
  from pptx import Presentation
10
  from pptx.util import Inches, Pt
11
- from pptx.enum.text import PP_ALIGN
12
  from pptx.dml.color import RGBColor
13
- import re
14
- import sys
15
 
16
- def markdown_to_pptx(md_file, pptx_file):
17
- """Convert markdown slides to PPTX format."""
18
-
19
- # Read markdown file
20
- with open(md_file, 'r') as f:
21
- content = f.read()
22
-
23
- # Split by slide delimiter (---)
24
- slides_text = content.split('\n---\n')
25
-
26
- # Create presentation
27
- prs = Presentation()
28
- prs.slide_width = Inches(10)
29
- prs.slide_height = Inches(7.5)
30
-
31
- # Define color scheme
32
- TITLE_COLOR = RGBColor(0, 102, 204) # Blue
33
- TEXT_COLOR = RGBColor(50, 50, 50) # Dark gray
34
-
35
- for slide_content in slides_text:
36
- slide_content = slide_content.strip()
37
- if not slide_content:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  continue
39
-
40
- # Add blank slide
41
- blank_slide_layout = prs.slide_layouts[6]
42
- slide = prs.slides.add_slide(blank_slide_layout)
43
-
44
- # Add background
45
- background = slide.background
46
- fill = background.fill
47
- fill.solid()
48
- fill.fore_color.rgb = RGBColor(255, 255, 255)
49
-
50
- # Parse slide content
51
- lines = slide_content.split('\n')
52
-
53
- # Add title (first line with #)
54
- title_found = False
55
- current_y = Inches(0.5)
56
-
57
- for line in lines:
58
- if line.startswith('# '):
59
- title_box = slide.shapes.add_textbox(Inches(0.5), current_y, Inches(9), Inches(1))
60
- title_frame = title_box.text_frame
61
- title_frame.word_wrap = True
62
- p = title_frame.paragraphs[0]
63
- p.text = line.replace('# ', '')
64
- p.font.size = Pt(44)
65
- p.font.bold = True
66
- p.font.color.rgb = TITLE_COLOR
67
- current_y += Inches(1.2)
68
- title_found = True
69
- break
70
- elif line.startswith('## '):
71
- title_box = slide.shapes.add_textbox(Inches(0.5), current_y, Inches(9), Inches(1))
72
- title_frame = title_box.text_frame
73
- title_frame.word_wrap = True
74
- p = title_frame.paragraphs[0]
75
- p.text = line.replace('## ', '')
76
- p.font.size = Pt(36)
77
- p.font.bold = True
78
- p.font.color.rgb = TITLE_COLOR
79
- current_y += Inches(0.8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  title_found = True
81
- break
82
-
83
- # Add content (remaining lines)
84
- if title_found:
85
- lines = lines[1:]
86
-
87
- content_text = '\n'.join(lines).strip()
88
- if content_text:
89
- # Remove markdown formatting
90
- content_text = re.sub(r'\*\*(.*?)\*\*', r'\1', content_text) # Bold
91
- content_text = re.sub(r'\*(.*?)\*', r'\1', content_text) # Italic
92
- content_text = re.sub(r'`(.*?)`', r'\1', content_text) # Code
93
-
94
- content_box = slide.shapes.add_textbox(Inches(0.5), current_y, Inches(9), Inches(5.5))
95
- text_frame = content_box.text_frame
96
- text_frame.word_wrap = True
97
-
98
- p = text_frame.paragraphs[0]
99
- p.text = content_text
100
- p.font.size = Pt(18)
101
- p.font.color.rgb = TEXT_COLOR
102
- p.level = 0
103
-
104
- # Save presentation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  prs.save(pptx_file)
106
- print(f"βœ… Converted: {md_file} β†’ {pptx_file}")
107
-
108
- if __name__ == "__main__":
109
- # Convert both sessions
110
- print("=" * 60)
111
- print("πŸ“Š Converting Markdown Slides to PowerPoint")
112
- print("=" * 60 + "\n")
113
-
114
  try:
115
- markdown_to_pptx("slides/SESSION1_SLIDES.md", "slides/SESSION1_SLIDES.pptx")
116
- markdown_to_pptx("slides/SESSION2_SLIDES.md", "slides/SESSION2_SLIDES.pptx")
117
-
118
- print("\n" + "=" * 60)
119
- print("βœ… Conversion complete!")
120
- print("=" * 60)
121
- print("\nπŸ“ Generated files:")
122
- print(" - slides/SESSION1_SLIDES.pptx")
123
- print(" - slides/SESSION2_SLIDES.pptx")
124
- print("\nπŸ’‘ Tip: Open in PowerPoint/LibreOffice and adjust formatting as needed")
125
  except Exception as e:
126
- print(f"❌ Error: {str(e)}")
127
- print("\nπŸ’‘ Make sure you have python-pptx installed:")
128
- print(" pip install python-pptx")
 
1
  #!/usr/bin/env python3
2
  """
3
+ Convert session markdown slides to styled PowerPoint presentations.
 
4
 
5
+ Run from the infy/ directory:
6
+ python3 scripts/convert_slides_to_pptx.py
7
+
8
+ Requires: pip install python-pptx
9
  """
10
 
11
+ import re
12
  from pptx import Presentation
13
  from pptx.util import Inches, Pt
 
14
  from pptx.dml.color import RGBColor
15
+ from pptx.enum.text import PP_ALIGN
 
16
 
17
+ # ── Brand colours ───────────────────────────────────────────────────────────
18
+ HF_ORANGE = RGBColor(0xFF, 0x6B, 0x35) # Hugging Face orange
19
+ HF_DARK = RGBColor(0x1C, 0x1C, 0x1C) # Near-black
20
+ HF_BLUE = RGBColor(0x00, 0x62, 0xCC) # Accent blue
21
+ CODE_BG = RGBColor(0xF0, 0xF0, 0xF0) # Light grey (code bg)
22
+ CODE_FG = RGBColor(0x2D, 0x2D, 0x2D) # Dark (code text)
23
+ SUBHEAD_CLR = RGBColor(0x00, 0x62, 0xCC) # Blue subheadings
24
+ TEXT_CLR = RGBColor(0x33, 0x33, 0x33) # Body text
25
+ WHITE = RGBColor(0xFF, 0xFF, 0xFF)
26
+ INLINE_CODE = RGBColor(0xC7, 0x26, 0x3B) # Red-ish for inline code
27
+ QUOTE_BG = RGBColor(0xFF, 0xF3, 0xED) # Light orange (blockquote bg)
28
+ TABLE_HDR = RGBColor(0xFF, 0x6B, 0x35) # Header row colour
29
+ BORDER_CLR = RGBColor(0xCC, 0xCC, 0xCC) # Light grey border
30
+
31
+ # ── Slide dimensions (widescreen 16:9) ──────────────────────────────────────
32
+ SW = Inches(13.33)
33
+ SH = Inches(7.5)
34
+ MARGIN = Inches(0.5)
35
+ TITLE_H = Inches(1.2)
36
+ BODY_TOP = Inches(1.45)
37
+ BODY_BOTTOM = Inches(7.2)
38
+ BODY_W = SW - 2 * MARGIN
39
+
40
+ # ── Low-level helpers ────────────────────────────────────────────────────────
41
+
42
+ def _set_bg(slide, rgb: RGBColor):
43
+ f = slide.background.fill
44
+ f.solid()
45
+ f.fore_color.rgb = rgb
46
+
47
+
48
+ def _add_rect(slide, x, y, w, h, rgb: RGBColor, border_rgb=None):
49
+ shape = slide.shapes.add_shape(1, x, y, w, h) # 1 = rectangle
50
+ shape.fill.solid()
51
+ shape.fill.fore_color.rgb = rgb
52
+ if border_rgb:
53
+ shape.line.color.rgb = border_rgb
54
+ shape.line.width = Pt(0.5)
55
+ else:
56
+ shape.line.color.rgb = rgb # same colour as fill β†’ invisible border
57
+ return shape
58
+
59
+
60
+ def _add_runs(para, text: str, base_size, base_color: RGBColor, base_bold=False):
61
+ """Append inline-formatted runs (**bold** and `code`) to an existing paragraph."""
62
+ parts = re.split(r'(\*\*[^*]+?\*\*|`[^`]+?`|\*[^*]+?\*)', text)
63
+ for part in parts:
64
+ if not part:
65
  continue
66
+ run = para.add_run()
67
+ if part.startswith('**') and part.endswith('**'):
68
+ run.text = part[2:-2]
69
+ run.font.bold = True
70
+ run.font.size = base_size
71
+ run.font.color.rgb = base_color
72
+ elif part.startswith('`') and part.endswith('`'):
73
+ run.text = part[1:-1]
74
+ run.font.name = 'Courier New'
75
+ run.font.size = Pt(max(base_size.pt - 2, 10))
76
+ run.font.color.rgb = INLINE_CODE
77
+ elif part.startswith('*') and part.endswith('*'):
78
+ run.text = part[1:-1]
79
+ run.font.italic = True
80
+ run.font.size = base_size
81
+ run.font.color.rgb = base_color
82
+ else:
83
+ run.text = part
84
+ run.font.bold = base_bold
85
+ run.font.size = base_size
86
+ run.font.color.rgb = base_color
87
+
88
+
89
+ # ── Markdown parser ──────────────────────────────────────────────────────────
90
+
91
+ def parse_slides(content: str):
92
+ """
93
+ Split content into a list of (title, items[]) tuples, one per slide.
94
+
95
+ A new slide begins whenever a `---`-separated block starts with a #/##
96
+ heading. Continuation blocks (no heading) are merged into the previous
97
+ slide so that e.g. presenter details on a title slide don't become a
98
+ separate slide.
99
+ """
100
+ raw_blocks = re.split(r'\n---\n', content)
101
+ merged = []
102
+ current = None
103
+ for block in raw_blocks:
104
+ block = block.strip()
105
+ if not block:
106
+ continue
107
+ if re.match(r'^#{1,2} ', block):
108
+ if current is not None:
109
+ merged.append(current)
110
+ current = block
111
+ else:
112
+ if current is not None:
113
+ current += '\n' + block
114
+ else:
115
+ current = block
116
+ if current:
117
+ merged.append(current)
118
+
119
+ return [_parse_one_slide(s) for s in merged]
120
+
121
+
122
+ def _parse_one_slide(raw: str):
123
+ """
124
+ Parse one slide's text into (title, items[]).
125
+
126
+ Item shapes:
127
+ {'type': 'subheading', 'text': str}
128
+ {'type': 'bullet', 'text': str, 'level': int}
129
+ {'type': 'code', 'lines': list[str]}
130
+ {'type': 'quote', 'text': str}
131
+ {'type': 'table_row', 'cells': list[str], 'header': bool}
132
+ {'type': 'text', 'text': str}
133
+ {'type': 'separator'}
134
+ """
135
+ lines = raw.strip().split('\n')
136
+ title = ''
137
+ items = []
138
+ title_found = False
139
+ in_code = False
140
+ code_buf = []
141
+ table_next_header = True
142
+
143
+ for line in lines:
144
+ # ── Code fence ──
145
+ if line.strip().startswith('```'):
146
+ if in_code:
147
+ items.append({'type': 'code', 'lines': list(code_buf)})
148
+ code_buf.clear()
149
+ in_code = False
150
+ else:
151
+ in_code = True
152
+ continue
153
+ if in_code:
154
+ code_buf.append(line)
155
+ continue
156
+
157
+ # ── Slide title (first # or ## heading) ──
158
+ if not title_found:
159
+ if line.startswith('## ') or line.startswith('# '):
160
+ raw_title = re.sub(r'^#{1,2} ', '', line).strip()
161
+ title = re.sub(r'^Slide \d+[\.:]\s*', '', raw_title)
162
  title_found = True
163
+ continue
164
+
165
+ # ── Decorative separator inside a slide ──
166
+ if line.strip() in ('---', '***'):
167
+ items.append({'type': 'separator'})
168
+ continue
169
+
170
+ # ── ### subheading ──
171
+ if line.startswith('### '):
172
+ items.append({'type': 'subheading', 'text': line[4:].strip()})
173
+ table_next_header = True
174
+ continue
175
+
176
+ # ── Blockquote ──
177
+ if line.startswith('> '):
178
+ items.append({'type': 'quote', 'text': line[2:].strip()})
179
+ continue
180
+
181
+ # ── Table row ──
182
+ if line.strip().startswith('|'):
183
+ if re.match(r'^\|[\-\s\|:]+\|$', line.strip()):
184
+ table_next_header = False
185
+ continue # separator row
186
+ cells = [c.strip() for c in line.strip().strip('|').split('|')]
187
+ items.append({'type': 'table_row', 'cells': cells,
188
+ 'header': table_next_header})
189
+ table_next_header = False
190
+ continue
191
+
192
+ # ── Bullet / numbered list ──
193
+ m = re.match(r'^(\s*)([-*β€’]|\d+\.)\s+(.+)$', line)
194
+ if m:
195
+ level = len(m.group(1)) // 2
196
+ items.append({'type': 'bullet', 'text': m.group(3), 'level': level})
197
+ table_next_header = True
198
+ continue
199
+
200
+ # ── Empty line ──
201
+ if not line.strip():
202
+ continue
203
+
204
+ # ── Plain / bold text ──
205
+ items.append({'type': 'text', 'text': line.strip()})
206
+ table_next_header = True
207
+
208
+ return title, items
209
+
210
+
211
+ # ── Slide renderer ───────────────────────────────────────────────────────────
212
+
213
+ def _get_blank_layout(prs):
214
+ for layout in prs.slide_layouts:
215
+ if 'blank' in layout.name.lower():
216
+ return layout
217
+ return prs.slide_layouts[6]
218
+
219
+
220
+ def render_slide(prs: Presentation, title: str, items: list):
221
+ """Add one fully-styled slide to the presentation."""
222
+ slide = prs.slides.add_slide(_get_blank_layout(prs))
223
+ _set_bg(slide, WHITE)
224
+
225
+ # ─ Orange title bar ─
226
+ _add_rect(slide, 0, 0, SW, TITLE_H, HF_ORANGE)
227
+ _add_rect(slide, 0, TITLE_H, SW, Inches(0.045), HF_DARK)
228
+
229
+ # ─ Title text ─
230
+ tb = slide.shapes.add_textbox(MARGIN, Inches(0.22), BODY_W, Inches(0.92))
231
+ tf = tb.text_frame
232
+ tf.word_wrap = True
233
+ p = tf.paragraphs[0]
234
+ run = p.add_run()
235
+ run.text = title or 'Slide'
236
+ run.font.size = Pt(30)
237
+ run.font.bold = True
238
+ run.font.color.rgb = WHITE
239
+
240
+ # ─ Body ─
241
+ y = BODY_TOP
242
+ table_rows = []
243
+
244
+ def flush_table():
245
+ nonlocal y
246
+ if not table_rows:
247
+ return
248
+ row_h = Inches(0.32)
249
+ h = row_h * len(table_rows) + Inches(0.15)
250
+ if y + h > BODY_BOTTOM:
251
+ table_rows.clear()
252
+ return
253
+ _add_rect(slide, MARGIN, y, BODY_W, h,
254
+ RGBColor(0xF8, 0xF8, 0xF8), border_rgb=BORDER_CLR)
255
+ ttb = slide.shapes.add_textbox(
256
+ MARGIN + Inches(0.12), y + Inches(0.06),
257
+ BODY_W - Inches(0.24), h - Inches(0.1))
258
+ ttf = ttb.text_frame
259
+ ttf.word_wrap = True
260
+ for i, row_item in enumerate(table_rows):
261
+ tp = ttf.paragraphs[0] if i == 0 else ttf.add_paragraph()
262
+ is_hdr = row_item.get('header', False)
263
+ run = tp.add_run()
264
+ run.text = ' β”‚ '.join(row_item['cells'])
265
+ run.font.size = Pt(12)
266
+ run.font.name = 'Courier New'
267
+ run.font.bold = is_hdr
268
+ run.font.color.rgb = TABLE_HDR if is_hdr else HF_DARK
269
+ y += h + Inches(0.1)
270
+ table_rows.clear()
271
+
272
+ for item in items:
273
+ # flush pending table if moving to a non-table item
274
+ if item['type'] != 'table_row' and table_rows:
275
+ flush_table()
276
+
277
+ itype = item['type']
278
+
279
+ # ── Code block ──────────────────────────────────────────────────────
280
+ if itype == 'code':
281
+ code_lines = item['lines']
282
+ n = len(code_lines)
283
+ if n == 0:
284
+ continue
285
+ h = Inches(0.26) * n + Inches(0.22)
286
+ h = min(h, BODY_BOTTOM - y)
287
+ if y + Inches(0.4) > BODY_BOTTOM:
288
+ continue
289
+ _add_rect(slide, MARGIN, y, BODY_W, h, CODE_BG, border_rgb=BORDER_CLR)
290
+ ctb = slide.shapes.add_textbox(
291
+ MARGIN + Inches(0.15), y + Inches(0.1),
292
+ BODY_W - Inches(0.3), h - Inches(0.15))
293
+ ctf = ctb.text_frame
294
+ ctf.word_wrap = False
295
+ visible = max(1, int((h - Inches(0.15)) / Inches(0.26)))
296
+ for i, cl in enumerate(code_lines[:visible]):
297
+ cp = ctf.paragraphs[0] if i == 0 else ctf.add_paragraph()
298
+ run = cp.add_run()
299
+ run.text = cl
300
+ run.font.name = 'Courier New'
301
+ run.font.size = Pt(11)
302
+ run.font.color.rgb = CODE_FG
303
+ y += h + Inches(0.12)
304
+
305
+ # ── Table row ───────────────────────────────────────────────────────
306
+ elif itype == 'table_row':
307
+ table_rows.append(item)
308
+
309
+ # ── ### Subheading ──────────────────────────────────────────────────
310
+ elif itype == 'subheading':
311
+ h = Inches(0.44)
312
+ if y + h > BODY_BOTTOM:
313
+ continue
314
+ stb = slide.shapes.add_textbox(MARGIN, y, BODY_W, h)
315
+ stf = stb.text_frame
316
+ stf.word_wrap = True
317
+ p = stf.paragraphs[0]
318
+ _add_runs(p, item['text'], Pt(19), SUBHEAD_CLR, base_bold=True)
319
+ y += h
320
+
321
+ # ── Blockquote ──────────────────────────────────────────────────────
322
+ elif itype == 'quote':
323
+ h = Inches(0.5)
324
+ if y + h > BODY_BOTTOM:
325
+ continue
326
+ _add_rect(slide, MARGIN, y, Inches(0.08), h, HF_ORANGE)
327
+ _add_rect(slide, MARGIN + Inches(0.08), y,
328
+ BODY_W - Inches(0.08), h, QUOTE_BG)
329
+ qtb = slide.shapes.add_textbox(
330
+ MARGIN + Inches(0.22), y + Inches(0.07),
331
+ BODY_W - Inches(0.3), h - Inches(0.1))
332
+ qtf = qtb.text_frame
333
+ qtf.word_wrap = True
334
+ qp = qtf.paragraphs[0]
335
+ _add_runs(qp, item['text'], Pt(17), HF_DARK, base_bold=True)
336
+ y += h + Inches(0.06)
337
+
338
+ # ── Bullet ──────────────────────────────────────────────────────────
339
+ elif itype == 'bullet':
340
+ level = item.get('level', 0)
341
+ indent = Inches(0.36 * (level + 1))
342
+ h = Inches(0.34)
343
+ if y + h > BODY_BOTTOM:
344
+ continue
345
+ dot = 'β–Έ ' if level == 0 else 'β—¦ '
346
+ btb = slide.shapes.add_textbox(
347
+ MARGIN + indent, y, BODY_W - indent, h)
348
+ btf = btb.text_frame
349
+ btf.word_wrap = True
350
+ bp = btf.paragraphs[0]
351
+ mr = bp.add_run()
352
+ mr.text = dot
353
+ mr.font.size = Pt(15)
354
+ mr.font.color.rgb = HF_ORANGE
355
+ mr.font.bold = True
356
+ _add_runs(bp, item['text'], Pt(15), TEXT_CLR)
357
+ y += h
358
+
359
+ # ── Separator ───────────────────────────────────────────────────────
360
+ elif itype == 'separator':
361
+ if y + Inches(0.15) <= BODY_BOTTOM:
362
+ _add_rect(slide, MARGIN, y + Inches(0.06),
363
+ BODY_W, Inches(0.02), BORDER_CLR)
364
+ y += Inches(0.2)
365
+
366
+ # ── Plain / bold text ───────────────────────────────────────────────
367
+ elif itype == 'text':
368
+ text = item['text']
369
+ h = Inches(0.36)
370
+ if y + h > BODY_BOTTOM:
371
+ continue
372
+ txtb = slide.shapes.add_textbox(MARGIN, y, BODY_W, h)
373
+ txtf = txtb.text_frame
374
+ txtf.word_wrap = True
375
+ p = txtf.paragraphs[0]
376
+ _add_runs(p, text, Pt(15), TEXT_CLR)
377
+ y += h
378
+
379
+ # flush any remaining table
380
+ if table_rows:
381
+ flush_table()
382
+
383
+
384
+ # ── Main ─────────────────────────────────────────────────────────────────────
385
+
386
+ def markdown_to_pptx(md_file: str, pptx_file: str):
387
+ with open(md_file, 'r', encoding='utf-8') as f:
388
+ content = f.read()
389
+
390
+ parsed = parse_slides(content)
391
+ prs = Presentation()
392
+ prs.slide_width = SW
393
+ prs.slide_height = SH
394
+
395
+ for title, items in parsed:
396
+ render_slide(prs, title, items)
397
+
398
  prs.save(pptx_file)
399
+ print(f'βœ… {md_file} β†’ {pptx_file} ({len(parsed)} slides)')
400
+
401
+
402
+ if __name__ == '__main__':
403
+ print('=' * 60)
404
+ print('πŸ“Š Converting Markdown Slides β†’ PowerPoint')
405
+ print('=' * 60 + '\n')
406
+
407
  try:
408
+ markdown_to_pptx('slides/SESSION1_SLIDES.md', 'slides/SESSION1_SLIDES.pptx')
409
+ markdown_to_pptx('slides/SESSION2_SLIDES.md', 'slides/SESSION2_SLIDES.pptx')
410
+ print('\nβœ… All done!')
411
+ print(' slides/SESSION1_SLIDES.pptx')
412
+ print(' slides/SESSION2_SLIDES.pptx')
413
+ except ImportError as e:
414
+ print(f'❌ Missing dependency: {e}')
415
+ print('\n Install with: pip3 install python-pptx')
 
 
416
  except Exception as e:
417
+ import traceback
418
+ traceback.print_exc()
419
+ print(f'\n❌ Error: {e}')
slides/SESSION1_SLIDES.pptx ADDED
Binary file (58.9 kB). View file
 
slides/SESSION2_SLIDES.pptx ADDED
Binary file (75 kB). View file