""" md2pptx_plus.py A minimal Markdown → PPTX converter with decent code block support. - Slide breaks: explicit lines containing only '---' create a new slide. - Titles: the first heading (# or ##) in a slide becomes the title. - Content: paragraphs, bullet lists, numbered lists rendered as body text. - Code blocks (```lang ... ```): rendered in a shaded monospace box preserving indentation. - Tables (| a | b |): rendered as monospace blocks (simple). Usage: python md2pptx_plus.py input.md output.pptx Dependencies: pip install python-pptx """ from pathlib import Path from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE from pptx.enum.text import MSO_AUTO_SIZE import re import sys # --------------- helpers --------------- def split_slides(md_text: str): # Normalize newlines md_text = md_text.replace('\r\n', '\n').replace('\r', '\n').strip() # Split on lines containing only '---' parts = re.split(r'^\s*---\s*$', md_text, flags=re.MULTILINE) return [p.strip() for p in parts if p.strip()] def parse_blocks(slide_md: str): """ Yields ('type', payload) for each block in the slide. Types: 'heading', 'code', 'ul', 'ol', 'para', 'table' """ lines = slide_md.split('\n') i = 0 while i < len(lines): line = lines[i] # fenced code ```lang m = re.match(r'^\s*```(\w+)?\s*$', line) if m: lang = m.group(1) or '' i += 1 buf = [] while i < len(lines) and not re.match(r'^\s*```\s*$', lines[i]): buf.append(lines[i]) i += 1 # skip closing ``` i += 1 yield ('code', {'lang': lang, 'code': '\n'.join(buf)}) continue # heading m = re.match(r'^\s*(#{1,6})\s+(.*)\s*$', line) if m: level = len(m.group(1)) text = m.group(2).strip() yield ('heading', {'level': level, 'text': text}) i += 1 continue # table (simple detect: starts with '|' and has another '|' later) if re.match(r'^\s*\|.*\|\s*$', line): buf = [line] i += 1 while i < len(lines) and re.match(r'^\s*\|.*\|\s*$', lines[i]): buf.append(lines[i]) i += 1 yield ('table', {'text': '\n'.join(buf)}) continue # unordered list if re.match(r'^\s*[-*]\s+', line): buf = [] while i < len(lines) and re.match(r'^\s*[-*]\s+', lines[i]): buf.append(re.sub(r'^\s*[-*]\s+', '', lines[i]).rstrip()) i += 1 yield ('ul', {'items': buf}) continue # ordered list if re.match(r'^\s*\d+\.\s+', line): buf = [] while i < len(lines) and re.match(r'^\s*\d+\.\s+', lines[i]): buf.append(re.sub(r'^\s*\d+\.\s+', '', lines[i]).rstrip()) i += 1 yield ('ol', {'items': buf}) continue # paragraph (collect until blank) if line.strip(): buf = [line.rstrip()] i += 1 while i < len(lines) and lines[i].strip() and not re.match(r'^\s*(?:```|#{1,6}\s+|[-*]\s+|\d+\.\s+|\|)', lines[i]): buf.append(lines[i].rstrip()) i += 1 yield ('para', {'text': ' '.join(buf)}) continue # blank i += 1 def add_title_body_slide(prs, title_text): layout = prs.slide_layouts[1] # Title and Content slide = prs.slides.add_slide(layout) slide.shapes.title.text = title_text return slide def textbox(slide, text, left, top, width, height, font_size=18, bold=False, monospace=False, shaded=False): shape = slide.shapes.add_textbox(left, top, width, height) tf = shape.text_frame tf.word_wrap = True tf.clear() p = tf.paragraphs[0] run = p.add_run() run.text = text p.space_after = 0 p.space_before = 0 if monospace: run.font.name = 'Consolas' run.font.size = Pt(font_size) else: run.font.size = Pt(font_size) run.font.bold = bold if shaded: fill = shape.fill fill.solid() fill.fore_color.rgb = RGBColor(245, 245, 245) # light gray line = shape.line line.color.rgb = RGBColor(200, 200, 200) return shape def render_slide(prs, slide_md): # find first heading as title blocks = list(parse_blocks(slide_md)) title = None for btype, payload in blocks: if btype == 'heading' and payload['level'] <= 2: title = payload['text'] break if title is None: # fallback to first non-empty line first_line = next((ln for ln in slide_md.split('\n') if ln.strip()), 'Slide') title = first_line.strip('# ').strip() slide = add_title_body_slide(prs, title) # content area geometry left = Inches(1.0) top = Inches(1.8) width = Inches(8.5) cur_top = top # render remaining blocks for btype, payload in blocks: # skip the title heading we already used if btype == 'heading' and payload['text'] == title: continue if btype == 'para': shape = textbox(slide, payload['text'], left, cur_top, width, Inches(0.8), font_size=20) cur_top = Inches(shape.top.inches + shape.height.inches + 0.05) elif btype == 'ul': # bullet list shape = slide.shapes.add_textbox(left, cur_top, width, Inches(1.0)) tf = shape.text_frame tf.clear() for idx, item in enumerate(payload['items']): p = tf.add_paragraph() if idx > 0 else tf.paragraphs[0] p.text = item p.level = 0 p.font.size = Pt(20) cur_top = Inches(shape.top.inches + shape.height.inches + 0.05) elif btype == 'ol': # numbered list (simulate with bullets + numbers) shape = slide.shapes.add_textbox(left, cur_top, width, Inches(1.0)) tf = shape.text_frame tf.clear() for idx, item in enumerate(payload['items'], start=1): p = tf.add_paragraph() if idx > 1 else tf.paragraphs[0] p.text = f"{idx}. {item}" p.level = 0 p.font.size = Pt(20) cur_top = Inches(shape.top.inches + shape.height.inches + 0.05) elif btype == 'code': # code box (monospace, shaded) code_text = payload['code'].replace('\t', ' ') lines = code_text.split('\n') # estimate height: 0.33 inch per ~3 lines line_height = 0.22 h = max(0.6, min(5.0, 0.22 * max(1, len(lines)) + 0.2)) shape = textbox(slide, code_text, left, cur_top, width, Inches(h), font_size=16, monospace=True, shaded=True) cur_top = Inches(shape.top.inches + shape.height.inches + 0.05) elif btype == 'table': # simple: render as monospace pre block tbl_text = payload['text'] lines = tbl_text.split('\n') h = max(0.6, min(3.5, 0.25 * len(lines) + 0.2)) shape = textbox(slide, tbl_text, left, cur_top, width, Inches(h), font_size=16, monospace=True, shaded=True) cur_top = Inches(shape.top.inches + shape.height.inches + 0.05) elif btype == 'heading': # subheading in body shape = textbox(slide, payload['text'], left, cur_top, width, Inches(0.5), font_size=22, bold=True) cur_top = Inches(shape.top.inches + shape.height.inches + 0.05) return slide def md_to_pptx(md_path: Path, pptx_path: Path): text = Path(md_path).read_text(encoding='utf-8') slides = split_slides(text) prs = Presentation() # remove default slide if present while len(prs.slides) > 0: rId = prs.slides._sldIdLst[-1].rId prs.part.drop_rel(rId) del prs.slides._sldIdLst[-1] for slide_md in slides: render_slide(prs, slide_md) prs.save(pptx_path) if __name__ == "__main__": if len(sys.argv) < 3: print("Usage: python md2pptx_plus.py input.md output.pptx") sys.exit(1) md_to_pptx(Path(sys.argv[1]), Path(sys.argv[2]))