| import gradio as gr |
| import fitz |
| import tempfile |
| import zipfile |
| from pathlib import Path |
| import re |
|
|
|
|
| def sanitize_filename(text: str) -> str: |
| text = re.sub(r"[^\w\s-]", "", text) |
| return re.sub(r"\s+", "_", text).strip("_") |
|
|
|
|
| def split_pdf(pdf_file): |
| doc = fitz.open(pdf_file.name) |
| toc = doc.get_toc() |
|
|
| if not toc: |
| return None, "β No bookmarks found in this PDF." |
|
|
| chapters = [item for item in toc if item[0] == 1] |
|
|
| if not chapters: |
| return None, "β No level-1 chapters found." |
|
|
| temp_dir = Path(tempfile.mkdtemp()) |
| zip_path = temp_dir / "chapters.zip" |
|
|
| with zipfile.ZipFile(zip_path, "w") as zf: |
| for i, (_, title, page) in enumerate(chapters): |
| start_page = page - 1 |
| end_page = ( |
| chapters[i + 1][2] - 2 |
| if i + 1 < len(chapters) |
| else doc.page_count - 1 |
| ) |
|
|
| if start_page > end_page: |
| continue |
|
|
| new_doc = fitz.open() |
| new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page) |
|
|
| name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf" |
| output = temp_dir / name |
| new_doc.save(output) |
| new_doc.close() |
|
|
| zf.write(output, arcname=name) |
|
|
| doc.close() |
| return str(zip_path), "β
Chapters extracted successfully!" |
|
|
|
|
| with gr.Blocks(title="π Smart PDF Chapter Splitter") as demo: |
| gr.Markdown("## π Smart PDF Chapter Splitter") |
| gr.Markdown( |
| "Upload a PDF with bookmarks and get clean chapter files β fast and deterministic." |
| ) |
|
|
| pdf_input = gr.File(label="π Upload PDF", file_types=[".pdf"]) |
| output_zip = gr.File(label="π¦ Download Chapters (ZIP)") |
| status = gr.Markdown() |
|
|
| split_btn = gr.Button("βοΈ Split PDF") |
|
|
| split_btn.click( |
| fn=split_pdf, |
| inputs=pdf_input, |
| outputs=[output_zip, status], |
| ) |
|
|
| demo.launch() |
|
|