| import os |
| import tempfile |
| from typing import List |
| import gradio as gr |
| from PyPDF2 import PdfReader, PdfWriter |
|
|
| |
| with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f: |
| custom_css = f.read() |
|
|
| |
| MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 |
|
|
| def parse_page_ranges(ranges: str, num_pages: int) -> List[int]: |
| """ |
| Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices. |
| """ |
| pages = set() |
| for part in ranges.split(','): |
| part = part.strip() |
| if '-' in part: |
| start_str, end_str = part.split('-', 1) |
| start = max(1, int(start_str)) |
| end = min(num_pages, int(end_str)) |
| pages.update(range(start - 1, end)) |
| else: |
| p = int(part) |
| if 1 <= p <= num_pages: |
| pages.add(p - 1) |
| return sorted(pages) |
|
|
|
|
| def split_pdf(file, page_ranges: str): |
| |
| file_size = os.path.getsize(file.name) |
| if file_size > MAX_SIZE_BYTES: |
| return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)." |
|
|
| reader = PdfReader(file.name) |
| num_pages = len(reader.pages) |
| try: |
| page_indices = parse_page_ranges(page_ranges, num_pages) |
| except Exception as e: |
| return None, f"Error parsing page ranges: {e}" |
|
|
| if not page_indices: |
| return None, "No valid pages selected." |
|
|
| writer = PdfWriter() |
| for idx in page_indices: |
| writer.add_page(reader.pages[idx]) |
|
|
| out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name |
| with open(out_path, "wb") as f_out: |
| writer.write(f_out) |
|
|
| return out_path, None |
|
|
|
|
| def merge_pdfs(files): |
| |
| if not files: |
| return None, "Please upload at least two PDF files to merge." |
|
|
| |
| total_size = sum(os.path.getsize(f.name) for f in files) |
| if total_size > MAX_SIZE_BYTES: |
| return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)." |
|
|
| writer = PdfWriter() |
| try: |
| for f in files: |
| reader = PdfReader(f.name) |
| for page in reader.pages: |
| writer.add_page(page) |
| except Exception as e: |
| return None, f"Error reading PDFs: {e}" |
|
|
| out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name |
| with open(out_path, "wb") as f_out: |
| writer.write(f_out) |
|
|
| return out_path, None |
|
|
| |
| with gr.Blocks(css=custom_css) as demo: |
| gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>") |
| gr.Markdown( |
| "**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n" |
| "Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one.\n\n" |
| "Maximum file size: **1 GB**. If you'd like to increase the limit, feel free to clone the space and adjust the code yourself." |
| ) |
|
|
| with gr.Tabs(): |
| |
| with gr.TabItem("Split PDF"): |
| with gr.Row(elem_classes="input-row"): |
| pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf']) |
| page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9") |
| with gr.Row(elem_classes="button-row"): |
| split_button = gr.Button("Split PDF", variant="primary") |
| output_split = gr.File(label="Download Split PDF") |
| error_split = gr.Textbox(label="Error Message", interactive=False, visible=False) |
|
|
| def run_split(file, ranges): |
| if file is None or not ranges: |
| return None, "Please upload a PDF and specify page ranges.", True |
| out_path, error = split_pdf(file, ranges) |
| if error: |
| return None, error, True |
| return out_path, "", False |
|
|
| split_button.click( |
| fn=run_split, |
| inputs=[pdf_input, page_input], |
| outputs=[output_split, error_split, error_split], |
| api_name="split_pdf" |
| ) |
| error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split) |
|
|
| |
| with gr.TabItem("Merge PDF"): |
| with gr.Row(elem_classes="input-row"): |
| merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf']) |
| with gr.Row(elem_classes="button-row"): |
| merge_button = gr.Button("Merge PDFs", variant="primary") |
| output_merge = gr.File(label="Download Merged PDF") |
| error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False) |
|
|
| def run_merge(files): |
| if not files or len(files) < 2: |
| return None, "Please upload at least two PDF files.", True |
| out_path, error = merge_pdfs(files) |
| if error: |
| return None, error, True |
| return out_path, "", False |
|
|
| merge_button.click( |
| fn=run_merge, |
| inputs=[merge_inputs], |
| outputs=[output_merge, error_merge, error_merge], |
| api_name="merge_pdfs" |
| ) |
| error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |