Spaces:
Running on Zero
Running on Zero
| """PDF I/O utilities for loading and rendering PDFs.""" | |
| from PIL import Image | |
| def load_pdf_pages(pdf_bytes: bytes, dpi: int = 150, max_pages: int = 10) -> list[Image.Image]: | |
| """Load PDF and render each page to a PIL Image. | |
| Args: | |
| pdf_bytes: PDF file as bytes. | |
| dpi: Resolution for rendering. | |
| max_pages: Maximum number of pages to render. | |
| Returns: | |
| List of PIL Images, one per page. | |
| """ | |
| try: | |
| import fitz # PyMuPDF | |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| pages = [] | |
| try: | |
| for i in range(min(len(doc), max_pages)): | |
| page = doc[i] | |
| pix = page.get_pixmap(matrix=fitz.Matrix(dpi / 72, dpi / 72)) | |
| img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples) | |
| pages.append(img) | |
| finally: | |
| doc.close() | |
| return pages | |
| except ImportError: | |
| print("PyMuPDF not available, returning placeholder") | |
| img = Image.new("RGB", (800, 1000), color=(200, 200, 200)) | |
| return [img] * 3 | |
| def get_page_count(pdf_bytes: bytes) -> int: | |
| """Get total page count of a PDF. | |
| Args: | |
| pdf_bytes: PDF file as bytes. | |
| Returns: | |
| Total number of pages, or 0 on error. | |
| """ | |
| try: | |
| import fitz | |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| count = len(doc) | |
| doc.close() | |
| return count | |
| except Exception: # noqa: BLE001 | |
| return 0 | |