Spaces:
Running on Zero
Running on Zero
File size: 1,500 Bytes
49574d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | """PDF I/O utilities for loading and rendering PDFs."""
from PIL import Image
def load_pdf_pages(pdf_bytes: bytes, dpi: int = 150, max_pages: int = 10) -> list[Image.Image]:
"""Load PDF and render each page to a PIL Image.
Args:
pdf_bytes: PDF file as bytes.
dpi: Resolution for rendering.
max_pages: Maximum number of pages to render.
Returns:
List of PIL Images, one per page.
"""
try:
import fitz # PyMuPDF
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
pages = []
try:
for i in range(min(len(doc), max_pages)):
page = doc[i]
pix = page.get_pixmap(matrix=fitz.Matrix(dpi / 72, dpi / 72))
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
pages.append(img)
finally:
doc.close()
return pages
except ImportError:
print("PyMuPDF not available, returning placeholder")
img = Image.new("RGB", (800, 1000), color=(200, 200, 200))
return [img] * 3
def get_page_count(pdf_bytes: bytes) -> int:
"""Get total page count of a PDF.
Args:
pdf_bytes: PDF file as bytes.
Returns:
Total number of pages, or 0 on error.
"""
try:
import fitz
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
count = len(doc)
doc.close()
return count
except Exception: # noqa: BLE001
return 0
|