Deploybot
Deploy from stable branch
49574d5
"""PDF I/O utilities for loading and rendering PDFs."""
from PIL import Image
def load_pdf_pages(pdf_bytes: bytes, dpi: int = 150, max_pages: int = 10) -> list[Image.Image]:
"""Load PDF and render each page to a PIL Image.
Args:
pdf_bytes: PDF file as bytes.
dpi: Resolution for rendering.
max_pages: Maximum number of pages to render.
Returns:
List of PIL Images, one per page.
"""
try:
import fitz # PyMuPDF
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
pages = []
try:
for i in range(min(len(doc), max_pages)):
page = doc[i]
pix = page.get_pixmap(matrix=fitz.Matrix(dpi / 72, dpi / 72))
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
pages.append(img)
finally:
doc.close()
return pages
except ImportError:
print("PyMuPDF not available, returning placeholder")
img = Image.new("RGB", (800, 1000), color=(200, 200, 200))
return [img] * 3
def get_page_count(pdf_bytes: bytes) -> int:
"""Get total page count of a PDF.
Args:
pdf_bytes: PDF file as bytes.
Returns:
Total number of pages, or 0 on error.
"""
try:
import fitz
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
count = len(doc)
doc.close()
return count
except Exception: # noqa: BLE001
return 0