pdf-to-image / dump /pdf_to_image.py
algorembrant's picture
Upload 10 files
2212506 verified
import fitz # PyMuPDF
import argparse
import os
def convert_pdf_to_images(pdf_path, output_dir, dpi=300, image_format="png"):
"""
Convert a PDF to a series of images.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
try:
doc = fitz.open(pdf_path)
print(f"Opened {pdf_path}. Total pages: {len(doc)}")
for page_num in range(len(doc)):
page = doc.load_page(page_num)
# alpha=False ensures a white background instead of transparent
pix = page.get_pixmap(dpi=dpi, alpha=False)
output_file = os.path.join(output_dir, f"page_{page_num + 1:02d}.{image_format}")
pix.save(output_file)
print(f"Saved {output_file}")
print("Conversion complete.")
except Exception as e:
print(f"Error processing PDF: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert a PDF to images.")
parser.add_argument("pdf_path", help="Path to the input PDF file")
parser.add_argument("--output", "-o", default="output", help="Output directory")
parser.add_argument("--dpi", type=int, default=300, help="Output image DPI (default: 300)")
parser.add_argument("--format", "-f", default="png", help="Output image format (default: png)")
args = parser.parse_args()
convert_pdf_to_images(args.pdf_path, args.output, args.dpi, args.format)