Shivakafle038 commited on
Commit
01b1aa1
·
1 Parent(s): 574d24e

Enhanced PDF to Images: ultra-high DPI up to 600, sharper text rendering

Browse files
Files changed (2) hide show
  1. server.py +18 -6
  2. static/index.html +9 -5
server.py CHANGED
@@ -1309,11 +1309,19 @@ async def api_split_pdf(
1309
  async def api_pdf_to_images(
1310
  file: UploadFile = File(...),
1311
  format: str = Form("png"),
1312
- dpi: int = Form(150),
1313
  pages: str = Form(""),
1314
  output_name: str = Form("pages"),
1315
  ):
1316
- """Convert PDF pages to images (PNG or JPG)"""
 
 
 
 
 
 
 
 
1317
  data = await file.read()
1318
  validate_pdf_upload(data)
1319
 
@@ -1321,7 +1329,9 @@ async def api_pdf_to_images(
1321
  format = format.lower() if format.lower() in ["png", "jpg", "jpeg"] else "png"
1322
  if format == "jpeg":
1323
  format = "jpg"
1324
- dpi = max(72, min(300, dpi))
 
 
1325
 
1326
  try:
1327
  src_doc = fitz.open(stream=data, filetype="pdf")
@@ -1339,13 +1349,15 @@ async def api_pdf_to_images(
1339
  job_id = generate_job_id()
1340
  zip_path = OUT_DIR / f"{job_id}_{output_name}.zip"
1341
 
 
1342
  zoom = dpi / 72.0
1343
  matrix = fitz.Matrix(zoom, zoom)
1344
 
1345
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
1346
  for i in sorted(page_set):
1347
  page = src_doc.load_page(i)
1348
- pix = page.get_pixmap(matrix=matrix)
 
1349
 
1350
  if format == "png":
1351
  img_bytes = pix.tobytes("png")
@@ -1353,13 +1365,13 @@ async def api_pdf_to_images(
1353
  # Convert to JPG via PIL
1354
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
1355
  buffer = io.BytesIO()
1356
- img.save(buffer, format="JPEG", quality=90)
1357
  img_bytes = buffer.getvalue()
1358
 
1359
  zf.writestr(f"{output_name}_page_{i+1}.{format}", img_bytes)
1360
 
1361
  src_doc.close()
1362
- logger.info(f"Converted {len(page_set)} pages to {format.upper()}")
1363
 
1364
  return FileResponse(
1365
  path=str(zip_path),
 
1309
  async def api_pdf_to_images(
1310
  file: UploadFile = File(...),
1311
  format: str = Form("png"),
1312
+ dpi: int = Form(200),
1313
  pages: str = Form(""),
1314
  output_name: str = Form("pages"),
1315
  ):
1316
+ """Convert PDF pages to high-quality images (PNG or JPG)
1317
+
1318
+ DPI Guide:
1319
+ - 150: Fast, small files (web preview)
1320
+ - 200: Good quality (default)
1321
+ - 300: Print quality
1322
+ - 400: Ultra sharp (presentations)
1323
+ - 600: Maximum quality (OCR/archive)
1324
+ """
1325
  data = await file.read()
1326
  validate_pdf_upload(data)
1327
 
 
1329
  format = format.lower() if format.lower() in ["png", "jpg", "jpeg"] else "png"
1330
  if format == "jpeg":
1331
  format = "jpg"
1332
+
1333
+ # Allow higher DPI for ultra-sharp output (up to 600)
1334
+ dpi = max(72, min(600, dpi))
1335
 
1336
  try:
1337
  src_doc = fitz.open(stream=data, filetype="pdf")
 
1349
  job_id = generate_job_id()
1350
  zip_path = OUT_DIR / f"{job_id}_{output_name}.zip"
1351
 
1352
+ # Vector render at specified DPI - handles portrait/landscape automatically
1353
  zoom = dpi / 72.0
1354
  matrix = fitz.Matrix(zoom, zoom)
1355
 
1356
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
1357
  for i in sorted(page_set):
1358
  page = src_doc.load_page(i)
1359
+ # alpha=False removes transparency for sharper text
1360
+ pix = page.get_pixmap(matrix=matrix, alpha=False)
1361
 
1362
  if format == "png":
1363
  img_bytes = pix.tobytes("png")
 
1365
  # Convert to JPG via PIL
1366
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
1367
  buffer = io.BytesIO()
1368
+ img.save(buffer, format="JPEG", quality=95, optimize=True)
1369
  img_bytes = buffer.getvalue()
1370
 
1371
  zf.writestr(f"{output_name}_page_{i+1}.{format}", img_bytes)
1372
 
1373
  src_doc.close()
1374
+ logger.info(f"Converted {len(page_set)} pages to {format.upper()} at {dpi} DPI")
1375
 
1376
  return FileResponse(
1377
  path=str(zip_path),
static/index.html CHANGED
@@ -980,12 +980,16 @@
980
  </select>
981
  </div>
982
  <div class="form-group">
983
- <label class="form-label">Quality (DPI)</label>
 
 
 
984
  <select id="pdf2img_dpi" class="form-select">
985
- <option value="72">72 DPI (Fast)</option>
986
- <option value="150" selected>150 DPI (Balanced)</option>
987
- <option value="200">200 DPI (Quality)</option>
988
- <option value="300">300 DPI (Best)</option>
 
989
  </select>
990
  </div>
991
  <div class="form-group">
 
980
  </select>
981
  </div>
982
  <div class="form-group">
983
+ <label class="form-label">
984
+ Quality (DPI)
985
+ <span class="help-tip tooltip" data-tooltip="Higher DPI = sharper images but larger files">?</span>
986
+ </label>
987
  <select id="pdf2img_dpi" class="form-select">
988
+ <option value="150">150 DPI (Web preview)</option>
989
+ <option value="200" selected>200 DPI (Good quality)</option>
990
+ <option value="300">300 DPI (Print quality)</option>
991
+ <option value="400">400 DPI (Ultra sharp)</option>
992
+ <option value="600">600 DPI (Maximum - OCR/Archive)</option>
993
  </select>
994
  </div>
995
  <div class="form-group">