Commit ·
01b1aa1
1
Parent(s): 574d24e
Enhanced PDF to Images: ultra-high DPI up to 600, sharper text rendering
Browse files- server.py +18 -6
- static/index.html +9 -5
server.py
CHANGED
|
@@ -1309,11 +1309,19 @@ async def api_split_pdf(
|
|
| 1309 |
async def api_pdf_to_images(
|
| 1310 |
file: UploadFile = File(...),
|
| 1311 |
format: str = Form("png"),
|
| 1312 |
-
dpi: int = Form(
|
| 1313 |
pages: str = Form(""),
|
| 1314 |
output_name: str = Form("pages"),
|
| 1315 |
):
|
| 1316 |
-
"""Convert PDF pages to images (PNG or JPG)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1317 |
data = await file.read()
|
| 1318 |
validate_pdf_upload(data)
|
| 1319 |
|
|
@@ -1321,7 +1329,9 @@ async def api_pdf_to_images(
|
|
| 1321 |
format = format.lower() if format.lower() in ["png", "jpg", "jpeg"] else "png"
|
| 1322 |
if format == "jpeg":
|
| 1323 |
format = "jpg"
|
| 1324 |
-
|
|
|
|
|
|
|
| 1325 |
|
| 1326 |
try:
|
| 1327 |
src_doc = fitz.open(stream=data, filetype="pdf")
|
|
@@ -1339,13 +1349,15 @@ async def api_pdf_to_images(
|
|
| 1339 |
job_id = generate_job_id()
|
| 1340 |
zip_path = OUT_DIR / f"{job_id}_{output_name}.zip"
|
| 1341 |
|
|
|
|
| 1342 |
zoom = dpi / 72.0
|
| 1343 |
matrix = fitz.Matrix(zoom, zoom)
|
| 1344 |
|
| 1345 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 1346 |
for i in sorted(page_set):
|
| 1347 |
page = src_doc.load_page(i)
|
| 1348 |
-
|
|
|
|
| 1349 |
|
| 1350 |
if format == "png":
|
| 1351 |
img_bytes = pix.tobytes("png")
|
|
@@ -1353,13 +1365,13 @@ async def api_pdf_to_images(
|
|
| 1353 |
# Convert to JPG via PIL
|
| 1354 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 1355 |
buffer = io.BytesIO()
|
| 1356 |
-
img.save(buffer, format="JPEG", quality=
|
| 1357 |
img_bytes = buffer.getvalue()
|
| 1358 |
|
| 1359 |
zf.writestr(f"{output_name}_page_{i+1}.{format}", img_bytes)
|
| 1360 |
|
| 1361 |
src_doc.close()
|
| 1362 |
-
logger.info(f"Converted {len(page_set)} pages to {format.upper()}")
|
| 1363 |
|
| 1364 |
return FileResponse(
|
| 1365 |
path=str(zip_path),
|
|
|
|
| 1309 |
async def api_pdf_to_images(
|
| 1310 |
file: UploadFile = File(...),
|
| 1311 |
format: str = Form("png"),
|
| 1312 |
+
dpi: int = Form(200),
|
| 1313 |
pages: str = Form(""),
|
| 1314 |
output_name: str = Form("pages"),
|
| 1315 |
):
|
| 1316 |
+
"""Convert PDF pages to high-quality images (PNG or JPG)
|
| 1317 |
+
|
| 1318 |
+
DPI Guide:
|
| 1319 |
+
- 150: Fast, small files (web preview)
|
| 1320 |
+
- 200: Good quality (default)
|
| 1321 |
+
- 300: Print quality
|
| 1322 |
+
- 400: Ultra sharp (presentations)
|
| 1323 |
+
- 600: Maximum quality (OCR/archive)
|
| 1324 |
+
"""
|
| 1325 |
data = await file.read()
|
| 1326 |
validate_pdf_upload(data)
|
| 1327 |
|
|
|
|
| 1329 |
format = format.lower() if format.lower() in ["png", "jpg", "jpeg"] else "png"
|
| 1330 |
if format == "jpeg":
|
| 1331 |
format = "jpg"
|
| 1332 |
+
|
| 1333 |
+
# Allow higher DPI for ultra-sharp output (up to 600)
|
| 1334 |
+
dpi = max(72, min(600, dpi))
|
| 1335 |
|
| 1336 |
try:
|
| 1337 |
src_doc = fitz.open(stream=data, filetype="pdf")
|
|
|
|
| 1349 |
job_id = generate_job_id()
|
| 1350 |
zip_path = OUT_DIR / f"{job_id}_{output_name}.zip"
|
| 1351 |
|
| 1352 |
+
# Vector render at specified DPI - handles portrait/landscape automatically
|
| 1353 |
zoom = dpi / 72.0
|
| 1354 |
matrix = fitz.Matrix(zoom, zoom)
|
| 1355 |
|
| 1356 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 1357 |
for i in sorted(page_set):
|
| 1358 |
page = src_doc.load_page(i)
|
| 1359 |
+
# alpha=False removes transparency for sharper text
|
| 1360 |
+
pix = page.get_pixmap(matrix=matrix, alpha=False)
|
| 1361 |
|
| 1362 |
if format == "png":
|
| 1363 |
img_bytes = pix.tobytes("png")
|
|
|
|
| 1365 |
# Convert to JPG via PIL
|
| 1366 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 1367 |
buffer = io.BytesIO()
|
| 1368 |
+
img.save(buffer, format="JPEG", quality=95, optimize=True)
|
| 1369 |
img_bytes = buffer.getvalue()
|
| 1370 |
|
| 1371 |
zf.writestr(f"{output_name}_page_{i+1}.{format}", img_bytes)
|
| 1372 |
|
| 1373 |
src_doc.close()
|
| 1374 |
+
logger.info(f"Converted {len(page_set)} pages to {format.upper()} at {dpi} DPI")
|
| 1375 |
|
| 1376 |
return FileResponse(
|
| 1377 |
path=str(zip_path),
|
static/index.html
CHANGED
|
@@ -980,12 +980,16 @@
|
|
| 980 |
</select>
|
| 981 |
</div>
|
| 982 |
<div class="form-group">
|
| 983 |
-
<label class="form-label">
|
|
|
|
|
|
|
|
|
|
| 984 |
<select id="pdf2img_dpi" class="form-select">
|
| 985 |
-
<option value="
|
| 986 |
-
<option value="
|
| 987 |
-
<option value="
|
| 988 |
-
<option value="
|
|
|
|
| 989 |
</select>
|
| 990 |
</div>
|
| 991 |
<div class="form-group">
|
|
|
|
| 980 |
</select>
|
| 981 |
</div>
|
| 982 |
<div class="form-group">
|
| 983 |
+
<label class="form-label">
|
| 984 |
+
Quality (DPI)
|
| 985 |
+
<span class="help-tip tooltip" data-tooltip="Higher DPI = sharper images but larger files">?</span>
|
| 986 |
+
</label>
|
| 987 |
<select id="pdf2img_dpi" class="form-select">
|
| 988 |
+
<option value="150">150 DPI (Web preview)</option>
|
| 989 |
+
<option value="200" selected>200 DPI (Good quality)</option>
|
| 990 |
+
<option value="300">300 DPI (Print quality)</option>
|
| 991 |
+
<option value="400">400 DPI (Ultra sharp)</option>
|
| 992 |
+
<option value="600">600 DPI (Maximum - OCR/Archive)</option>
|
| 993 |
</select>
|
| 994 |
</div>
|
| 995 |
<div class="form-group">
|