JM-EH-Downloader / utils.py
jscmp4's picture
Update utils.py
599b9bf verified
import os
import re
import io
import img2pdf
from PIL import Image # 用于图像压缩
# --- 全局配置 ---
BASE_DIR = "download_cache"
PDF_FINAL_DIR = os.path.join(BASE_DIR, "final_pdfs")
os.makedirs(PDF_FINAL_DIR, exist_ok=True)
def natural_sort_key(s):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(r'(\d+)', s)]
def sanitize_filename(name):
return re.sub(r'[\\/*?:"<>|]', '_', name)
def manual_merge_pdf(album_dir, output_pdf_path, quality=100):
"""
核心功能:将目录下的图片合并为 PDF
:param quality: 图片质量 (1-100)。100为无损(原图打包),数字越小体积越小。
"""
image_paths = []
for root, _, files in os.walk(album_dir):
for file in files:
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp')):
image_paths.append(os.path.join(root, file))
if not image_paths:
raise Exception("目录中未找到任何图片")
image_paths.sort(key=natural_sort_key)
# === 分支 1: 无损模式 (秒成,体积大) ===
if quality >= 100:
with open(output_pdf_path, "wb") as f:
f.write(img2pdf.convert(image_paths))
return
# === 分支 2: 压缩模式 (稍慢,体积小) ===
# 将图片读入内存 -> 压缩 -> 转 PDF
compressed_data = []
for img_path in image_paths:
try:
with Image.open(img_path) as img:
# 1. 如果是 RGBA (PNG透明底),转为 RGB,否则 JPEG 不支持
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
# 2. 存入内存缓冲流
buffer = io.BytesIO()
# optimize=True 还能再省点空间
img.save(buffer, format="JPEG", quality=quality, optimize=True)
# 3. 放入列表
compressed_data.append(buffer.getvalue())
except Exception as e:
print(f"压缩图片出错 {img_path}: {e}")
# 如果这张图压缩坏了,就直接把原图放进去,防止整个崩掉
with open(img_path, "rb") as f:
compressed_data.append(f.read())
# 将压缩后的数据流写入 PDF
with open(output_pdf_path, "wb") as f:
f.write(img2pdf.convert(compressed_data))