import os
import re
import io
import img2pdf
from PIL import Image  # 用于图像压缩

# --- 全局配置 ---
BASE_DIR = "download_cache"
PDF_FINAL_DIR = os.path.join(BASE_DIR, "final_pdfs")
os.makedirs(PDF_FINAL_DIR, exist_ok=True)

def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(r'(\d+)', s)]

def sanitize_filename(name):
    return re.sub(r'[\\/*?:"<>|]', '_', name)

def manual_merge_pdf(album_dir, output_pdf_path, quality=100):
    """
    核心功能：将目录下的图片合并为 PDF
    :param quality: 图片质量 (1-100)。100为无损(原图打包)，数字越小体积越小。
    """
    image_paths = []
    for root, _, files in os.walk(album_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp')):
                image_paths.append(os.path.join(root, file))
    
    if not image_paths:
        raise Exception("目录中未找到任何图片")

    image_paths.sort(key=natural_sort_key)

    # === 分支 1: 无损模式 (秒成，体积大) ===
    if quality >= 100:
        with open(output_pdf_path, "wb") as f:
            f.write(img2pdf.convert(image_paths))
        return

    # === 分支 2: 压缩模式 (稍慢，体积小) ===
    # 将图片读入内存 -> 压缩 -> 转 PDF
    compressed_data = []
    
    for img_path in image_paths:
        try:
            with Image.open(img_path) as img:
                # 1. 如果是 RGBA (PNG透明底)，转为 RGB，否则 JPEG 不支持
                if img.mode in ("RGBA", "P"):
                    img = img.convert("RGB")
                
                # 2. 存入内存缓冲流
                buffer = io.BytesIO()
                # optimize=True 还能再省点空间
                img.save(buffer, format="JPEG", quality=quality, optimize=True)
                
                # 3. 放入列表
                compressed_data.append(buffer.getvalue())
        except Exception as e:
            print(f"压缩图片出错 {img_path}: {e}")
            # 如果这张图压缩坏了，就直接把原图放进去，防止整个崩掉
            with open(img_path, "rb") as f:
                compressed_data.append(f.read())

    # 将压缩后的数据流写入 PDF
    with open(output_pdf_path, "wb") as f:
        f.write(img2pdf.convert(compressed_data))