'''
测试第一步，将50wild图片，使用MTCNN进行检测、截取，得到50cropped后的数据集
'''
import os
import torch
from facenet_pytorch import MTCNN
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# 初始化MTCNN模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mtcnn = MTCNN(keep_all=False, device=device)  # keep_all=False 只提取单张人脸

# 定义路径
data_dir = '../../../datasets/classification/LFWPairs/lfw-py/lfw_test_template_50_wild'  # LFW图像文件目录
save_dir = '../../../datasets/classification/LFWPairs/lfw-py/lfw_test_template_50_cropped'  # 保存裁剪后人脸的目录
error_log_path = '../../../datasets/classification/LFWPairs/lfw-py/lfw_error_log_selected_50.txt'  # 保存错误信息的文件

# 创建保存目录
os.makedirs(save_dir, exist_ok=True)

# 定义人脸裁剪函数
def crop_and_save_faces(image_path, save_path):
    try:
        # 加载图像
        image = Image.open(image_path).convert('RGB')

        # 检测人脸并裁剪
        boxes, _ = mtcnn.detect(image)

        if boxes is not None:
            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = map(int, box)
                if x2 > x1 and y2 > y1:  # 确保裁剪框有效
                    face = image.crop((x1, y1, x2, y2))  # 裁剪人脸区域
                    os.makedirs(os.path.dirname(save_path), exist_ok=True)
                    face.save(save_path)
        else:
            # 如果没有检测到人脸，记录图片信息
            with open(error_log_path, 'a') as f:
                f.write(f"未检测到人脸: {image_path}\n")
    except Exception as e:
        # 如果发生错误，记录图片信息和错误信息
        with open(error_log_path, 'a') as f:
            f.write(f"处理 {image_path} 时出错: {e}\n")

# 遍历LFW数据集并提取人脸
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.lower().endswith(('jpg', 'jpeg', 'png')):
            if 'test' in root or 'template' in root:
                image_path = os.path.join(root, file)
                relative_path = os.path.relpath(image_path, data_dir)
                save_path = os.path.join(save_dir, relative_path)

                # 使用多线程加速裁剪
                with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
                    list(tqdm(executor.map(lambda img: crop_and_save_faces(img, os.path.join(save_dir, os.path.relpath(img, data_dir))), [image_path]), total=1))

print("所有人脸提取完成并保存到: ", save_dir)
print("错误日志已保存到: ", error_log_path)