| |
| |
| import os |
|
|
| import matplotlib.pyplot as plt |
|
|
| if os.getenv("SPACES_ZERO_GPU") == "true": |
| os.environ["SPACES_ZERO_GPU"] = "1" |
| os.environ["K_DIFFUSION_USE_COMPILE"] = "0" |
|
|
| import spaces |
| import cv2 |
| from tqdm import tqdm |
| import gradio as gr |
| import random |
| import torch |
| from basicsr.archs.rrdbnet_arch import RRDBNet |
| from basicsr.utils import img2tensor, tensor2img |
| from facexlib.utils.face_restoration_helper import FaceRestoreHelper |
| from realesrgan.utils import RealESRGANer |
|
|
| from lightning_models.mmse_rectified_flow import MMSERectifiedFlow |
|
|
| MAX_SEED = 10000 |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| os.makedirs("pretrained_models", exist_ok=True) |
| realesr_model_path = "pretrained_models/RealESRGAN_x4plus.pth" |
| if not os.path.exists(realesr_model_path): |
| os.system( |
| "wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth -O pretrained_models/RealESRGAN_x4plus.pth" |
| ) |
|
|
|
|
| |
| |
| |
| |
| |
|
|
|
|
| def set_realesrgan(): |
| use_half = False |
| if torch.cuda.is_available(): |
| no_half_gpu_list = ["1650", "1660"] |
| if not True in [ |
| gpu in torch.cuda.get_device_name(0) for gpu in no_half_gpu_list |
| ]: |
| use_half = True |
|
|
| model = RRDBNet( |
| num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2, |
| ) |
| upsampler = RealESRGANer( |
| scale=2, |
| model_path="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/RealESRGAN_x2plus.pth", |
| model=model, |
| tile=400, |
| tile_pad=40, |
| pre_pad=0, |
| half=use_half, |
| ) |
| return upsampler |
|
|
|
|
| upsampler = set_realesrgan() |
| pmrf = MMSERectifiedFlow.from_pretrained( |
| "ohayonguy/PMRF_blind_face_image_restoration" |
| ).to(device=device) |
|
|
|
|
| def generate_reconstructions(pmrf_model, x, y, non_noisy_z0, num_flow_steps, device): |
| source_dist_samples = pmrf_model.create_source_distribution_samples( |
| x, y, non_noisy_z0 |
| ) |
| dt = (1.0 / num_flow_steps) * (1.0 - pmrf_model.hparams.eps) |
| x_t_next = source_dist_samples.clone() |
| t_one = torch.ones(x.shape[0], device=device) |
| for i in tqdm(range(num_flow_steps)): |
| num_t = (i / num_flow_steps) * ( |
| 1.0 - pmrf_model.hparams.eps |
| ) + pmrf_model.hparams.eps |
| v_t_next = pmrf_model(x_t=x_t_next, t=t_one * num_t, y=y).to(x_t_next.dtype) |
| x_t_next = x_t_next.clone() + v_t_next * dt |
|
|
| return x_t_next.clip(0, 1) |
|
|
|
|
| def resize(img, size): |
| |
| h, w = img.shape[0:2] |
| scale = size / min(h, w) |
| h, w = int(h * scale), int(w * scale) |
| interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR |
| return cv2.resize(img, (w, h), interpolation=interp) |
|
|
|
|
| @torch.inference_mode() |
| @spaces.GPU(duration=25) |
| def enhance_face(img, face_helper, has_aligned, num_flow_steps, scale=2): |
| face_helper.clean_all() |
| if has_aligned: |
| img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR) |
| face_helper.cropped_faces = [img] |
| else: |
| face_helper.read_image(img) |
| face_helper.input_img = resize(face_helper.input_img, 640) |
| face_helper.get_face_landmarks_5(only_center_face=False, eye_dist_threshold=5) |
| face_helper.align_warp_face() |
| if len(face_helper.cropped_faces) == 0: |
| raise gr.Error("Could not identify any face in the image.") |
| if has_aligned and len(face_helper.cropped_faces) > 1: |
| raise gr.Error( |
| "You marked that the input image is aligned, but multiple faces were detected." |
| ) |
|
|
| |
| for i, cropped_face in tqdm(enumerate(face_helper.cropped_faces)): |
| cropped_face_t = img2tensor(cropped_face / 255.0, bgr2rgb=True, float32=True) |
| cropped_face_t = cropped_face_t.unsqueeze(0).to(device) |
|
|
| output = generate_reconstructions( |
| pmrf, |
| torch.zeros_like(cropped_face_t), |
| cropped_face_t, |
| None, |
| num_flow_steps, |
| device, |
| ) |
| restored_face = tensor2img( |
| output.to(torch.float32).squeeze(0), rgb2bgr=True, min_max=(0, 1) |
| ) |
| restored_face = restored_face.astype("uint8") |
| face_helper.add_restored_face(restored_face) |
|
|
| if not has_aligned: |
| |
| |
| bg_img = upsampler.enhance(img, outscale=scale)[0] |
| face_helper.get_inverse_affine(None) |
| |
| restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img) |
| return face_helper.cropped_faces, face_helper.restored_faces, restored_img |
| else: |
| return face_helper.cropped_faces, face_helper.restored_faces, None |
|
|
|
|
| @torch.inference_mode() |
| @spaces.GPU(duration=25) |
| def inference( |
| img, |
| randomize_seed, |
| aligned, |
| scale, |
| num_flow_steps, |
| seed, |
| progress=gr.Progress(track_tqdm=True), |
| ): |
| if img is None: |
| raise gr.Error("Please upload an image before submitting.") |
| if randomize_seed: |
| seed = random.randint(0, MAX_SEED) |
| torch.manual_seed(seed) |
| img = cv2.imread(img, cv2.IMREAD_COLOR) |
| h, w = img.shape[0:2] |
| if h > 4500 or w > 4500: |
| raise gr.Error("Image size too large.") |
|
|
| face_helper = FaceRestoreHelper( |
| scale, |
| face_size=512, |
| crop_ratio=(1, 1), |
| det_model="retinaface_resnet50", |
| save_ext="png", |
| use_parse=True, |
| device=device, |
| model_rootpath=None, |
| ) |
|
|
| has_aligned = aligned |
| cropped_face, restored_faces, restored_img = enhance_face( |
| img, face_helper, has_aligned, num_flow_steps=num_flow_steps, scale=scale |
| ) |
| if has_aligned: |
| output = restored_faces[0] |
| else: |
| output = restored_img |
|
|
| output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) |
| for i, restored_face in enumerate(restored_faces): |
| restored_faces[i] = cv2.cvtColor(restored_face, cv2.COLOR_BGR2RGB) |
| torch.cuda.empty_cache() |
| return output, restored_faces if len(restored_faces) > 1 else None |
|
|
|
|
| title = "Posterior-Mean Rectified Flow: Towards Minimum MSE Photo-Realistic Image Restoration" |
| intro = """ |
| <h3 style="margin-bottom: 10px; text-align: center;"> |
| <a href="https://ohayonguy.github.io/">Guy Ohayon</a> , |
| <a href="https://tomer.net.technion.ac.il/">Tomer Michaeli</a> , |
| <a href="https://elad.cs.technion.ac.il/">Michael Elad</a> |
| </h3> |
| <h3 style="margin-bottom: 10px; text-align: center;"> |
| <a href="https://arxiv.org/abs/2410.00418">[Paper]</a> | |
| <a href="https://pmrf-ml.github.io/">[Project Page]</a> | |
| <a href="https://github.com/ohayonguy/PMRF">[Code]</a> |
| </h3> |
| |
| Gradio demo for the blind face image restoration version of [Posterior-Mean Rectified Flow: Towards Minimum MSE Photo-Realistic Image Restoration](https://arxiv.org/abs/2410.00418). |
| You may use this demo to enhance the quality of any image which contains faces. |
| |
| PMRF is a novel photo-realistic image restoration algorithm. It (provably) approximates the optimal estimator that minimizes the Mean Squared Error (MSE) under a perfect perceptual quality constraint. Our model in this demo is specifically tailored for blind face image restoration. Please refer to our project's page for more details: https://pmrf-ml.github.io/. |
| |
| *Notes* : |
| |
| 1. Our original model is designed to restore low-quality face images, where the image is square, there is *only one* face in the image, and the face is centered and aligned. In this demo, however, we incorporate mechanisms that allow restoring the quality of *any* image that contains *any* number of faces. Thus, the resulting quality of such general images is not guaranteed. |
| 2. If your image is not an aligned and square face image, make sure that the checkbox "The input is an aligned and square face image" in *not* marked. |
| 3. Too large images may result in out-of-memory error. |
| """ |
|
|
| article = r""" |
| |
| If you find our work useful, please ⭐ our <a href='https://github.com/ohayonguy/PMRF' target='_blank'>GitHub repository</a>. Thanks! |
| [](https://github.com/ohayonguy/PMRF) |
| |
| 📝 **Citation** |
| |
| ```bibtex |
| @article{ohayon2024pmrf, |
| author = {Guy Ohayon and Tomer Michaeli and Michael Elad}, |
| title = {Posterior-Mean Rectified Flow: Towards Minimum MSE Photo-Realistic Image Restoration}, |
| journal = {arXiv preprint arXiv:2410.00418}, |
| year = {2024}, |
| url = {https://arxiv.org/abs/2410.00418} |
| } |
| ``` |
| |
| 📋 **License** |
| |
| This project is released under the <a rel="license" href="https://github.com/ohayonguy/PMRF/blob/master/LICENSE">MIT license</a>. |
| |
| 📧 **Contact** |
| |
| If you have any questions, please feel free to contact me at <b>guyoep@gmail.com</b>. |
| """ |
|
|
| demo = gr.Interface( |
| inference, |
| [ |
| gr.Image(label="Input", type="filepath", show_label=True), |
| gr.Checkbox(label="Randomize seed", value=True), |
| gr.Checkbox(label="The input is an aligned and square face image", value=False), |
| gr.Slider( |
| label="Scale factor (applicable to non-aligned face images)", |
| minimum=1, |
| maximum=4, |
| step=0.1, |
| value=1, |
| scale=1, |
| ), |
| gr.Slider( |
| label="Number of inference steps (a larger number should lead to better image quality)", |
| minimum=1, |
| maximum=200, |
| step=1, |
| value=25, |
| scale=1, |
| ), |
| gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, scale=1), |
| ], |
| [ |
| gr.Image(label="Output", type="numpy", show_label=True, format="png"), |
| gr.Gallery( |
| label="Restored faces gallery", type="numpy", show_label=True, format="png", |
| ), |
| ], |
| title=title, |
| description=intro, |
| article=article, |
| examples=[ |
| ["examples/01.png", False, False, 1, 25, 42], |
| ["examples/03.jpg", False, False, 2, 25, 42], |
| ["examples/00000055.png", False, True, 1, 25, 42], |
| ["examples/00000085.png", False, True, 1, 25, 42], |
| ["examples/00000113.png", False, True, 1, 25, 42], |
| ["examples/00000137.png", False, True, 1, 25, 42], |
| ], |
| theme=gr.themes.Soft(), |
| ) |
|
|
| demo.queue(api_open=True).launch(show_error=True, show_api=True) |
|
|