Spaces:

dreamlessx
/

LandmarkDiff

Running

App Files Files Community

dreamlessx commited on Mar 15

Commit

bb003e6

verified ·

1 Parent(s): 92264a1

Update landmarkdiff/postprocess.py to v0.3.2

Browse files

Files changed (1) hide show

landmarkdiff/postprocess.py +36 -18

landmarkdiff/postprocess.py CHANGED Viewed

@@ -17,9 +17,9 @@ from __future__ import annotations
 import cv2
 import numpy as np
-# Singleton model caches — load once, reuse across calls
 _CODEFORMER_MODEL = None
-_CODEFORMER_HELPER = None
 _REALESRGAN_UPSAMPLER = None
 _ARCFACE_APP = None
@@ -149,7 +149,8 @@ def frequency_aware_sharpen(
     Returns:
         Sharpened BGR image.
     """
-    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
     l_channel = lab[:, :, 0]
     # Unsharp mask on luminance only
@@ -183,18 +184,22 @@ def restore_face_gfpgan(
     except ImportError:
         return image
     try:
-        global _CODEFORMER_HELPER
         # Singleton: avoid reloading ~300MB GFPGAN model on every call
-        if _CODEFORMER_HELPER is None:
-            _CODEFORMER_HELPER = GFPGANer(
                 model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
                 upscale=upscale,
                 arch="clean",
                 channel_multiplier=2,
                 bg_upsampler=None,
             )
-        _, _, restored = _CODEFORMER_HELPER.enhance(
             image,
             has_aligned=False,
             only_center_face=True,
@@ -237,8 +242,12 @@ def restore_face_codeformer(
     except ImportError:
         return image
     try:
-        global _CODEFORMER_MODEL, _CODEFORMER_HELPER
         from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
         from codeformer.inference_codeformer import set_realesrgan as _unused  # noqa: F401
@@ -350,8 +359,10 @@ def enhance_background_realesrgan(
         mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
         # Keep face region from original, use enhanced for background
-        result = (
-            image.astype(np.float32) * mask_3ch + enhanced.astype(np.float32) * (1.0 - mask_3ch)
         ).astype(np.uint8)
         return result
     except Exception:
@@ -363,19 +374,19 @@ def enhance_background_realesrgan(
 def verify_identity_arcface(
     original: np.ndarray,
     result: np.ndarray,
-    threshold: float = 0.6,
 ) -> dict:
     """Verify output preserves input identity using ArcFace neural net.
     Computes cosine similarity between ArcFace embeddings of the original
     and result images. If similarity drops below threshold, flags identity
-    drift — meaning the postprocessing or diffusion altered the person's
     appearance too much.
     Args:
         original: BGR original face image.
         result: BGR post-processed output image.
-        threshold: Minimum cosine similarity to pass (0.6 = same person).
     Returns:
         Dict with 'similarity' (float), 'passed' (bool), 'message' (str).
@@ -460,14 +471,21 @@ def histogram_match_skin(
     Returns:
         Color-matched BGR image.
     """
-    mask_bool = mask > 0.3 if mask.dtype == np.float32 else mask > 76
     if not np.any(mask_bool):
         return source
-    source.copy()
-    src_lab = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
-    ref_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2LAB).astype(np.float32)
     for ch in range(3):
         src_vals = src_lab[:, :, ch][mask_bool]
@@ -509,7 +527,7 @@ def full_postprocess(
     use_laplacian_blend: bool = True,
     sharpen_strength: float = 0.25,
     verify_identity: bool = True,
-    identity_threshold: float = 0.6,
 ) -> dict:
     """Full neural net + classical post-processing pipeline for maximum photorealism.

 import cv2
 import numpy as np
+# Singleton model caches -- load once, reuse across calls
 _CODEFORMER_MODEL = None
+_GFPGAN_HELPER = None
 _REALESRGAN_UPSAMPLER = None
 _ARCFACE_APP = None
     Returns:
         Sharpened BGR image.
     """
+    image_u8 = np.clip(image, 0, 255).astype(np.uint8)
+    lab = cv2.cvtColor(image_u8, cv2.COLOR_BGR2LAB).astype(np.float32)
     l_channel = lab[:, :, 0]
     # Unsharp mask on luminance only
     except ImportError:
         return image
+    # GFPGAN requires 3-channel BGR input
+    if image.ndim == 2 or (image.ndim == 3 and image.shape[2] == 1):
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
     try:
+        global _GFPGAN_HELPER
         # Singleton: avoid reloading ~300MB GFPGAN model on every call
+        if _GFPGAN_HELPER is None:
+            _GFPGAN_HELPER = GFPGANer(
                 model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
                 upscale=upscale,
                 arch="clean",
                 channel_multiplier=2,
                 bg_upsampler=None,
             )
+        _, _, restored = _GFPGAN_HELPER.enhance(
             image,
             has_aligned=False,
             only_center_face=True,
     except ImportError:
         return image
+    # CodeFormer requires 3-channel BGR input
+    if image.ndim == 2 or (image.ndim == 3 and image.shape[2] == 1):
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
     try:
+        global _CODEFORMER_MODEL
         from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
         from codeformer.inference_codeformer import set_realesrgan as _unused  # noqa: F401
         mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
         # Keep face region from original, use enhanced for background
+        result = np.clip(
+            image.astype(np.float32) * mask_3ch + enhanced.astype(np.float32) * (1.0 - mask_3ch),
+            0,
+            255,
         ).astype(np.uint8)
         return result
     except Exception:
 def verify_identity_arcface(
     original: np.ndarray,
     result: np.ndarray,
+    threshold: float = 0.5,
 ) -> dict:
     """Verify output preserves input identity using ArcFace neural net.
     Computes cosine similarity between ArcFace embeddings of the original
     and result images. If similarity drops below threshold, flags identity
+    drift -- meaning the postprocessing or diffusion altered the person's
     appearance too much.
     Args:
         original: BGR original face image.
         result: BGR post-processed output image.
+        threshold: Minimum cosine similarity to pass (0.5 = same person).
     Returns:
         Dict with 'similarity' (float), 'passed' (bool), 'message' (str).
     Returns:
         Color-matched BGR image.
     """
+    # Ensure 2D mask for per-channel indexing
+    m = mask
+    if m.ndim == 3:
+        m = m[:, :, 0]
+    mask_bool = m > 0.3 if m.dtype == np.float32 else m > 76
     if not np.any(mask_bool):
         return source
+    # Clip to valid uint8 range before LAB conversion to prevent overflow
+    # on images with saturated or out-of-range pixel values
+    src_u8 = np.clip(source, 0, 255).astype(np.uint8)
+    ref_u8 = np.clip(reference, 0, 255).astype(np.uint8)
+    src_lab = cv2.cvtColor(src_u8, cv2.COLOR_BGR2LAB).astype(np.float32)
+    ref_lab = cv2.cvtColor(ref_u8, cv2.COLOR_BGR2LAB).astype(np.float32)
     for ch in range(3):
         src_vals = src_lab[:, :, ch][mask_bool]
     use_laplacian_blend: bool = True,
     sharpen_strength: float = 0.25,
     verify_identity: bool = True,
+    identity_threshold: float = 0.5,
 ) -> dict:
     """Full neural net + classical post-processing pipeline for maximum photorealism.