Spaces:

dreamlessx
/

LandmarkDiff

Running

App Files Files Community

dreamlessx commited on Mar 14

Commit

83b71db

verified ·

1 Parent(s): 57d9540

Upload landmarkdiff/postprocess.py with huggingface_hub

Browse files

Files changed (1) hide show

landmarkdiff/postprocess.py +452 -0

landmarkdiff/postprocess.py ADDED Viewed

	@@ -0,0 +1,452 @@

+"""Post-processing: CodeFormer/GFPGAN face restore, Real-ESRGAN bg,
+Laplacian blend, sharpening, histogram matching, ArcFace identity gate.
+"""
+from __future__ import annotations
+import cv2
+import numpy as np
+def laplacian_pyramid_blend(
+    source: np.ndarray,
+    target: np.ndarray,
+    mask: np.ndarray,
+    levels: int = 6,
+) -> np.ndarray:
+    """Laplacian pyramid blend - kills the 'pasted on' look from alpha blending."""
+    # Ensure same size
+    h, w = target.shape[:2]
+    source = cv2.resize(source, (w, h)) if source.shape[:2] != (h, w) else source
+    # Normalize mask
+    mask_f = mask.astype(np.float32)
+    if mask_f.max() > 1.0:
+        mask_f = mask_f / 255.0
+    if mask_f.ndim == 2:
+        mask_3ch = np.stack([mask_f] * 3, axis=-1)
+    else:
+        mask_3ch = mask_f
+    # Make dimensions divisible by 2^levels
+    factor = 2 ** levels
+    new_h = (h + factor - 1) // factor * factor
+    new_w = (w + factor - 1) // factor * factor
+    if new_h != h or new_w != w:
+        source = cv2.resize(source, (new_w, new_h))
+        target = cv2.resize(target, (new_w, new_h))
+        mask_3ch = cv2.resize(mask_3ch, (new_w, new_h))
+    src_f = source.astype(np.float32)
+    tgt_f = target.astype(np.float32)
+    # Build Gaussian pyramids for the mask
+    mask_pyr = [mask_3ch]
+    for _ in range(levels):
+        mask_pyr.append(cv2.pyrDown(mask_pyr[-1]))
+    # Build Laplacian pyramids for source and target
+    src_lap = _build_laplacian_pyramid(src_f, levels)
+    tgt_lap = _build_laplacian_pyramid(tgt_f, levels)
+    # Blend each level using the mask at that resolution
+    blended_lap = []
+    for i in range(levels + 1):
+        sl = src_lap[i]
+        tl = tgt_lap[i]
+        ml = mask_pyr[i]
+        # Resize mask to match level shape if needed
+        if ml.shape[:2] != sl.shape[:2]:
+            ml = cv2.resize(ml, (sl.shape[1], sl.shape[0]))
+        blended = sl * ml + tl * (1.0 - ml)
+        blended_lap.append(blended)
+    # Reconstruct from blended Laplacian
+    result = _reconstruct_from_laplacian(blended_lap)
+    # Crop back to original size
+    result = result[:h, :w]
+    return np.clip(result, 0, 255).astype(np.uint8)
+def _build_laplacian_pyramid(
+    image: np.ndarray,
+    levels: int,
+) -> list[np.ndarray]:
+    """Build Laplacian pyramid from an image."""
+    gaussian = [image.copy()]
+    for _ in range(levels):
+        gaussian.append(cv2.pyrDown(gaussian[-1]))
+    laplacian = []
+    for i in range(levels):
+        upsampled = cv2.pyrUp(gaussian[i + 1])
+        # Match sizes (pyrUp can add a pixel)
+        gh, gw = gaussian[i].shape[:2]
+        upsampled = upsampled[:gh, :gw]
+        laplacian.append(gaussian[i] - upsampled)
+    laplacian.append(gaussian[-1])  # coarsest level
+    return laplacian
+def _reconstruct_from_laplacian(pyramid: list[np.ndarray]) -> np.ndarray:
+    """Reconstruct image from Laplacian pyramid."""
+    image = pyramid[-1].copy()
+    for i in range(len(pyramid) - 2, -1, -1):
+        image = cv2.pyrUp(image)
+        lh, lw = pyramid[i].shape[:2]
+        image = image[:lh, :lw]
+        image = image + pyramid[i]
+    return image
+def frequency_aware_sharpen(
+    image: np.ndarray,
+    strength: float = 0.3,
+    radius: int = 3,
+) -> np.ndarray:
+    """Unsharp mask on LAB luminance only - sharpens skin texture without color fringe."""
+    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
+    l_channel = lab[:, :, 0]
+    # Unsharp mask on luminance only
+    ksize = radius * 2 + 1
+    blurred = cv2.GaussianBlur(l_channel, (ksize, ksize), 0)
+    sharpened = l_channel + strength * (l_channel - blurred)
+    lab[:, :, 0] = np.clip(sharpened, 0, 255)
+    return cv2.cvtColor(lab.astype(np.uint8), cv2.COLOR_LAB2BGR)
+def restore_face_gfpgan(
+    image: np.ndarray,
+    upscale: int = 1,
+) -> np.ndarray:
+    """GFPGAN face restore. Returns original if not installed."""
+    try:
+        from gfpgan import GFPGANer
+    except ImportError:
+        return image
+    try:
+        restorer = GFPGANer(
+            model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
+            upscale=upscale,
+            arch="clean",
+            channel_multiplier=2,
+            bg_upsampler=None,
+        )
+        _, _, restored = restorer.enhance(
+            image,
+            has_aligned=False,
+            only_center_face=True,
+            paste_back=True,
+        )
+        if restored is not None:
+            return restored
+    except Exception:
+        pass
+    return image
+def restore_face_codeformer(
+    image: np.ndarray,
+    fidelity: float = 0.7,
+    upscale: int = 1,
+) -> np.ndarray:
+    """CodeFormer face restore. fidelity: 0=quality, 1=identity. Returns original if not installed."""
+    try:
+        from codeformer.basicsr.utils import img2tensor, tensor2img
+        from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
+        from codeformer.basicsr.utils.download_util import load_file_from_url
+        import torch
+        from torchvision.transforms.functional import normalize as tv_normalize
+    except ImportError:
+        return image
+    try:
+        from codeformer.inference_codeformer import set_realesrgan as _unused  # noqa: F401
+        from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model = CodeFormerArch(
+            dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
+            connect_list=["32", "64", "128", "256"],
+        ).to(device)
+        ckpt_path = load_file_from_url(
+            url="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth",
+            model_dir="weights/CodeFormer",
+            progress=True,
+        )
+        checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
+        model.load_state_dict(checkpoint["params_ema"])
+        model.eval()
+        face_helper = FaceRestoreHelper(
+            upscale,
+            face_size=512,
+            crop_ratio=(1, 1),
+            det_model="retinaface_resnet50",
+            save_ext="png",
+            device=device,
+        )
+        face_helper.read_image(image)
+        face_helper.get_face_landmarks_5(only_center_face=True)
+        face_helper.align_warp_face()
+        for cropped_face in face_helper.cropped_faces:
+            face_t = img2tensor(cropped_face / 255.0, bgr2rgb=True, float32=True)
+            tv_normalize(face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+            face_t = face_t.unsqueeze(0).to(device)
+            with torch.no_grad():
+                output = model(face_t, w=fidelity, adain=True)[0]
+                restored = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
+            restored = restored.astype(np.uint8)
+            face_helper.add_restored_face(restored)
+        face_helper.get_inverse_affine(None)
+        restored_img = face_helper.paste_faces_to_image()
+        if restored_img is not None:
+            return restored_img
+    except Exception:
+        pass
+    return image
+def enhance_background_realesrgan(
+    image: np.ndarray,
+    mask: np.ndarray,
+    outscale: int = 2,
+) -> np.ndarray:
+    """Real-ESRGAN on background only (outside mask). Returns original if not installed."""
+    try:
+        from realesrgan import RealESRGANer
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+        import torch
+    except ImportError:
+        return image
+    try:
+        model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+        upsampler = RealESRGANer(
+            scale=4,
+            model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
+            model=model,
+            tile=400,
+            tile_pad=10,
+            pre_pad=0,
+            half=torch.cuda.is_available(),
+        )
+        enhanced, _ = upsampler.enhance(image, outscale=outscale)
+        # Downscale back to original size
+        h, w = image.shape[:2]
+        enhanced = cv2.resize(enhanced, (w, h), interpolation=cv2.INTER_LANCZOS4)
+        # Only apply enhancement to background (outside mask)
+        mask_f = mask.astype(np.float32)
+        if mask_f.max() > 1.0:
+            mask_f /= 255.0
+        if mask_f.ndim == 2:
+            mask_3ch = np.stack([mask_f] * 3, axis=-1)
+        else:
+            mask_3ch = mask_f
+        # Keep face region from original, use enhanced for background
+        result = (
+            image.astype(np.float32) * mask_3ch
+            + enhanced.astype(np.float32) * (1.0 - mask_3ch)
+        ).astype(np.uint8)
+        return result
+    except Exception:
+        pass
+    return image
+def verify_identity_arcface(
+    original: np.ndarray,
+    result: np.ndarray,
+    threshold: float = 0.6,
+) -> dict:
+    """ArcFace cosine similarity check. Flags if output drifted from input identity."""
+    try:
+        from insightface.app import FaceAnalysis
+    except ImportError:
+        return {
+            "similarity": -1.0,
+            "passed": True,
+            "message": "InsightFace not installed - identity check skipped",
+        }
+    try:
+        app = FaceAnalysis(
+            name="buffalo_l",
+            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+        )
+        app.prepare(ctx_id=0 if _has_cuda() else -1, det_size=(320, 320))
+        orig_faces = app.get(original)
+        result_faces = app.get(result)
+        if not orig_faces or not result_faces:
+            return {
+                "similarity": -1.0,
+                "passed": True,
+                "message": "Could not detect face in one/both images - check skipped",
+            }
+        orig_emb = orig_faces[0].embedding
+        result_emb = result_faces[0].embedding
+        sim = float(np.dot(orig_emb, result_emb) / (
+            np.linalg.norm(orig_emb) * np.linalg.norm(result_emb) + 1e-8
+        ))
+        sim = float(np.clip(sim, 0, 1))
+        passed = sim >= threshold
+        if passed:
+            msg = f"Identity preserved (similarity={sim:.3f})"
+        else:
+            msg = f"WARNING: Identity drift detected (similarity={sim:.3f} < {threshold})"
+        return {"similarity": sim, "passed": passed, "message": msg}
+    except Exception as e:
+        return {
+            "similarity": -1.0,
+            "passed": True,
+            "message": f"Identity check failed: {e}",
+        }
+def _has_cuda() -> bool:
+    try:
+        import torch
+        return torch.cuda.is_available()
+    except ImportError:
+        return False
+def histogram_match_skin(
+    source: np.ndarray,
+    reference: np.ndarray,
+    mask: np.ndarray,
+) -> np.ndarray:
+    """CDF-based histogram matching in LAB space. Better than mean/std for skin."""
+    mask_bool = mask > 0.3 if mask.dtype == np.float32 else mask > 76
+    if not np.any(mask_bool):
+        return source
+    result = source.copy()
+    src_lab = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
+    ref_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2LAB).astype(np.float32)
+    for ch in range(3):
+        src_vals = src_lab[:, :, ch][mask_bool]
+        ref_vals = ref_lab[:, :, ch][mask_bool]
+        if len(src_vals) == 0 or len(ref_vals) == 0:
+            continue
+        # CDF matching
+        src_sorted = np.sort(src_vals)
+        ref_sorted = np.sort(ref_vals)
+        # Interpolate reference CDF to match source length
+        src_cdf = np.linspace(0, 1, len(src_sorted))
+        ref_cdf = np.linspace(0, 1, len(ref_sorted))
+        # Map source values through reference distribution
+        mapping = np.interp(src_cdf, ref_cdf, ref_sorted)
+        # Create lookup from source intensity to matched intensity
+        src_flat = src_lab[:, :, ch].ravel()
+        matched = np.interp(src_flat, src_sorted, mapping)
+        matched_2d = matched.reshape(src_lab.shape[:2])
+        # Apply only in mask region
+        src_lab[:, :, ch] = np.where(mask_bool, matched_2d, src_lab[:, :, ch])
+    result_lab = np.clip(src_lab, 0, 255).astype(np.uint8)
+    return cv2.cvtColor(result_lab, cv2.COLOR_LAB2BGR)
+def full_postprocess(
+    generated: np.ndarray,
+    original: np.ndarray,
+    mask: np.ndarray,
+    restore_mode: str = "codeformer",
+    codeformer_fidelity: float = 0.7,
+    use_realesrgan: bool = True,
+    use_laplacian_blend: bool = True,
+    sharpen_strength: float = 0.25,
+    verify_identity: bool = True,
+    identity_threshold: float = 0.6,
+) -> dict:
+    """Full pipeline: restore -> bg enhance -> histogram match -> sharpen -> blend -> identity check."""
+    result = generated.copy()
+    restore_used = "none"
+    # Step 1: Neural face restoration (CodeFormer > GFPGAN > skip)
+    if restore_mode == "codeformer":
+        restored = restore_face_codeformer(result, fidelity=codeformer_fidelity)
+        if restored is not result:
+            result = restored
+            restore_used = "codeformer"
+        else:
+            # CodeFormer unavailable, fall back to GFPGAN
+            result = restore_face_gfpgan(result)
+            restore_used = "gfpgan" if result is not generated else "none"
+    elif restore_mode == "gfpgan":
+        restored = restore_face_gfpgan(result)
+        if restored is not result:
+            result = restored
+            restore_used = "gfpgan"
+    # Step 2: Neural background enhancement
+    if use_realesrgan:
+        result = enhance_background_realesrgan(result, mask)
+    # Step 3: Skin tone histogram matching (classical)
+    result = histogram_match_skin(result, original, mask)
+    # Step 4: Sharpen texture (classical)
+    if sharpen_strength > 0:
+        result = frequency_aware_sharpen(result, strength=sharpen_strength)
+    # Step 5: Blend into original (classical)
+    if use_laplacian_blend:
+        composited = laplacian_pyramid_blend(result, original, mask)
+    else:
+        mask_f = mask.astype(np.float32)
+        if mask_f.max() > 1.0:
+            mask_f /= 255.0
+        if mask_f.ndim == 2:
+            mask_3ch = np.stack([mask_f] * 3, axis=-1)
+        else:
+            mask_3ch = mask_f
+        composited = (
+            result.astype(np.float32) * mask_3ch
+            + original.astype(np.float32) * (1.0 - mask_3ch)
+        ).astype(np.uint8)
+    # Step 6: Neural identity verification
+    identity_check = {"similarity": -1.0, "passed": True, "message": "skipped"}
+    if verify_identity:
+        identity_check = verify_identity_arcface(
+            original, composited, threshold=identity_threshold,
+        )
+    return {
+        "image": composited,
+        "identity_check": identity_check,
+        "restore_used": restore_used,
+    }