Spaces:
Running
Running
Update landmarkdiff/postprocess.py to v0.3.2
Browse files- landmarkdiff/postprocess.py +36 -18
landmarkdiff/postprocess.py
CHANGED
|
@@ -17,9 +17,9 @@ from __future__ import annotations
|
|
| 17 |
import cv2
|
| 18 |
import numpy as np
|
| 19 |
|
| 20 |
-
# Singleton model caches
|
| 21 |
_CODEFORMER_MODEL = None
|
| 22 |
-
|
| 23 |
_REALESRGAN_UPSAMPLER = None
|
| 24 |
_ARCFACE_APP = None
|
| 25 |
|
|
@@ -149,7 +149,8 @@ def frequency_aware_sharpen(
|
|
| 149 |
Returns:
|
| 150 |
Sharpened BGR image.
|
| 151 |
"""
|
| 152 |
-
|
|
|
|
| 153 |
l_channel = lab[:, :, 0]
|
| 154 |
|
| 155 |
# Unsharp mask on luminance only
|
|
@@ -183,18 +184,22 @@ def restore_face_gfpgan(
|
|
| 183 |
except ImportError:
|
| 184 |
return image
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
try:
|
| 187 |
-
global
|
| 188 |
# Singleton: avoid reloading ~300MB GFPGAN model on every call
|
| 189 |
-
if
|
| 190 |
-
|
| 191 |
model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
|
| 192 |
upscale=upscale,
|
| 193 |
arch="clean",
|
| 194 |
channel_multiplier=2,
|
| 195 |
bg_upsampler=None,
|
| 196 |
)
|
| 197 |
-
_, _, restored =
|
| 198 |
image,
|
| 199 |
has_aligned=False,
|
| 200 |
only_center_face=True,
|
|
@@ -237,8 +242,12 @@ def restore_face_codeformer(
|
|
| 237 |
except ImportError:
|
| 238 |
return image
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
try:
|
| 241 |
-
global _CODEFORMER_MODEL
|
| 242 |
from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
|
| 243 |
from codeformer.inference_codeformer import set_realesrgan as _unused # noqa: F401
|
| 244 |
|
|
@@ -350,8 +359,10 @@ def enhance_background_realesrgan(
|
|
| 350 |
mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
|
| 351 |
|
| 352 |
# Keep face region from original, use enhanced for background
|
| 353 |
-
result = (
|
| 354 |
-
image.astype(np.float32) * mask_3ch + enhanced.astype(np.float32) * (1.0 - mask_3ch)
|
|
|
|
|
|
|
| 355 |
).astype(np.uint8)
|
| 356 |
return result
|
| 357 |
except Exception:
|
|
@@ -363,19 +374,19 @@ def enhance_background_realesrgan(
|
|
| 363 |
def verify_identity_arcface(
|
| 364 |
original: np.ndarray,
|
| 365 |
result: np.ndarray,
|
| 366 |
-
threshold: float = 0.
|
| 367 |
) -> dict:
|
| 368 |
"""Verify output preserves input identity using ArcFace neural net.
|
| 369 |
|
| 370 |
Computes cosine similarity between ArcFace embeddings of the original
|
| 371 |
and result images. If similarity drops below threshold, flags identity
|
| 372 |
-
drift
|
| 373 |
appearance too much.
|
| 374 |
|
| 375 |
Args:
|
| 376 |
original: BGR original face image.
|
| 377 |
result: BGR post-processed output image.
|
| 378 |
-
threshold: Minimum cosine similarity to pass (0.
|
| 379 |
|
| 380 |
Returns:
|
| 381 |
Dict with 'similarity' (float), 'passed' (bool), 'message' (str).
|
|
@@ -460,14 +471,21 @@ def histogram_match_skin(
|
|
| 460 |
Returns:
|
| 461 |
Color-matched BGR image.
|
| 462 |
"""
|
| 463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
if not np.any(mask_bool):
|
| 466 |
return source
|
| 467 |
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
for ch in range(3):
|
| 473 |
src_vals = src_lab[:, :, ch][mask_bool]
|
|
@@ -509,7 +527,7 @@ def full_postprocess(
|
|
| 509 |
use_laplacian_blend: bool = True,
|
| 510 |
sharpen_strength: float = 0.25,
|
| 511 |
verify_identity: bool = True,
|
| 512 |
-
identity_threshold: float = 0.
|
| 513 |
) -> dict:
|
| 514 |
"""Full neural net + classical post-processing pipeline for maximum photorealism.
|
| 515 |
|
|
|
|
| 17 |
import cv2
|
| 18 |
import numpy as np
|
| 19 |
|
| 20 |
+
# Singleton model caches -- load once, reuse across calls
|
| 21 |
_CODEFORMER_MODEL = None
|
| 22 |
+
_GFPGAN_HELPER = None
|
| 23 |
_REALESRGAN_UPSAMPLER = None
|
| 24 |
_ARCFACE_APP = None
|
| 25 |
|
|
|
|
| 149 |
Returns:
|
| 150 |
Sharpened BGR image.
|
| 151 |
"""
|
| 152 |
+
image_u8 = np.clip(image, 0, 255).astype(np.uint8)
|
| 153 |
+
lab = cv2.cvtColor(image_u8, cv2.COLOR_BGR2LAB).astype(np.float32)
|
| 154 |
l_channel = lab[:, :, 0]
|
| 155 |
|
| 156 |
# Unsharp mask on luminance only
|
|
|
|
| 184 |
except ImportError:
|
| 185 |
return image
|
| 186 |
|
| 187 |
+
# GFPGAN requires 3-channel BGR input
|
| 188 |
+
if image.ndim == 2 or (image.ndim == 3 and image.shape[2] == 1):
|
| 189 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
| 190 |
+
|
| 191 |
try:
|
| 192 |
+
global _GFPGAN_HELPER
|
| 193 |
# Singleton: avoid reloading ~300MB GFPGAN model on every call
|
| 194 |
+
if _GFPGAN_HELPER is None:
|
| 195 |
+
_GFPGAN_HELPER = GFPGANer(
|
| 196 |
model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
|
| 197 |
upscale=upscale,
|
| 198 |
arch="clean",
|
| 199 |
channel_multiplier=2,
|
| 200 |
bg_upsampler=None,
|
| 201 |
)
|
| 202 |
+
_, _, restored = _GFPGAN_HELPER.enhance(
|
| 203 |
image,
|
| 204 |
has_aligned=False,
|
| 205 |
only_center_face=True,
|
|
|
|
| 242 |
except ImportError:
|
| 243 |
return image
|
| 244 |
|
| 245 |
+
# CodeFormer requires 3-channel BGR input
|
| 246 |
+
if image.ndim == 2 or (image.ndim == 3 and image.shape[2] == 1):
|
| 247 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
| 248 |
+
|
| 249 |
try:
|
| 250 |
+
global _CODEFORMER_MODEL
|
| 251 |
from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
|
| 252 |
from codeformer.inference_codeformer import set_realesrgan as _unused # noqa: F401
|
| 253 |
|
|
|
|
| 359 |
mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
|
| 360 |
|
| 361 |
# Keep face region from original, use enhanced for background
|
| 362 |
+
result = np.clip(
|
| 363 |
+
image.astype(np.float32) * mask_3ch + enhanced.astype(np.float32) * (1.0 - mask_3ch),
|
| 364 |
+
0,
|
| 365 |
+
255,
|
| 366 |
).astype(np.uint8)
|
| 367 |
return result
|
| 368 |
except Exception:
|
|
|
|
| 374 |
def verify_identity_arcface(
|
| 375 |
original: np.ndarray,
|
| 376 |
result: np.ndarray,
|
| 377 |
+
threshold: float = 0.5,
|
| 378 |
) -> dict:
|
| 379 |
"""Verify output preserves input identity using ArcFace neural net.
|
| 380 |
|
| 381 |
Computes cosine similarity between ArcFace embeddings of the original
|
| 382 |
and result images. If similarity drops below threshold, flags identity
|
| 383 |
+
drift -- meaning the postprocessing or diffusion altered the person's
|
| 384 |
appearance too much.
|
| 385 |
|
| 386 |
Args:
|
| 387 |
original: BGR original face image.
|
| 388 |
result: BGR post-processed output image.
|
| 389 |
+
threshold: Minimum cosine similarity to pass (0.5 = same person).
|
| 390 |
|
| 391 |
Returns:
|
| 392 |
Dict with 'similarity' (float), 'passed' (bool), 'message' (str).
|
|
|
|
| 471 |
Returns:
|
| 472 |
Color-matched BGR image.
|
| 473 |
"""
|
| 474 |
+
# Ensure 2D mask for per-channel indexing
|
| 475 |
+
m = mask
|
| 476 |
+
if m.ndim == 3:
|
| 477 |
+
m = m[:, :, 0]
|
| 478 |
+
mask_bool = m > 0.3 if m.dtype == np.float32 else m > 76
|
| 479 |
|
| 480 |
if not np.any(mask_bool):
|
| 481 |
return source
|
| 482 |
|
| 483 |
+
# Clip to valid uint8 range before LAB conversion to prevent overflow
|
| 484 |
+
# on images with saturated or out-of-range pixel values
|
| 485 |
+
src_u8 = np.clip(source, 0, 255).astype(np.uint8)
|
| 486 |
+
ref_u8 = np.clip(reference, 0, 255).astype(np.uint8)
|
| 487 |
+
src_lab = cv2.cvtColor(src_u8, cv2.COLOR_BGR2LAB).astype(np.float32)
|
| 488 |
+
ref_lab = cv2.cvtColor(ref_u8, cv2.COLOR_BGR2LAB).astype(np.float32)
|
| 489 |
|
| 490 |
for ch in range(3):
|
| 491 |
src_vals = src_lab[:, :, ch][mask_bool]
|
|
|
|
| 527 |
use_laplacian_blend: bool = True,
|
| 528 |
sharpen_strength: float = 0.25,
|
| 529 |
verify_identity: bool = True,
|
| 530 |
+
identity_threshold: float = 0.5,
|
| 531 |
) -> dict:
|
| 532 |
"""Full neural net + classical post-processing pipeline for maximum photorealism.
|
| 533 |
|