Spaces:

dreamlessx
/

LandmarkDiff

Running

App Files Files Community

dreamlessx commited on Mar 15

Commit

16a86a4

verified ·

1 Parent(s): 0c568c7

Update landmarkdiff/inference.py to v0.3.2

Browse files

Files changed (1) hide show

landmarkdiff/inference.py +162 -71

landmarkdiff/inference.py CHANGED Viewed

@@ -4,13 +4,15 @@ Four modes:
 1. ControlNet: CrucibleAI/ControlNetMediaPipeFace + SD1.5 (requires HF auth + GPU)
 2. ControlNet + IP-Adapter: ControlNet with identity preservation via face embeddings
 3. Img2Img: SD1.5 img2img with mask compositing (runs on MPS, no auth needed)
-4. TPS-only: Pure geometric warp — no diffusion model, instant results
 Supports MPS (Apple Silicon), CUDA, and CPU backends.
 """
 from __future__ import annotations
 import sys
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -28,6 +30,8 @@ from landmarkdiff.synthetic.tps_warp import warp_image_tps
 if TYPE_CHECKING:
     from landmarkdiff.clinical import ClinicalFlags
 def get_device() -> torch.device:
     if torch.backends.mps.is_available():
@@ -71,6 +75,16 @@ PROCEDURE_PROMPTS: dict[str, str] = {
         "realistic skin pores and texture, sharp focus, studio lighting, "
         "DSLR quality, natural skin color"
     ),
 }
 NEGATIVE_PROMPT = (
@@ -81,6 +95,21 @@ NEGATIVE_PROMPT = (
     "plastic skin, waxy, smooth skin, airbrushed, oversaturated"
 )
 def mask_composite(
     warped: np.ndarray,
@@ -107,7 +136,7 @@ def mask_composite(
             return laplacian_pyramid_blend(corrected, original, mask_f)
         except Exception:
-            pass
     # Fallback: simple alpha blend
     mask_3ch = mask_to_3channel(mask_f)
@@ -124,7 +153,7 @@ def _match_skin_tone(source: np.ndarray, target: np.ndarray, mask: np.ndarray) -
     Works in LAB space: transfers L (luminance) and AB (color) statistics
     from the original to the warped image so skin tone is preserved exactly.
     """
-    mask_bool = mask > 0.3
     if not np.any(mask_bool):
         return source
@@ -136,8 +165,8 @@ def _match_skin_tone(source: np.ndarray, target: np.ndarray, mask: np.ndarray) -
         src_vals = src_lab[:, :, ch][mask_bool]
         tgt_vals = tgt_lab[:, :, ch][mask_bool]
-        src_mean, src_std = np.mean(src_vals), np.std(src_vals) + 1e-6
-        tgt_mean, tgt_std = np.mean(tgt_vals), np.std(tgt_vals) + 1e-6
         # Normalize source to match target's distribution
         src_lab[:, :, ch] = np.where(
@@ -154,7 +183,8 @@ class LandmarkDiffPipeline:
     """End-to-end pipeline: image -> landmarks -> manipulate -> generate.
     Modes:
-    - 'controlnet': CrucibleAI/ControlNetMediaPipeFace + SD1.5
     - 'controlnet_ip': ControlNet + IP-Adapter for identity preservation
     - 'img2img': SD1.5 img2img with mask compositing
     - 'tps': Pure geometric TPS warp (no diffusion, instant)
@@ -166,6 +196,9 @@ class LandmarkDiffPipeline:
     IP_ADAPTER_WEIGHT_NAME = "ip-adapter-plus-face_sd15.bin"
     IP_ADAPTER_SCALE_DEFAULT = 0.6
     def __init__(
         self,
         mode: str = "img2img",
@@ -191,9 +224,9 @@ class LandmarkDiffPipeline:
                 from landmarkdiff.displacement_model import DisplacementModel
                 self._displacement_model = DisplacementModel.load(displacement_model_path)
-                print(f"Displacement model loaded: {self._displacement_model.procedures}")
             except Exception as e:
-                print(f"WARNING: Failed to load displacement model: {e}")
         if self.device.type == "mps":
             self.dtype = torch.float32
@@ -204,22 +237,23 @@ class LandmarkDiffPipeline:
         if base_model_id:
             self.base_model_id = base_model_id
-        elif mode in ("controlnet", "controlnet_ip"):
-            self.base_model_id = "runwayml/stable-diffusion-v1-5"
         else:
             self.base_model_id = "runwayml/stable-diffusion-v1-5"
         self.controlnet_id = controlnet_id
         self._pipe = None
         self._ip_adapter_loaded = False
     def load(self) -> None:
         if self.mode == "tps":
-            print("TPS mode — no model to load")
             return
-        if self.mode in ("controlnet", "controlnet_ip"):
             self._load_controlnet()
-            if self.mode == "controlnet_ip":
                 self._load_ip_adapter()
         else:
             self._load_img2img()
@@ -231,43 +265,72 @@ class LandmarkDiffPipeline:
             StableDiffusionControlNetPipeline,
         )
         if self.controlnet_checkpoint:
             # Load fine-tuned ControlNet from local checkpoint
             ckpt_path = Path(self.controlnet_checkpoint)
             # Support both direct path and training checkpoint structure
             if (ckpt_path / "controlnet_ema").exists():
                 ckpt_path = ckpt_path / "controlnet_ema"
-            print(f"Loading fine-tuned ControlNet from {ckpt_path}...")
             controlnet = ControlNetModel.from_pretrained(
                 str(ckpt_path),
                 torch_dtype=self.dtype,
             )
         else:
-            print(f"Loading ControlNet from {self.controlnet_id}...")
             controlnet = ControlNetModel.from_pretrained(
                 self.controlnet_id,
                 subfolder="diffusion_sd15",
                 torch_dtype=self.dtype,
             )
-        print(f"Loading base model from {self.base_model_id}...")
         self._pipe = StableDiffusionControlNetPipeline.from_pretrained(
             self.base_model_id,
             controlnet=controlnet,
             torch_dtype=self.dtype,
             safety_checker=None,
             requires_safety_checker=False,
         )
-        # DPM++ 2M Karras — produces more photorealistic output than UniPC
         self._pipe.scheduler = DPMSolverMultistepScheduler.from_config(
             self._pipe.scheduler.config,
             algorithm_type="dpmsolver++",
             use_karras_sigmas=True,
         )
-        # FP32 VAE decode — prevents color banding artifacts on skin tones
         if hasattr(self._pipe, "vae") and self._pipe.vae is not None:
             self._pipe.vae.config.force_upcast = True
         self._apply_device_optimizations()
     def _load_ip_adapter(self) -> None:
         """Load IP-Adapter for identity-preserving generation.
@@ -277,7 +340,7 @@ class LandmarkDiffPipeline:
         if self._pipe is None:
             raise RuntimeError("Base pipeline must be loaded before IP-Adapter")
         try:
-            print(f"Loading IP-Adapter ({self.IP_ADAPTER_WEIGHT_NAME})...")
             self._pipe.load_ip_adapter(
                 self.IP_ADAPTER_REPO,
                 subfolder=self.IP_ADAPTER_SUBFOLDER,
@@ -285,10 +348,10 @@ class LandmarkDiffPipeline:
             )
             self._pipe.set_ip_adapter_scale(self.ip_adapter_scale)
             self._ip_adapter_loaded = True
-            print(f"IP-Adapter loaded (scale={self.ip_adapter_scale})")
         except Exception as e:
-            print(f"WARNING: IP-Adapter load failed: {e}")
-            print("Falling back to ControlNet-only mode")
             self._ip_adapter_loaded = False
     def _load_img2img(self) -> None:
@@ -297,12 +360,16 @@ class LandmarkDiffPipeline:
             StableDiffusionImg2ImgPipeline,
         )
-        print(f"Loading SD1.5 img2img from {self.base_model_id}...")
         self._pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
             self.base_model_id,
             torch_dtype=self.dtype,
             safety_checker=None,
             requires_safety_checker=False,
         )
         self._pipe.scheduler = DPMSolverMultistepScheduler.from_config(self._pipe.scheduler.config)
         self._apply_device_optimizations()
@@ -318,7 +385,7 @@ class LandmarkDiffPipeline:
                 self._pipe = self._pipe.to(self.device)
         else:
             self._pipe.enable_sequential_cpu_offload()
-        print(f"Pipeline loaded on {self.device} ({self.dtype})")
     @property
     def is_loaded(self) -> bool:
@@ -342,7 +409,8 @@ class LandmarkDiffPipeline:
             raise RuntimeError("Pipeline not loaded. Call .load() first.")
         flags = clinical_flags or self.clinical_flags
-        image_512 = cv2.resize(image, (512, 512))
         face = extract_landmarks(image_512)
         if face is None:
@@ -357,7 +425,7 @@ class LandmarkDiffPipeline:
             try:
                 rng = np.random.default_rng(seed) if seed is not None else np.random.default_rng()
                 # Map UI intensity (0-100) to displacement model intensity (0-2)
-                dm_intensity = intensity / 50.0  # 50 -> 1.0x mean displacement
                 displacement = self._displacement_model.get_displacement_field(
                     procedure,
                     intensity=dm_intensity,
@@ -373,17 +441,18 @@ class LandmarkDiffPipeline:
                 new_lm[:, 1] = np.clip(new_lm[:, 1], 0.01, 0.99)
                 manipulated = FaceLandmarks(
                     landmarks=new_lm,
-                    image_width=512,
-                    image_height=512,
                     confidence=face.confidence,
                 )
                 manipulation_mode = "displacement_model"
-            except Exception:
                 manipulated = apply_procedure_preset(
                     face,
                     procedure,
                     intensity,
-                    image_size=512,
                     clinical_flags=flags,
                 )
         else:
@@ -391,15 +460,15 @@ class LandmarkDiffPipeline:
                 face,
                 procedure,
                 intensity,
-                image_size=512,
                 clinical_flags=flags,
             )
-        landmark_img = render_landmark_image(manipulated, 512, 512)
         mask = generate_surgical_mask(
             face,
             procedure,
-            512,
-            512,
             clinical_flags=flags,
         )
@@ -409,33 +478,51 @@ class LandmarkDiffPipeline:
         prompt = PROCEDURE_PROMPTS.get(procedure, "a photo of a person's face")
-        # Step 1: TPS geometric warp (always computed — the geometric baseline)
         tps_warped = warp_image_tps(image_512, face.pixel_coords, manipulated.pixel_coords)
         if self.mode == "tps":
             raw_output = tps_warped
-        elif self.mode in ("controlnet", "controlnet_ip"):
             ip_image = numpy_to_pil(image_512) if self._ip_adapter_loaded else None
-            raw_output = self._generate_controlnet(
-                image_512,
-                landmark_img,
-                prompt,
-                num_inference_steps,
-                guidance_scale,
-                controlnet_conditioning_scale,
-                generator,
-                ip_adapter_image=ip_image,
-            )
         else:
-            raw_output = self._generate_img2img(
-                tps_warped,
-                mask,
-                prompt,
-                num_inference_steps,
-                guidance_scale,
-                strength,
-                generator,
-            )
         # Step 2: Post-processing for photorealism (neural + classical pipeline)
         identity_check = None
@@ -474,6 +561,7 @@ class LandmarkDiffPipeline:
             "mode": self.mode,
             "view_info": view_info,
             "ip_adapter_active": self._ip_adapter_loaded,
             "identity_check": identity_check,
             "restore_used": restore_used,
             "manipulation_mode": manipulation_mode,
@@ -535,11 +623,12 @@ def estimate_face_view(face: FaceLandmarks) -> dict:
     Returns dict with yaw, pitch (degrees), and view classification.
     """
     coords = face.pixel_coords
-    nose_tip = coords[1]
-    left_ear = coords[234]
-    right_ear = coords[454]
-    forehead = coords[10]
-    chin = coords[152]
     # Yaw: ratio of nose-to-ear distances (symmetric = 0 degrees)
     left_dist = np.linalg.norm(nose_tip - left_ear)
@@ -558,13 +647,13 @@ def estimate_face_view(face: FaceLandmarks) -> dict:
         pitch = 0.0
     else:
         pitch_ratio = (lower - upper) / (upper + lower)
-        pitch = float(pitch_ratio * 45)
     # Classify view
     abs_yaw = abs(yaw)
-    if abs_yaw < 15:
         view = "frontal"
-    elif abs_yaw < 45:
         view = "three_quarter"
     else:
         view = "profile"
@@ -573,8 +662,10 @@ def estimate_face_view(face: FaceLandmarks) -> dict:
         "yaw": round(yaw, 1),
         "pitch": round(pitch, 1),
         "view": view,
-        "is_frontal": abs_yaw < 15,
-        "warning": "Side-view detected: results may be less accurate" if abs_yaw > 30 else None,
     }
@@ -594,7 +685,7 @@ def run_inference(
     image = cv2.imread(image_path)
     if image is None:
-        print(f"ERROR: Could not load {image_path}")
         sys.exit(1)
     pipe = LandmarkDiffPipeline(
@@ -605,7 +696,7 @@ def run_inference(
     )
     pipe.load()
-    print(f"\nGenerating {procedure} prediction (intensity={intensity}, mode={mode})...")
     result = pipe.generate(image, procedure=procedure, intensity=intensity, seed=seed)
     cv2.imwrite(str(out / "input.png"), result["input"])
@@ -620,9 +711,9 @@ def run_inference(
     view = result.get("view_info", {})
     if view.get("warning"):
-        print(f"WARNING: {view['warning']}")
-    print(f"Face view: {view.get('view', 'unknown')} (yaw={view.get('yaw', 0)})")
-    print(f"Results saved to {out}/")
 if __name__ == "__main__":
@@ -637,7 +728,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--mode",
         default="img2img",
-        choices=["img2img", "controlnet", "controlnet_ip", "tps"],
     )
     parser.add_argument("--ip-adapter-scale", type=float, default=0.6)
     parser.add_argument(

 1. ControlNet: CrucibleAI/ControlNetMediaPipeFace + SD1.5 (requires HF auth + GPU)
 2. ControlNet + IP-Adapter: ControlNet with identity preservation via face embeddings
 3. Img2Img: SD1.5 img2img with mask compositing (runs on MPS, no auth needed)
+4. TPS-only: Pure geometric warp -- no diffusion model, instant results
 Supports MPS (Apple Silicon), CUDA, and CPU backends.
 """
 from __future__ import annotations
+import logging
+import os
 import sys
 from pathlib import Path
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
     from landmarkdiff.clinical import ClinicalFlags
+logger = logging.getLogger(__name__)
 def get_device() -> torch.device:
     if torch.backends.mps.is_available():
         "realistic skin pores and texture, sharp focus, studio lighting, "
         "DSLR quality, natural skin color"
     ),
+    "brow_lift": (
+        "clinical photograph, patient face, elevated brow position, smooth forehead, "
+        "realistic skin pores and texture, sharp focus, studio lighting, "
+        "DSLR quality, natural skin color"
+    ),
+    "mentoplasty": (
+        "clinical photograph, patient face, refined chin contour, balanced lower face, "
+        "realistic skin pores and texture, sharp focus, studio lighting, "
+        "DSLR quality, natural skin color"
+    ),
 }
 NEGATIVE_PROMPT = (
     "plastic skin, waxy, smooth skin, airbrushed, oversaturated"
 )
+# Skin tone matching: minimum mask alpha to include in LAB stats transfer
+_SKIN_TONE_MASK_THRESHOLD = 0.3
+# Epsilon to avoid division by zero in std normalization
+_STD_EPSILON = 1e-6
+# Default SD1.5 resolution (all pipelines resize to this)
+_SD15_RESOLUTION = 512
+# Intensity mapping: UI scale (0-100) to displacement model scale (0-2)
+_INTENSITY_UI_TO_MODEL = 50.0
+# Face view classification thresholds (degrees)
+_YAW_FRONTAL_MAX = 15
+_YAW_THREE_QUARTER_MAX = 45
+_YAW_WARNING_THRESHOLD = 30
+# Max pitch scale factor (maps pitch ratio to degrees)
+_PITCH_SCALE = 45
 def mask_composite(
     warped: np.ndarray,
             return laplacian_pyramid_blend(corrected, original, mask_f)
         except Exception:
+            logger.debug("Laplacian blend failed, using alpha blend", exc_info=True)
     # Fallback: simple alpha blend
     mask_3ch = mask_to_3channel(mask_f)
     Works in LAB space: transfers L (luminance) and AB (color) statistics
     from the original to the warped image so skin tone is preserved exactly.
     """
+    mask_bool = mask > _SKIN_TONE_MASK_THRESHOLD
     if not np.any(mask_bool):
         return source
         src_vals = src_lab[:, :, ch][mask_bool]
         tgt_vals = tgt_lab[:, :, ch][mask_bool]
+        src_mean, src_std = np.mean(src_vals), np.std(src_vals) + _STD_EPSILON
+        tgt_mean, tgt_std = np.mean(tgt_vals), np.std(tgt_vals) + _STD_EPSILON
         # Normalize source to match target's distribution
         src_lab[:, :, ch] = np.where(
     """End-to-end pipeline: image -> landmarks -> manipulate -> generate.
     Modes:
+    - 'controlnet': CrucibleAI/ControlNetMediaPipeFace + SD1.5 (30 steps)
+    - 'controlnet_fast': ControlNet + LCM-LoRA (4 steps, CPU-viable)
     - 'controlnet_ip': ControlNet + IP-Adapter for identity preservation
     - 'img2img': SD1.5 img2img with mask compositing
     - 'tps': Pure geometric TPS warp (no diffusion, instant)
     IP_ADAPTER_WEIGHT_NAME = "ip-adapter-plus-face_sd15.bin"
     IP_ADAPTER_SCALE_DEFAULT = 0.6
+    # LCM-LoRA for fast inference (2-4 steps instead of 30)
+    LCM_LORA_REPO = "latent-consistency/lcm-lora-sdv1-5"
     def __init__(
         self,
         mode: str = "img2img",
                 from landmarkdiff.displacement_model import DisplacementModel
                 self._displacement_model = DisplacementModel.load(displacement_model_path)
+                logger.info("Displacement model loaded: %s", self._displacement_model.procedures)
             except Exception as e:
+                logger.warning("Failed to load displacement model: %s", e)
         if self.device.type == "mps":
             self.dtype = torch.float32
         if base_model_id:
             self.base_model_id = base_model_id
         else:
             self.base_model_id = "runwayml/stable-diffusion-v1-5"
         self.controlnet_id = controlnet_id
         self._pipe = None
         self._ip_adapter_loaded = False
+        self._lcm_loaded = False
     def load(self) -> None:
         if self.mode == "tps":
+            logger.info("TPS mode -- no model to load")
             return
+        if self.mode in ("controlnet", "controlnet_ip", "controlnet_fast"):
             self._load_controlnet()
+            if self.mode == "controlnet_fast":
+                self._load_lcm_lora()
+            elif self.mode == "controlnet_ip":
                 self._load_ip_adapter()
         else:
             self._load_img2img()
             StableDiffusionControlNetPipeline,
         )
+        _local_only = os.environ.get("HF_HUB_OFFLINE", "0") == "1"
+        _kw: dict = {"local_files_only": True} if _local_only else {}
         if self.controlnet_checkpoint:
             # Load fine-tuned ControlNet from local checkpoint
             ckpt_path = Path(self.controlnet_checkpoint)
             # Support both direct path and training checkpoint structure
             if (ckpt_path / "controlnet_ema").exists():
                 ckpt_path = ckpt_path / "controlnet_ema"
+            logger.info("Loading fine-tuned ControlNet from %s", ckpt_path)
             controlnet = ControlNetModel.from_pretrained(
                 str(ckpt_path),
                 torch_dtype=self.dtype,
             )
         else:
+            logger.info("Loading ControlNet from %s", self.controlnet_id)
             controlnet = ControlNetModel.from_pretrained(
                 self.controlnet_id,
                 subfolder="diffusion_sd15",
                 torch_dtype=self.dtype,
+                **_kw,
             )
+        logger.info("Loading base model from %s", self.base_model_id)
         self._pipe = StableDiffusionControlNetPipeline.from_pretrained(
             self.base_model_id,
             controlnet=controlnet,
             torch_dtype=self.dtype,
             safety_checker=None,
             requires_safety_checker=False,
+            **_kw,
         )
+        # DPM++ 2M Karras -- produces more photorealistic output than UniPC
         self._pipe.scheduler = DPMSolverMultistepScheduler.from_config(
             self._pipe.scheduler.config,
             algorithm_type="dpmsolver++",
             use_karras_sigmas=True,
         )
+        # FP32 VAE decode -- prevents color banding artifacts on skin tones
         if hasattr(self._pipe, "vae") and self._pipe.vae is not None:
             self._pipe.vae.config.force_upcast = True
         self._apply_device_optimizations()
+    def _load_lcm_lora(self) -> None:
+        """Load LCM-LoRA for fast 4-step inference.
+        LCM-LoRA (Latent Consistency Model) distills the denoising process
+        into 2-4 steps, making CPU inference viable (~3-8s vs ~60s+).
+        Replaces the scheduler with LCMScheduler for consistency sampling.
+        """
+        if self._pipe is None:
+            raise RuntimeError("Base pipeline must be loaded before LCM-LoRA")
+        try:
+            from diffusers import LCMScheduler
+            logger.info("Loading LCM-LoRA from %s", self.LCM_LORA_REPO)
+            _local_only = os.environ.get("HF_HUB_OFFLINE", "0") == "1"
+            _kw: dict = {"local_files_only": True} if _local_only else {}
+            self._pipe.load_lora_weights(self.LCM_LORA_REPO, **_kw)
+            self._pipe.scheduler = LCMScheduler.from_config(self._pipe.scheduler.config)
+            self._lcm_loaded = True
+            logger.info("LCM-LoRA loaded -- 4-step inference enabled")
+        except Exception as e:
+            logger.warning("LCM-LoRA load failed: %s", e)
+            logger.warning("Falling back to standard scheduler (30 steps)")
+            self._lcm_loaded = False
     def _load_ip_adapter(self) -> None:
         """Load IP-Adapter for identity-preserving generation.
         if self._pipe is None:
             raise RuntimeError("Base pipeline must be loaded before IP-Adapter")
         try:
+            logger.info("Loading IP-Adapter (%s)", self.IP_ADAPTER_WEIGHT_NAME)
             self._pipe.load_ip_adapter(
                 self.IP_ADAPTER_REPO,
                 subfolder=self.IP_ADAPTER_SUBFOLDER,
             )
             self._pipe.set_ip_adapter_scale(self.ip_adapter_scale)
             self._ip_adapter_loaded = True
+            logger.info("IP-Adapter loaded (scale=%s)", self.ip_adapter_scale)
         except Exception as e:
+            logger.warning("IP-Adapter load failed: %s", e)
+            logger.warning("Falling back to ControlNet-only mode")
             self._ip_adapter_loaded = False
     def _load_img2img(self) -> None:
             StableDiffusionImg2ImgPipeline,
         )
+        _local_only = os.environ.get("HF_HUB_OFFLINE", "0") == "1"
+        _kw: dict = {"local_files_only": True} if _local_only else {}
+        logger.info("Loading SD1.5 img2img from %s", self.base_model_id)
         self._pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
             self.base_model_id,
             torch_dtype=self.dtype,
             safety_checker=None,
             requires_safety_checker=False,
+            **_kw,
         )
         self._pipe.scheduler = DPMSolverMultistepScheduler.from_config(self._pipe.scheduler.config)
         self._apply_device_optimizations()
                 self._pipe = self._pipe.to(self.device)
         else:
             self._pipe.enable_sequential_cpu_offload()
+        logger.info("Pipeline loaded on %s (%s)", self.device, self.dtype)
     @property
     def is_loaded(self) -> bool:
             raise RuntimeError("Pipeline not loaded. Call .load() first.")
         flags = clinical_flags or self.clinical_flags
+        res = _SD15_RESOLUTION
+        image_512 = cv2.resize(image, (res, res))
         face = extract_landmarks(image_512)
         if face is None:
             try:
                 rng = np.random.default_rng(seed) if seed is not None else np.random.default_rng()
                 # Map UI intensity (0-100) to displacement model intensity (0-2)
+                dm_intensity = intensity / _INTENSITY_UI_TO_MODEL  # 50 -> 1.0x mean displacement
                 displacement = self._displacement_model.get_displacement_field(
                     procedure,
                     intensity=dm_intensity,
                 new_lm[:, 1] = np.clip(new_lm[:, 1], 0.01, 0.99)
                 manipulated = FaceLandmarks(
                     landmarks=new_lm,
+                    image_width=res,
+                    image_height=res,
                     confidence=face.confidence,
                 )
                 manipulation_mode = "displacement_model"
+            except Exception as exc:
+                logger.warning("Displacement model failed, falling back to preset: %s", exc)
                 manipulated = apply_procedure_preset(
                     face,
                     procedure,
                     intensity,
+                    image_size=res,
                     clinical_flags=flags,
                 )
         else:
                 face,
                 procedure,
                 intensity,
+                image_size=res,
                 clinical_flags=flags,
             )
+        landmark_img = render_landmark_image(manipulated, res, res)
         mask = generate_surgical_mask(
             face,
             procedure,
+            res,
+            res,
             clinical_flags=flags,
         )
         prompt = PROCEDURE_PROMPTS.get(procedure, "a photo of a person's face")
+        # Step 1: TPS geometric warp (always computed -- the geometric baseline)
         tps_warped = warp_image_tps(image_512, face.pixel_coords, manipulated.pixel_coords)
         if self.mode == "tps":
             raw_output = tps_warped
+        elif self.mode in ("controlnet", "controlnet_ip", "controlnet_fast"):
+            # LCM mode: override to 4 steps, low guidance (LCM works best with cfg=1-2)
+            if self._lcm_loaded:
+                num_inference_steps = min(num_inference_steps, 4)
+                guidance_scale = min(guidance_scale, 1.5)
             ip_image = numpy_to_pil(image_512) if self._ip_adapter_loaded else None
+            try:
+                raw_output = self._generate_controlnet(
+                    image_512,
+                    landmark_img,
+                    prompt,
+                    num_inference_steps,
+                    guidance_scale,
+                    controlnet_conditioning_scale,
+                    generator,
+                    ip_adapter_image=ip_image,
+                )
+            except torch.cuda.OutOfMemoryError as exc:
+                torch.cuda.empty_cache()
+                raise RuntimeError(
+                    "GPU out of memory during inference. Try reducing "
+                    "num_inference_steps or switching to mode='tps' for CPU-only."
+                ) from exc
         else:
+            try:
+                raw_output = self._generate_img2img(
+                    tps_warped,
+                    mask,
+                    prompt,
+                    num_inference_steps,
+                    guidance_scale,
+                    strength,
+                    generator,
+                )
+            except torch.cuda.OutOfMemoryError as exc:
+                torch.cuda.empty_cache()
+                raise RuntimeError(
+                    "GPU out of memory during inference. Try reducing "
+                    "num_inference_steps or switching to mode='tps' for CPU-only."
+                ) from exc
         # Step 2: Post-processing for photorealism (neural + classical pipeline)
         identity_check = None
             "mode": self.mode,
             "view_info": view_info,
             "ip_adapter_active": self._ip_adapter_loaded,
+            "lcm_active": self._lcm_loaded,
             "identity_check": identity_check,
             "restore_used": restore_used,
             "manipulation_mode": manipulation_mode,
     Returns dict with yaw, pitch (degrees), and view classification.
     """
     coords = face.pixel_coords
+    # MediaPipe landmark indices for key anatomical points
+    nose_tip = coords[1]  # nose tip
+    left_ear = coords[234]  # left tragion (ear)
+    right_ear = coords[454]  # right tragion (ear)
+    forehead = coords[10]  # forehead center
+    chin = coords[152]  # chin center
     # Yaw: ratio of nose-to-ear distances (symmetric = 0 degrees)
     left_dist = np.linalg.norm(nose_tip - left_ear)
         pitch = 0.0
     else:
         pitch_ratio = (lower - upper) / (upper + lower)
+        pitch = float(pitch_ratio * _PITCH_SCALE)
     # Classify view
     abs_yaw = abs(yaw)
+    if abs_yaw < _YAW_FRONTAL_MAX:
         view = "frontal"
+    elif abs_yaw < _YAW_THREE_QUARTER_MAX:
         view = "three_quarter"
     else:
         view = "profile"
         "yaw": round(yaw, 1),
         "pitch": round(pitch, 1),
         "view": view,
+        "is_frontal": abs_yaw < _YAW_FRONTAL_MAX,
+        "warning": "Side-view detected: results may be less accurate"
+        if abs_yaw > _YAW_WARNING_THRESHOLD
+        else None,
     }
     image = cv2.imread(image_path)
     if image is None:
+        logger.error("Could not load %s", image_path)
         sys.exit(1)
     pipe = LandmarkDiffPipeline(
     )
     pipe.load()
+    logger.info("Generating %s prediction (intensity=%s, mode=%s)", procedure, intensity, mode)
     result = pipe.generate(image, procedure=procedure, intensity=intensity, seed=seed)
     cv2.imwrite(str(out / "input.png"), result["input"])
     view = result.get("view_info", {})
     if view.get("warning"):
+        logger.warning("%s", view["warning"])
+    logger.info("Face view: %s (yaw=%s)", view.get("view", "unknown"), view.get("yaw", 0))
+    logger.info("Results saved to %s/", out)
 if __name__ == "__main__":
     parser.add_argument(
         "--mode",
         default="img2img",
+        choices=["img2img", "controlnet", "controlnet_ip", "controlnet_fast", "tps"],
     )
     parser.add_argument("--ip-adapter-scale", type=float, default=0.6)
     parser.add_argument(