Spaces:

dreamlessx
/

LandmarkDiff

Running

dreamlessx commited on Mar 14

Commit

433e26f

1 Parent(s): c951530

Add error handling, sync package from public repo, auto-trigger on upload

- Wrap all processing functions in try/except with informative error messages
- Add show_error=True for visible error reporting
- Auto-trigger processing when image is uploaded (not just on button click)
- Sync bundled landmarkdiff package to match public repo v0.2.0
- Add .gitignore for __pycache__

Files changed (40) hide show

.gitignore +1 -0
app.py +99 -70
landmarkdiff/__init__.py +1 -1
landmarkdiff/__main__.py +111 -40
landmarkdiff/api_client.py +15 -14
landmarkdiff/arcface_torch.py +40 -22
landmarkdiff/audit.py +5 -9
landmarkdiff/augmentation.py +293 -0
landmarkdiff/benchmark.py +17 -12
landmarkdiff/checkpoint_manager.py +10 -14
landmarkdiff/cli.py +19 -13
landmarkdiff/clinical.py +3 -3
landmarkdiff/conditioning.py +115 -9
landmarkdiff/config.py +24 -11
landmarkdiff/curriculum.py +6 -9
landmarkdiff/data.py +11 -13
landmarkdiff/data_version.py +5 -8
landmarkdiff/displacement_model.py +47 -43
landmarkdiff/ensemble.py +35 -23
landmarkdiff/evaluation.py +12 -11
landmarkdiff/experiment_tracker.py +215 -0
landmarkdiff/face_verifier.py +51 -39
landmarkdiff/fid.py +85 -79
landmarkdiff/hyperparam.py +36 -19
landmarkdiff/inference.py +90 -38
landmarkdiff/landmarks.py +131 -23
landmarkdiff/log.py +6 -4
landmarkdiff/losses.py +17 -6
landmarkdiff/manipulation.py +335 -138
landmarkdiff/metrics_agg.py +19 -14
landmarkdiff/metrics_viz.py +62 -37
landmarkdiff/model_registry.py +9 -16
landmarkdiff/postprocess.py +27 -29
landmarkdiff/py.typed +0 -0
landmarkdiff/safety.py +7 -12
landmarkdiff/synthetic/__init__.py +23 -0
landmarkdiff/synthetic/augmentation.py +3 -3
landmarkdiff/synthetic/pair_generator.py +9 -13
landmarkdiff/synthetic/tps_warp.py +4 -5
landmarkdiff/validation.py +14 -8

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

app.py CHANGED Viewed

@@ -2,6 +2,9 @@
 from __future__ import annotations
 import cv2
 import gradio as gr
 import numpy as np
@@ -11,6 +14,9 @@ from landmarkdiff.landmarks import extract_landmarks
 from landmarkdiff.manipulation import PROCEDURE_LANDMARKS, apply_procedure_preset
 from landmarkdiff.masking import generate_surgical_mask
 VERSION = "v0.2.1"
 GITHUB_URL = "https://github.com/dreamlessx/LandmarkDiff-public"
@@ -44,17 +50,31 @@ def mask_composite(warped, original, mask):
 PROCEDURES = list(PROCEDURE_LANDMARKS.keys())
 def process_image(image_rgb, procedure, intensity):
     """Process a single image through the TPS pipeline."""
     if image_rgb is None:
-        blank = np.zeros((512, 512, 3), dtype=np.uint8)
-        return blank, blank, blank, blank, "Upload a face photo to begin."
-    image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
-    image_bgr = cv2.resize(image_bgr, (512, 512))
-    image_rgb_512 = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-    face = extract_landmarks(image_bgr)
     if face is None:
         return (
             image_rgb_512,
@@ -64,38 +84,38 @@ def process_image(image_rgb, procedure, intensity):
             "No face detected. Try a clearer photo with good lighting.",
         )
-    # Manipulate landmarks
-    manipulated = apply_procedure_preset(face, procedure, float(intensity), image_size=512)
-    # Generate wireframe (pass width and height as separate keyword args)
-    wireframe = render_wireframe(manipulated, width=512, height=512)
-    wireframe_rgb = cv2.cvtColor(wireframe, cv2.COLOR_GRAY2RGB)
-    # Generate mask
-    mask = generate_surgical_mask(face, procedure, 512, 512)
-    mask_vis = (mask * 255).astype(np.uint8)
-    # TPS warp + composite
-    warped = warp_image_tps(image_bgr, face.pixel_coords, manipulated.pixel_coords)
-    composited = mask_composite(warped, image_bgr, mask)
-    composited_rgb = cv2.cvtColor(composited, cv2.COLOR_BGR2RGB)
-    # Side by side
-    side_by_side = np.hstack([image_rgb_512, composited_rgb])
-    # Displacement stats
-    displacement = np.mean(np.linalg.norm(manipulated.pixel_coords - face.pixel_coords, axis=1))
-    info = (
-        f"Procedure: {procedure}\n"
-        f"Intensity: {intensity:.0f}%\n"
-        f"Landmarks: {len(face.landmarks)}\n"
-        f"Avg displacement: {displacement:.1f} px\n"
-        f"Confidence: {face.confidence:.2f}\n"
-        f"Mode: TPS (CPU)"
-    )
-    return wireframe_rgb, mask_vis, composited_rgb, side_by_side, info
 def compare_procedures(image_rgb, intensity):
@@ -104,23 +124,28 @@ def compare_procedures(image_rgb, intensity):
         blank = np.zeros((512, 512, 3), dtype=np.uint8)
         return [blank] * len(PROCEDURES)
-    image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
-    image_bgr = cv2.resize(image_bgr, (512, 512))
-    face = extract_landmarks(image_bgr)
-    if face is None:
-        rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-        return [rgb] * len(PROCEDURES)
-    results = []
-    for proc in PROCEDURES:
-        manip = apply_procedure_preset(face, proc, float(intensity), image_size=512)
-        mask = generate_surgical_mask(face, proc, 512, 512)
-        warped = warp_image_tps(image_bgr, face.pixel_coords, manip.pixel_coords)
-        comp = mask_composite(warped, image_bgr, mask)
-        results.append(cv2.cvtColor(comp, cv2.COLOR_BGR2RGB))
-    return results
 def intensity_sweep(image_rgb, procedure):
@@ -128,27 +153,31 @@ def intensity_sweep(image_rgb, procedure):
     if image_rgb is None:
         return []
-    image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
-    image_bgr = cv2.resize(image_bgr, (512, 512))
-    face = extract_landmarks(image_bgr)
-    if face is None:
         return []
-    steps = [0, 20, 40, 60, 80, 100]
-    results = []
-    for val in steps:
-        if val == 0:
-            results.append((cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB), "0%"))
-            continue
-        manip = apply_procedure_preset(face, procedure, float(val), image_size=512)
-        mask = generate_surgical_mask(face, procedure, 512, 512)
-        warped = warp_image_tps(image_bgr, face.pixel_coords, manip.pixel_coords)
-        comp = mask_composite(warped, image_bgr, mask)
-        results.append((cv2.cvtColor(comp, cv2.COLOR_BGR2RGB), f"{val}%"))
-    return results
 # -- Build the procedure table for the description --
 _proc_rows = "\n".join(
@@ -248,7 +277,7 @@ with gr.Blocks(
             inputs=[input_image, procedure, intensity],
             outputs=[out_wireframe, out_mask, out_result, out_sidebyside, info_box],
         )
-        for trigger in [procedure, intensity]:
             trigger.change(
                 fn=process_image,
                 inputs=[input_image, procedure, intensity],
@@ -310,4 +339,4 @@ with gr.Blocks(
     gr.Markdown(FOOTER_MD)
 if __name__ == "__main__":
-    demo.launch()

 from __future__ import annotations
+import logging
+import traceback
 import cv2
 import gradio as gr
 import numpy as np
 from landmarkdiff.manipulation import PROCEDURE_LANDMARKS, apply_procedure_preset
 from landmarkdiff.masking import generate_surgical_mask
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 VERSION = "v0.2.1"
 GITHUB_URL = "https://github.com/dreamlessx/LandmarkDiff-public"
 PROCEDURES = list(PROCEDURE_LANDMARKS.keys())
+def _error_result(msg):
+    """Return a 5-tuple of blanks + error message for the UI."""
+    blank = np.zeros((512, 512, 3), dtype=np.uint8)
+    return blank, blank, blank, blank, msg
 def process_image(image_rgb, procedure, intensity):
     """Process a single image through the TPS pipeline."""
     if image_rgb is None:
+        return _error_result("Upload a face photo to begin.")
+    try:
+        image_bgr = cv2.cvtColor(np.asarray(image_rgb, dtype=np.uint8), cv2.COLOR_RGB2BGR)
+        image_bgr = cv2.resize(image_bgr, (512, 512))
+        image_rgb_512 = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
+    except Exception as exc:
+        logger.error("Image conversion failed: %s", exc)
+        return _error_result(f"Image conversion failed: {exc}")
+    try:
+        face = extract_landmarks(image_bgr)
+    except Exception as exc:
+        logger.error("Landmark extraction failed: %s\n%s", exc, traceback.format_exc())
+        return _error_result(f"Landmark extraction error: {exc}")
     if face is None:
         return (
             image_rgb_512,
             "No face detected. Try a clearer photo with good lighting.",
         )
+    try:
+        manipulated = apply_procedure_preset(face, procedure, float(intensity), image_size=512)
+        wireframe = render_wireframe(manipulated, width=512, height=512)
+        wireframe_rgb = cv2.cvtColor(wireframe, cv2.COLOR_GRAY2RGB)
+        mask = generate_surgical_mask(face, procedure, 512, 512)
+        mask_vis = (mask * 255).astype(np.uint8)
+        warped = warp_image_tps(image_bgr, face.pixel_coords, manipulated.pixel_coords)
+        composited = mask_composite(warped, image_bgr, mask)
+        composited_rgb = cv2.cvtColor(composited, cv2.COLOR_BGR2RGB)
+        side_by_side = np.hstack([image_rgb_512, composited_rgb])
+        displacement = np.mean(
+            np.linalg.norm(manipulated.pixel_coords - face.pixel_coords, axis=1)
+        )
+        info = (
+            f"Procedure: {procedure}\n"
+            f"Intensity: {intensity:.0f}%\n"
+            f"Landmarks: {len(face.landmarks)}\n"
+            f"Avg displacement: {displacement:.1f} px\n"
+            f"Confidence: {face.confidence:.2f}\n"
+            f"Mode: TPS (CPU)"
+        )
+        return wireframe_rgb, mask_vis, composited_rgb, side_by_side, info
+    except Exception as exc:
+        logger.error("Processing failed: %s\n%s", exc, traceback.format_exc())
+        return _error_result(f"Processing error: {exc}")
 def compare_procedures(image_rgb, intensity):
         blank = np.zeros((512, 512, 3), dtype=np.uint8)
         return [blank] * len(PROCEDURES)
+    try:
+        image_bgr = cv2.cvtColor(np.asarray(image_rgb, dtype=np.uint8), cv2.COLOR_RGB2BGR)
+        image_bgr = cv2.resize(image_bgr, (512, 512))
+        face = extract_landmarks(image_bgr)
+        if face is None:
+            rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
+            return [rgb] * len(PROCEDURES)
+        results = []
+        for proc in PROCEDURES:
+            manip = apply_procedure_preset(face, proc, float(intensity), image_size=512)
+            mask = generate_surgical_mask(face, proc, 512, 512)
+            warped = warp_image_tps(image_bgr, face.pixel_coords, manip.pixel_coords)
+            comp = mask_composite(warped, image_bgr, mask)
+            results.append(cv2.cvtColor(comp, cv2.COLOR_BGR2RGB))
+        return results
+    except Exception as exc:
+        logger.error("Compare procedures failed: %s\n%s", exc, traceback.format_exc())
+        blank = np.zeros((512, 512, 3), dtype=np.uint8)
+        return [blank] * len(PROCEDURES)
 def intensity_sweep(image_rgb, procedure):
     if image_rgb is None:
         return []
+    try:
+        image_bgr = cv2.cvtColor(np.asarray(image_rgb, dtype=np.uint8), cv2.COLOR_RGB2BGR)
+        image_bgr = cv2.resize(image_bgr, (512, 512))
+        face = extract_landmarks(image_bgr)
+        if face is None:
+            return []
+        steps = [0, 20, 40, 60, 80, 100]
+        results = []
+        for val in steps:
+            if val == 0:
+                results.append((cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB), "0%"))
+                continue
+            manip = apply_procedure_preset(face, procedure, float(val), image_size=512)
+            mask = generate_surgical_mask(face, procedure, 512, 512)
+            warped = warp_image_tps(image_bgr, face.pixel_coords, manip.pixel_coords)
+            comp = mask_composite(warped, image_bgr, mask)
+            results.append((cv2.cvtColor(comp, cv2.COLOR_BGR2RGB), f"{val}%"))
+        return results
+    except Exception as exc:
+        logger.error("Intensity sweep failed: %s\n%s", exc, traceback.format_exc())
         return []
 # -- Build the procedure table for the description --
 _proc_rows = "\n".join(
             inputs=[input_image, procedure, intensity],
             outputs=[out_wireframe, out_mask, out_result, out_sidebyside, info_box],
         )
+        for trigger in [input_image, procedure, intensity]:
             trigger.change(
                 fn=process_image,
                 inputs=[input_image, procedure, intensity],
     gr.Markdown(FOOTER_MD)
 if __name__ == "__main__":
+    demo.launch(show_error=True)

landmarkdiff/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """LandmarkDiff: Anatomically-conditioned latent diffusion for facial surgery simulation."""
-__version__ = "0.3.0"
 __all__ = [
     "api_client",

 """LandmarkDiff: Anatomically-conditioned latent diffusion for facial surgery simulation."""
+__version__ = "0.2.0"
 __all__ = [
     "api_client",

landmarkdiff/__main__.py CHANGED Viewed

@@ -1,80 +1,148 @@
 """CLI entry point for python -m landmarkdiff."""
 import argparse
 import sys
-def main():
     parser = argparse.ArgumentParser(
         prog="landmarkdiff",
         description="Facial surgery outcome prediction from clinical photography",
     )
-    parser.add_argument("--version", action="store_true", help="Print version and exit")
     subparsers = parser.add_subparsers(dest="command")
     # inference
     infer = subparsers.add_parser("infer", help="Run inference on an image")
     infer.add_argument("image", type=str, help="Path to input face image")
-    infer.add_argument("--procedure", type=str, default="rhinoplasty",
-                       choices=["rhinoplasty", "blepharoplasty", "rhytidectomy", "orthognathic", "brow_lift", "mentoplasty"])
-    infer.add_argument("--intensity", type=float, default=60.0,
-                       help="Deformation intensity (0-100)")
-    infer.add_argument("--mode", type=str, default="tps",
-                       choices=["tps", "controlnet", "img2img", "controlnet_ip"])
-    infer.add_argument("--output", type=str, default="output/")
-    infer.add_argument("--steps", type=int, default=30)
-    infer.add_argument("--seed", type=int, default=None)
     # landmarks
     lm = subparsers.add_parser("landmarks", help="Extract and visualize landmarks")
     lm.add_argument("image", type=str, help="Path to input face image")
-    lm.add_argument("--output", type=str, default="output/landmarks.png")
     # demo
     subparsers.add_parser("demo", help="Launch Gradio web demo")
     args = parser.parse_args()
-    if args.version:
-        from landmarkdiff import __version__
-        print(f"landmarkdiff {__version__}")
-        return
     if args.command is None:
         parser.print_help()
         return
-    if args.command == "infer":
-        _run_inference(args)
-    elif args.command == "landmarks":
-        _run_landmarks(args)
-    elif args.command == "demo":
-        _run_demo()
-def _run_inference(args):
-    from pathlib import Path
     import numpy as np
     from PIL import Image
     from landmarkdiff.landmarks import extract_landmarks
     from landmarkdiff.manipulation import apply_procedure_preset
     output_dir = Path(args.output)
     output_dir.mkdir(parents=True, exist_ok=True)
-    img = Image.open(args.image).convert("RGB").resize((512, 512))
     img_array = np.array(img)
     landmarks = extract_landmarks(img_array)
     if landmarks is None:
-        print("no face detected")
-        sys.exit(1)
     deformed = apply_procedure_preset(landmarks, args.procedure, intensity=args.intensity)
     if args.mode == "tps":
         from landmarkdiff.synthetic.tps_warp import warp_image_tps
         src = landmarks.pixel_coords[:, :2].copy()
         dst = deformed.pixel_coords[:, :2].copy()
         src[:, 0] *= 512 / landmarks.image_width
@@ -85,8 +153,11 @@ def _run_inference(args):
         Image.fromarray(warped).save(str(output_dir / "prediction.png"))
         print(f"saved tps result to {output_dir / 'prediction.png'}")
     else:
         from landmarkdiff.inference import LandmarkDiffPipeline
-        pipeline = LandmarkDiffPipeline(mode=args.mode, device="cuda")
         pipeline.load()
         result = pipeline.generate(
             img_array,
@@ -99,37 +170,37 @@ def _run_inference(args):
         print(f"saved result to {output_dir / 'prediction.png'}")
-def _run_landmarks(args):
-    from pathlib import Path
     import numpy as np
     from PIL import Image
     from landmarkdiff.landmarks import extract_landmarks, render_landmark_image
-    img = np.array(Image.open(args.image).convert("RGB").resize((512, 512)))
     landmarks = extract_landmarks(img)
     if landmarks is None:
-        print("no face detected")
-        sys.exit(1)
     mesh = render_landmark_image(landmarks, 512, 512)
     output_path = Path(args.output)
     output_path.parent.mkdir(parents=True, exist_ok=True)
-    from PIL import Image
     Image.fromarray(mesh).save(str(output_path))
     print(f"saved landmark mesh to {output_path}")
     print(f"detected {len(landmarks.landmarks)} landmarks, confidence {landmarks.confidence:.2f}")
-def _run_demo():
     try:
         from scripts.app import build_app
         demo = build_app()
         demo.launch()
     except ImportError:
-        print("gradio not installed - run: pip install landmarkdiff[app]")
-        sys.exit(1)
 if __name__ == "__main__":

 """CLI entry point for python -m landmarkdiff."""
+from __future__ import annotations
 import argparse
 import sys
+from pathlib import Path
+from typing import NoReturn
+def _error(msg: str) -> NoReturn:
+    """Print error to stderr and exit."""
+    print(f"error: {msg}", file=sys.stderr)
+    sys.exit(1)
+def _validate_image_path(path_str: str) -> Path:
+    """Validate that the image path exists and looks like an image file."""
+    p = Path(path_str)
+    if not p.exists():
+        _error(f"file not found: {path_str}")
+    if not p.is_file():
+        _error(f"not a file: {path_str}")
+    return p
+def main() -> None:
+    from landmarkdiff import __version__
     parser = argparse.ArgumentParser(
         prog="landmarkdiff",
         description="Facial surgery outcome prediction from clinical photography",
     )
+    parser.add_argument("--version", action="version", version=f"landmarkdiff {__version__}")
     subparsers = parser.add_subparsers(dest="command")
     # inference
     infer = subparsers.add_parser("infer", help="Run inference on an image")
     infer.add_argument("image", type=str, help="Path to input face image")
+    infer.add_argument(
+        "--procedure",
+        type=str,
+        default="rhinoplasty",
+        choices=[
+            "rhinoplasty",
+            "blepharoplasty",
+            "rhytidectomy",
+            "orthognathic",
+            "brow_lift",
+            "mentoplasty",
+        ],
+        help="Surgical procedure to simulate (default: rhinoplasty)",
+    )
+    infer.add_argument(
+        "--intensity",
+        type=float,
+        default=60.0,
+        help="Deformation intensity, 0-100 (default: 60)",
+    )
+    infer.add_argument(
+        "--mode",
+        type=str,
+        default="tps",
+        choices=["tps", "controlnet", "img2img", "controlnet_ip"],
+        help="Inference mode (default: tps, others require GPU)",
+    )
+    infer.add_argument(
+        "--output",
+        type=str,
+        default="output/",
+        help="Output directory (default: output/)",
+    )
+    infer.add_argument(
+        "--steps",
+        type=int,
+        default=30,
+        help="Number of diffusion steps (default: 30)",
+    )
+    infer.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Random seed for reproducibility",
+    )
     # landmarks
     lm = subparsers.add_parser("landmarks", help="Extract and visualize landmarks")
     lm.add_argument("image", type=str, help="Path to input face image")
+    lm.add_argument(
+        "--output",
+        type=str,
+        default="output/landmarks.png",
+        help="Output path for landmark visualization (default: output/landmarks.png)",
+    )
     # demo
     subparsers.add_parser("demo", help="Launch Gradio web demo")
     args = parser.parse_args()
     if args.command is None:
         parser.print_help()
         return
+    try:
+        if args.command == "infer":
+            _run_inference(args)
+        elif args.command == "landmarks":
+            _run_landmarks(args)
+        elif args.command == "demo":
+            _run_demo()
+    except KeyboardInterrupt:
+        sys.exit(130)
+    except Exception as exc:
+        _error(str(exc))
+def _run_inference(args: argparse.Namespace) -> None:
     import numpy as np
     from PIL import Image
     from landmarkdiff.landmarks import extract_landmarks
     from landmarkdiff.manipulation import apply_procedure_preset
+    if not (0 <= args.intensity <= 100):
+        _error(f"intensity must be between 0 and 100, got {args.intensity}")
+    image_path = _validate_image_path(args.image)
     output_dir = Path(args.output)
     output_dir.mkdir(parents=True, exist_ok=True)
+    img = Image.open(image_path).convert("RGB").resize((512, 512))
     img_array = np.array(img)
     landmarks = extract_landmarks(img_array)
     if landmarks is None:
+        _error("no face detected in image")
     deformed = apply_procedure_preset(landmarks, args.procedure, intensity=args.intensity)
     if args.mode == "tps":
         from landmarkdiff.synthetic.tps_warp import warp_image_tps
         src = landmarks.pixel_coords[:, :2].copy()
         dst = deformed.pixel_coords[:, :2].copy()
         src[:, 0] *= 512 / landmarks.image_width
         Image.fromarray(warped).save(str(output_dir / "prediction.png"))
         print(f"saved tps result to {output_dir / 'prediction.png'}")
     else:
+        import torch
         from landmarkdiff.inference import LandmarkDiffPipeline
+        pipeline = LandmarkDiffPipeline(mode=args.mode, device=torch.device("cuda"))
         pipeline.load()
         result = pipeline.generate(
             img_array,
         print(f"saved result to {output_dir / 'prediction.png'}")
+def _run_landmarks(args: argparse.Namespace) -> None:
     import numpy as np
     from PIL import Image
     from landmarkdiff.landmarks import extract_landmarks, render_landmark_image
+    image_path = _validate_image_path(args.image)
+    img = np.array(Image.open(image_path).convert("RGB").resize((512, 512)))
     landmarks = extract_landmarks(img)
     if landmarks is None:
+        _error("no face detected in image")
     mesh = render_landmark_image(landmarks, 512, 512)
     output_path = Path(args.output)
     output_path.parent.mkdir(parents=True, exist_ok=True)
     Image.fromarray(mesh).save(str(output_path))
     print(f"saved landmark mesh to {output_path}")
     print(f"detected {len(landmarks.landmarks)} landmarks, confidence {landmarks.confidence:.2f}")
+def _run_demo() -> None:
     try:
         from scripts.app import build_app
         demo = build_app()
         demo.launch()
     except ImportError:
+        _error("gradio not installed - run: pip install landmarkdiff[app]")
 if __name__ == "__main__":

landmarkdiff/api_client.py CHANGED Viewed

@@ -26,7 +26,6 @@ Usage:
 from __future__ import annotations
 import base64
-import io
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -43,8 +42,8 @@ class PredictionResult:
     procedure: str
     intensity: float
     confidence: float = 0.0
-    landmarks_before: list | None = None
-    landmarks_after: list | None = None
     metrics: dict[str, float] = field(default_factory=dict)
     metadata: dict[str, Any] = field(default_factory=dict)
@@ -70,15 +69,15 @@ class LandmarkDiffClient:
     def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 60.0) -> None:
         self.base_url = base_url.rstrip("/")
         self.timeout = timeout
-        self._session = None
-    def _get_session(self):
         """Lazy-initialize requests session."""
         if self._session is None:
             try:
                 import requests
             except ImportError:
-                raise ImportError("requests required. Install with: pip install requests")
             self._session = requests.Session()
             self._session.timeout = self.timeout
         return self._session
@@ -218,12 +217,14 @@ class LandmarkDiffClient:
                 results.append(result)
             except Exception as e:
                 # Create a failed result
-                results.append(PredictionResult(
-                    output_image=np.zeros((512, 512, 3), dtype=np.uint8),
-                    procedure=procedure,
-                    intensity=intensity,
-                    metadata={"error": str(e), "path": str(path)},
-                ))
         return results
     def close(self) -> None:
@@ -232,10 +233,10 @@ class LandmarkDiffClient:
             self._session.close()
             self._session = None
-    def __enter__(self):
         return self
-    def __exit__(self, *args):
         self.close()
     def __repr__(self) -> str:

 from __future__ import annotations
 import base64
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
     procedure: str
     intensity: float
     confidence: float = 0.0
+    landmarks_before: list[Any] | None = None
+    landmarks_after: list[Any] | None = None
     metrics: dict[str, float] = field(default_factory=dict)
     metadata: dict[str, Any] = field(default_factory=dict)
     def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 60.0) -> None:
         self.base_url = base_url.rstrip("/")
         self.timeout = timeout
+        self._session: Any = None
+    def _get_session(self) -> Any:
         """Lazy-initialize requests session."""
         if self._session is None:
             try:
                 import requests
             except ImportError:
+                raise ImportError("requests required. Install with: pip install requests") from None
             self._session = requests.Session()
             self._session.timeout = self.timeout
         return self._session
                 results.append(result)
             except Exception as e:
                 # Create a failed result
+                results.append(
+                    PredictionResult(
+                        output_image=np.zeros((512, 512, 3), dtype=np.uint8),
+                        procedure=procedure,
+                        intensity=intensity,
+                        metadata={"error": str(e), "path": str(path)},
+                    )
+                )
         return results
     def close(self) -> None:
             self._session.close()
             self._session = None
+    def __enter__(self) -> LandmarkDiffClient:
         return self
+    def __exit__(self, *args: Any) -> None:
         self.close()
     def __repr__(self) -> str:

landmarkdiff/arcface_torch.py CHANGED Viewed

@@ -29,7 +29,6 @@ from __future__ import annotations
 import logging
 import warnings
 from pathlib import Path
-from typing import Optional
 import torch
 import torch.nn as nn
@@ -42,6 +41,7 @@ logger = logging.getLogger(__name__)
 # Building blocks
 # ---------------------------------------------------------------------------
 class SEModule(nn.Module):
     """Squeeze-and-Excitation channel attention (Hu et al., 2018).
@@ -79,18 +79,28 @@ class IBasicBlock(nn.Module):
         inplanes: int,
         planes: int,
         stride: int = 1,
-        downsample: Optional[nn.Module] = None,
         use_se: bool = True,
     ):
         super().__init__()
         self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-5)
         self.conv1 = nn.Conv2d(
-            inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False,
         )
         self.bn2 = nn.BatchNorm2d(planes, eps=1e-5)
         self.prelu = nn.PReLU(planes)
         self.conv2 = nn.Conv2d(
-            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False,
         )
         self.bn3 = nn.BatchNorm2d(planes, eps=1e-5)
@@ -120,6 +130,7 @@ class IBasicBlock(nn.Module):
 # Backbone
 # ---------------------------------------------------------------------------
 class ArcFaceBackbone(nn.Module):
     """IResNet-50 backbone for ArcFace identity embeddings.
@@ -257,7 +268,7 @@ _WEIGHT_URL = (
 )
-def _find_pretrained_weights() -> Optional[Path]:
     """Search known locations for pretrained IResNet-50 weights."""
     for p in _KNOWN_WEIGHT_PATHS:
         if p.exists() and p.suffix == ".pth":
@@ -269,6 +280,7 @@ def _try_download_weights(dest: Path) -> bool:
     """Attempt to download pretrained weights from the InsightFace release."""
     try:
         import urllib.request
         dest.parent.mkdir(parents=True, exist_ok=True)
         logger.info("Downloading ArcFace IResNet-50 weights from %s ...", _WEIGHT_URL)
         urllib.request.urlretrieve(_WEIGHT_URL, str(dest))
@@ -281,7 +293,7 @@ def _try_download_weights(dest: Path) -> bool:
 def load_pretrained_weights(
     model: ArcFaceBackbone,
-    weights_path: Optional[str] = None,
     download: bool = True,
 ) -> bool:
     """Load pretrained InsightFace IResNet-50 weights into the model.
@@ -300,7 +312,7 @@ def load_pretrained_weights(
         ``True`` if weights were loaded successfully, ``False`` otherwise
         (model keeps random initialization).
     """
-    path: Optional[Path] = None
     if weights_path is not None:
         path = Path(weights_path)
@@ -368,8 +380,7 @@ def load_pretrained_weights(
         return True
     except Exception as e:
         warnings.warn(
-            f"Failed to load ArcFace weights from {path}: {e}. "
-            "Using random initialization.",
             UserWarning,
             stacklevel=2,
         )
@@ -380,6 +391,7 @@ def load_pretrained_weights(
 # Differentiable face alignment
 # ---------------------------------------------------------------------------
 def align_face(
     images: torch.Tensor,
     size: int = 112,
@@ -402,7 +414,7 @@ def align_face(
     """
     B, C, H, W = images.shape
-    if H == size and W == size:
         return images
     # Crop fraction: keep central 80% to remove background padding
@@ -414,13 +426,17 @@ def align_face(
     # grid_sample expects coordinates in [-1, 1] where -1 is top-left, +1 is bottom-right
     # Center crop: map [-1, 1] output range to [-crop_frac, +crop_frac] input range
     theta = torch.zeros(B, 2, 3, device=images.device, dtype=images.dtype)
-    theta[:, 0, 0] = half_crop   # x scale
-    theta[:, 1, 1] = half_crop   # y scale
     # translation stays 0 (centered)
     grid = F.affine_grid(theta, [B, C, size, size], align_corners=False)
     aligned = F.grid_sample(
-        images, grid, mode="bilinear", padding_mode="border", align_corners=False,
     )
     return aligned
@@ -444,7 +460,10 @@ def align_face_no_crop(
     if images.shape[-2] == size and images.shape[-1] == size:
         return images
     return F.interpolate(
-        images, size=(size, size), mode="bilinear", align_corners=False,
     )
@@ -452,6 +471,7 @@ def align_face_no_crop(
 # ArcFaceLoss: differentiable identity preservation loss
 # ---------------------------------------------------------------------------
 class ArcFaceLoss(nn.Module):
     """Differentiable identity loss using PyTorch-native ArcFace.
@@ -474,8 +494,8 @@ class ArcFaceLoss(nn.Module):
     def __init__(
         self,
-        device: Optional[torch.device] = None,
-        weights_path: Optional[str] = None,
         crop_face: bool = True,
     ):
         """
@@ -527,10 +547,7 @@ class ArcFaceLoss(nn.Module):
         Returns:
             (B, 3, 112, 112) in [-1, 1].
         """
-        if self.crop_face:
-            x = align_face(images, size=112)
-        else:
-            x = align_face_no_crop(images, size=112)
         # Normalize from [0, 1] to [-1, 1]
         x = x * 2.0 - 1.0
@@ -662,9 +679,10 @@ class ArcFaceLoss(nn.Module):
 # Convenience: create a pre-configured loss instance
 # ---------------------------------------------------------------------------
 def create_arcface_loss(
-    device: Optional[torch.device] = None,
-    weights_path: Optional[str] = None,
 ) -> ArcFaceLoss:
     """Factory function for creating an ArcFaceLoss with sensible defaults.

 import logging
 import warnings
 from pathlib import Path
 import torch
 import torch.nn as nn
 # Building blocks
 # ---------------------------------------------------------------------------
 class SEModule(nn.Module):
     """Squeeze-and-Excitation channel attention (Hu et al., 2018).
         inplanes: int,
         planes: int,
         stride: int = 1,
+        downsample: nn.Module | None = None,
         use_se: bool = True,
     ):
         super().__init__()
         self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-5)
         self.conv1 = nn.Conv2d(
+            inplanes,
+            planes,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=False,
         )
         self.bn2 = nn.BatchNorm2d(planes, eps=1e-5)
         self.prelu = nn.PReLU(planes)
         self.conv2 = nn.Conv2d(
+            planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
         )
         self.bn3 = nn.BatchNorm2d(planes, eps=1e-5)
 # Backbone
 # ---------------------------------------------------------------------------
 class ArcFaceBackbone(nn.Module):
     """IResNet-50 backbone for ArcFace identity embeddings.
 )
+def _find_pretrained_weights() -> Path | None:
     """Search known locations for pretrained IResNet-50 weights."""
     for p in _KNOWN_WEIGHT_PATHS:
         if p.exists() and p.suffix == ".pth":
     """Attempt to download pretrained weights from the InsightFace release."""
     try:
         import urllib.request
         dest.parent.mkdir(parents=True, exist_ok=True)
         logger.info("Downloading ArcFace IResNet-50 weights from %s ...", _WEIGHT_URL)
         urllib.request.urlretrieve(_WEIGHT_URL, str(dest))
 def load_pretrained_weights(
     model: ArcFaceBackbone,
+    weights_path: str | None = None,
     download: bool = True,
 ) -> bool:
     """Load pretrained InsightFace IResNet-50 weights into the model.
         ``True`` if weights were loaded successfully, ``False`` otherwise
         (model keeps random initialization).
     """
+    path: Path | None = None
     if weights_path is not None:
         path = Path(weights_path)
         return True
     except Exception as e:
         warnings.warn(
+            f"Failed to load ArcFace weights from {path}: {e}. Using random initialization.",
             UserWarning,
             stacklevel=2,
         )
 # Differentiable face alignment
 # ---------------------------------------------------------------------------
 def align_face(
     images: torch.Tensor,
     size: int = 112,
     """
     B, C, H, W = images.shape
+    if size == H and size == W:
         return images
     # Crop fraction: keep central 80% to remove background padding
     # grid_sample expects coordinates in [-1, 1] where -1 is top-left, +1 is bottom-right
     # Center crop: map [-1, 1] output range to [-crop_frac, +crop_frac] input range
     theta = torch.zeros(B, 2, 3, device=images.device, dtype=images.dtype)
+    theta[:, 0, 0] = half_crop  # x scale
+    theta[:, 1, 1] = half_crop  # y scale
     # translation stays 0 (centered)
     grid = F.affine_grid(theta, [B, C, size, size], align_corners=False)
     aligned = F.grid_sample(
+        images,
+        grid,
+        mode="bilinear",
+        padding_mode="border",
+        align_corners=False,
     )
     return aligned
     if images.shape[-2] == size and images.shape[-1] == size:
         return images
     return F.interpolate(
+        images,
+        size=(size, size),
+        mode="bilinear",
+        align_corners=False,
     )
 # ArcFaceLoss: differentiable identity preservation loss
 # ---------------------------------------------------------------------------
 class ArcFaceLoss(nn.Module):
     """Differentiable identity loss using PyTorch-native ArcFace.
     def __init__(
         self,
+        device: torch.device | None = None,
+        weights_path: str | None = None,
         crop_face: bool = True,
     ):
         """
         Returns:
             (B, 3, 112, 112) in [-1, 1].
         """
+        x = align_face(images, size=112) if self.crop_face else align_face_no_crop(images, size=112)
         # Normalize from [0, 1] to [-1, 1]
         x = x * 2.0 - 1.0
 # Convenience: create a pre-configured loss instance
 # ---------------------------------------------------------------------------
 def create_arcface_loss(
+    device: torch.device | None = None,
+    weights_path: str | None = None,
 ) -> ArcFaceLoss:
     """Factory function for creating an ArcFaceLoss with sensible defaults.

landmarkdiff/audit.py CHANGED Viewed

@@ -116,12 +116,10 @@ class AuditReporter:
             if case.identity_sim > 0:
                 by_proc[proc]["id_sims"].append(case.identity_sim)
-        for proc, stats in by_proc.items():
             stats["pass_rate"] = stats["passed"] / max(stats["total"], 1)
             stats["mean_identity_sim"] = (
-                sum(stats["id_sims"]) / len(stats["id_sims"])
-                if stats["id_sims"]
-                else 0.0
             )
             del stats["id_sims"]
@@ -137,12 +135,10 @@ class AuditReporter:
             if case.identity_sim > 0:
                 by_fitz[ft]["id_sims"].append(case.identity_sim)
-        for ft, stats in by_fitz.items():
             stats["pass_rate"] = stats["passed"] / max(stats["total"], 1)
             stats["mean_identity_sim"] = (
-                sum(stats["id_sims"]) / len(stats["id_sims"])
-                if stats["id_sims"]
-                else 0.0
             )
             del stats["id_sims"]
@@ -268,7 +264,7 @@ class AuditReporter:
                 f"<td>{c.procedure.title()}</td>"
                 f"<td>{c.fitzpatrick_type}</td>"
                 f"<td>{c.identity_sim:.4f}</td>"
-                f'<td>{"WARN" if c.safety_passed else "FAIL"}</td>'
                 f"<td>{issues}</td>"
                 f"</tr>\n"
             )

             if case.identity_sim > 0:
                 by_proc[proc]["id_sims"].append(case.identity_sim)
+        for _proc, stats in by_proc.items():
             stats["pass_rate"] = stats["passed"] / max(stats["total"], 1)
             stats["mean_identity_sim"] = (
+                sum(stats["id_sims"]) / len(stats["id_sims"]) if stats["id_sims"] else 0.0
             )
             del stats["id_sims"]
             if case.identity_sim > 0:
                 by_fitz[ft]["id_sims"].append(case.identity_sim)
+        for _ft, stats in by_fitz.items():
             stats["pass_rate"] = stats["passed"] / max(stats["total"], 1)
             stats["mean_identity_sim"] = (
+                sum(stats["id_sims"]) / len(stats["id_sims"]) if stats["id_sims"] else 0.0
             )
             del stats["id_sims"]
                 f"<td>{c.procedure.title()}</td>"
                 f"<td>{c.fitzpatrick_type}</td>"
                 f"<td>{c.identity_sim:.4f}</td>"
+                f"<td>{'WARN' if c.safety_passed else 'FAIL'}</td>"
                 f"<td>{issues}</td>"
                 f"</tr>\n"
             )

landmarkdiff/augmentation.py ADDED Viewed

	@@ -0,0 +1,293 @@

+"""Training data augmentation pipeline for LandmarkDiff.
+Provides domain-specific augmentations that maintain landmark consistency:
+- Geometric: flip, rotation, affine (landmarks co-transformed)
+- Photometric: color jitter, brightness, contrast (applied to images only)
+- Skin-tone augmentation: ITA-space perturbation for Fitzpatrick balance
+- Conditioning augmentation: noise injection, dropout for robustness
+All augmentations preserve the correspondence between:
+  input_image ↔ conditioning_image ↔ target_image ↔ mask
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+import cv2
+import numpy as np
+@dataclass
+class AugmentationConfig:
+    """Augmentation parameters."""
+    # Geometric
+    random_flip: bool = True
+    random_rotation_deg: float = 5.0
+    random_scale: tuple[float, float] = (0.95, 1.05)
+    random_translate: float = 0.02  # fraction of image size
+    # Photometric (images only, not conditioning)
+    brightness_range: tuple[float, float] = (0.9, 1.1)
+    contrast_range: tuple[float, float] = (0.9, 1.1)
+    saturation_range: tuple[float, float] = (0.9, 1.1)
+    hue_shift_range: float = 5.0  # degrees
+    # Conditioning augmentation
+    conditioning_dropout_prob: float = 0.1
+    conditioning_noise_std: float = 0.02
+    # Skin-tone augmentation
+    ita_perturbation_std: float = 3.0  # ITA angle noise
+    seed: int | None = None
+def augment_training_sample(
+    input_image: np.ndarray,
+    target_image: np.ndarray,
+    conditioning: np.ndarray,
+    mask: np.ndarray,
+    landmarks_src: np.ndarray | None = None,
+    landmarks_dst: np.ndarray | None = None,
+    config: AugmentationConfig | None = None,
+    rng: np.random.Generator | None = None,
+) -> dict[str, np.ndarray]:
+    """Apply consistent augmentations to a training sample.
+    All spatial transforms are applied to images AND landmarks together
+    so correspondence is preserved.
+    Args:
+        input_image: (H, W, 3) original face image (uint8 BGR).
+        target_image: (H, W, 3) target face image (uint8 BGR).
+        conditioning: (H, W, 3) conditioning image (uint8).
+        mask: (H, W) or (H, W, 1) float32 mask.
+        landmarks_src: (N, 2) normalized [0,1] source landmark coords.
+        landmarks_dst: (N, 2) normalized [0,1] target landmark coords.
+        config: Augmentation parameters.
+        rng: Random generator for reproducibility.
+    Returns:
+        Dict with augmented versions of all inputs.
+    """
+    if config is None:
+        config = AugmentationConfig()
+    if rng is None:
+        rng = np.random.default_rng(config.seed)
+    h, w = input_image.shape[:2]
+    out_input = input_image.copy()
+    out_target = target_image.copy()
+    out_cond = conditioning.copy()
+    out_mask = mask.copy()
+    out_lm_src = landmarks_src.copy() if landmarks_src is not None else None
+    out_lm_dst = landmarks_dst.copy() if landmarks_dst is not None else None
+    # --- Geometric augmentations (applied to all) ---
+    # Random horizontal flip
+    if config.random_flip and rng.random() < 0.5:
+        out_input = np.ascontiguousarray(out_input[:, ::-1])
+        out_target = np.ascontiguousarray(out_target[:, ::-1])
+        out_cond = np.ascontiguousarray(out_cond[:, ::-1])
+        out_mask = np.ascontiguousarray(
+            out_mask[:, ::-1] if out_mask.ndim == 2 else out_mask[:, ::-1, :]
+        )
+        if out_lm_src is not None:
+            out_lm_src[:, 0] = 1.0 - out_lm_src[:, 0]
+        if out_lm_dst is not None:
+            out_lm_dst[:, 0] = 1.0 - out_lm_dst[:, 0]
+    # Random rotation + scale + translate
+    if config.random_rotation_deg > 0 or config.random_scale != (1.0, 1.0):
+        angle = rng.uniform(-config.random_rotation_deg, config.random_rotation_deg)
+        scale = rng.uniform(config.random_scale[0], config.random_scale[1])
+        tx = rng.uniform(-config.random_translate, config.random_translate) * w
+        ty = rng.uniform(-config.random_translate, config.random_translate) * h
+        center = (w / 2, h / 2)
+        M = cv2.getRotationMatrix2D(center, angle, scale)
+        M[0, 2] += tx
+        M[1, 2] += ty
+        out_input = cv2.warpAffine(out_input, M, (w, h), borderMode=cv2.BORDER_REFLECT_101)
+        out_target = cv2.warpAffine(out_target, M, (w, h), borderMode=cv2.BORDER_REFLECT_101)
+        out_cond = cv2.warpAffine(
+            out_cond, M, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=0
+        )
+        mask_2d = out_mask if out_mask.ndim == 2 else out_mask[:, :, 0]
+        mask_2d = cv2.warpAffine(mask_2d, M, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=0)
+        out_mask = mask_2d if out_mask.ndim == 2 else mask_2d[:, :, np.newaxis]
+        # Transform landmarks
+        if out_lm_src is not None:
+            out_lm_src = _transform_landmarks(out_lm_src, M, w, h)
+        if out_lm_dst is not None:
+            out_lm_dst = _transform_landmarks(out_lm_dst, M, w, h)
+    # --- Photometric augmentations (images only, not conditioning/mask) ---
+    # Brightness
+    b_factor = rng.uniform(config.brightness_range[0], config.brightness_range[1])
+    out_input = np.clip(out_input.astype(np.float32) * b_factor, 0, 255).astype(np.uint8)
+    out_target = np.clip(out_target.astype(np.float32) * b_factor, 0, 255).astype(np.uint8)
+    # Contrast
+    c_factor = rng.uniform(config.contrast_range[0], config.contrast_range[1])
+    mean_in = out_input.mean()
+    mean_tgt = out_target.mean()
+    out_input = np.clip(
+        (out_input.astype(np.float32) - mean_in) * c_factor + mean_in, 0, 255
+    ).astype(np.uint8)
+    out_target = np.clip(
+        (out_target.astype(np.float32) - mean_tgt) * c_factor + mean_tgt, 0, 255
+    ).astype(np.uint8)
+    # Saturation (in HSV space)
+    s_factor = rng.uniform(config.saturation_range[0], config.saturation_range[1])
+    if abs(s_factor - 1.0) > 1e-4:
+        out_input = _adjust_saturation(out_input, s_factor)
+        out_target = _adjust_saturation(out_target, s_factor)
+    # Hue shift
+    if config.hue_shift_range > 0:
+        hue_delta = rng.uniform(-config.hue_shift_range, config.hue_shift_range)
+        if abs(hue_delta) > 0.1:
+            out_input = _shift_hue(out_input, hue_delta)
+            out_target = _shift_hue(out_target, hue_delta)
+    # --- Conditioning augmentation ---
+    # Conditioning dropout (replace with zeros to learn unconditional)
+    if config.conditioning_dropout_prob > 0 and rng.random() < config.conditioning_dropout_prob:
+        out_cond = np.zeros_like(out_cond)
+    # Conditioning noise
+    if config.conditioning_noise_std > 0:
+        noise = rng.normal(0, config.conditioning_noise_std * 255, out_cond.shape)
+        out_cond = np.clip(out_cond.astype(np.float32) + noise, 0, 255).astype(np.uint8)
+    result = {
+        "input_image": out_input,
+        "target_image": out_target,
+        "conditioning": out_cond,
+        "mask": out_mask,
+    }
+    if out_lm_src is not None:
+        result["landmarks_src"] = out_lm_src
+    if out_lm_dst is not None:
+        result["landmarks_dst"] = out_lm_dst
+    return result
+def _transform_landmarks(landmarks: np.ndarray, M: np.ndarray, w: int, h: int) -> np.ndarray:
+    """Transform normalized landmarks with an affine matrix."""
+    # Convert to pixel coords
+    px = landmarks.copy()
+    px[:, 0] *= w
+    px[:, 1] *= h
+    # Apply affine transform
+    ones = np.ones((px.shape[0], 1))
+    px_h = np.hstack([px, ones])  # (N, 3)
+    transformed = (M @ px_h.T).T  # (N, 2)
+    # Back to normalized
+    transformed[:, 0] /= w
+    transformed[:, 1] /= h
+    return np.clip(transformed, 0.0, 1.0)
+def _adjust_saturation(img: np.ndarray, factor: float) -> np.ndarray:
+    """Adjust saturation of a BGR image."""
+    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
+    hsv[:, :, 1] = np.clip(hsv[:, :, 1] * factor, 0, 255)
+    return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+def _shift_hue(img: np.ndarray, delta_deg: float) -> np.ndarray:
+    """Shift hue of a BGR image by delta degrees."""
+    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
+    # OpenCV hue range is [0, 180]
+    hsv[:, :, 0] = (hsv[:, :, 0] + delta_deg / 2) % 180
+    return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+def augment_skin_tone(
+    image: np.ndarray,
+    ita_delta: float = 0.0,
+) -> np.ndarray:
+    """Augment skin tone by shifting in L*a*b* space.
+    This helps balance Fitzpatrick representation in training by
+    simulating different skin tones from existing samples.
+    Args:
+        image: (H, W, 3) BGR uint8 image.
+        ita_delta: ITA angle shift (positive = lighter, negative = darker).
+    Returns:
+        Augmented image with shifted skin tone.
+    """
+    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
+    # Shift L channel (lightness) based on ITA delta
+    # ITA = arctan((L-50)/b), so shifting ITA shifts L
+    l_shift = ita_delta * 0.5  # approximate mapping
+    lab[:, :, 0] = np.clip(lab[:, :, 0] + l_shift, 0, 255)
+    # Slightly shift b channel too for more natural tone changes
+    b_shift = -ita_delta * 0.15
+    lab[:, :, 2] = np.clip(lab[:, :, 2] + b_shift, 0, 255)
+    return cv2.cvtColor(lab.astype(np.uint8), cv2.COLOR_LAB2BGR)
+class FitzpatrickBalancer:
+    """Oversample underrepresented Fitzpatrick types during training.
+    Maintains per-type counts and generates sampling weights to ensure
+    equitable training across all skin types.
+    """
+    def __init__(self, target_distribution: dict[str, float] | None = None):
+        """Initialize balancer.
+        Args:
+            target_distribution: Target fraction per type. Defaults to uniform.
+        """
+        self.target = target_distribution or {
+            "I": 1 / 6,
+            "II": 1 / 6,
+            "III": 1 / 6,
+            "IV": 1 / 6,
+            "V": 1 / 6,
+            "VI": 1 / 6,
+        }
+        self._counts: dict[str, int] = {}
+    def register_sample(self, fitz_type: str) -> None:
+        """Register a sample's Fitzpatrick type."""
+        self._counts[fitz_type] = self._counts.get(fitz_type, 0) + 1
+    def get_sampling_weights(self, fitz_types: list[str]) -> np.ndarray:
+        """Compute sampling weights for a list of samples.
+        Returns weights inversely proportional to type frequency,
+        so underrepresented types get upsampled.
+        """
+        total = sum(self._counts.values()) or 1
+        weights = []
+        for ft in fitz_types:
+            count = self._counts.get(ft, 1)
+            freq = count / total
+            target_freq = self.target.get(ft, 1 / 6)
+            # Weight = target / actual (capped for stability)
+            w = min(target_freq / max(freq, 1e-6), 5.0)
+            weights.append(w)
+        w = np.array(weights, dtype=np.float64)
+        return w / w.sum()  # normalize to probability distribution

landmarkdiff/benchmark.py CHANGED Viewed

@@ -63,17 +63,19 @@ class InferenceBenchmark:
         if throughput_fps == 0.0 and latency_ms > 0:
             throughput_fps = 1000.0 / latency_ms * batch_size
-        self.results.append(BenchmarkResult(
-            config_name=config_name,
-            latency_ms=latency_ms,
-            throughput_fps=throughput_fps,
-            vram_gb=vram_gb,
-            batch_size=batch_size,
-            resolution=resolution,
-            num_inference_steps=num_inference_steps,
-            device=device,
-            metadata=metadata,
-        ))
     def mean_latency(self, config_name: str | None = None) -> float:
         """Mean latency in ms, optionally filtered by config."""
@@ -124,7 +126,10 @@ class InferenceBenchmark:
         if not configs:
             return "No benchmark results."
-        header = f"{'Config':>20s} | {'Mean(ms)':>10s} | {'P99(ms)':>10s} | {'FPS':>8s} | {'VRAM(GB)':>8s} | {'N':>4s}"
         lines = [
             f"Inference Benchmark: {self.model_name}",
             header,

         if throughput_fps == 0.0 and latency_ms > 0:
             throughput_fps = 1000.0 / latency_ms * batch_size
+        self.results.append(
+            BenchmarkResult(
+                config_name=config_name,
+                latency_ms=latency_ms,
+                throughput_fps=throughput_fps,
+                vram_gb=vram_gb,
+                batch_size=batch_size,
+                resolution=resolution,
+                num_inference_steps=num_inference_steps,
+                device=device,
+                metadata=metadata,
+            )
+        )
     def mean_latency(self, config_name: str | None = None) -> float:
         """Mean latency in ms, optionally filtered by config."""
         if not configs:
             return "No benchmark results."
+        header = (
+            f"{'Config':>20s} | {'Mean(ms)':>10s} | {'P99(ms)':>10s}"
+            f" | {'FPS':>8s} | {'VRAM(GB)':>8s} | {'N':>4s}"
+        )
         lines = [
             f"Inference Benchmark: {self.model_name}",
             header,

landmarkdiff/checkpoint_manager.py CHANGED Viewed

@@ -166,9 +166,7 @@ class CheckpointManager:
         torch.save(state, ckpt_dir / "training_state.pt")
         # Compute checkpoint size
-        size_mb = sum(
-            f.stat().st_size for f in ckpt_dir.rglob("*") if f.is_file()
-        ) / (1024 * 1024)
         # Create metadata
         meta = CheckpointMetadata(
@@ -216,7 +214,7 @@ class CheckpointManager:
         entries.sort(key=lambda x: x[1], reverse=not self.lower_is_better)
         # Mark best
-        best_names = {e[0] for e in entries[:self.keep_best]}
         for name, meta in self._index["checkpoints"].items():
             meta["is_best"] = name in best_names
@@ -245,11 +243,11 @@ class CheckpointManager:
             val = meta.get("metrics", {}).get(self.metric)
             if val is None:
                 continue
-            if best_val is None:
-                best, best_val = name, val
-            elif self.lower_is_better and val < best_val:
-                best, best_val = name, val
-            elif not self.lower_is_better and val > best_val:
                 best, best_val = name, val
         return best
@@ -280,7 +278,7 @@ class CheckpointManager:
         keep = set()
         # Keep latest
-        for name in all_names[-self.keep_latest:]:
             keep.add(name)
         # Keep best
@@ -323,10 +321,7 @@ class CheckpointManager:
     def total_size_mb(self) -> float:
         """Return total disk size of all tracked checkpoints."""
-        return sum(
-            meta.get("size_mb", 0.0)
-            for meta in self._index["checkpoints"].values()
-        )
     def summary(self) -> str:
         """Return a human-readable summary of checkpoint state."""
@@ -351,6 +346,7 @@ class CheckpointManager:
 # Helpers
 # ------------------------------------------------------------------
 def _get_state_dict(module: torch.nn.Module) -> dict:
     """Extract state dict, handling DDP wrapper."""
     if hasattr(module, "module"):

         torch.save(state, ckpt_dir / "training_state.pt")
         # Compute checkpoint size
+        size_mb = sum(f.stat().st_size for f in ckpt_dir.rglob("*") if f.is_file()) / (1024 * 1024)
         # Create metadata
         meta = CheckpointMetadata(
         entries.sort(key=lambda x: x[1], reverse=not self.lower_is_better)
         # Mark best
+        best_names = {e[0] for e in entries[: self.keep_best]}
         for name, meta in self._index["checkpoints"].items():
             meta["is_best"] = name in best_names
             val = meta.get("metrics", {}).get(self.metric)
             if val is None:
                 continue
+            if (
+                best_val is None
+                or (self.lower_is_better and val < best_val)
+                or (not self.lower_is_better and val > best_val)
+            ):
                 best, best_val = name, val
         return best
         keep = set()
         # Keep latest
+        for name in all_names[-self.keep_latest :]:
             keep.add(name)
         # Keep best
     def total_size_mb(self) -> float:
         """Return total disk size of all tracked checkpoints."""
+        return sum(meta.get("size_mb", 0.0) for meta in self._index["checkpoints"].values())
     def summary(self) -> str:
         """Return a human-readable summary of checkpoint state."""
 # Helpers
 # ------------------------------------------------------------------
 def _get_state_dict(module: torch.nn.Module) -> dict:
     """Extract state dict, handling DDP wrapper."""
     if hasattr(module, "module"):

landmarkdiff/cli.py CHANGED Viewed

@@ -17,10 +17,10 @@ import sys
 def cmd_infer(args: argparse.Namespace) -> None:
     """Run single-image inference."""
-    import cv2
-    import numpy as np
     from pathlib import Path
     from landmarkdiff.inference import LandmarkDiffPipeline
     image = cv2.imread(args.image)
@@ -51,6 +51,7 @@ def cmd_infer(args: argparse.Namespace) -> None:
     if args.watermark:
         from landmarkdiff.safety import SafetyValidator
         validator = SafetyValidator()
         watermarked = validator.apply_watermark(result["output"])
         wm_path = out_path.with_stem(out_path.stem + "_watermarked")
@@ -87,9 +88,7 @@ def cmd_evaluate(args: argparse.Namespace) -> None:
     run_evaluation(
         test_dir=args.test_dir,
         output_dir=args.output,
-        mode=args.mode,
         checkpoint=args.checkpoint,
-        displacement_model=args.displacement_model,
         max_samples=args.max_samples,
     )
@@ -98,10 +97,7 @@ def cmd_config(args: argparse.Namespace) -> None:
     """Show or validate configuration."""
     from landmarkdiff.config import ExperimentConfig, load_config, validate_config
-    if args.file:
-        config = load_config(args.file)
-    else:
-        config = ExperimentConfig()
     if args.validate:
         warnings = validate_config(config)
@@ -112,14 +108,17 @@ def cmd_config(args: argparse.Namespace) -> None:
         else:
             print("Configuration valid (no warnings).")
     else:
-        import yaml
         from dataclasses import asdict
         print(yaml.dump(asdict(config), default_flow_style=False, sort_keys=False))
 def cmd_validate(args: argparse.Namespace) -> None:
     """Run safety validation on an output image."""
     import cv2
     from landmarkdiff.safety import SafetyValidator
     input_img = cv2.imread(args.input)
@@ -148,6 +147,7 @@ def cmd_validate(args: argparse.Namespace) -> None:
 def cmd_version(args: argparse.Namespace) -> None:
     """Print version info."""
     from landmarkdiff import __version__
     print(f"LandmarkDiff v{__version__}")
@@ -162,8 +162,11 @@ def main(argv: list[str] | None = None) -> None:
     # --- infer ---
     p_infer = subparsers.add_parser("infer", help="Run single-image inference")
     p_infer.add_argument("image", help="Input face image path")
-    p_infer.add_argument("--procedure", default="rhinoplasty",
-                         choices=["rhinoplasty", "blepharoplasty", "rhytidectomy", "orthognathic"])
     p_infer.add_argument("--intensity", type=float, default=65.0)
     p_infer.add_argument("--output", default="output.png")
     p_infer.add_argument("--mode", default="tps", choices=["controlnet", "img2img", "tps"])
@@ -180,8 +183,11 @@ def main(argv: list[str] | None = None) -> None:
     p_ensemble.add_argument("--intensity", type=float, default=65.0)
     p_ensemble.add_argument("--output", default="ensemble_output")
     p_ensemble.add_argument("--n-samples", type=int, default=5)
-    p_ensemble.add_argument("--strategy", default="best_of_n",
-                            choices=["pixel_average", "weighted_average", "best_of_n", "median"])
     p_ensemble.add_argument("--mode", default="tps", choices=["controlnet", "img2img", "tps"])
     p_ensemble.add_argument("--checkpoint", default=None)
     p_ensemble.add_argument("--displacement-model", default=None)

 def cmd_infer(args: argparse.Namespace) -> None:
     """Run single-image inference."""
     from pathlib import Path
+    import cv2
     from landmarkdiff.inference import LandmarkDiffPipeline
     image = cv2.imread(args.image)
     if args.watermark:
         from landmarkdiff.safety import SafetyValidator
         validator = SafetyValidator()
         watermarked = validator.apply_watermark(result["output"])
         wm_path = out_path.with_stem(out_path.stem + "_watermarked")
     run_evaluation(
         test_dir=args.test_dir,
         output_dir=args.output,
         checkpoint=args.checkpoint,
         max_samples=args.max_samples,
     )
     """Show or validate configuration."""
     from landmarkdiff.config import ExperimentConfig, load_config, validate_config
+    config = load_config(args.file) if args.file else ExperimentConfig()
     if args.validate:
         warnings = validate_config(config)
         else:
             print("Configuration valid (no warnings).")
     else:
         from dataclasses import asdict
+        import yaml
         print(yaml.dump(asdict(config), default_flow_style=False, sort_keys=False))
 def cmd_validate(args: argparse.Namespace) -> None:
     """Run safety validation on an output image."""
     import cv2
     from landmarkdiff.safety import SafetyValidator
     input_img = cv2.imread(args.input)
 def cmd_version(args: argparse.Namespace) -> None:
     """Print version info."""
     from landmarkdiff import __version__
     print(f"LandmarkDiff v{__version__}")
     # --- infer ---
     p_infer = subparsers.add_parser("infer", help="Run single-image inference")
     p_infer.add_argument("image", help="Input face image path")
+    p_infer.add_argument(
+        "--procedure",
+        default="rhinoplasty",
+        choices=["rhinoplasty", "blepharoplasty", "rhytidectomy", "orthognathic"],
+    )
     p_infer.add_argument("--intensity", type=float, default=65.0)
     p_infer.add_argument("--output", default="output.png")
     p_infer.add_argument("--mode", default="tps", choices=["controlnet", "img2img", "tps"])
     p_ensemble.add_argument("--intensity", type=float, default=65.0)
     p_ensemble.add_argument("--output", default="ensemble_output")
     p_ensemble.add_argument("--n-samples", type=int, default=5)
+    p_ensemble.add_argument(
+        "--strategy",
+        default="best_of_n",
+        choices=["pixel_average", "weighted_average", "best_of_n", "median"],
+    )
     p_ensemble.add_argument("--mode", default="tps", choices=["controlnet", "img2img", "tps"])
     p_ensemble.add_argument("--checkpoint", default=None)
     p_ensemble.add_argument("--displacement-model", default=None)

landmarkdiff/clinical.py CHANGED Viewed

@@ -80,9 +80,9 @@ def detect_vitiligo_patches(
     # Also check for low saturation (a,b channels close to 128)
     a_channel = lab[:, :, 1]
     b_channel = lab[:, :, 2]
-    low_sat = (
-        (np.abs(a_channel - 128) < 15) & (np.abs(b_channel - 128) < 15)
-    ).astype(np.uint8) * 255
     # Combined: bright AND low-saturation within face
     vitiligo_raw = cv2.bitwise_and(bright_mask, low_sat)

     # Also check for low saturation (a,b channels close to 128)
     a_channel = lab[:, :, 1]
     b_channel = lab[:, :, 2]
+    low_sat = ((np.abs(a_channel - 128) < 15) & (np.abs(b_channel - 128) < 15)).astype(
+        np.uint8
+    ) * 255
     # Combined: bright AND low-saturation within face
     vitiligo_raw = cv2.bitwise_and(bright_mask, low_sat)

landmarkdiff/conditioning.py CHANGED Viewed

@@ -18,17 +18,83 @@ from landmarkdiff.landmarks import FaceLandmarks
 # This is invariant to landmark displacement (unlike Delaunay).
 JAWLINE_CONTOUR = [
-    10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
-    397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
-    172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109, 10,
 ]
 LEFT_EYE_CONTOUR = [
-    33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246, 33,
 ]
 RIGHT_EYE_CONTOUR = [
-    362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398, 362,
 ]
 LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
@@ -39,13 +105,53 @@ NOSE_TIP = [94, 2, 326, 327, 294, 278, 279, 275, 274, 460, 456, 363, 370]
 NOSE_BOTTOM = [19, 1, 274, 275, 440, 344, 278, 294, 460, 305, 289, 392]
 OUTER_LIPS = [
-    61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
-    308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 61,
 ]
 INNER_LIPS = [
-    78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308,
-    324, 318, 402, 317, 14, 87, 178, 88, 95, 78,
 ]
 ALL_CONTOURS = [

 # This is invariant to landmark displacement (unlike Delaunay).
 JAWLINE_CONTOUR = [
+    10,
+    338,
+    297,
+    332,
+    284,
+    251,
+    389,
+    356,
+    454,
+    323,
+    361,
+    288,
+    397,
+    365,
+    379,
+    378,
+    400,
+    377,
+    152,
+    148,
+    176,
+    149,
+    150,
+    136,
+    172,
+    58,
+    132,
+    93,
+    234,
+    127,
+    162,
+    21,
+    54,
+    103,
+    67,
+    109,
+    10,
 ]
 LEFT_EYE_CONTOUR = [
+    33,
+    7,
+    163,
+    144,
+    145,
+    153,
+    154,
+    155,
+    133,
+    173,
+    157,
+    158,
+    159,
+    160,
+    161,
+    246,
+    33,
 ]
 RIGHT_EYE_CONTOUR = [
+    362,
+    382,
+    381,
+    380,
+    374,
+    373,
+    390,
+    249,
+    263,
+    466,
+    388,
+    387,
+    386,
+    385,
+    384,
+    398,
+    362,
 ]
 LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
 NOSE_BOTTOM = [19, 1, 274, 275, 440, 344, 278, 294, 460, 305, 289, 392]
 OUTER_LIPS = [
+    61,
+    146,
+    91,
+    181,
+    84,
+    17,
+    314,
+    405,
+    321,
+    375,
+    291,
+    308,
+    324,
+    318,
+    402,
+    317,
+    14,
+    87,
+    178,
+    88,
+    95,
+    78,
+    61,
 ]
 INNER_LIPS = [
+    78,
+    191,
+    80,
+    81,
+    82,
+    13,
+    312,
+    311,
+    310,
+    415,
+    308,
+    324,
+    318,
+    402,
+    317,
+    14,
+    87,
+    178,
+    88,
+    95,
+    78,
 ]
 ALL_CONTOURS = [

landmarkdiff/config.py CHANGED Viewed

@@ -18,7 +18,8 @@ Usage:
 from __future__ import annotations
-from dataclasses import dataclass, field, asdict
 from pathlib import Path
 from typing import Any
@@ -28,6 +29,7 @@ import yaml
 @dataclass
 class ModelConfig:
     """ControlNet and base model configuration."""
     base_model: str = "runwayml/stable-diffusion-v1-5"
     controlnet_conditioning_channels: int = 3
     controlnet_conditioning_scale: float = 1.0
@@ -39,6 +41,7 @@ class ModelConfig:
 @dataclass
 class TrainingConfig:
     """Training hyperparameters."""
     phase: str = "A"  # "A" or "B"
     learning_rate: float = 1e-5
     batch_size: int = 4
@@ -77,6 +80,7 @@ class TrainingConfig:
 @dataclass
 class DataConfig:
     """Dataset configuration."""
     train_dir: str = "data/training"
     val_dir: str = "data/validation"
     test_dir: str = "data/test"
@@ -90,9 +94,14 @@ class DataConfig:
     color_jitter: float = 0.1
     # Procedure filtering
-    procedures: list[str] = field(default_factory=lambda: [
-        "rhinoplasty", "blepharoplasty", "rhytidectomy", "orthognathic",
-    ])
     intensity_range: tuple[float, float] = (30.0, 100.0)
     # Data-driven displacement
@@ -103,6 +112,7 @@ class DataConfig:
 @dataclass
 class InferenceConfig:
     """Inference / generation configuration."""
     num_inference_steps: int = 30
     guidance_scale: float = 7.5
     scheduler: str = "dpmsolver++"  # "ddpm", "ddim", "dpmsolver++"
@@ -124,6 +134,7 @@ class InferenceConfig:
 @dataclass
 class EvaluationConfig:
     """Evaluation configuration."""
     compute_fid: bool = True
     compute_lpips: bool = True
     compute_nme: bool = True
@@ -137,6 +148,7 @@ class EvaluationConfig:
 @dataclass
 class WandbConfig:
     """Weights & Biases logging configuration."""
     enabled: bool = True
     project: str = "landmarkdiff"
     entity: str | None = None
@@ -147,8 +159,9 @@ class WandbConfig:
 @dataclass
 class SlurmConfig:
     """SLURM job submission parameters."""
     partition: str = "batch_gpu"
-    account: str = "csb_gpu_acc"
     gpu_type: str = "nvidia_rtx_a6000"
     num_gpus: int = 1
     mem: str = "48G"
@@ -160,6 +173,7 @@ class SlurmConfig:
 @dataclass
 class SafetyConfig:
     """Clinical safety and responsible AI parameters."""
     identity_threshold: float = 0.6
     max_displacement_fraction: float = 0.05
     watermark_enabled: bool = True
@@ -173,6 +187,7 @@ class SafetyConfig:
 @dataclass
 class ExperimentConfig:
     """Top-level experiment configuration."""
     experiment_name: str = "default"
     description: str = ""
     version: str = "0.3.0"
@@ -227,9 +242,10 @@ class ExperimentConfig:
         return asdict(self)
-def _from_dict(cls, d: dict):
     """Create a dataclass from a dict, ignoring unknown keys."""
     import dataclasses
     field_map = {f.name: f for f in dataclasses.fields(cls)}
     filtered = {}
     for k, v in d.items():
@@ -243,7 +259,7 @@ def _from_dict(cls, d: dict):
     return cls(**filtered)
-def _convert_tuples(obj):
     """Recursively convert tuples to lists for YAML serialization."""
     if isinstance(obj, dict):
         return {k: _convert_tuples(v) for k, v in obj.items()}
@@ -266,10 +282,7 @@ def load_config(
     Returns:
         ExperimentConfig with overrides applied.
     """
-    if config_path:
-        config = ExperimentConfig.from_yaml(config_path)
-    else:
-        config = ExperimentConfig()
     if overrides:
         for key, value in overrides.items():

 from __future__ import annotations
+import os
+from dataclasses import asdict, dataclass, field
 from pathlib import Path
 from typing import Any
 @dataclass
 class ModelConfig:
     """ControlNet and base model configuration."""
     base_model: str = "runwayml/stable-diffusion-v1-5"
     controlnet_conditioning_channels: int = 3
     controlnet_conditioning_scale: float = 1.0
 @dataclass
 class TrainingConfig:
     """Training hyperparameters."""
     phase: str = "A"  # "A" or "B"
     learning_rate: float = 1e-5
     batch_size: int = 4
 @dataclass
 class DataConfig:
     """Dataset configuration."""
     train_dir: str = "data/training"
     val_dir: str = "data/validation"
     test_dir: str = "data/test"
     color_jitter: float = 0.1
     # Procedure filtering
+    procedures: list[str] = field(
+        default_factory=lambda: [
+            "rhinoplasty",
+            "blepharoplasty",
+            "rhytidectomy",
+            "orthognathic",
+        ]
+    )
     intensity_range: tuple[float, float] = (30.0, 100.0)
     # Data-driven displacement
 @dataclass
 class InferenceConfig:
     """Inference / generation configuration."""
     num_inference_steps: int = 30
     guidance_scale: float = 7.5
     scheduler: str = "dpmsolver++"  # "ddpm", "ddim", "dpmsolver++"
 @dataclass
 class EvaluationConfig:
     """Evaluation configuration."""
     compute_fid: bool = True
     compute_lpips: bool = True
     compute_nme: bool = True
 @dataclass
 class WandbConfig:
     """Weights & Biases logging configuration."""
     enabled: bool = True
     project: str = "landmarkdiff"
     entity: str | None = None
 @dataclass
 class SlurmConfig:
     """SLURM job submission parameters."""
     partition: str = "batch_gpu"
+    account: str = os.environ.get("SLURM_ACCOUNT", "default_gpu")
     gpu_type: str = "nvidia_rtx_a6000"
     num_gpus: int = 1
     mem: str = "48G"
 @dataclass
 class SafetyConfig:
     """Clinical safety and responsible AI parameters."""
     identity_threshold: float = 0.6
     max_displacement_fraction: float = 0.05
     watermark_enabled: bool = True
 @dataclass
 class ExperimentConfig:
     """Top-level experiment configuration."""
     experiment_name: str = "default"
     description: str = ""
     version: str = "0.3.0"
         return asdict(self)
+def _from_dict(cls: type, d: dict) -> Any:
     """Create a dataclass from a dict, ignoring unknown keys."""
     import dataclasses
     field_map = {f.name: f for f in dataclasses.fields(cls)}
     filtered = {}
     for k, v in d.items():
     return cls(**filtered)
+def _convert_tuples(obj: Any) -> Any:
     """Recursively convert tuples to lists for YAML serialization."""
     if isinstance(obj, dict):
         return {k: _convert_tuples(v) for k, v in obj.items()}
     Returns:
         ExperimentConfig with overrides applied.
     """
+    config = ExperimentConfig.from_yaml(config_path) if config_path else ExperimentConfig()
     if overrides:
         for key, value in overrides.items():

landmarkdiff/curriculum.py CHANGED Viewed

@@ -104,10 +104,10 @@ class ProcedureCurriculum:
     # Difficulty ranking (0=easiest, 1=hardest)
     DEFAULT_PROCEDURE_DIFFICULTY = {
-        "blepharoplasty": 0.3,   # small, localized changes
-        "rhinoplasty": 0.5,     # moderate, central face
-        "rhytidectomy": 0.7,    # large, affects face shape
-        "orthognathic": 0.9,    # largest deformations
     }
     def __init__(
@@ -137,10 +137,7 @@ class ProcedureCurriculum:
     def get_procedure_weights(self, step: int) -> dict[str, float]:
         """Get all procedure weights at the given step."""
-        return {
-            proc: self.get_weight(step, proc)
-            for proc in self.proc_difficulty
-        }
 def compute_sample_difficulty(
@@ -174,7 +171,7 @@ def compute_sample_difficulty(
     source_bonus = {
         "synthetic": 0.0,
         "synthetic_v3": 0.1,  # realistic displacements slightly harder
-        "real": 0.2,          # real data hardest
         "augmented": 0.0,
     }

     # Difficulty ranking (0=easiest, 1=hardest)
     DEFAULT_PROCEDURE_DIFFICULTY = {
+        "blepharoplasty": 0.3,  # small, localized changes
+        "rhinoplasty": 0.5,  # moderate, central face
+        "rhytidectomy": 0.7,  # large, affects face shape
+        "orthognathic": 0.9,  # largest deformations
     }
     def __init__(
     def get_procedure_weights(self, step: int) -> dict[str, float]:
         """Get all procedure weights at the given step."""
+        return {proc: self.get_weight(step, proc) for proc in self.proc_difficulty}
 def compute_sample_difficulty(
     source_bonus = {
         "synthetic": 0.0,
         "synthetic_v3": 0.1,  # realistic displacements slightly harder
+        "real": 0.2,  # real data hardest
         "augmented": 0.0,
     }

landmarkdiff/data.py CHANGED Viewed

@@ -23,8 +23,8 @@ from __future__ import annotations
 import csv
 import json
 import logging
 from pathlib import Path
-from typing import Callable
 import cv2
 import numpy as np
@@ -38,6 +38,7 @@ logger = logging.getLogger(__name__)
 # Core dataset
 # ---------------------------------------------------------------------------
 class SurgicalPairDataset(Dataset):
     """Dataset for loading surgical before/after training pairs.
@@ -162,9 +163,7 @@ class SurgicalPairDataset(Dataset):
         img = cv2.imread(str(path))
         if img is None:
             logger.warning("Failed to load %s, using blank", path)
-            return np.zeros(
-                (self.resolution, self.resolution, 3), dtype=np.uint8
-            )
         if img.shape[:2] != (self.resolution, self.resolution):
             img = cv2.resize(img, (self.resolution, self.resolution))
         return img
@@ -173,14 +172,10 @@ class SurgicalPairDataset(Dataset):
         """Load a mask as float32 [0,1], resized to resolution."""
         path = self.data_dir / filename
         if not path.exists():
-            return np.ones(
-                (self.resolution, self.resolution), dtype=np.float32
-            )
         mask = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
         if mask is None:
-            return np.ones(
-                (self.resolution, self.resolution), dtype=np.float32
-            )
         mask = cv2.resize(mask, (self.resolution, self.resolution))
         return mask.astype(np.float32) / 255.0
@@ -189,6 +184,7 @@ class SurgicalPairDataset(Dataset):
 # Evaluation dataset (input + ground truth)
 # ---------------------------------------------------------------------------
 class EvalPairDataset(Dataset):
     """Dataset for evaluation: loads input/target pairs with procedure labels.
@@ -235,9 +231,7 @@ class EvalPairDataset(Dataset):
         path = self.data_dir / filename
         img = cv2.imread(str(path))
         if img is None:
-            return np.zeros(
-                (self.resolution, self.resolution, 3), dtype=np.uint8
-            )
         if img.shape[:2] != (self.resolution, self.resolution):
             img = cv2.resize(img, (self.resolution, self.resolution))
         return img
@@ -247,6 +241,7 @@ class EvalPairDataset(Dataset):
 # Conversion utilities
 # ---------------------------------------------------------------------------
 def bgr_to_tensor(bgr: np.ndarray) -> torch.Tensor:
     """Convert BGR uint8 image to RGB [0,1] tensor (C, H, W)."""
     rgb = bgr[:, :, ::-1].astype(np.float32) / 255.0
@@ -271,6 +266,7 @@ def mask_to_tensor(mask: np.ndarray) -> torch.Tensor:
 # Samplers
 # ---------------------------------------------------------------------------
 def create_procedure_sampler(
     dataset: SurgicalPairDataset,
     balance_procedures: bool = True,
@@ -309,6 +305,7 @@ def create_procedure_sampler(
 # DataLoader factory
 # ---------------------------------------------------------------------------
 def create_dataloader(
     dataset: Dataset,
     batch_size: int = 4,
@@ -353,6 +350,7 @@ def create_dataloader(
 # Multi-directory dataset
 # ---------------------------------------------------------------------------
 class CombinedDataset(Dataset):
     """Combine multiple SurgicalPairDatasets into one.

 import csv
 import json
 import logging
+from collections.abc import Callable
 from pathlib import Path
 import cv2
 import numpy as np
 # Core dataset
 # ---------------------------------------------------------------------------
 class SurgicalPairDataset(Dataset):
     """Dataset for loading surgical before/after training pairs.
         img = cv2.imread(str(path))
         if img is None:
             logger.warning("Failed to load %s, using blank", path)
+            return np.zeros((self.resolution, self.resolution, 3), dtype=np.uint8)
         if img.shape[:2] != (self.resolution, self.resolution):
             img = cv2.resize(img, (self.resolution, self.resolution))
         return img
         """Load a mask as float32 [0,1], resized to resolution."""
         path = self.data_dir / filename
         if not path.exists():
+            return np.ones((self.resolution, self.resolution), dtype=np.float32)
         mask = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
         if mask is None:
+            return np.ones((self.resolution, self.resolution), dtype=np.float32)
         mask = cv2.resize(mask, (self.resolution, self.resolution))
         return mask.astype(np.float32) / 255.0
 # Evaluation dataset (input + ground truth)
 # ---------------------------------------------------------------------------
 class EvalPairDataset(Dataset):
     """Dataset for evaluation: loads input/target pairs with procedure labels.
         path = self.data_dir / filename
         img = cv2.imread(str(path))
         if img is None:
+            return np.zeros((self.resolution, self.resolution, 3), dtype=np.uint8)
         if img.shape[:2] != (self.resolution, self.resolution):
             img = cv2.resize(img, (self.resolution, self.resolution))
         return img
 # Conversion utilities
 # ---------------------------------------------------------------------------
 def bgr_to_tensor(bgr: np.ndarray) -> torch.Tensor:
     """Convert BGR uint8 image to RGB [0,1] tensor (C, H, W)."""
     rgb = bgr[:, :, ::-1].astype(np.float32) / 255.0
 # Samplers
 # ---------------------------------------------------------------------------
 def create_procedure_sampler(
     dataset: SurgicalPairDataset,
     balance_procedures: bool = True,
 # DataLoader factory
 # ---------------------------------------------------------------------------
 def create_dataloader(
     dataset: Dataset,
     batch_size: int = 4,
 # Multi-directory dataset
 # ---------------------------------------------------------------------------
 class CombinedDataset(Dataset):
     """Combine multiple SurgicalPairDatasets into one.

landmarkdiff/data_version.py CHANGED Viewed

@@ -22,7 +22,7 @@ import json
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Iterator
 @dataclass
@@ -68,9 +68,7 @@ class DataManifest:
     """
     version: str = "1.0"
-    created_at: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
     root_dir: str = ""
     files: list[FileEntry] = field(default_factory=list)
     metadata: dict[str, Any] = field(default_factory=dict)
@@ -237,8 +235,7 @@ class DataManifest:
             actual_size = fp.stat().st_size
             if actual_size != entry.size_bytes:
                 issues.append(
-                    f"Size mismatch: {entry.path} "
-                    f"(expected {entry.size_bytes}, got {actual_size})"
                 )
             # Check checksum
@@ -265,8 +262,7 @@ class DataManifest:
         added = sorted(other_paths - self_paths)
         removed = sorted(self_paths - other_paths)
         modified = sorted(
-            p for p in self_paths & other_paths
-            if self_files[p].checksum != other_files[p].checksum
         )
         return {"added": added, "removed": removed, "modified": modified}
@@ -292,6 +288,7 @@ def _get_hostname() -> str:
     """Get hostname safely."""
     try:
         import socket
         return socket.gethostname()
     except Exception:
         return "unknown"

 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any
 @dataclass
     """
     version: str = "1.0"
+    created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
     root_dir: str = ""
     files: list[FileEntry] = field(default_factory=list)
     metadata: dict[str, Any] = field(default_factory=dict)
             actual_size = fp.stat().st_size
             if actual_size != entry.size_bytes:
                 issues.append(
+                    f"Size mismatch: {entry.path} (expected {entry.size_bytes}, got {actual_size})"
                 )
             # Check checksum
         added = sorted(other_paths - self_paths)
         removed = sorted(self_paths - other_paths)
         modified = sorted(
+            p for p in self_paths & other_paths if self_files[p].checksum != other_files[p].checksum
         )
         return {"added": added, "removed": removed, "modified": modified}
     """Get hostname safely."""
     try:
         import socket
         return socket.gethostname()
     except Exception:
         return "unknown"

landmarkdiff/displacement_model.py CHANGED Viewed

@@ -33,12 +33,11 @@ from __future__ import annotations
 import json
 import logging
 from pathlib import Path
-from typing import Optional, Union
 import cv2
 import numpy as np
-from landmarkdiff.landmarks import extract_landmarks, FaceLandmarks
 from landmarkdiff.manipulation import PROCEDURE_LANDMARKS
 logger = logging.getLogger(__name__)
@@ -54,6 +53,7 @@ PROCEDURES = list(PROCEDURE_LANDMARKS.keys())
 # Helpers
 # ---------------------------------------------------------------------------
 def _normalized_coords_2d(face: FaceLandmarks) -> np.ndarray:
     """Extract (478, 2) normalized [0, 1] coordinates from a FaceLandmarks object.
@@ -84,9 +84,24 @@ def _compute_alignment_quality(
     # Stable landmarks: forehead, temple region, outer face oval
     # These should exhibit near-zero displacement after surgery.
     stable_indices = [
-        10, 109, 67, 103, 54, 21, 162, 127,  # left forehead/temple
-        338, 297, 332, 284, 251, 389, 356, 454,  # right forehead/temple
-        234, 93,  # outer cheek anchors
     ]
     stable_indices = [i for i in stable_indices if i < NUM_LANDMARKS]
@@ -95,7 +110,7 @@ def _compute_alignment_quality(
     # RMS displacement on stable points
     diffs = after_stable - before_stable
-    rms = np.sqrt(np.mean(np.sum(diffs ** 2, axis=1)))
     # Map RMS to quality: 0 displacement -> 1.0, rms >= 0.05 (5% of image) -> 0.0
     quality = float(np.clip(1.0 - rms / 0.05, 0.0, 1.0))
@@ -106,6 +121,7 @@ def _compute_alignment_quality(
 # Procedure classification
 # ---------------------------------------------------------------------------
 def classify_procedure(displacements: np.ndarray) -> str:
     """Classify which surgical procedure was performed from displacement vectors.
@@ -143,8 +159,7 @@ def classify_procedure(displacements: np.ndarray) -> str:
     # Threshold: mean displacement < 0.002 (~1 pixel at 512x512)
     if best_score < 0.002:
         logger.debug(
-            "No significant displacement detected (best=%.5f). "
-            "Classified as 'unknown'.",
             best_score,
         )
         return "unknown"
@@ -156,11 +171,12 @@ def classify_procedure(displacements: np.ndarray) -> str:
 # Single-pair extraction
 # ---------------------------------------------------------------------------
 def extract_displacements(
     before_img: np.ndarray,
     after_img: np.ndarray,
     min_detection_confidence: float = 0.5,
-) -> Optional[dict]:
     """Extract landmark displacements from a before/after surgery image pair.
     Runs MediaPipe Face Mesh on both images, computes per-landmark
@@ -185,16 +201,12 @@ def extract_displacements(
         Returns ``None`` if face detection fails on either image.
     """
     # Extract landmarks from both images
-    face_before = extract_landmarks(
-        before_img, min_detection_confidence=min_detection_confidence
-    )
     if face_before is None:
         logger.warning("Face detection failed on before image.")
         return None
-    face_after = extract_landmarks(
-        after_img, min_detection_confidence=min_detection_confidence
-    )
     if face_after is None:
         logger.warning("Face detection failed on after image.")
         return None
@@ -227,8 +239,9 @@ def extract_displacements(
 # Batch extraction from directory
 # ---------------------------------------------------------------------------
 def extract_from_directory(
-    pairs_dir: Union[str, Path],
     min_detection_confidence: float = 0.5,
     min_quality: float = 0.0,
 ) -> list[dict]:
@@ -338,6 +351,7 @@ def extract_from_directory(
 # Displacement model
 # ---------------------------------------------------------------------------
 class DisplacementModel:
     """Statistical model of per-procedure surgical displacements.
@@ -418,12 +432,12 @@ class DisplacementModel:
             n = stacked.shape[0]
             self.stats[proc] = {
-                "mean": np.mean(stacked, axis=0),           # (478, 2)
-                "std": np.std(stacked, axis=0),             # (478, 2)
-                "min": np.min(stacked, axis=0),             # (478, 2)
-                "max": np.max(stacked, axis=0),             # (478, 2)
-                "median": np.median(stacked, axis=0),       # (478, 2)
-                "mean_magnitude": np.mean(                  # (478,)
                     np.linalg.norm(stacked, axis=2), axis=0
                 ),
             }
@@ -442,7 +456,7 @@ class DisplacementModel:
         procedure: str,
         intensity: float = 1.0,
         noise_scale: float = 0.0,
-        rng: Optional[np.random.Generator] = None,
     ) -> np.ndarray:
         """Generate a displacement field for a given procedure and intensity.
@@ -470,10 +484,7 @@ class DisplacementModel:
         if procedure not in self.stats:
             available = ", ".join(self.procedures)
-            raise KeyError(
-                f"Procedure '{procedure}' not in model. "
-                f"Available: {available}"
-            )
         proc_stats = self.stats[procedure]
         field = proc_stats["mean"].copy() * intensity
@@ -489,7 +500,7 @@ class DisplacementModel:
         return field.astype(np.float32)
-    def get_summary(self, procedure: Optional[str] = None) -> dict:
         """Get a human-readable summary of the model statistics.
         Args:
@@ -518,7 +529,7 @@ class DisplacementModel:
         return summary
-    def save(self, path: Union[str, Path]) -> None:
         """Save the fitted model to disk as a ``.npz`` file.
         The file contains:
@@ -550,15 +561,13 @@ class DisplacementModel:
             "n_samples": self.n_samples,
             "num_landmarks": NUM_LANDMARKS,
         }
-        arrays["__metadata__"] = np.frombuffer(
-            json.dumps(metadata).encode("utf-8"), dtype=np.uint8
-        )
         np.savez_compressed(str(path), **arrays)
         logger.info("Saved displacement model to %s", path)
     @classmethod
-    def load(cls, path: Union[str, Path]) -> "DisplacementModel":
         """Load a fitted model from a ``.npz`` file.
         Supports two formats:
@@ -592,7 +601,7 @@ class DisplacementModel:
                 model.stats[proc] = {}
                 for key in data.files:
                     if key.startswith(f"{proc}__"):
-                        stat_name = key[len(f"{proc}__"):]
                         model.stats[proc][stat_name] = data[key]
         # Format 2: extract_displacements.py format with procedures array
@@ -625,10 +634,7 @@ class DisplacementModel:
                     model.n_samples[proc] = 0
         else:
-            raise ValueError(
-                f"Unrecognized displacement model format. "
-                f"Keys: {data.files[:10]}"
-            )
         model._fitted = True
         logger.info(
@@ -644,6 +650,7 @@ class DisplacementModel:
 # Utilities
 # ---------------------------------------------------------------------------
 def _top_k_landmarks(
     magnitudes: np.ndarray,
     k: int = 10,
@@ -659,10 +666,7 @@ def _top_k_landmarks(
         descending by magnitude.
     """
     top_indices = np.argsort(magnitudes)[::-1][:k]
-    return [
-        {"index": int(idx), "magnitude": float(magnitudes[idx])}
-        for idx in top_indices
-    ]
 def visualize_displacements(
@@ -698,7 +702,7 @@ def visualize_displacements(
         dy = int(displacements[i, 1] * h * scale)
         # Only draw if displacement is above noise floor
-        mag = np.sqrt(dx ** 2 + dy ** 2)
         if mag < 1.0:
             continue

 import json
 import logging
 from pathlib import Path
 import cv2
 import numpy as np
+from landmarkdiff.landmarks import FaceLandmarks, extract_landmarks
 from landmarkdiff.manipulation import PROCEDURE_LANDMARKS
 logger = logging.getLogger(__name__)
 # Helpers
 # ---------------------------------------------------------------------------
 def _normalized_coords_2d(face: FaceLandmarks) -> np.ndarray:
     """Extract (478, 2) normalized [0, 1] coordinates from a FaceLandmarks object.
     # Stable landmarks: forehead, temple region, outer face oval
     # These should exhibit near-zero displacement after surgery.
     stable_indices = [
+        10,
+        109,
+        67,
+        103,
+        54,
+        21,
+        162,
+        127,  # left forehead/temple
+        338,
+        297,
+        332,
+        284,
+        251,
+        389,
+        356,
+        454,  # right forehead/temple
+        234,
+        93,  # outer cheek anchors
     ]
     stable_indices = [i for i in stable_indices if i < NUM_LANDMARKS]
     # RMS displacement on stable points
     diffs = after_stable - before_stable
+    rms = np.sqrt(np.mean(np.sum(diffs**2, axis=1)))
     # Map RMS to quality: 0 displacement -> 1.0, rms >= 0.05 (5% of image) -> 0.0
     quality = float(np.clip(1.0 - rms / 0.05, 0.0, 1.0))
 # Procedure classification
 # ---------------------------------------------------------------------------
 def classify_procedure(displacements: np.ndarray) -> str:
     """Classify which surgical procedure was performed from displacement vectors.
     # Threshold: mean displacement < 0.002 (~1 pixel at 512x512)
     if best_score < 0.002:
         logger.debug(
+            "No significant displacement detected (best=%.5f). Classified as 'unknown'.",
             best_score,
         )
         return "unknown"
 # Single-pair extraction
 # ---------------------------------------------------------------------------
 def extract_displacements(
     before_img: np.ndarray,
     after_img: np.ndarray,
     min_detection_confidence: float = 0.5,
+) -> dict | None:
     """Extract landmark displacements from a before/after surgery image pair.
     Runs MediaPipe Face Mesh on both images, computes per-landmark
         Returns ``None`` if face detection fails on either image.
     """
     # Extract landmarks from both images
+    face_before = extract_landmarks(before_img, min_detection_confidence=min_detection_confidence)
     if face_before is None:
         logger.warning("Face detection failed on before image.")
         return None
+    face_after = extract_landmarks(after_img, min_detection_confidence=min_detection_confidence)
     if face_after is None:
         logger.warning("Face detection failed on after image.")
         return None
 # Batch extraction from directory
 # ---------------------------------------------------------------------------
 def extract_from_directory(
+    pairs_dir: str | Path,
     min_detection_confidence: float = 0.5,
     min_quality: float = 0.0,
 ) -> list[dict]:
 # Displacement model
 # ---------------------------------------------------------------------------
 class DisplacementModel:
     """Statistical model of per-procedure surgical displacements.
             n = stacked.shape[0]
             self.stats[proc] = {
+                "mean": np.mean(stacked, axis=0),  # (478, 2)
+                "std": np.std(stacked, axis=0),  # (478, 2)
+                "min": np.min(stacked, axis=0),  # (478, 2)
+                "max": np.max(stacked, axis=0),  # (478, 2)
+                "median": np.median(stacked, axis=0),  # (478, 2)
+                "mean_magnitude": np.mean(  # (478,)
                     np.linalg.norm(stacked, axis=2), axis=0
                 ),
             }
         procedure: str,
         intensity: float = 1.0,
         noise_scale: float = 0.0,
+        rng: np.random.Generator | None = None,
     ) -> np.ndarray:
         """Generate a displacement field for a given procedure and intensity.
         if procedure not in self.stats:
             available = ", ".join(self.procedures)
+            raise KeyError(f"Procedure '{procedure}' not in model. Available: {available}")
         proc_stats = self.stats[procedure]
         field = proc_stats["mean"].copy() * intensity
         return field.astype(np.float32)
+    def get_summary(self, procedure: str | None = None) -> dict:
         """Get a human-readable summary of the model statistics.
         Args:
         return summary
+    def save(self, path: str | Path) -> None:
         """Save the fitted model to disk as a ``.npz`` file.
         The file contains:
             "n_samples": self.n_samples,
             "num_landmarks": NUM_LANDMARKS,
         }
+        arrays["__metadata__"] = np.frombuffer(json.dumps(metadata).encode("utf-8"), dtype=np.uint8)
         np.savez_compressed(str(path), **arrays)
         logger.info("Saved displacement model to %s", path)
     @classmethod
+    def load(cls, path: str | Path) -> DisplacementModel:
         """Load a fitted model from a ``.npz`` file.
         Supports two formats:
                 model.stats[proc] = {}
                 for key in data.files:
                     if key.startswith(f"{proc}__"):
+                        stat_name = key[len(f"{proc}__") :]
                         model.stats[proc][stat_name] = data[key]
         # Format 2: extract_displacements.py format with procedures array
                     model.n_samples[proc] = 0
         else:
+            raise ValueError(f"Unrecognized displacement model format. Keys: {data.files[:10]}")
         model._fitted = True
         logger.info(
 # Utilities
 # ---------------------------------------------------------------------------
 def _top_k_landmarks(
     magnitudes: np.ndarray,
     k: int = 10,
         descending by magnitude.
     """
     top_indices = np.argsort(magnitudes)[::-1][:k]
+    return [{"index": int(idx), "magnitude": float(magnitudes[idx])} for idx in top_indices]
 def visualize_displacements(
         dy = int(displacements[i, 1] * h * scale)
         # Only draw if displacement is above noise floor
+        mag = np.sqrt(dx**2 + dy**2)
         if mag < 1.0:
             continue

landmarkdiff/ensemble.py CHANGED Viewed

@@ -21,8 +21,6 @@ Usage:
 from __future__ import annotations
-from typing import Optional
 import cv2
 import numpy as np
@@ -93,7 +91,7 @@ class EnsembleInference:
         guidance_scale: float = 9.0,
         controlnet_conditioning_scale: float = 0.9,
         strength: float = 0.5,
-        seed: Optional[int] = None,
         **kwargs,
     ) -> dict:
         """Generate ensemble output.
@@ -155,14 +153,16 @@ class EnsembleInference:
         # Copy metadata from best result
         best_idx = selected_idx if selected_idx >= 0 else 0
         ensemble_result = dict(results[best_idx])
-        ensemble_result.update({
-            "output": final,
-            "outputs": outputs,
-            "scores": scores,
-            "selected_idx": selected_idx,
-            "strategy": self.strategy,
-            "n_samples": self.n_samples,
-        })
         return ensemble_result
@@ -196,7 +196,7 @@ class EnsembleInference:
         # Weighted average
         result = np.zeros_like(outputs[0], dtype=np.float32)
-        for output, weight in zip(outputs, weights):
             result += output.astype(np.float32) * weight
         return np.clip(result, 0, 255).astype(np.uint8), scores
@@ -269,8 +269,10 @@ def ensemble_inference(
     for i, output in enumerate(result["outputs"]):
         cv2.imwrite(str(out / f"sample_{i:02d}.png"), output)
         score = result["scores"][i]
-        print(f"  Sample {i}: score={score:.4f}"
-              + (" <-- selected" if i == result.get("selected_idx") else ""))
     # Comparison grid
     panels = [image] + result["outputs"] + [result["output"]]
@@ -281,8 +283,10 @@ def ensemble_inference(
     print(f"\nEnsemble output saved: {out / 'ensemble_output.png'}")
     if result.get("selected_idx", -1) >= 0:
-        print(f"Selected sample: {result['selected_idx']} "
-              f"(score={result['scores'][result['selected_idx']]:.4f})")
 if __name__ == "__main__":
@@ -294,18 +298,26 @@ if __name__ == "__main__":
     parser.add_argument("--intensity", type=float, default=65.0)
     parser.add_argument("--output", default="ensemble_output")
     parser.add_argument("--n_samples", type=int, default=5)
-    parser.add_argument("--strategy", default="best_of_n",
-                        choices=["pixel_average", "weighted_average", "best_of_n", "median"])
-    parser.add_argument("--mode", default="tps",
-                        choices=["controlnet", "img2img", "tps"])
     parser.add_argument("--checkpoint", default=None)
     parser.add_argument("--displacement-model", default=None)
     parser.add_argument("--seed", type=int, default=42)
     args = parser.parse_args()
     ensemble_inference(
-        args.image, args.procedure, args.intensity,
-        args.output, args.n_samples, args.strategy,
-        args.mode, args.checkpoint, args.displacement_model,
         args.seed,
     )

 from __future__ import annotations
 import cv2
 import numpy as np
         guidance_scale: float = 9.0,
         controlnet_conditioning_scale: float = 0.9,
         strength: float = 0.5,
+        seed: int | None = None,
         **kwargs,
     ) -> dict:
         """Generate ensemble output.
         # Copy metadata from best result
         best_idx = selected_idx if selected_idx >= 0 else 0
         ensemble_result = dict(results[best_idx])
+        ensemble_result.update(
+            {
+                "output": final,
+                "outputs": outputs,
+                "scores": scores,
+                "selected_idx": selected_idx,
+                "strategy": self.strategy,
+                "n_samples": self.n_samples,
+            }
+        )
         return ensemble_result
         # Weighted average
         result = np.zeros_like(outputs[0], dtype=np.float32)
+        for output, weight in zip(outputs, weights, strict=False):
             result += output.astype(np.float32) * weight
         return np.clip(result, 0, 255).astype(np.uint8), scores
     for i, output in enumerate(result["outputs"]):
         cv2.imwrite(str(out / f"sample_{i:02d}.png"), output)
         score = result["scores"][i]
+        print(
+            f"  Sample {i}: score={score:.4f}"
+            + (" <-- selected" if i == result.get("selected_idx") else "")
+        )
     # Comparison grid
     panels = [image] + result["outputs"] + [result["output"]]
     print(f"\nEnsemble output saved: {out / 'ensemble_output.png'}")
     if result.get("selected_idx", -1) >= 0:
+        print(
+            f"Selected sample: {result['selected_idx']} "
+            f"(score={result['scores'][result['selected_idx']]:.4f})"
+        )
 if __name__ == "__main__":
     parser.add_argument("--intensity", type=float, default=65.0)
     parser.add_argument("--output", default="ensemble_output")
     parser.add_argument("--n_samples", type=int, default=5)
+    parser.add_argument(
+        "--strategy",
+        default="best_of_n",
+        choices=["pixel_average", "weighted_average", "best_of_n", "median"],
+    )
+    parser.add_argument("--mode", default="tps", choices=["controlnet", "img2img", "tps"])
     parser.add_argument("--checkpoint", default=None)
     parser.add_argument("--displacement-model", default=None)
     parser.add_argument("--seed", type=int, default=42)
     args = parser.parse_args()
     ensemble_inference(
+        args.image,
+        args.procedure,
+        args.intensity,
+        args.output,
+        args.n_samples,
+        args.strategy,
+        args.mode,
+        args.checkpoint,
+        args.displacement_model,
         args.seed,
     )

landmarkdiff/evaluation.py CHANGED Viewed

@@ -8,6 +8,7 @@ Secondary: SSIM (relaxed target >0.80).
 from __future__ import annotations
 from dataclasses import dataclass, field
 import numpy as np
@@ -23,7 +24,7 @@ class EvalMetrics:
     fid: float = 0.0
     lpips: float = 0.0
-    nme: float = 0.0           # Normalized Mean landmark Error
     identity_sim: float = 0.0  # ArcFace cosine similarity
     ssim: float = 0.0
@@ -154,9 +155,7 @@ def compute_nme(
     Returns:
         NME value (lower is better).
     """
-    iod = np.linalg.norm(
-        target_landmarks[left_eye_idx] - target_landmarks[right_eye_idx]
-    )
     if iod < 1.0:
         iod = 1.0
@@ -175,6 +174,7 @@ def compute_ssim(
     """
     try:
         from skimage.metrics import structural_similarity
         # Convert to grayscale if color, or compute per-channel
         if pred.ndim == 3 and pred.shape[2] == 3:
             return float(structural_similarity(pred, target, channel_axis=2, data_range=255))
@@ -194,10 +194,8 @@ def compute_ssim(
         C1 = (0.01 * 255) ** 2
         C2 = (0.03 * 255) ** 2
-        ssim_val = (
-            (2 * mu_p * mu_t + C1) * (2 * sigma_pt + C2)
-        ) / (
-            (mu_p ** 2 + mu_t ** 2 + C1) * (sigma_p ** 2 + sigma_t ** 2 + C2)
         )
         return float(ssim_val)
@@ -206,11 +204,12 @@ _LPIPS_FN = None
 _ARCFACE_APP = None
-def _get_lpips_fn():
     """Get or create singleton LPIPS model."""
     global _LPIPS_FN
     if _LPIPS_FN is None:
         import lpips
         _LPIPS_FN = lpips.LPIPS(net="alex", verbose=False)
         _LPIPS_FN.eval()
     return _LPIPS_FN
@@ -225,7 +224,7 @@ def compute_lpips(
     Returns LPIPS score (lower = more similar).
     """
     try:
-        import lpips
         import torch
     except ImportError:
         return float("nan")
@@ -261,9 +260,10 @@ def compute_fid(
     except ImportError:
         raise ImportError(
             "torch-fidelity is required for FID. Install with: pip install torch-fidelity"
-        )
     import torch
     metrics = calculate_metrics(
         input1=generated_dir,
         input2=real_dir,
@@ -285,6 +285,7 @@ def compute_identity_similarity(
     """
     try:
         from insightface.app import FaceAnalysis
         global _ARCFACE_APP
         if _ARCFACE_APP is None:
             _ARCFACE_APP = FaceAnalysis(

 from __future__ import annotations
 from dataclasses import dataclass, field
+from typing import Any
 import numpy as np
     fid: float = 0.0
     lpips: float = 0.0
+    nme: float = 0.0  # Normalized Mean landmark Error
     identity_sim: float = 0.0  # ArcFace cosine similarity
     ssim: float = 0.0
     Returns:
         NME value (lower is better).
     """
+    iod = np.linalg.norm(target_landmarks[left_eye_idx] - target_landmarks[right_eye_idx])
     if iod < 1.0:
         iod = 1.0
     """
     try:
         from skimage.metrics import structural_similarity
         # Convert to grayscale if color, or compute per-channel
         if pred.ndim == 3 and pred.shape[2] == 3:
             return float(structural_similarity(pred, target, channel_axis=2, data_range=255))
         C1 = (0.01 * 255) ** 2
         C2 = (0.03 * 255) ** 2
+        ssim_val = ((2 * mu_p * mu_t + C1) * (2 * sigma_pt + C2)) / (
+            (mu_p**2 + mu_t**2 + C1) * (sigma_p**2 + sigma_t**2 + C2)
         )
         return float(ssim_val)
 _ARCFACE_APP = None
+def _get_lpips_fn() -> Any:
     """Get or create singleton LPIPS model."""
     global _LPIPS_FN
     if _LPIPS_FN is None:
         import lpips
         _LPIPS_FN = lpips.LPIPS(net="alex", verbose=False)
         _LPIPS_FN.eval()
     return _LPIPS_FN
     Returns LPIPS score (lower = more similar).
     """
     try:
+        import lpips  # noqa: F401
         import torch
     except ImportError:
         return float("nan")
     except ImportError:
         raise ImportError(
             "torch-fidelity is required for FID. Install with: pip install torch-fidelity"
+        ) from None
     import torch
     metrics = calculate_metrics(
         input1=generated_dir,
         input2=real_dir,
     """
     try:
         from insightface.app import FaceAnalysis
         global _ARCFACE_APP
         if _ARCFACE_APP is None:
             _ARCFACE_APP = FaceAnalysis(

landmarkdiff/experiment_tracker.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""Local experiment tracker for training reproducibility.
+Tracks all training runs with their configs, metrics, and results.
+Each experiment gets a unique ID and timestamp.
+Usage::
+    tracker = ExperimentTracker("experiments/")
+    # Start a new experiment
+    exp_id = tracker.start(
+        name="phaseA_v2",
+        config={
+            "phase": "A", "lr": 1e-5, "batch": 4,
+            "steps": 100000, "data": "training_combined",
+        },
+    )
+    # Log metrics during training
+    tracker.log_metric(exp_id, step=1000, loss=0.045, ssim=0.82)
+    # Record final results
+    tracker.finish(exp_id, results={"fid": 42.3, "ssim": 0.87})
+    # List all experiments
+    tracker.list_experiments()
+    # Compare experiments
+    tracker.compare(["exp_001", "exp_002"])
+"""
+from __future__ import annotations
+import json
+import os
+import socket
+import time
+from datetime import datetime
+from pathlib import Path
+class ExperimentTracker:
+    """Simple file-based experiment tracker."""
+    def __init__(self, experiments_dir: str = "experiments"):
+        self.dir = Path(experiments_dir)
+        self.dir.mkdir(parents=True, exist_ok=True)
+        self._index_path = self.dir / "index.json"
+        self._index = self._load_index()
+    def _load_index(self) -> dict:
+        if self._index_path.exists():
+            with open(self._index_path) as f:
+                return json.load(f)
+        return {"experiments": {}, "counter": 0}
+    def _save_index(self) -> None:
+        with open(self._index_path, "w") as f:
+            json.dump(self._index, f, indent=2)
+    def start(
+        self,
+        name: str,
+        config: dict,
+        tags: list[str] | None = None,
+    ) -> str:
+        """Start a new experiment. Returns experiment ID."""
+        self._index["counter"] += 1
+        exp_id = f"exp_{self._index['counter']:03d}"
+        exp = {
+            "id": exp_id,
+            "name": name,
+            "config": config,
+            "tags": tags or [],
+            "status": "running",
+            "started_at": datetime.now().isoformat(),
+            "finished_at": None,
+            "hostname": socket.gethostname(),
+            "slurm_job_id": os.environ.get("SLURM_JOB_ID"),
+            "gpu": os.environ.get("CUDA_VISIBLE_DEVICES"),
+            "results": {},
+            "metrics_file": f"{exp_id}_metrics.jsonl",
+        }
+        self._index["experiments"][exp_id] = exp
+        self._save_index()
+        # Create metrics log file
+        metrics_path = self.dir / str(exp["metrics_file"])
+        metrics_path.touch()
+        print(f"Experiment started: {exp_id} ({name})")
+        return exp_id
+    def log_metric(self, exp_id: str, step: int | None = None, **metrics) -> None:
+        """Log metrics for a training step."""
+        exp = self._index["experiments"].get(exp_id)
+        if not exp:
+            return
+        entry = {
+            "timestamp": time.time(),
+            "step": step,
+            **metrics,
+        }
+        metrics_path = self.dir / str(exp["metrics_file"])
+        with open(metrics_path, "a") as f:
+            f.write(json.dumps(entry) + "\n")
+    def finish(
+        self,
+        exp_id: str,
+        results: dict | None = None,
+        status: str = "completed",
+    ) -> None:
+        """Mark experiment as finished."""
+        exp = self._index["experiments"].get(exp_id)
+        if not exp:
+            return
+        exp["status"] = status
+        exp["finished_at"] = datetime.now().isoformat()
+        if results:
+            exp["results"] = results
+        self._save_index()
+        print(f"Experiment {exp_id} {status}")
+    def get_metrics(self, exp_id: str) -> list[dict]:
+        """Load all logged metrics for an experiment."""
+        exp = self._index["experiments"].get(exp_id)
+        if not exp:
+            return []
+        metrics_path = self.dir / str(exp["metrics_file"])
+        if not metrics_path.exists():
+            return []
+        entries = []
+        with open(metrics_path) as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    entries.append(json.loads(line))
+        return entries
+    def list_experiments(self) -> list[dict]:
+        """List all experiments with summary info."""
+        experiments = []
+        for exp_id, exp in sorted(self._index["experiments"].items()):
+            summary = {
+                "id": exp_id,
+                "name": exp["name"],
+                "status": exp["status"],
+                "started": exp["started_at"][:19],
+                "tags": exp.get("tags", []),
+            }
+            if exp["results"]:
+                for key in ["fid", "ssim", "lpips", "nme"]:
+                    if key in exp["results"]:
+                        summary[key] = exp["results"][key]
+            experiments.append(summary)
+        return experiments
+    def compare(self, exp_ids: list[str]) -> dict:
+        """Compare multiple experiments by their results."""
+        comparison = {}
+        for exp_id in exp_ids:
+            exp = self._index["experiments"].get(exp_id)
+            if exp:
+                comparison[exp_id] = {
+                    "name": exp["name"],
+                    "config": exp["config"],
+                    "results": exp["results"],
+                }
+        return comparison
+    def print_summary(self) -> None:
+        """Print a summary table of all experiments."""
+        experiments = self.list_experiments()
+        if not experiments:
+            print("No experiments found.")
+            return
+        # Header
+        print(f"{'ID':<10} {'Name':<20} {'Status':<12} {'FID':>6} {'SSIM':>6} {'LPIPS':>6}")
+        print("-" * 70)
+        for exp in experiments:
+            fid = f"{exp.get('fid', '')}" if "fid" in exp else "--"
+            ssim = f"{exp.get('ssim', ''):.4f}" if "ssim" in exp else "--"
+            lpips = f"{exp.get('lpips', ''):.4f}" if "lpips" in exp else "--"
+            print(
+                f"{exp['id']:<10} {exp['name']:<20}"
+                f" {exp['status']:<12} {fid:>6} {ssim:>6} {lpips:>6}"
+            )
+    def get_best(self, metric: str = "fid", lower_is_better: bool = True) -> str | None:
+        """Get the experiment ID with the best value for a given metric."""
+        best_id = None
+        best_val = float("inf") if lower_is_better else float("-inf")
+        for exp_id, exp in self._index["experiments"].items():
+            if exp["status"] != "completed":
+                continue
+            val = exp["results"].get(metric)
+            if val is None:
+                continue
+            if (lower_is_better and val < best_val) or (not lower_is_better and val > best_val):
+                best_val = val
+                best_id = exp_id
+        return best_id

landmarkdiff/face_verifier.py CHANGED Viewed

@@ -15,19 +15,18 @@ Designed for:
 from __future__ import annotations
-import os
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Optional
 import cv2
 import numpy as np
 # ---------------------------------------------------------------------------
 # Data structures
 # ---------------------------------------------------------------------------
 @dataclass
 class DistortionReport:
     """Analysis of detected distortions in a face image."""
@@ -36,13 +35,13 @@ class DistortionReport:
     quality_score: float = 0.0
     # Individual distortion scores (0-1, higher = more distorted)
-    blur_score: float = 0.0          # Laplacian variance-based
-    noise_score: float = 0.0         # High-freq energy ratio
-    compression_score: float = 0.0   # JPEG block artifact detection
-    oversmooth_score: float = 0.0    # Beauty filter / airbrushed detection
-    color_cast_score: float = 0.0    # Unnatural color shift
-    geometric_distort: float = 0.0   # Face proportion anomalies
-    lighting_score: float = 0.0      # Over/under exposure
     # Classification
     primary_distortion: str = "none"
@@ -74,14 +73,14 @@ class DistortionReport:
 class RestorationResult:
     """Result of neural face restoration pipeline."""
-    restored: np.ndarray                    # Restored BGR image
-    original: np.ndarray                    # Original BGR image
-    distortion_report: DistortionReport     # Pre-restoration analysis
-    post_quality_score: float = 0.0         # Quality after restoration
-    identity_similarity: float = 0.0        # ArcFace cosine sim (original vs restored)
-    identity_preserved: bool = True         # Whether identity check passed
     restoration_stages: list[str] = field(default_factory=list)  # Which nets ran
-    improvement: float = 0.0               # quality_after - quality_before
     def summary(self) -> str:
         lines = [
@@ -100,9 +99,9 @@ class BatchVerificationReport:
     """Summary of batch face verification/restoration."""
     total: int = 0
-    passed: int = 0           # Good quality, no fix needed
-    restored: int = 0         # Fixed and now usable
-    rejected: int = 0         # Too distorted to salvage
     identity_failures: int = 0  # Restoration changed identity
     avg_quality_before: float = 0.0
     avg_quality_after: float = 0.0
@@ -123,7 +122,8 @@ class BatchVerificationReport:
             "Distortion Breakdown:",
         ]
         for dist_type, count in sorted(
-            self.distortion_counts.items(), key=lambda x: -x[1],
         ):
             lines.append(f"  {dist_type}: {count}")
         return "\n".join(lines)
@@ -133,6 +133,7 @@ class BatchVerificationReport:
 # Distortion Detection (classical + neural)
 # ---------------------------------------------------------------------------
 def detect_blur(image: np.ndarray) -> float:
     """Detect blur using Laplacian variance.
@@ -147,7 +148,7 @@ def detect_blur(image: np.ndarray) -> float:
     # Gradient magnitude (secondary)
     gx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
     gy = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
-    grad_mag = np.sqrt(gx ** 2 + gy ** 2).mean()
     # Normalize: typical sharp face has lap_var > 500, grad_mag > 30
     blur_lap = 1.0 - min(lap_var / 800.0, 1.0)
@@ -221,7 +222,7 @@ def detect_oversmoothing(image: np.ndarray) -> float:
     # Focus on face center region (avoid background)
     if h < 8 or w < 8:
         return 0.0  # Too small to analyze
-    roi = gray[h // 4:3 * h // 4, w // 4:3 * w // 4]
     # Texture energy: variance of high-pass filtered image
     blurred = cv2.GaussianBlur(roi.astype(np.float64), (0, 0), 2.0)
@@ -254,7 +255,7 @@ def detect_color_cast(image: np.ndarray) -> float:
     h, w = image.shape[:2]
     # Sample face center region
-    roi = lab[h // 4:3 * h // 4, w // 4:3 * w // 4]
     # A channel: green-red axis (neutral ~128)
     # B channel: blue-yellow axis (neutral ~128)
@@ -337,7 +338,7 @@ def detect_lighting_issues(image: np.ndarray) -> float:
     # Check for clipping
     overexposed = np.mean(l_channel > 245) * 5  # Fraction near white
-    underexposed = np.mean(l_channel < 10) * 5   # Fraction near black
     # Check for bimodal distribution (harsh shadows)
     hist = cv2.calcHist([l_channel], [0], None, [256], [0, 256]).flatten()
@@ -429,7 +430,7 @@ def analyze_distortions(image: np.ndarray) -> DistortionReport:
 _FACE_QUALITY_NET = None
-def _get_face_quality_scorer():
     """Get or create singleton face quality assessment model.
     Uses FaceXLib's quality scorer or falls back to BRISQUE-style features.
@@ -440,6 +441,7 @@ def _get_face_quality_scorer():
     try:
         from facexlib.assessment import init_assessment_model
         _FACE_QUALITY_NET = init_assessment_model("hypernet")
         return _FACE_QUALITY_NET
     except Exception:
@@ -461,6 +463,7 @@ def neural_quality_score(image: np.ndarray) -> float:
         try:
             import torch
             from facexlib.utils import img2tensor
             img_t = img2tensor(image / 255.0, bgr2rgb=True, float32=True)
             img_t = img_t.unsqueeze(0)
             if torch.cuda.is_available():
@@ -481,6 +484,7 @@ def neural_quality_score(image: np.ndarray) -> float:
 # Neural Face Restoration (cascaded)
 # ---------------------------------------------------------------------------
 def restore_face(
     image: np.ndarray,
     distortion: DistortionReport | None = None,
@@ -563,6 +567,7 @@ def restore_face(
     post_blur = detect_blur(result)
     if post_blur > 0.3:
         from landmarkdiff.postprocess import frequency_aware_sharpen
         result = frequency_aware_sharpen(result, strength=0.3)
         stages.append("sharpen")
@@ -573,6 +578,7 @@ def _try_codeformer(image: np.ndarray, fidelity: float = 0.7) -> np.ndarray | No
     """Try CodeFormer restoration. Returns None if unavailable."""
     try:
         from landmarkdiff.postprocess import restore_face_codeformer
         restored = restore_face_codeformer(image, fidelity=fidelity)
         if restored is not image:
             return restored
@@ -585,6 +591,7 @@ def _try_gfpgan(image: np.ndarray) -> np.ndarray | None:
     """Try GFPGAN restoration. Returns None if unavailable."""
     try:
         from landmarkdiff.postprocess import restore_face_gfpgan
         restored = restore_face_gfpgan(image)
         if restored is not image:
             return restored
@@ -599,15 +606,19 @@ _FV_REALESRGAN = None
 def _try_realesrgan(image: np.ndarray) -> np.ndarray | None:
     """Try Real-ESRGAN 2x upscale + downsample. Returns None if unavailable."""
     try:
-        from realesrgan import RealESRGANer
-        from basicsr.archs.rrdbnet_arch import RRDBNet
         import torch
         global _FV_REALESRGAN
         if _FV_REALESRGAN is None:
             model = RRDBNet(
-                num_in_ch=3, num_out_ch=3, num_feat=64,
-                num_block=23, num_grow_ch=32, scale=4,
             )
             _FV_REALESRGAN = RealESRGANer(
                 scale=4,
@@ -661,15 +672,15 @@ def _fix_lighting(image: np.ndarray) -> np.ndarray:
 _ARCFACE_APP = None
-def _get_arcface():
     """Get or create singleton ArcFace model."""
     global _ARCFACE_APP
     if _ARCFACE_APP is not None:
         return _ARCFACE_APP
     try:
-        from insightface.app import FaceAnalysis
         import torch
         app = FaceAnalysis(
             name="buffalo_l",
@@ -717,9 +728,9 @@ def verify_identity(
     if emb_orig is None or emb_rest is None:
         return -1.0, True  # Can't verify — assume OK
-    sim = float(np.dot(emb_orig, emb_rest) / (
-        np.linalg.norm(emb_orig) * np.linalg.norm(emb_rest) + 1e-8
-    ))
     sim = float(np.clip(sim, -1, 1))
     return sim, sim >= threshold
@@ -728,6 +739,7 @@ def verify_identity(
 # Full Verification + Restoration Pipeline
 # ---------------------------------------------------------------------------
 def verify_and_restore(
     image: np.ndarray,
     quality_threshold: float = 60.0,
@@ -813,6 +825,7 @@ def verify_and_restore(
 # Batch Processing
 # ---------------------------------------------------------------------------
 def verify_batch(
     image_dir: str,
     output_dir: str | None = None,
@@ -858,10 +871,9 @@ def verify_batch(
         rejected_dir.mkdir(parents=True, exist_ok=True)
     # Find all images
-    image_files = sorted([
-        f for f in image_path.iterdir()
-        if f.suffix.lower() in extensions and f.is_file()
-    ])
     report = BatchVerificationReport(total=len(image_files))
     quality_before = []

 from __future__ import annotations
 from dataclasses import dataclass, field
 from pathlib import Path
+from typing import Any
 import cv2
 import numpy as np
 # ---------------------------------------------------------------------------
 # Data structures
 # ---------------------------------------------------------------------------
 @dataclass
 class DistortionReport:
     """Analysis of detected distortions in a face image."""
     quality_score: float = 0.0
     # Individual distortion scores (0-1, higher = more distorted)
+    blur_score: float = 0.0  # Laplacian variance-based
+    noise_score: float = 0.0  # High-freq energy ratio
+    compression_score: float = 0.0  # JPEG block artifact detection
+    oversmooth_score: float = 0.0  # Beauty filter / airbrushed detection
+    color_cast_score: float = 0.0  # Unnatural color shift
+    geometric_distort: float = 0.0  # Face proportion anomalies
+    lighting_score: float = 0.0  # Over/under exposure
     # Classification
     primary_distortion: str = "none"
 class RestorationResult:
     """Result of neural face restoration pipeline."""
+    restored: np.ndarray  # Restored BGR image
+    original: np.ndarray  # Original BGR image
+    distortion_report: DistortionReport  # Pre-restoration analysis
+    post_quality_score: float = 0.0  # Quality after restoration
+    identity_similarity: float = 0.0  # ArcFace cosine sim (original vs restored)
+    identity_preserved: bool = True  # Whether identity check passed
     restoration_stages: list[str] = field(default_factory=list)  # Which nets ran
+    improvement: float = 0.0  # quality_after - quality_before
     def summary(self) -> str:
         lines = [
     """Summary of batch face verification/restoration."""
     total: int = 0
+    passed: int = 0  # Good quality, no fix needed
+    restored: int = 0  # Fixed and now usable
+    rejected: int = 0  # Too distorted to salvage
     identity_failures: int = 0  # Restoration changed identity
     avg_quality_before: float = 0.0
     avg_quality_after: float = 0.0
             "Distortion Breakdown:",
         ]
         for dist_type, count in sorted(
+            self.distortion_counts.items(),
+            key=lambda x: -x[1],
         ):
             lines.append(f"  {dist_type}: {count}")
         return "\n".join(lines)
 # Distortion Detection (classical + neural)
 # ---------------------------------------------------------------------------
 def detect_blur(image: np.ndarray) -> float:
     """Detect blur using Laplacian variance.
     # Gradient magnitude (secondary)
     gx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
     gy = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+    grad_mag = np.sqrt(gx**2 + gy**2).mean()
     # Normalize: typical sharp face has lap_var > 500, grad_mag > 30
     blur_lap = 1.0 - min(lap_var / 800.0, 1.0)
     # Focus on face center region (avoid background)
     if h < 8 or w < 8:
         return 0.0  # Too small to analyze
+    roi = gray[h // 4 : 3 * h // 4, w // 4 : 3 * w // 4]
     # Texture energy: variance of high-pass filtered image
     blurred = cv2.GaussianBlur(roi.astype(np.float64), (0, 0), 2.0)
     h, w = image.shape[:2]
     # Sample face center region
+    roi = lab[h // 4 : 3 * h // 4, w // 4 : 3 * w // 4]
     # A channel: green-red axis (neutral ~128)
     # B channel: blue-yellow axis (neutral ~128)
     # Check for clipping
     overexposed = np.mean(l_channel > 245) * 5  # Fraction near white
+    underexposed = np.mean(l_channel < 10) * 5  # Fraction near black
     # Check for bimodal distribution (harsh shadows)
     hist = cv2.calcHist([l_channel], [0], None, [256], [0, 256]).flatten()
 _FACE_QUALITY_NET = None
+def _get_face_quality_scorer() -> Any:
     """Get or create singleton face quality assessment model.
     Uses FaceXLib's quality scorer or falls back to BRISQUE-style features.
     try:
         from facexlib.assessment import init_assessment_model
         _FACE_QUALITY_NET = init_assessment_model("hypernet")
         return _FACE_QUALITY_NET
     except Exception:
         try:
             import torch
             from facexlib.utils import img2tensor
             img_t = img2tensor(image / 255.0, bgr2rgb=True, float32=True)
             img_t = img_t.unsqueeze(0)
             if torch.cuda.is_available():
 # Neural Face Restoration (cascaded)
 # ---------------------------------------------------------------------------
 def restore_face(
     image: np.ndarray,
     distortion: DistortionReport | None = None,
     post_blur = detect_blur(result)
     if post_blur > 0.3:
         from landmarkdiff.postprocess import frequency_aware_sharpen
         result = frequency_aware_sharpen(result, strength=0.3)
         stages.append("sharpen")
     """Try CodeFormer restoration. Returns None if unavailable."""
     try:
         from landmarkdiff.postprocess import restore_face_codeformer
         restored = restore_face_codeformer(image, fidelity=fidelity)
         if restored is not image:
             return restored
     """Try GFPGAN restoration. Returns None if unavailable."""
     try:
         from landmarkdiff.postprocess import restore_face_gfpgan
         restored = restore_face_gfpgan(image)
         if restored is not image:
             return restored
 def _try_realesrgan(image: np.ndarray) -> np.ndarray | None:
     """Try Real-ESRGAN 2x upscale + downsample. Returns None if unavailable."""
     try:
         import torch
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+        from realesrgan import RealESRGANer
         global _FV_REALESRGAN
         if _FV_REALESRGAN is None:
             model = RRDBNet(
+                num_in_ch=3,
+                num_out_ch=3,
+                num_feat=64,
+                num_block=23,
+                num_grow_ch=32,
+                scale=4,
             )
             _FV_REALESRGAN = RealESRGANer(
                 scale=4,
 _ARCFACE_APP = None
+def _get_arcface() -> Any:
     """Get or create singleton ArcFace model."""
     global _ARCFACE_APP
     if _ARCFACE_APP is not None:
         return _ARCFACE_APP
     try:
         import torch
+        from insightface.app import FaceAnalysis
         app = FaceAnalysis(
             name="buffalo_l",
     if emb_orig is None or emb_rest is None:
         return -1.0, True  # Can't verify — assume OK
+    sim = float(
+        np.dot(emb_orig, emb_rest) / (np.linalg.norm(emb_orig) * np.linalg.norm(emb_rest) + 1e-8)
+    )
     sim = float(np.clip(sim, -1, 1))
     return sim, sim >= threshold
 # Full Verification + Restoration Pipeline
 # ---------------------------------------------------------------------------
 def verify_and_restore(
     image: np.ndarray,
     quality_threshold: float = 60.0,
 # Batch Processing
 # ---------------------------------------------------------------------------
 def verify_batch(
     image_dir: str,
     output_dir: str | None = None,
         rejected_dir.mkdir(parents=True, exist_ok=True)
     # Find all images
+    image_files = sorted(
+        [f for f in image_path.iterdir() if f.suffix.lower() in extensions and f.is_file()]
+    )
     report = BatchVerificationReport(total=len(image_files))
     quality_before = []

landmarkdiff/fid.py CHANGED Viewed

@@ -16,6 +16,7 @@ Usage:
 from __future__ import annotations
 from pathlib import Path
 import numpy as np
@@ -23,14 +24,15 @@ try:
     import torch
     import torch.nn as nn
     from torch.utils.data import DataLoader, Dataset
     HAS_TORCH = True
 except ImportError:
     HAS_TORCH = False
-def _load_inception_v3():
     """Load InceptionV3 with pool3 features (2048-dim)."""
-    from torchvision.models import inception_v3, Inception_V3_Weights
     model = inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1)
     # We want features from the avg pool layer (2048-dim)
@@ -40,79 +42,81 @@ def _load_inception_v3():
     return model
-class ImageFolderDataset(Dataset):
-    """Simple dataset that loads images from a directory."""
-    def __init__(self, directory: str | Path, image_size: int = 299):
-        self.directory = Path(directory)
-        exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
-        self.files = sorted(
-            f for f in self.directory.iterdir()
-            if f.suffix.lower() in exts and f.is_file()
-        )
-        self.image_size = image_size
-    def __len__(self):
-        return len(self.files)
-    def __getitem__(self, idx):
-        import cv2
-        img = cv2.imread(str(self.files[idx]))
-        if img is None:
-            # Return zeros if image can't be loaded
-            return torch.zeros(3, self.image_size, self.image_size)
-        img = cv2.resize(img, (self.image_size, self.image_size))
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        # Normalize to [0, 1] then ImageNet normalize
-        t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
-        t = _imagenet_normalize(t)
-        return t
-class NumpyArrayDataset(Dataset):
-    """Dataset wrapping a list of numpy arrays."""
-    def __init__(self, images: list[np.ndarray], image_size: int = 299):
-        self.images = images
-        self.image_size = image_size
-    def __len__(self):
-        return len(self.images)
-    def __getitem__(self, idx):
-        import cv2
-        img = self.images[idx]
-        if img.shape[:2] != (self.image_size, self.image_size):
             img = cv2.resize(img, (self.image_size, self.image_size))
-        if img.shape[2] == 3:
             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
-        t = _imagenet_normalize(t)
-        return t
-def _imagenet_normalize(t: "torch.Tensor") -> "torch.Tensor":
-    """Apply ImageNet normalization."""
-    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
-    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
-    return (t - mean) / std
-@torch.no_grad()
-def _extract_features(
-    model: nn.Module,
-    dataloader: DataLoader,
-    device: torch.device,
-) -> np.ndarray:
-    """Extract InceptionV3 pool3 features from a dataloader."""
-    features = []
-    for batch in dataloader:
-        batch = batch.to(device)
-        feat = model(batch)
-        if isinstance(feat, tuple):
-            feat = feat[0]
-        features.append(feat.cpu().numpy())
-    return np.concatenate(features, axis=0)
 def _compute_statistics(features: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
@@ -123,8 +127,10 @@ def _compute_statistics(features: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
 def _calculate_fid(
-    mu1: np.ndarray, sigma1: np.ndarray,
-    mu2: np.ndarray, sigma2: np.ndarray,
 ) -> float:
     """Calculate FID given two sets of statistics.
@@ -177,10 +183,10 @@ def compute_fid_from_dirs(
     if len(real_ds) == 0 or len(gen_ds) == 0:
         raise ValueError("Need at least 1 image in each directory")
-    real_loader = DataLoader(real_ds, batch_size=batch_size,
-                              num_workers=num_workers, pin_memory=True)
-    gen_loader = DataLoader(gen_ds, batch_size=batch_size,
-                             num_workers=num_workers, pin_memory=True)
     real_features = _extract_features(model, real_loader, dev)
     gen_features = _extract_features(model, gen_loader, dev)

 from __future__ import annotations
 from pathlib import Path
+from typing import Any
 import numpy as np
     import torch
     import torch.nn as nn
     from torch.utils.data import DataLoader, Dataset
     HAS_TORCH = True
 except ImportError:
     HAS_TORCH = False
+def _load_inception_v3() -> Any:
     """Load InceptionV3 with pool3 features (2048-dim)."""
+    from torchvision.models import Inception_V3_Weights, inception_v3
     model = inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1)
     # We want features from the avg pool layer (2048-dim)
     return model
+# Guard torch-dependent class and function definitions so the module
+# can be imported safely when torch is not installed.
+if HAS_TORCH:
+    class ImageFolderDataset(Dataset):  # type: ignore[misc]
+        """Simple dataset that loads images from a directory."""
+        def __init__(self, directory: str | Path, image_size: int = 299):
+            self.directory = Path(directory)
+            exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
+            self.files = sorted(
+                f for f in self.directory.iterdir() if f.suffix.lower() in exts and f.is_file()
+            )
+            self.image_size = image_size
+        def __len__(self) -> int:
+            return len(self.files)
+        def __getitem__(self, idx: int) -> Any:
+            import cv2
+            img = cv2.imread(str(self.files[idx]))
+            if img is None:
+                # Return zeros if image can't be loaded
+                return torch.zeros(3, self.image_size, self.image_size)
             img = cv2.resize(img, (self.image_size, self.image_size))
             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            # Normalize to [0, 1] then ImageNet normalize
+            t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
+            t = _imagenet_normalize(t)
+            return t
+    class NumpyArrayDataset(Dataset):  # type: ignore[misc]
+        """Dataset wrapping a list of numpy arrays."""
+        def __init__(self, images: list[np.ndarray], image_size: int = 299):
+            self.images = images
+            self.image_size = image_size
+        def __len__(self) -> int:
+            return len(self.images)
+        def __getitem__(self, idx: int) -> Any:
+            import cv2
+            img = self.images[idx]
+            if img.shape[:2] != (self.image_size, self.image_size):
+                img = cv2.resize(img, (self.image_size, self.image_size))
+            if img.shape[2] == 3:
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
+            t = _imagenet_normalize(t)
+            return t
+    def _imagenet_normalize(t: Any) -> Any:
+        """Apply ImageNet normalization."""
+        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
+        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
+        return (t - mean) / std
+    @torch.no_grad()
+    def _extract_features(
+        model: Any,
+        dataloader: Any,
+        device: Any,
+    ) -> np.ndarray:
+        """Extract InceptionV3 pool3 features from a dataloader."""
+        features = []
+        for batch in dataloader:
+            batch = batch.to(device)
+            feat = model(batch)
+            if isinstance(feat, tuple):
+                feat = feat[0]
+            features.append(feat.cpu().numpy())
+        return np.concatenate(features, axis=0)
 def _compute_statistics(features: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
 def _calculate_fid(
+    mu1: np.ndarray,
+    sigma1: np.ndarray,
+    mu2: np.ndarray,
+    sigma2: np.ndarray,
 ) -> float:
     """Calculate FID given two sets of statistics.
     if len(real_ds) == 0 or len(gen_ds) == 0:
         raise ValueError("Need at least 1 image in each directory")
+    real_loader = DataLoader(
+        real_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=True
+    )
+    gen_loader = DataLoader(gen_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=True)
     real_features = _extract_features(model, real_loader, dev)
     gen_features = _extract_features(model, gen_loader, dev)

landmarkdiff/hyperparam.py CHANGED Viewed

@@ -24,7 +24,7 @@ import json
 import math
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Iterator
 def _to_native(val: Any) -> Any:
@@ -99,27 +99,45 @@ class SearchSpace:
         self.params: dict[str, ParamSpec] = {}
     def add_float(
-        self, name: str, low: float, high: float, log_scale: bool = False,
     ) -> SearchSpace:
         """Add a continuous float parameter."""
         self.params[name] = ParamSpec(
-            name=name, param_type="float", low=low, high=high, log_scale=log_scale,
         )
         return self
     def add_int(
-        self, name: str, low: int, high: int, step: int = 1,
     ) -> SearchSpace:
         """Add an integer parameter."""
         self.params[name] = ParamSpec(
-            name=name, param_type="int", low=low, high=high, step=step,
         )
         return self
     def add_choice(self, name: str, choices: list[Any]) -> SearchSpace:
         """Add a categorical parameter."""
         self.params[name] = ParamSpec(
-            name=name, param_type="choice", choices=choices,
         )
         return self
@@ -204,10 +222,7 @@ class HyperparamSearch:
         attempts = 0
         while len(trials) < n_trials and attempts < max_attempts:
             attempts += 1
-            config = {
-                name: spec.sample(rng)
-                for name, spec in self.space.params.items()
-            }
             trial = Trial(
                 trial_id=f"trial_{len(trials):04d}",
                 config=config,
@@ -223,14 +238,11 @@ class HyperparamSearch:
         import itertools
         param_names = list(self.space.params.keys())
-        param_values = [
-            self.space.params[name].grid_values(grid_points)
-            for name in param_names
-        ]
         trials = []
         for combo in itertools.product(*param_values):
-            config = dict(zip(param_names, combo))
             trial = Trial(
                 trial_id=f"trial_{len(trials):04d}",
                 config=config,
@@ -240,7 +252,9 @@ class HyperparamSearch:
         return trials
     def record_result(
-        self, trial_id: str, metrics: dict[str, float],
     ) -> None:
         """Record results for a trial."""
         for trial in self.trials:
@@ -251,7 +265,9 @@ class HyperparamSearch:
         raise KeyError(f"Trial {trial_id} not found")
     def best_trial(
-        self, metric: str = "loss", lower_is_better: bool = True,
     ) -> Trial | None:
         """Get the best completed trial by a metric."""
         completed = [t for t in self.trials if t.status == "completed" and metric in t.result]
@@ -275,7 +291,8 @@ class HyperparamSearch:
             with open(cfg_path, "w") as f:
                 yaml.safe_dump(
                     {"trial_id": trial.trial_id, **native_config},
-                    f, default_flow_style=False,
                 )
         # Save summary index
@@ -321,7 +338,7 @@ class HyperparamSearch:
                 if isinstance(val, float):
                     parts.append(f"{val:>12.6f}")
                 else:
-                    parts.append(f"{str(val):>12s}")
             for m in metric_names:
                 val = trial.result.get(m, float("nan"))
                 parts.append(f"{val:>12.4f}")

 import math
 from dataclasses import dataclass, field
 from pathlib import Path
+from typing import Any
 def _to_native(val: Any) -> Any:
         self.params: dict[str, ParamSpec] = {}
     def add_float(
+        self,
+        name: str,
+        low: float,
+        high: float,
+        log_scale: bool = False,
     ) -> SearchSpace:
         """Add a continuous float parameter."""
         self.params[name] = ParamSpec(
+            name=name,
+            param_type="float",
+            low=low,
+            high=high,
+            log_scale=log_scale,
         )
         return self
     def add_int(
+        self,
+        name: str,
+        low: int,
+        high: int,
+        step: int = 1,
     ) -> SearchSpace:
         """Add an integer parameter."""
         self.params[name] = ParamSpec(
+            name=name,
+            param_type="int",
+            low=low,
+            high=high,
+            step=step,
         )
         return self
     def add_choice(self, name: str, choices: list[Any]) -> SearchSpace:
         """Add a categorical parameter."""
         self.params[name] = ParamSpec(
+            name=name,
+            param_type="choice",
+            choices=choices,
         )
         return self
         attempts = 0
         while len(trials) < n_trials and attempts < max_attempts:
             attempts += 1
+            config = {name: spec.sample(rng) for name, spec in self.space.params.items()}
             trial = Trial(
                 trial_id=f"trial_{len(trials):04d}",
                 config=config,
         import itertools
         param_names = list(self.space.params.keys())
+        param_values = [self.space.params[name].grid_values(grid_points) for name in param_names]
         trials = []
         for combo in itertools.product(*param_values):
+            config = dict(zip(param_names, combo, strict=False))
             trial = Trial(
                 trial_id=f"trial_{len(trials):04d}",
                 config=config,
         return trials
     def record_result(
+        self,
+        trial_id: str,
+        metrics: dict[str, float],
     ) -> None:
         """Record results for a trial."""
         for trial in self.trials:
         raise KeyError(f"Trial {trial_id} not found")
     def best_trial(
+        self,
+        metric: str = "loss",
+        lower_is_better: bool = True,
     ) -> Trial | None:
         """Get the best completed trial by a metric."""
         completed = [t for t in self.trials if t.status == "completed" and metric in t.result]
             with open(cfg_path, "w") as f:
                 yaml.safe_dump(
                     {"trial_id": trial.trial_id, **native_config},
+                    f,
+                    default_flow_style=False,
                 )
         # Save summary index
                 if isinstance(val, float):
                     parts.append(f"{val:>12.6f}")
                 else:
+                    parts.append(f"{val!s:>12s}")
             for m in metric_names:
                 val = trial.result.get(m, float("nan"))
                 parts.append(f"{val:>12.4f}")

landmarkdiff/inference.py CHANGED Viewed

@@ -13,7 +13,7 @@ from __future__ import annotations
 import sys
 from pathlib import Path
-from typing import Optional
 import cv2
 import numpy as np
@@ -21,11 +21,13 @@ import torch
 from PIL import Image
 from landmarkdiff.landmarks import FaceLandmarks, extract_landmarks, render_landmark_image
-from landmarkdiff.conditioning import generate_conditioning
 from landmarkdiff.manipulation import apply_procedure_preset
 from landmarkdiff.masking import generate_surgical_mask, mask_to_3channel
 from landmarkdiff.synthetic.tps_warp import warp_image_tps
 def get_device() -> torch.device:
     if torch.backends.mps.is_available():
@@ -102,6 +104,7 @@ def mask_composite(
     if use_laplacian:
         try:
             from landmarkdiff.postprocess import laplacian_pyramid_blend
             return laplacian_pyramid_blend(corrected, original, mask_f)
         except Exception:
             pass
@@ -109,8 +112,7 @@ def mask_composite(
     # Fallback: simple alpha blend
     mask_3ch = mask_to_3channel(mask_f)
     result = (
-        corrected.astype(np.float32) * mask_3ch
-        + original.astype(np.float32) * (1.0 - mask_3ch)
     ).astype(np.uint8)
     return result
@@ -170,10 +172,10 @@ class LandmarkDiffPipeline:
         controlnet_id: str = "CrucibleAI/ControlNetMediaPipeFace",
         controlnet_checkpoint: str | None = None,
         base_model_id: str | None = None,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
         ip_adapter_scale: float = 0.6,
-        clinical_flags: Optional["ClinicalFlags"] = None,
         displacement_model_path: str | None = None,
     ):
         self.mode = mode
@@ -187,6 +189,7 @@ class LandmarkDiffPipeline:
         if displacement_model_path:
             try:
                 from landmarkdiff.displacement_model import DisplacementModel
                 self._displacement_model = DisplacementModel.load(displacement_model_path)
                 print(f"Displacement model loaded: {self._displacement_model.procedures}")
             except Exception as e:
@@ -224,8 +227,8 @@ class LandmarkDiffPipeline:
     def _load_controlnet(self) -> None:
         from diffusers import (
             ControlNetModel,
-            StableDiffusionControlNetPipeline,
             DPMSolverMultistepScheduler,
         )
         if self.controlnet_checkpoint:
@@ -236,12 +239,15 @@ class LandmarkDiffPipeline:
                 ckpt_path = ckpt_path / "controlnet_ema"
             print(f"Loading fine-tuned ControlNet from {ckpt_path}...")
             controlnet = ControlNetModel.from_pretrained(
-                str(ckpt_path), torch_dtype=self.dtype,
             )
         else:
             print(f"Loading ControlNet from {self.controlnet_id}...")
             controlnet = ControlNetModel.from_pretrained(
-                self.controlnet_id, subfolder="diffusion_sd15", torch_dtype=self.dtype,
             )
         print(f"Loading base model from {self.base_model_id}...")
         self._pipe = StableDiffusionControlNetPipeline.from_pretrained(
@@ -287,8 +293,8 @@ class LandmarkDiffPipeline:
     def _load_img2img(self) -> None:
         from diffusers import (
-            StableDiffusionImg2ImgPipeline,
             DPMSolverMultistepScheduler,
         )
         print(f"Loading SD1.5 img2img from {self.base_model_id}...")
@@ -298,9 +304,7 @@ class LandmarkDiffPipeline:
             safety_checker=None,
             requires_safety_checker=False,
         )
-        self._pipe.scheduler = DPMSolverMultistepScheduler.from_config(
-            self._pipe.scheduler.config
-        )
         self._apply_device_optimizations()
     def _apply_device_optimizations(self) -> None:
@@ -329,8 +333,8 @@ class LandmarkDiffPipeline:
         guidance_scale: float = 9.0,
         controlnet_conditioning_scale: float = 0.9,
         strength: float = 0.5,
-        seed: Optional[int] = None,
-        clinical_flags: Optional["ClinicalFlags"] = None,
         postprocess: bool = True,
         use_gfpgan: bool = False,
     ) -> dict:
@@ -351,12 +355,14 @@ class LandmarkDiffPipeline:
         manipulation_mode = "preset"
         if self._displacement_model and procedure in self._displacement_model.procedures:
             try:
-                from landmarkdiff.displacement_model import DisplacementModel
                 rng = np.random.default_rng(seed) if seed is not None else np.random.default_rng()
                 # Map UI intensity (0-100) to displacement model intensity (0-2)
                 dm_intensity = intensity / 50.0  # 50 -> 1.0x mean displacement
                 displacement = self._displacement_model.get_displacement_field(
-                    procedure, intensity=dm_intensity, noise_scale=0.3, rng=rng,
                 )
                 # Apply displacement to landmarks
                 new_lm = face.landmarks.copy()
@@ -367,21 +373,34 @@ class LandmarkDiffPipeline:
                 new_lm[:, 1] = np.clip(new_lm[:, 1], 0.01, 0.99)
                 manipulated = FaceLandmarks(
                     landmarks=new_lm,
-                    image_width=512, image_height=512,
                     confidence=face.confidence,
                 )
                 manipulation_mode = "displacement_model"
             except Exception:
                 manipulated = apply_procedure_preset(
-                    face, procedure, intensity, image_size=512, clinical_flags=flags,
                 )
         else:
             manipulated = apply_procedure_preset(
-                face, procedure, intensity, image_size=512, clinical_flags=flags,
             )
         landmark_img = render_landmark_image(manipulated, 512, 512)
         mask = generate_surgical_mask(
-            face, procedure, 512, 512, clinical_flags=flags,
         )
         generator = None
@@ -398,14 +417,24 @@ class LandmarkDiffPipeline:
         elif self.mode in ("controlnet", "controlnet_ip"):
             ip_image = numpy_to_pil(image_512) if self._ip_adapter_loaded else None
             raw_output = self._generate_controlnet(
-                image_512, landmark_img, prompt, num_inference_steps,
-                guidance_scale, controlnet_conditioning_scale, generator,
                 ip_adapter_image=ip_image,
             )
         else:
             raw_output = self._generate_img2img(
-                tps_warped, mask, prompt, num_inference_steps,
-                guidance_scale, strength, generator,
             )
         # Step 2: Post-processing for photorealism (neural + classical pipeline)
@@ -413,6 +442,7 @@ class LandmarkDiffPipeline:
         restore_used = "none"
         if postprocess and self.mode != "tps":
             from landmarkdiff.postprocess import full_postprocess
             pp_result = full_postprocess(
                 generated=raw_output,
                 original=image_512,
@@ -450,8 +480,13 @@ class LandmarkDiffPipeline:
         }
     def _generate_controlnet(
-        self, image: np.ndarray, conditioning: np.ndarray,
-        prompt: str, steps: int, cfg: float, cn_scale: float,
         generator: torch.Generator | None,
         ip_adapter_image: Image.Image | None = None,
     ) -> np.ndarray:
@@ -470,8 +505,13 @@ class LandmarkDiffPipeline:
         return pil_to_numpy(result.images[0])
     def _generate_img2img(
-        self, image: np.ndarray, mask: np.ndarray,
-        prompt: str, steps: int, cfg: float, strength: float,
         generator: torch.Generator | None,
     ) -> np.ndarray:
         result = self._pipe(
@@ -558,7 +598,8 @@ def run_inference(
         sys.exit(1)
     pipe = LandmarkDiffPipeline(
-        mode=mode, ip_adapter_scale=ip_adapter_scale,
         controlnet_checkpoint=controlnet_checkpoint,
         displacement_model_path=displacement_model_path,
     )
@@ -594,18 +635,29 @@ if __name__ == "__main__":
     parser.add_argument("--output", default="scripts/inference_output")
     parser.add_argument("--seed", type=int, default=42)
     parser.add_argument(
-        "--mode", default="img2img",
         choices=["img2img", "controlnet", "controlnet_ip", "tps"],
     )
     parser.add_argument("--ip-adapter-scale", type=float, default=0.6)
-    parser.add_argument("--checkpoint", default=None,
-                        help="Path to fine-tuned ControlNet checkpoint")
-    parser.add_argument("--displacement-model", default=None,
-                        help="Path to displacement_model.npz for data-driven manipulation")
     args = parser.parse_args()
     run_inference(
-        args.image, args.procedure, args.intensity, args.output,
-        args.seed, args.mode, args.ip_adapter_scale, args.checkpoint,
         args.displacement_model,
     )

 import sys
 from pathlib import Path
+from typing import TYPE_CHECKING
 import cv2
 import numpy as np
 from PIL import Image
 from landmarkdiff.landmarks import FaceLandmarks, extract_landmarks, render_landmark_image
 from landmarkdiff.manipulation import apply_procedure_preset
 from landmarkdiff.masking import generate_surgical_mask, mask_to_3channel
 from landmarkdiff.synthetic.tps_warp import warp_image_tps
+if TYPE_CHECKING:
+    from landmarkdiff.clinical import ClinicalFlags
 def get_device() -> torch.device:
     if torch.backends.mps.is_available():
     if use_laplacian:
         try:
             from landmarkdiff.postprocess import laplacian_pyramid_blend
             return laplacian_pyramid_blend(corrected, original, mask_f)
         except Exception:
             pass
     # Fallback: simple alpha blend
     mask_3ch = mask_to_3channel(mask_f)
     result = (
+        corrected.astype(np.float32) * mask_3ch + original.astype(np.float32) * (1.0 - mask_3ch)
     ).astype(np.uint8)
     return result
         controlnet_id: str = "CrucibleAI/ControlNetMediaPipeFace",
         controlnet_checkpoint: str | None = None,
         base_model_id: str | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
         ip_adapter_scale: float = 0.6,
+        clinical_flags: ClinicalFlags | None = None,
         displacement_model_path: str | None = None,
     ):
         self.mode = mode
         if displacement_model_path:
             try:
                 from landmarkdiff.displacement_model import DisplacementModel
                 self._displacement_model = DisplacementModel.load(displacement_model_path)
                 print(f"Displacement model loaded: {self._displacement_model.procedures}")
             except Exception as e:
     def _load_controlnet(self) -> None:
         from diffusers import (
             ControlNetModel,
             DPMSolverMultistepScheduler,
+            StableDiffusionControlNetPipeline,
         )
         if self.controlnet_checkpoint:
                 ckpt_path = ckpt_path / "controlnet_ema"
             print(f"Loading fine-tuned ControlNet from {ckpt_path}...")
             controlnet = ControlNetModel.from_pretrained(
+                str(ckpt_path),
+                torch_dtype=self.dtype,
             )
         else:
             print(f"Loading ControlNet from {self.controlnet_id}...")
             controlnet = ControlNetModel.from_pretrained(
+                self.controlnet_id,
+                subfolder="diffusion_sd15",
+                torch_dtype=self.dtype,
             )
         print(f"Loading base model from {self.base_model_id}...")
         self._pipe = StableDiffusionControlNetPipeline.from_pretrained(
     def _load_img2img(self) -> None:
         from diffusers import (
             DPMSolverMultistepScheduler,
+            StableDiffusionImg2ImgPipeline,
         )
         print(f"Loading SD1.5 img2img from {self.base_model_id}...")
             safety_checker=None,
             requires_safety_checker=False,
         )
+        self._pipe.scheduler = DPMSolverMultistepScheduler.from_config(self._pipe.scheduler.config)
         self._apply_device_optimizations()
     def _apply_device_optimizations(self) -> None:
         guidance_scale: float = 9.0,
         controlnet_conditioning_scale: float = 0.9,
         strength: float = 0.5,
+        seed: int | None = None,
+        clinical_flags: ClinicalFlags | None = None,
         postprocess: bool = True,
         use_gfpgan: bool = False,
     ) -> dict:
         manipulation_mode = "preset"
         if self._displacement_model and procedure in self._displacement_model.procedures:
             try:
                 rng = np.random.default_rng(seed) if seed is not None else np.random.default_rng()
                 # Map UI intensity (0-100) to displacement model intensity (0-2)
                 dm_intensity = intensity / 50.0  # 50 -> 1.0x mean displacement
                 displacement = self._displacement_model.get_displacement_field(
+                    procedure,
+                    intensity=dm_intensity,
+                    noise_scale=0.3,
+                    rng=rng,
                 )
                 # Apply displacement to landmarks
                 new_lm = face.landmarks.copy()
                 new_lm[:, 1] = np.clip(new_lm[:, 1], 0.01, 0.99)
                 manipulated = FaceLandmarks(
                     landmarks=new_lm,
+                    image_width=512,
+                    image_height=512,
                     confidence=face.confidence,
                 )
                 manipulation_mode = "displacement_model"
             except Exception:
                 manipulated = apply_procedure_preset(
+                    face,
+                    procedure,
+                    intensity,
+                    image_size=512,
+                    clinical_flags=flags,
                 )
         else:
             manipulated = apply_procedure_preset(
+                face,
+                procedure,
+                intensity,
+                image_size=512,
+                clinical_flags=flags,
             )
         landmark_img = render_landmark_image(manipulated, 512, 512)
         mask = generate_surgical_mask(
+            face,
+            procedure,
+            512,
+            512,
+            clinical_flags=flags,
         )
         generator = None
         elif self.mode in ("controlnet", "controlnet_ip"):
             ip_image = numpy_to_pil(image_512) if self._ip_adapter_loaded else None
             raw_output = self._generate_controlnet(
+                image_512,
+                landmark_img,
+                prompt,
+                num_inference_steps,
+                guidance_scale,
+                controlnet_conditioning_scale,
+                generator,
                 ip_adapter_image=ip_image,
             )
         else:
             raw_output = self._generate_img2img(
+                tps_warped,
+                mask,
+                prompt,
+                num_inference_steps,
+                guidance_scale,
+                strength,
+                generator,
             )
         # Step 2: Post-processing for photorealism (neural + classical pipeline)
         restore_used = "none"
         if postprocess and self.mode != "tps":
             from landmarkdiff.postprocess import full_postprocess
             pp_result = full_postprocess(
                 generated=raw_output,
                 original=image_512,
         }
     def _generate_controlnet(
+        self,
+        image: np.ndarray,
+        conditioning: np.ndarray,
+        prompt: str,
+        steps: int,
+        cfg: float,
+        cn_scale: float,
         generator: torch.Generator | None,
         ip_adapter_image: Image.Image | None = None,
     ) -> np.ndarray:
         return pil_to_numpy(result.images[0])
     def _generate_img2img(
+        self,
+        image: np.ndarray,
+        mask: np.ndarray,
+        prompt: str,
+        steps: int,
+        cfg: float,
+        strength: float,
         generator: torch.Generator | None,
     ) -> np.ndarray:
         result = self._pipe(
         sys.exit(1)
     pipe = LandmarkDiffPipeline(
+        mode=mode,
+        ip_adapter_scale=ip_adapter_scale,
         controlnet_checkpoint=controlnet_checkpoint,
         displacement_model_path=displacement_model_path,
     )
     parser.add_argument("--output", default="scripts/inference_output")
     parser.add_argument("--seed", type=int, default=42)
     parser.add_argument(
+        "--mode",
+        default="img2img",
         choices=["img2img", "controlnet", "controlnet_ip", "tps"],
     )
     parser.add_argument("--ip-adapter-scale", type=float, default=0.6)
+    parser.add_argument(
+        "--checkpoint", default=None, help="Path to fine-tuned ControlNet checkpoint"
+    )
+    parser.add_argument(
+        "--displacement-model",
+        default=None,
+        help="Path to displacement_model.npz for data-driven manipulation",
+    )
     args = parser.parse_args()
     run_inference(
+        args.image,
+        args.procedure,
+        args.intensity,
+        args.output,
+        args.seed,
+        args.mode,
+        args.ip_adapter_scale,
+        args.checkpoint,
         args.displacement_model,
     )

landmarkdiff/landmarks.py CHANGED Viewed

@@ -4,7 +4,6 @@ from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional
 import cv2
 import mediapipe as mp
@@ -12,39 +11,145 @@ import numpy as np
 # Region color map for visualization (BGR)
 REGION_COLORS: dict[str, tuple[int, int, int]] = {
-    "jawline": (255, 255, 255),     # white
-    "eyebrow_left": (0, 255, 0),    # green
     "eyebrow_right": (0, 255, 0),
-    "eye_left": (255, 255, 0),      # cyan
     "eye_right": (255, 255, 0),
-    "nose": (0, 255, 255),          # yellow
-    "lips": (0, 0, 255),            # red
-    "iris_left": (255, 0, 255),     # magenta
     "iris_right": (255, 0, 255),
 }
 # MediaPipe landmark index groups by anatomical region
 LANDMARK_REGIONS: dict[str, list[int]] = {
     "jawline": [
-        10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
-        397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
-        172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109,
     ],
     "eye_left": [
-        33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246,
     ],
     "eye_right": [
-        362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398,
     ],
     "eyebrow_left": [70, 63, 105, 66, 107, 55, 65, 52, 53, 46],
     "eyebrow_right": [300, 293, 334, 296, 336, 285, 295, 282, 283, 276],
     "nose": [
-        1, 2, 4, 5, 6, 19, 94, 141, 168, 195, 197, 236, 240,
-        274, 275, 278, 279, 294, 326, 327, 360, 363, 370, 456, 460,
     ],
     "lips": [
-        61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
-        308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78,
     ],
     "iris_left": [468, 469, 470, 471, 472],
     "iris_right": [473, 474, 475, 476, 477],
@@ -78,7 +183,7 @@ def extract_landmarks(
     image: np.ndarray,
     min_detection_confidence: float = 0.5,
     min_tracking_confidence: float = 0.5,
-) -> Optional[FaceLandmarks]:
     """Extract 478 facial landmarks from an image using MediaPipe Face Mesh.
     Args:
@@ -97,7 +202,9 @@ def extract_landmarks(
         landmarks, confidence = _extract_tasks_api(rgb, min_detection_confidence)
     except Exception:
         try:
-            landmarks, confidence = _extract_solutions_api(rgb, min_detection_confidence, min_tracking_confidence)
         except Exception:
             return None
@@ -115,14 +222,14 @@ def extract_landmarks(
 def _extract_tasks_api(
     rgb: np.ndarray,
     min_confidence: float,
-) -> tuple[Optional[np.ndarray], float]:
     """Extract landmarks using MediaPipe Tasks API (>= 0.10.20)."""
     FaceLandmarker = mp.tasks.vision.FaceLandmarker
     FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
     RunningMode = mp.tasks.vision.RunningMode
     BaseOptions = mp.tasks.BaseOptions
-    import urllib.request
     import tempfile
     # Download model if not cached
     model_path = Path(tempfile.gettempdir()) / "face_landmarker_v2_with_blendshapes.task"
@@ -161,7 +268,7 @@ def _extract_solutions_api(
     rgb: np.ndarray,
     min_detection_confidence: float,
     min_tracking_confidence: float,
-) -> tuple[Optional[np.ndarray], float]:
     """Extract landmarks using legacy MediaPipe Solutions API."""
     with mp.solutions.face_mesh.FaceMesh(
         static_image_mode=True,
@@ -224,8 +331,8 @@ def visualize_landmarks(
 def render_landmark_image(
     face: FaceLandmarks,
-    width: Optional[int] = None,
-    height: Optional[int] = None,
     radius: int = 2,
 ) -> np.ndarray:
     """Render MediaPipe face mesh tessellation on black canvas.
@@ -257,6 +364,7 @@ def render_landmark_image(
     # Draw tessellation mesh (what CrucibleAI ControlNet expects)
     try:
         from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarksConnections
         tessellation = FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION
         contours = FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS

 from dataclasses import dataclass
 from pathlib import Path
 import cv2
 import mediapipe as mp
 # Region color map for visualization (BGR)
 REGION_COLORS: dict[str, tuple[int, int, int]] = {
+    "jawline": (255, 255, 255),  # white
+    "eyebrow_left": (0, 255, 0),  # green
     "eyebrow_right": (0, 255, 0),
+    "eye_left": (255, 255, 0),  # cyan
     "eye_right": (255, 255, 0),
+    "nose": (0, 255, 255),  # yellow
+    "lips": (0, 0, 255),  # red
+    "iris_left": (255, 0, 255),  # magenta
     "iris_right": (255, 0, 255),
 }
 # MediaPipe landmark index groups by anatomical region
 LANDMARK_REGIONS: dict[str, list[int]] = {
     "jawline": [
+        10,
+        338,
+        297,
+        332,
+        284,
+        251,
+        389,
+        356,
+        454,
+        323,
+        361,
+        288,
+        397,
+        365,
+        379,
+        378,
+        400,
+        377,
+        152,
+        148,
+        176,
+        149,
+        150,
+        136,
+        172,
+        58,
+        132,
+        93,
+        234,
+        127,
+        162,
+        21,
+        54,
+        103,
+        67,
+        109,
     ],
     "eye_left": [
+        33,
+        7,
+        163,
+        144,
+        145,
+        153,
+        154,
+        155,
+        133,
+        173,
+        157,
+        158,
+        159,
+        160,
+        161,
+        246,
     ],
     "eye_right": [
+        362,
+        382,
+        381,
+        380,
+        374,
+        373,
+        390,
+        249,
+        263,
+        466,
+        388,
+        387,
+        386,
+        385,
+        384,
+        398,
     ],
     "eyebrow_left": [70, 63, 105, 66, 107, 55, 65, 52, 53, 46],
     "eyebrow_right": [300, 293, 334, 296, 336, 285, 295, 282, 283, 276],
     "nose": [
+        1,
+        2,
+        4,
+        5,
+        6,
+        19,
+        94,
+        141,
+        168,
+        195,
+        197,
+        236,
+        240,
+        274,
+        275,
+        278,
+        279,
+        294,
+        326,
+        327,
+        360,
+        363,
+        370,
+        456,
+        460,
     ],
     "lips": [
+        61,
+        146,
+        91,
+        181,
+        84,
+        17,
+        314,
+        405,
+        321,
+        375,
+        291,
+        308,
+        324,
+        318,
+        402,
+        317,
+        14,
+        87,
+        178,
+        88,
+        95,
+        78,
     ],
     "iris_left": [468, 469, 470, 471, 472],
     "iris_right": [473, 474, 475, 476, 477],
     image: np.ndarray,
     min_detection_confidence: float = 0.5,
     min_tracking_confidence: float = 0.5,
+) -> FaceLandmarks | None:
     """Extract 478 facial landmarks from an image using MediaPipe Face Mesh.
     Args:
         landmarks, confidence = _extract_tasks_api(rgb, min_detection_confidence)
     except Exception:
         try:
+            landmarks, confidence = _extract_solutions_api(
+                rgb, min_detection_confidence, min_tracking_confidence
+            )
         except Exception:
             return None
 def _extract_tasks_api(
     rgb: np.ndarray,
     min_confidence: float,
+) -> tuple[np.ndarray | None, float]:
     """Extract landmarks using MediaPipe Tasks API (>= 0.10.20)."""
     FaceLandmarker = mp.tasks.vision.FaceLandmarker
     FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
     RunningMode = mp.tasks.vision.RunningMode
     BaseOptions = mp.tasks.BaseOptions
     import tempfile
+    import urllib.request
     # Download model if not cached
     model_path = Path(tempfile.gettempdir()) / "face_landmarker_v2_with_blendshapes.task"
     rgb: np.ndarray,
     min_detection_confidence: float,
     min_tracking_confidence: float,
+) -> tuple[np.ndarray | None, float]:
     """Extract landmarks using legacy MediaPipe Solutions API."""
     with mp.solutions.face_mesh.FaceMesh(
         static_image_mode=True,
 def render_landmark_image(
     face: FaceLandmarks,
+    width: int | None = None,
+    height: int | None = None,
     radius: int = 2,
 ) -> np.ndarray:
     """Render MediaPipe face mesh tessellation on black canvas.
     # Draw tessellation mesh (what CrucibleAI ControlNet expects)
     try:
         from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarksConnections
         tessellation = FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION
         contours = FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS

landmarkdiff/log.py CHANGED Viewed

@@ -46,10 +46,12 @@ def setup_logging(
     if not _CONFIGURED:
         handler = logging.StreamHandler(stream or sys.stderr)
-        handler.setFormatter(logging.Formatter(
-            fmt or LOG_FORMAT,
-            datefmt=LOG_DATE_FORMAT,
-        ))
         root_logger.addHandler(handler)
         # Prevent propagation to root logger to avoid duplicate messages
         root_logger.propagate = False

     if not _CONFIGURED:
         handler = logging.StreamHandler(stream or sys.stderr)
+        handler.setFormatter(
+            logging.Formatter(
+                fmt or LOG_FORMAT,
+                datefmt=LOG_DATE_FORMAT,
+            )
+        )
         root_logger.addHandler(handler)
         # Prevent propagation to root logger to avoid duplicate messages
         root_logger.propagate = False

landmarkdiff/losses.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """4-term loss function module for ControlNet fine-tuning.
-L_total = L_diffusion + w_landmark * L_landmark + w_identity * L_identity + w_perceptual * L_perceptual
 Phase A (synthetic TPS data): L_diffusion ONLY. No perceptual loss against
 rubbery TPS warps — it would penalize realism.
@@ -92,11 +93,16 @@ class IdentityLoss:
             return
         try:
             from insightface.app import FaceAnalysis
             self._app = FaceAnalysis(
                 name="buffalo_l",
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
-            ctx_id = device.index if device.type == "cuda" and device.index is not None else (0 if device.type == "cuda" else -1)
             self._app.prepare(ctx_id=ctx_id, det_size=(320, 320))
             self._has_arcface = True
         except Exception:
@@ -114,6 +120,7 @@ class IdentityLoss:
         """
         if self._has_arcface:
             import numpy as np
             embeddings = []
             valid_mask = []
             for i in range(image_tensor.shape[0]):
@@ -152,7 +159,9 @@ class IdentityLoss:
         # Resize to 112x112 for ArcFace
         pred_112 = F.interpolate(pred_crop, size=(112, 112), mode="bilinear", align_corners=False)
-        target_112 = F.interpolate(target_crop, size=(112, 112), mode="bilinear", align_corners=False)
         # Normalize to [-1, 1]
         pred_norm = pred_112 * 2 - 1
@@ -163,7 +172,7 @@ class IdentityLoss:
         target_emb, target_valid = self._extract_embedding(target_norm)
         # Only compute loss for samples where both faces were detected
-        valid = [p and t for p, t in zip(pred_valid, target_valid)]
         if not any(valid):
             return torch.tensor(0.0, device=pred_image.device)
@@ -216,6 +225,7 @@ class PerceptualLoss:
         if self._lpips is None:
             try:
                 import lpips
                 self._lpips = lpips.LPIPS(net="alex").to(device)
                 self._lpips.eval()
                 for p in self._lpips.parameters():
@@ -225,9 +235,9 @@ class PerceptualLoss:
     def __call__(
         self,
-        pred: torch.Tensor,    # (B, 3, H, W) in [0, 1]
         target: torch.Tensor,
-        mask: torch.Tensor,    # (B, 1, H, W) surgical mask [0, 1]
     ) -> torch.Tensor:
         self._ensure_loaded(pred.device)
@@ -289,6 +299,7 @@ class CombinedLoss:
         # or ONNX-based fallback
         if use_differentiable_arcface:
             from landmarkdiff.arcface_torch import ArcFaceLoss
             self.identity_loss = ArcFaceLoss(weights_path=arcface_weights_path)
         else:
             self.identity_loss = IdentityLoss()

 """4-term loss function module for ControlNet fine-tuning.
+L_total = L_diffusion + w_landmark * L_landmark
+        + w_identity * L_identity + w_perceptual * L_perceptual
 Phase A (synthetic TPS data): L_diffusion ONLY. No perceptual loss against
 rubbery TPS warps — it would penalize realism.
             return
         try:
             from insightface.app import FaceAnalysis
             self._app = FaceAnalysis(
                 name="buffalo_l",
                 providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
             )
+            ctx_id = (
+                device.index
+                if device.type == "cuda" and device.index is not None
+                else (0 if device.type == "cuda" else -1)
+            )
             self._app.prepare(ctx_id=ctx_id, det_size=(320, 320))
             self._has_arcface = True
         except Exception:
         """
         if self._has_arcface:
             import numpy as np
             embeddings = []
             valid_mask = []
             for i in range(image_tensor.shape[0]):
         # Resize to 112x112 for ArcFace
         pred_112 = F.interpolate(pred_crop, size=(112, 112), mode="bilinear", align_corners=False)
+        target_112 = F.interpolate(
+            target_crop, size=(112, 112), mode="bilinear", align_corners=False
+        )
         # Normalize to [-1, 1]
         pred_norm = pred_112 * 2 - 1
         target_emb, target_valid = self._extract_embedding(target_norm)
         # Only compute loss for samples where both faces were detected
+        valid = [p and t for p, t in zip(pred_valid, target_valid, strict=False)]
         if not any(valid):
             return torch.tensor(0.0, device=pred_image.device)
         if self._lpips is None:
             try:
                 import lpips
                 self._lpips = lpips.LPIPS(net="alex").to(device)
                 self._lpips.eval()
                 for p in self._lpips.parameters():
     def __call__(
         self,
+        pred: torch.Tensor,  # (B, 3, H, W) in [0, 1]
         target: torch.Tensor,
+        mask: torch.Tensor,  # (B, 1, H, W) surgical mask [0, 1]
     ) -> torch.Tensor:
         self._ensure_loaded(pred.device)
         # or ONNX-based fallback
         if use_differentiable_arcface:
             from landmarkdiff.arcface_torch import ArcFaceLoss
             self.identity_loss = ArcFaceLoss(weights_path=arcface_weights_path)
         else:
             self.identity_loss = IdentityLoss()

landmarkdiff/manipulation.py CHANGED Viewed

@@ -7,11 +7,11 @@ mm inputs only in v3+ with FLAME calibrated metric space.
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Optional, TYPE_CHECKING
 import numpy as np
-from landmarkdiff.landmarks import FaceLandmarks, LANDMARK_REGIONS
 if TYPE_CHECKING:
     from landmarkdiff.clinical import ClinicalFlags
@@ -23,38 +23,184 @@ class DeformationHandle:
     landmark_index: int
     displacement: np.ndarray  # (2,) or (3,) pixel displacement
-    influence_radius: float   # Gaussian RBF radius in pixels
 # Procedure-specific landmark indices from the technical specification
 PROCEDURE_LANDMARKS: dict[str, list[int]] = {
     "rhinoplasty": [
-        1, 2, 4, 5, 6, 19, 94, 141, 168, 195, 197, 236, 240,
-        274, 275, 278, 279, 294, 326, 327, 360, 363, 370, 456, 460,
     ],
     "blepharoplasty": [
-        33, 7, 163, 144, 145, 153, 154, 155, 157, 158, 159, 160, 161, 246,
-        362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386,
-        385, 384, 398,
     ],
     "rhytidectomy": [
-        10, 21, 54, 58, 67, 93, 103, 109, 127, 132, 136, 150, 162, 172,
-        176, 187, 207, 213, 234, 284, 297, 323, 332, 338, 356, 361, 365,
-        379, 389, 397, 400, 427, 454,
     ],
     "orthognathic": [
-        0, 17, 18, 36, 37, 39, 40, 57, 61, 78, 80, 81, 82, 84, 87, 88,
-        91, 95, 146, 167, 169, 170, 175, 181, 191, 200, 201, 202, 204,
-        208, 211, 212, 214, 269, 270, 291, 311, 312, 317, 321, 324, 325,
-        375, 396, 405, 407, 415,
     ],
     "brow_lift": [
-        70, 63, 105, 66, 107,  # left brow
-        300, 293, 334, 296, 336,  # right brow
-        9, 8, 10, 109, 67, 103, 338, 297, 332, # forehead/upper face
     ],
     "mentoplasty": [
-        148, 149, 150, 152, 171, 175, 176, 377,
     ],
 }
 # Default influence radii per procedure (in pixels at 512x512)
@@ -78,7 +224,7 @@ def gaussian_rbf_deform(
     displacement = handle.displacement[:2]
     distances_sq = np.sum((landmarks[:, :2] - center) ** 2, axis=1)
-    weights = np.exp(-distances_sq / (2.0 * handle.influence_radius ** 2))
     result[:, 0] += displacement[0] * weights
     result[:, 1] += displacement[1] * weights
@@ -94,8 +240,8 @@ def apply_procedure_preset(
     procedure: str,
     intensity: float = 50.0,
     image_size: int = 512,
-    clinical_flags: Optional["ClinicalFlags"] = None,
-    displacement_model_path: Optional[str] = None,
     noise_scale: float = 0.0,
 ) -> FaceLandmarks:
     """Apply a surgical procedure preset to landmarks.
@@ -123,7 +269,11 @@ def apply_procedure_preset(
     # Data-driven displacement mode
     if displacement_model_path is not None:
         return _apply_data_driven(
-            face, procedure, scale, displacement_model_path, noise_scale,
         )
     indices = PROCEDURE_LANDMARKS[procedure]
@@ -140,6 +290,7 @@ def apply_procedure_preset(
     # Bell's palsy: remove handles on the affected (paralyzed) side
     if clinical_flags and clinical_flags.bells_palsy:
         from landmarkdiff.clinical import get_bells_palsy_side_indices
         affected = get_bells_palsy_side_indices(clinical_flags.bells_palsy_side)
         affected_indices = set()
         for region_indices in affected.values():
@@ -219,48 +370,58 @@ def _get_procedure_handles(
         left_alar = [240, 236, 141, 363, 370]
         for idx in left_alar:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([2.5 * scale, 0.0]),
-                    influence_radius=radius * 0.6,
-                ))
         # right nostril -> move LEFT (-X)
         right_alar = [460, 456, 274, 275, 278, 279]
         for idx in right_alar:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([-2.5 * scale, 0.0]),
-                    influence_radius=radius * 0.6,
-                ))
         # --- Tip refinement: subtle upward rotation + narrowing ---
         tip_indices = [1, 2, 94, 19]
         for idx in tip_indices:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -2.0 * scale]),
-                    influence_radius=radius * 0.5,
-                ))
         # --- Dorsum narrowing: bilateral squeeze of nasal bridge ---
         dorsum_left = [195, 197, 236]
         for idx in dorsum_left:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([1.5 * scale, 0.0]),
-                    influence_radius=radius * 0.5,
-                ))
         dorsum_right = [326, 327, 456]
         for idx in dorsum_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([-1.5 * scale, 0.0]),
-                    influence_radius=radius * 0.5,
-                ))
     elif procedure == "blepharoplasty":
         # --- Upper lid elevation (primary effect) ---
@@ -268,31 +429,37 @@ def _get_procedure_handles(
         upper_lid_right = [386, 385, 384]
         for idx in upper_lid_left + upper_lid_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -2.0 * scale]),
-                    influence_radius=radius,
-                ))
         # --- Medial/lateral lid corners: less displacement (tapered) ---
         corner_left = [158, 157, 133, 33]
         corner_right = [387, 388, 362, 263]
         for idx in corner_left + corner_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -0.8 * scale]),
-                    influence_radius=radius * 0.7,
-                ))
         # --- Subtle lower lid tightening ---
         lower_lid_left = [145, 153, 154]
         lower_lid_right = [374, 380, 381]
         for idx in lower_lid_left + lower_lid_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, 0.5 * scale]),
-                    influence_radius=radius * 0.5,
-                ))
     elif procedure == "rhytidectomy":
         # Different displacement vectors by anatomical sub-region.
@@ -300,82 +467,100 @@ def _get_procedure_handles(
         jowl_left = [132, 136, 172, 58, 150, 176]
         for idx in jowl_left:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([-2.5 * scale, -3.0 * scale]),
-                    influence_radius=radius,
-                ))
         jowl_right = [361, 365, 397, 288, 379, 400]
         for idx in jowl_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([2.5 * scale, -3.0 * scale]),
-                    influence_radius=radius,
-                ))
         # Chin/submental: upward only (no lateral)
         chin = [152, 148, 377, 378]
         for idx in chin:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -2.0 * scale]),
-                    influence_radius=radius * 0.8,
-                ))
         # Temple/upper face: very mild lift
         temple_left = [10, 21, 54, 67, 103, 109, 162, 127]
         temple_right = [284, 297, 332, 338, 323, 356, 389, 454]
         for idx in temple_left:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([-0.5 * scale, -1.0 * scale]),
-                    influence_radius=radius * 0.6,
-                ))
         for idx in temple_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.5 * scale, -1.0 * scale]),
-                    influence_radius=radius * 0.6,
-                ))
     elif procedure == "orthognathic":
         # --- Mandible repositioning: move jaw up and forward (visible as upward in 2D) ---
         lower_jaw = [17, 18, 200, 201, 202, 204, 208, 211, 212, 214]
         for idx in lower_jaw:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -3.0 * scale]),
-                    influence_radius=radius,
-                ))
         # --- Chin projection: move chin point forward/upward ---
         chin_pts = [175, 170, 169, 167, 396]
         for idx in chin_pts:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -2.0 * scale]),
-                    influence_radius=radius * 0.7,
-                ))
         # --- Lateral jaw: bilateral symmetric inward pull for narrowing ---
         jaw_left = [57, 61, 78, 91, 95, 146, 181]
         for idx in jaw_left:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([1.5 * scale, -1.0 * scale]),
-                    influence_radius=radius * 0.8,
-                ))
         jaw_right = [291, 311, 312, 321, 324, 325, 375, 405]
         for idx in jaw_right:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([-1.5 * scale, -1.0 * scale]),
-                    influence_radius=radius * 0.8,
-                ))
     elif procedure == "brow_lift":
         # --- Brow elevation ---
@@ -386,56 +571,68 @@ def _get_procedure_handles(
         left_weights = [0.7, 0.8, 0.9, 1.0, 1.1]
         for i, idx in enumerate(brow_left):
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -4.0 * left_weights[i] * scale]),
-                    influence_radius=radius,
-                ))
         right_weights = [0.7, 0.8, 0.9, 1.0, 1.1]
         for i, idx in enumerate(brow_right):
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -4.0 * right_weights[i] * scale]),
-                    influence_radius=radius,
-                ))
         # --- Forehead smoothing / subtle lift ---
         forehead = [9, 8, 10, 109, 67, 103, 338, 297, 332]
         for idx in forehead:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -1.5 * scale]),
-                    influence_radius=radius * 1.2,
-                ))
     elif procedure == "mentoplasty":
         # --- Chin tip advancement: move chin forward (upward in 2D) ---
         chin_tip = [152, 175]
         for idx in chin_tip:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -4.0 * scale]),
-                    influence_radius=radius,
-                ))
         # --- Lower chin contour: follow tip with softer displacement ---
         lower_contour = [148, 149, 150, 176, 377]
         for idx in lower_contour:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -2.5 * scale]),
-                    influence_radius=radius * 0.8,
-                ))
         # --- Jaw angles: minimal upward pull for natural transition ---
         jaw_angles = [171, 396]
         for idx in jaw_angles:
             if idx in indices:
-                handles.append(DeformationHandle(
-                    landmark_index=idx,
-                    displacement=np.array([0.0, -1.0 * scale]),
-                    influence_radius=radius * 0.6,
-                ))
     return handles

 from __future__ import annotations
 from dataclasses import dataclass
+from typing import TYPE_CHECKING
 import numpy as np
+from landmarkdiff.landmarks import FaceLandmarks
 if TYPE_CHECKING:
     from landmarkdiff.clinical import ClinicalFlags
     landmark_index: int
     displacement: np.ndarray  # (2,) or (3,) pixel displacement
+    influence_radius: float  # Gaussian RBF radius in pixels
 # Procedure-specific landmark indices from the technical specification
 PROCEDURE_LANDMARKS: dict[str, list[int]] = {
     "rhinoplasty": [
+        1,
+        2,
+        4,
+        5,
+        6,
+        19,
+        94,
+        141,
+        168,
+        195,
+        197,
+        236,
+        240,
+        274,
+        275,
+        278,
+        279,
+        294,
+        326,
+        327,
+        360,
+        363,
+        370,
+        456,
+        460,
     ],
     "blepharoplasty": [
+        33,
+        7,
+        163,
+        144,
+        145,
+        153,
+        154,
+        155,
+        157,
+        158,
+        159,
+        160,
+        161,
+        246,
+        362,
+        382,
+        381,
+        380,
+        374,
+        373,
+        390,
+        249,
+        263,
+        466,
+        388,
+        387,
+        386,
+        385,
+        384,
+        398,
     ],
     "rhytidectomy": [
+        10,
+        21,
+        54,
+        58,
+        67,
+        93,
+        103,
+        109,
+        127,
+        132,
+        136,
+        150,
+        162,
+        172,
+        176,
+        187,
+        207,
+        213,
+        234,
+        284,
+        297,
+        323,
+        332,
+        338,
+        356,
+        361,
+        365,
+        379,
+        389,
+        397,
+        400,
+        427,
+        454,
     ],
     "orthognathic": [
+        0,
+        17,
+        18,
+        36,
+        37,
+        39,
+        40,
+        57,
+        61,
+        78,
+        80,
+        81,
+        82,
+        84,
+        87,
+        88,
+        91,
+        95,
+        146,
+        167,
+        169,
+        170,
+        175,
+        181,
+        191,
+        200,
+        201,
+        202,
+        204,
+        208,
+        211,
+        212,
+        214,
+        269,
+        270,
+        291,
+        311,
+        312,
+        317,
+        321,
+        324,
+        325,
+        375,
+        396,
+        405,
+        407,
+        415,
     ],
     "brow_lift": [
+        70,
+        63,
+        105,
+        66,
+        107,  # left brow
+        300,
+        293,
+        334,
+        296,
+        336,  # right brow
+        9,
+        8,
+        10,
+        109,
+        67,
+        103,
+        338,
+        297,
+        332,  # forehead/upper face
     ],
     "mentoplasty": [
+        148,
+        149,
+        150,
+        152,
+        171,
+        175,
+        176,
+        377,
     ],
 }
 # Default influence radii per procedure (in pixels at 512x512)
     displacement = handle.displacement[:2]
     distances_sq = np.sum((landmarks[:, :2] - center) ** 2, axis=1)
+    weights = np.exp(-distances_sq / (2.0 * handle.influence_radius**2))
     result[:, 0] += displacement[0] * weights
     result[:, 1] += displacement[1] * weights
     procedure: str,
     intensity: float = 50.0,
     image_size: int = 512,
+    clinical_flags: ClinicalFlags | None = None,
+    displacement_model_path: str | None = None,
     noise_scale: float = 0.0,
 ) -> FaceLandmarks:
     """Apply a surgical procedure preset to landmarks.
     # Data-driven displacement mode
     if displacement_model_path is not None:
         return _apply_data_driven(
+            face,
+            procedure,
+            scale,
+            displacement_model_path,
+            noise_scale,
         )
     indices = PROCEDURE_LANDMARKS[procedure]
     # Bell's palsy: remove handles on the affected (paralyzed) side
     if clinical_flags and clinical_flags.bells_palsy:
         from landmarkdiff.clinical import get_bells_palsy_side_indices
         affected = get_bells_palsy_side_indices(clinical_flags.bells_palsy_side)
         affected_indices = set()
         for region_indices in affected.values():
         left_alar = [240, 236, 141, 363, 370]
         for idx in left_alar:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([2.5 * scale, 0.0]),
+                        influence_radius=radius * 0.6,
+                    )
+                )
         # right nostril -> move LEFT (-X)
         right_alar = [460, 456, 274, 275, 278, 279]
         for idx in right_alar:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([-2.5 * scale, 0.0]),
+                        influence_radius=radius * 0.6,
+                    )
+                )
         # --- Tip refinement: subtle upward rotation + narrowing ---
         tip_indices = [1, 2, 94, 19]
         for idx in tip_indices:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -2.0 * scale]),
+                        influence_radius=radius * 0.5,
+                    )
+                )
         # --- Dorsum narrowing: bilateral squeeze of nasal bridge ---
         dorsum_left = [195, 197, 236]
         for idx in dorsum_left:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([1.5 * scale, 0.0]),
+                        influence_radius=radius * 0.5,
+                    )
+                )
         dorsum_right = [326, 327, 456]
         for idx in dorsum_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([-1.5 * scale, 0.0]),
+                        influence_radius=radius * 0.5,
+                    )
+                )
     elif procedure == "blepharoplasty":
         # --- Upper lid elevation (primary effect) ---
         upper_lid_right = [386, 385, 384]
         for idx in upper_lid_left + upper_lid_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -2.0 * scale]),
+                        influence_radius=radius,
+                    )
+                )
         # --- Medial/lateral lid corners: less displacement (tapered) ---
         corner_left = [158, 157, 133, 33]
         corner_right = [387, 388, 362, 263]
         for idx in corner_left + corner_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -0.8 * scale]),
+                        influence_radius=radius * 0.7,
+                    )
+                )
         # --- Subtle lower lid tightening ---
         lower_lid_left = [145, 153, 154]
         lower_lid_right = [374, 380, 381]
         for idx in lower_lid_left + lower_lid_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, 0.5 * scale]),
+                        influence_radius=radius * 0.5,
+                    )
+                )
     elif procedure == "rhytidectomy":
         # Different displacement vectors by anatomical sub-region.
         jowl_left = [132, 136, 172, 58, 150, 176]
         for idx in jowl_left:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([-2.5 * scale, -3.0 * scale]),
+                        influence_radius=radius,
+                    )
+                )
         jowl_right = [361, 365, 397, 288, 379, 400]
         for idx in jowl_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([2.5 * scale, -3.0 * scale]),
+                        influence_radius=radius,
+                    )
+                )
         # Chin/submental: upward only (no lateral)
         chin = [152, 148, 377, 378]
         for idx in chin:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -2.0 * scale]),
+                        influence_radius=radius * 0.8,
+                    )
+                )
         # Temple/upper face: very mild lift
         temple_left = [10, 21, 54, 67, 103, 109, 162, 127]
         temple_right = [284, 297, 332, 338, 323, 356, 389, 454]
         for idx in temple_left:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([-0.5 * scale, -1.0 * scale]),
+                        influence_radius=radius * 0.6,
+                    )
+                )
         for idx in temple_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.5 * scale, -1.0 * scale]),
+                        influence_radius=radius * 0.6,
+                    )
+                )
     elif procedure == "orthognathic":
         # --- Mandible repositioning: move jaw up and forward (visible as upward in 2D) ---
         lower_jaw = [17, 18, 200, 201, 202, 204, 208, 211, 212, 214]
         for idx in lower_jaw:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -3.0 * scale]),
+                        influence_radius=radius,
+                    )
+                )
         # --- Chin projection: move chin point forward/upward ---
         chin_pts = [175, 170, 169, 167, 396]
         for idx in chin_pts:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -2.0 * scale]),
+                        influence_radius=radius * 0.7,
+                    )
+                )
         # --- Lateral jaw: bilateral symmetric inward pull for narrowing ---
         jaw_left = [57, 61, 78, 91, 95, 146, 181]
         for idx in jaw_left:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([1.5 * scale, -1.0 * scale]),
+                        influence_radius=radius * 0.8,
+                    )
+                )
         jaw_right = [291, 311, 312, 321, 324, 325, 375, 405]
         for idx in jaw_right:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([-1.5 * scale, -1.0 * scale]),
+                        influence_radius=radius * 0.8,
+                    )
+                )
     elif procedure == "brow_lift":
         # --- Brow elevation ---
         left_weights = [0.7, 0.8, 0.9, 1.0, 1.1]
         for i, idx in enumerate(brow_left):
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -4.0 * left_weights[i] * scale]),
+                        influence_radius=radius,
+                    )
+                )
         right_weights = [0.7, 0.8, 0.9, 1.0, 1.1]
         for i, idx in enumerate(brow_right):
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -4.0 * right_weights[i] * scale]),
+                        influence_radius=radius,
+                    )
+                )
         # --- Forehead smoothing / subtle lift ---
         forehead = [9, 8, 10, 109, 67, 103, 338, 297, 332]
         for idx in forehead:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -1.5 * scale]),
+                        influence_radius=radius * 1.2,
+                    )
+                )
     elif procedure == "mentoplasty":
         # --- Chin tip advancement: move chin forward (upward in 2D) ---
         chin_tip = [152, 175]
         for idx in chin_tip:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -4.0 * scale]),
+                        influence_radius=radius,
+                    )
+                )
         # --- Lower chin contour: follow tip with softer displacement ---
         lower_contour = [148, 149, 150, 176, 377]
         for idx in lower_contour:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -2.5 * scale]),
+                        influence_radius=radius * 0.8,
+                    )
+                )
         # --- Jaw angles: minimal upward pull for natural transition ---
         jaw_angles = [171, 396]
         for idx in jaw_angles:
             if idx in indices:
+                handles.append(
+                    DeformationHandle(
+                        landmark_index=idx,
+                        displacement=np.array([0.0, -1.0 * scale]),
+                        influence_radius=radius * 0.6,
+                    )
+                )
     return handles

landmarkdiff/metrics_agg.py CHANGED Viewed

@@ -41,8 +41,12 @@ class MetricsAggregator:
     """
     HIGHER_BETTER = {
-        "ssim": True, "psnr": True, "identity_sim": True,
-        "lpips": False, "fid": False, "nme": False,
     }
     def __init__(self) -> None:
@@ -57,13 +61,15 @@ class MetricsAggregator:
         **metadata: Any,
     ) -> None:
         """Add a single evaluation record."""
-        self.records.append(MetricRecord(
-            experiment=experiment,
-            procedure=procedure,
-            metrics=metrics,
-            checkpoint_step=checkpoint_step,
-            metadata=metadata,
-        ))
     def add_batch(
         self,
@@ -76,7 +82,9 @@ class MetricsAggregator:
         """
         for rec in records:
             proc = rec.get("procedure", "all")
-            metrics = {k: v for k, v in rec.items() if k != "procedure" and isinstance(v, (int, float))}
             self.add(experiment, proc, metrics)
     @property
@@ -211,10 +219,7 @@ class MetricsAggregator:
             val = self.mean(exp, metric, procedure)
             if math.isnan(val):
                 continue
-            if higher_better and val > best_val:
-                best_val = val
-                best_exp = exp
-            elif not higher_better and val < best_val:
                 best_val = val
                 best_exp = exp

     """
     HIGHER_BETTER = {
+        "ssim": True,
+        "psnr": True,
+        "identity_sim": True,
+        "lpips": False,
+        "fid": False,
+        "nme": False,
     }
     def __init__(self) -> None:
         **metadata: Any,
     ) -> None:
         """Add a single evaluation record."""
+        self.records.append(
+            MetricRecord(
+                experiment=experiment,
+                procedure=procedure,
+                metrics=metrics,
+                checkpoint_step=checkpoint_step,
+                metadata=metadata,
+            )
+        )
     def add_batch(
         self,
         """
         for rec in records:
             proc = rec.get("procedure", "all")
+            metrics = {
+                k: v for k, v in rec.items() if k != "procedure" and isinstance(v, (int, float))
+            }
             self.add(experiment, proc, metrics)
     @property
             val = self.mean(exp, metric, procedure)
             if math.isnan(val):
                 continue
+            if (higher_better and val > best_val) or (not higher_better and val < best_val):
                 best_val = val
                 best_exp = exp

landmarkdiff/metrics_viz.py CHANGED Viewed

@@ -24,7 +24,6 @@ Usage:
 from __future__ import annotations
-import json
 from pathlib import Path
 from typing import Any
@@ -79,25 +78,29 @@ class MetricsVisualizer:
         self.dpi = dpi
         self.style = style
-    def _get_plt(self):
         """Import matplotlib with configuration."""
         import matplotlib
         matplotlib.use("Agg")
         import matplotlib.pyplot as plt
         try:
             plt.style.use(self.style)
         except OSError:
             plt.style.use("seaborn-v0_8")
         # Publication font sizes
-        plt.rcParams.update({
-            "font.size": 10,
-            "axes.titlesize": 12,
-            "axes.labelsize": 11,
-            "xtick.labelsize": 9,
-            "ytick.labelsize": 9,
-            "legend.fontsize": 9,
-            "figure.titlesize": 13,
-        })
         return plt
     # ------------------------------------------------------------------
@@ -138,7 +141,7 @@ class MetricsVisualizer:
         if n_metrics == 1:
             axes = [axes]
-        for ax, metric in zip(axes, metrics):
             values = [metrics_by_procedure[p].get(metric, 0) for p in procedures]
             colors = [self.COLORS.get(p, "#999999") for p in procedures]
@@ -146,16 +149,21 @@ class MetricsVisualizer:
             ax.set_xticks(range(n_procs))
             ax.set_xticklabels(
                 [p[:5].title() for p in procedures],
-                rotation=30, ha="right",
             )
             ax.set_ylabel(self.METRIC_LABELS.get(metric, metric))
             ax.set_title(self.METRIC_LABELS.get(metric, metric))
             # Add value labels on bars
-            for bar, val in zip(bars, values):
                 ax.text(
-                    bar.get_x() + bar.get_width() / 2, bar.get_height(),
-                    f"{val:.3f}", ha="center", va="bottom", fontsize=8,
                 )
         fig.suptitle(title, fontweight="bold")
@@ -192,9 +200,8 @@ class MetricsVisualizer:
         if metrics is None:
             metrics = sorted(
-                set.intersection(
-                    *(set(v.keys()) for v in experiments.values())
-                ) & set(self.METRIC_LABELS.keys())
             )
         n_metrics = len(metrics)
@@ -258,9 +265,7 @@ class MetricsVisualizer:
         plt = self._get_plt()
         fitz_types = sorted(metrics_by_type.keys())
-        procedures = sorted(
-            set.union(*(set(v.keys()) for v in metrics_by_type.values()))
-        )
         # Build matrix
         matrix = np.zeros((len(fitz_types), len(procedures)))
@@ -268,7 +273,9 @@ class MetricsVisualizer:
             for j, proc in enumerate(procedures):
                 matrix[i, j] = metrics_by_type[ft].get(proc, 0)
-        fig, ax = plt.subplots(figsize=(max(6, len(procedures) * 1.5), max(4, len(fitz_types) * 0.8)))
         cmap = "RdYlGn" if self.METRIC_HIGHER_BETTER.get(metric, True) else "RdYlGn_r"
         im = ax.imshow(matrix, cmap=cmap, aspect="auto")
@@ -282,9 +289,15 @@ class MetricsVisualizer:
         # Annotate cells
         for i in range(len(fitz_types)):
             for j in range(len(procedures)):
-                ax.text(j, i, f"{matrix[i, j]:.3f}",
-                        ha="center", va="center", fontsize=9,
-                        color="white" if matrix[i, j] < np.median(matrix) else "black")
         fig.colorbar(im, ax=ax, label=self.METRIC_LABELS.get(metric, metric))
@@ -328,18 +341,21 @@ class MetricsVisualizer:
         fig, ax = plt.subplots(figsize=(max(6, len(groups) * 1.2), 5))
         bp = ax.boxplot(
-            data, patch_artist=True, widths=0.6,
             medianprops={"color": "black", "linewidth": 1.5},
         )
         colors = [self.COLORS.get(g, "#4C72B0") for g in groups]
-        for patch, color in zip(bp["boxes"], colors):
             patch.set_facecolor(color)
             patch.set_alpha(0.7)
         ax.set_xticklabels(
             [g.title() for g in groups],
-            rotation=30, ha="right",
         )
         ax.set_ylabel(self.METRIC_LABELS.get(metric, metric))
@@ -348,9 +364,16 @@ class MetricsVisualizer:
         ax.set_title(title, fontweight="bold")
         # Add sample count annotations
-        for i, (g, vals) in enumerate(zip(groups, data)):
-            ax.text(i + 1, ax.get_ylim()[0], f"n={len(vals)}",
-                    ha="center", va="bottom", fontsize=8, color="gray")
         fig.tight_layout()
         out_path = self.output_dir / filename
@@ -430,10 +453,12 @@ class MetricsVisualizer:
                     parts.append(val_str)
             lines.append(" & ".join(parts) + " \\\\")
-        lines.extend([
-            "\\bottomrule",
-            "\\end{tabular}",
-            "\\end{table}",
-        ])
         return "\n".join(lines)

 from __future__ import annotations
 from pathlib import Path
 from typing import Any
         self.dpi = dpi
         self.style = style
+    def _get_plt(self) -> Any:
         """Import matplotlib with configuration."""
         import matplotlib
         matplotlib.use("Agg")
         import matplotlib.pyplot as plt
         try:
             plt.style.use(self.style)
         except OSError:
             plt.style.use("seaborn-v0_8")
         # Publication font sizes
+        plt.rcParams.update(
+            {
+                "font.size": 10,
+                "axes.titlesize": 12,
+                "axes.labelsize": 11,
+                "xtick.labelsize": 9,
+                "ytick.labelsize": 9,
+                "legend.fontsize": 9,
+                "figure.titlesize": 13,
+            }
+        )
         return plt
     # ------------------------------------------------------------------
         if n_metrics == 1:
             axes = [axes]
+        for ax, metric in zip(axes, metrics, strict=False):
             values = [metrics_by_procedure[p].get(metric, 0) for p in procedures]
             colors = [self.COLORS.get(p, "#999999") for p in procedures]
             ax.set_xticks(range(n_procs))
             ax.set_xticklabels(
                 [p[:5].title() for p in procedures],
+                rotation=30,
+                ha="right",
             )
             ax.set_ylabel(self.METRIC_LABELS.get(metric, metric))
             ax.set_title(self.METRIC_LABELS.get(metric, metric))
             # Add value labels on bars
+            for bar, val in zip(bars, values, strict=False):
                 ax.text(
+                    bar.get_x() + bar.get_width() / 2,
+                    bar.get_height(),
+                    f"{val:.3f}",
+                    ha="center",
+                    va="bottom",
+                    fontsize=8,
                 )
         fig.suptitle(title, fontweight="bold")
         if metrics is None:
             metrics = sorted(
+                set.intersection(*(set(v.keys()) for v in experiments.values()))
+                & set(self.METRIC_LABELS.keys())
             )
         n_metrics = len(metrics)
         plt = self._get_plt()
         fitz_types = sorted(metrics_by_type.keys())
+        procedures = sorted(set.union(*(set(v.keys()) for v in metrics_by_type.values())))
         # Build matrix
         matrix = np.zeros((len(fitz_types), len(procedures)))
             for j, proc in enumerate(procedures):
                 matrix[i, j] = metrics_by_type[ft].get(proc, 0)
+        fig, ax = plt.subplots(
+            figsize=(max(6, len(procedures) * 1.5), max(4, len(fitz_types) * 0.8))
+        )
         cmap = "RdYlGn" if self.METRIC_HIGHER_BETTER.get(metric, True) else "RdYlGn_r"
         im = ax.imshow(matrix, cmap=cmap, aspect="auto")
         # Annotate cells
         for i in range(len(fitz_types)):
             for j in range(len(procedures)):
+                ax.text(
+                    j,
+                    i,
+                    f"{matrix[i, j]:.3f}",
+                    ha="center",
+                    va="center",
+                    fontsize=9,
+                    color="white" if matrix[i, j] < np.median(matrix) else "black",
+                )
         fig.colorbar(im, ax=ax, label=self.METRIC_LABELS.get(metric, metric))
         fig, ax = plt.subplots(figsize=(max(6, len(groups) * 1.2), 5))
         bp = ax.boxplot(
+            data,
+            patch_artist=True,
+            widths=0.6,
             medianprops={"color": "black", "linewidth": 1.5},
         )
         colors = [self.COLORS.get(g, "#4C72B0") for g in groups]
+        for patch, color in zip(bp["boxes"], colors, strict=False):
             patch.set_facecolor(color)
             patch.set_alpha(0.7)
         ax.set_xticklabels(
             [g.title() for g in groups],
+            rotation=30,
+            ha="right",
         )
         ax.set_ylabel(self.METRIC_LABELS.get(metric, metric))
         ax.set_title(title, fontweight="bold")
         # Add sample count annotations
+        for i, (_g, vals) in enumerate(zip(groups, data, strict=False)):
+            ax.text(
+                i + 1,
+                ax.get_ylim()[0],
+                f"n={len(vals)}",
+                ha="center",
+                va="bottom",
+                fontsize=8,
+                color="gray",
+            )
         fig.tight_layout()
         out_path = self.output_dir / filename
                     parts.append(val_str)
             lines.append(" & ".join(parts) + " \\\\")
+        lines.extend(
+            [
+                "\\bottomrule",
+                "\\end{tabular}",
+                "\\end{table}",
+            ]
+        )
         return "\n".join(lines)

landmarkdiff/model_registry.py CHANGED Viewed

@@ -139,9 +139,7 @@ class ModelRegistry:
             step = int(parts[-1])
         # Compute size
-        size_mb = sum(
-            f.stat().st_size for f in ckpt_dir.rglob("*") if f.is_file()
-        ) / (1024 * 1024)
         return ModelEntry(
             name=ckpt_dir.name,
@@ -195,16 +193,15 @@ class ModelRegistry:
         Returns:
             Best ModelEntry, or None if no models have the metric.
         """
-        candidates = [
-            m for m in self._models.values()
-            if metric in m.metrics
-        ]
         if not candidates:
             return None
-        return min(candidates, key=lambda m: m.metrics[metric])  \
-            if lower_is_better else \
-            max(candidates, key=lambda m: m.metrics[metric])
     def get_by_step(self, step: int) -> ModelEntry | None:
         """Get a model by its training step."""
@@ -266,9 +263,7 @@ class ModelRegistry:
             raise KeyError(f"Checkpoint '{name}' not found in registry")
         if use_ema and entry.has_ema:
-            return ControlNetModel.from_pretrained(
-                str(entry.path / "controlnet_ema")
-            )
         # Fallback: load from training state
         state = self.load(name)
@@ -356,9 +351,7 @@ class ModelRegistry:
         for metric in sorted(all_metrics):
             values = [m.metrics[metric] for m in models if metric in m.metrics]
             if values:
-                lines.append(
-                    f"  {metric}: {min(values):.4f} — {max(values):.4f}"
-                )
         return "\n".join(lines)

             step = int(parts[-1])
         # Compute size
+        size_mb = sum(f.stat().st_size for f in ckpt_dir.rglob("*") if f.is_file()) / (1024 * 1024)
         return ModelEntry(
             name=ckpt_dir.name,
         Returns:
             Best ModelEntry, or None if no models have the metric.
         """
+        candidates = [m for m in self._models.values() if metric in m.metrics]
         if not candidates:
             return None
+        return (
+            min(candidates, key=lambda m: m.metrics[metric])
+            if lower_is_better
+            else max(candidates, key=lambda m: m.metrics[metric])
+        )
     def get_by_step(self, step: int) -> ModelEntry | None:
         """Get a model by its training step."""
             raise KeyError(f"Checkpoint '{name}' not found in registry")
         if use_ema and entry.has_ema:
+            return ControlNetModel.from_pretrained(str(entry.path / "controlnet_ema"))
         # Fallback: load from training state
         state = self.load(name)
         for metric in sorted(all_metrics):
             values = [m.metrics[metric] for m in models if metric in m.metrics]
             if values:
+                lines.append(f"  {metric}: {min(values):.4f} — {max(values):.4f}")
         return "\n".join(lines)

landmarkdiff/postprocess.py CHANGED Viewed

@@ -54,13 +54,10 @@ def laplacian_pyramid_blend(
     mask_f = mask.astype(np.float32)
     if mask_f.max() > 1.0:
         mask_f = mask_f / 255.0
-    if mask_f.ndim == 2:
-        mask_3ch = np.stack([mask_f] * 3, axis=-1)
-    else:
-        mask_3ch = mask_f
     # Make dimensions divisible by 2^levels
-    factor = 2 ** levels
     new_h = (h + factor - 1) // factor * factor
     new_w = (w + factor - 1) // factor * factor
@@ -232,24 +229,27 @@ def restore_face_codeformer(
         Restored BGR image, or original if CodeFormer unavailable.
     """
     try:
         from codeformer.basicsr.utils import img2tensor, tensor2img
-        from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
         from codeformer.basicsr.utils.download_util import load_file_from_url
-        import torch
         from torchvision.transforms.functional import normalize as tv_normalize
     except ImportError:
         return image
     try:
         global _CODEFORMER_MODEL, _CODEFORMER_HELPER
-        from codeformer.inference_codeformer import set_realesrgan as _unused  # noqa: F401
         from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if _CODEFORMER_MODEL is None:
             model = CodeFormerArch(
-                dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
                 connect_list=["32", "64", "128", "256"],
             ).to(device)
@@ -316,16 +316,18 @@ def enhance_background_realesrgan(
         Enhanced BGR image at original resolution.
     """
     try:
-        from realesrgan import RealESRGANer
-        from basicsr.archs.rrdbnet_arch import RRDBNet
         import torch
     except ImportError:
         return image
     try:
         global _REALESRGAN_UPSAMPLER
         if _REALESRGAN_UPSAMPLER is None:
-            model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
             _REALESRGAN_UPSAMPLER = RealESRGANer(
                 scale=4,
                 model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
@@ -345,15 +347,11 @@ def enhance_background_realesrgan(
         mask_f = mask.astype(np.float32)
         if mask_f.max() > 1.0:
             mask_f /= 255.0
-        if mask_f.ndim == 2:
-            mask_3ch = np.stack([mask_f] * 3, axis=-1)
-        else:
-            mask_3ch = mask_f
         # Keep face region from original, use enhanced for background
         result = (
-            image.astype(np.float32) * mask_3ch
-            + enhanced.astype(np.float32) * (1.0 - mask_3ch)
         ).astype(np.uint8)
         return result
     except Exception:
@@ -414,9 +412,10 @@ def verify_identity_arcface(
         orig_emb = orig_faces[0].embedding
         result_emb = result_faces[0].embedding
-        sim = float(np.dot(orig_emb, result_emb) / (
-            np.linalg.norm(orig_emb) * np.linalg.norm(result_emb) + 1e-8
-        ))
         sim = float(np.clip(sim, 0, 1))
         passed = sim >= threshold
@@ -437,6 +436,7 @@ def verify_identity_arcface(
 def _has_cuda() -> bool:
     try:
         import torch
         return torch.cuda.is_available()
     except ImportError:
         return False
@@ -465,7 +465,7 @@ def histogram_match_skin(
     if not np.any(mask_bool):
         return source
-    result = source.copy()
     src_lab = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
     ref_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2LAB).astype(np.float32)
@@ -574,20 +574,18 @@ def full_postprocess(
         mask_f = mask.astype(np.float32)
         if mask_f.max() > 1.0:
             mask_f /= 255.0
-        if mask_f.ndim == 2:
-            mask_3ch = np.stack([mask_f] * 3, axis=-1)
-        else:
-            mask_3ch = mask_f
         composited = (
-            result.astype(np.float32) * mask_3ch
-            + original.astype(np.float32) * (1.0 - mask_3ch)
         ).astype(np.uint8)
     # Step 6: Neural identity verification
     identity_check = {"similarity": -1.0, "passed": True, "message": "skipped"}
     if verify_identity:
         identity_check = verify_identity_arcface(
-            original, composited, threshold=identity_threshold,
         )
     return {

     mask_f = mask.astype(np.float32)
     if mask_f.max() > 1.0:
         mask_f = mask_f / 255.0
+    mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
     # Make dimensions divisible by 2^levels
+    factor = 2**levels
     new_h = (h + factor - 1) // factor * factor
     new_w = (w + factor - 1) // factor * factor
         Restored BGR image, or original if CodeFormer unavailable.
     """
     try:
+        import torch
         from codeformer.basicsr.utils import img2tensor, tensor2img
         from codeformer.basicsr.utils.download_util import load_file_from_url
+        from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
         from torchvision.transforms.functional import normalize as tv_normalize
     except ImportError:
         return image
     try:
         global _CODEFORMER_MODEL, _CODEFORMER_HELPER
         from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
+        from codeformer.inference_codeformer import set_realesrgan as _unused  # noqa: F401
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if _CODEFORMER_MODEL is None:
             model = CodeFormerArch(
+                dim_embd=512,
+                codebook_size=1024,
+                n_head=8,
+                n_layers=9,
                 connect_list=["32", "64", "128", "256"],
             ).to(device)
         Enhanced BGR image at original resolution.
     """
     try:
         import torch
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+        from realesrgan import RealESRGANer
     except ImportError:
         return image
     try:
         global _REALESRGAN_UPSAMPLER
         if _REALESRGAN_UPSAMPLER is None:
+            model = RRDBNet(
+                num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4
+            )
             _REALESRGAN_UPSAMPLER = RealESRGANer(
                 scale=4,
                 model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
         mask_f = mask.astype(np.float32)
         if mask_f.max() > 1.0:
             mask_f /= 255.0
+        mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
         # Keep face region from original, use enhanced for background
         result = (
+            image.astype(np.float32) * mask_3ch + enhanced.astype(np.float32) * (1.0 - mask_3ch)
         ).astype(np.uint8)
         return result
     except Exception:
         orig_emb = orig_faces[0].embedding
         result_emb = result_faces[0].embedding
+        sim = float(
+            np.dot(orig_emb, result_emb)
+            / (np.linalg.norm(orig_emb) * np.linalg.norm(result_emb) + 1e-8)
+        )
         sim = float(np.clip(sim, 0, 1))
         passed = sim >= threshold
 def _has_cuda() -> bool:
     try:
         import torch
         return torch.cuda.is_available()
     except ImportError:
         return False
     if not np.any(mask_bool):
         return source
+    source.copy()
     src_lab = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
     ref_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2LAB).astype(np.float32)
         mask_f = mask.astype(np.float32)
         if mask_f.max() > 1.0:
             mask_f /= 255.0
+        mask_3ch = np.stack([mask_f] * 3, axis=-1) if mask_f.ndim == 2 else mask_f
         composited = (
+            result.astype(np.float32) * mask_3ch + original.astype(np.float32) * (1.0 - mask_3ch)
         ).astype(np.uint8)
     # Step 6: Neural identity verification
     identity_check = {"similarity": -1.0, "passed": True, "message": "skipped"}
     if verify_identity:
         identity_check = verify_identity_arcface(
+            original,
+            composited,
+            threshold=identity_threshold,
         )
     return {

landmarkdiff/py.typed ADDED Viewed

File without changes

landmarkdiff/safety.py CHANGED Viewed

@@ -26,7 +26,6 @@ Usage:
 from __future__ import annotations
 from dataclasses import dataclass, field
-from typing import Optional
 import cv2
 import numpy as np
@@ -35,6 +34,7 @@ import numpy as np
 @dataclass
 class SafetyResult:
     """Result of safety validation checks."""
     passed: bool = True
     failures: list[str] = field(default_factory=list)
     warnings: list[str] = field(default_factory=list)
@@ -124,9 +124,7 @@ class SafetyValidator:
         return result
-    def _check_face_confidence(
-        self, result: SafetyResult, confidence: float
-    ) -> None:
         """Check face detection confidence."""
         if confidence < self.min_face_confidence:
             result.add_failure(
@@ -147,14 +145,14 @@ class SafetyValidator:
         """Check identity preservation using ArcFace similarity."""
         try:
             from landmarkdiff.evaluation import compute_identity_similarity
             sim = compute_identity_similarity(output_image, input_image)
             result.details["identity_similarity"] = float(sim)
             if sim < self.identity_threshold:
                 result.add_failure(
                     "identity",
-                    f"Identity similarity {sim:.3f} below threshold "
-                    f"{self.identity_threshold}",
                 )
             else:
                 result.add_pass("identity")
@@ -257,9 +255,7 @@ class SafetyValidator:
         else:
             result.add_pass("procedure_region")
-    def _check_output_quality(
-        self, result: SafetyResult, output: np.ndarray
-    ) -> None:
         """Check output image quality (not blank, not corrupted)."""
         if output is None or output.size == 0:
             result.add_failure("output_quality", "Output image is empty")
@@ -346,8 +342,7 @@ class SafetyValidator:
         cv2.addWeighted(overlay, opacity, result, 1 - opacity, 0, result)
         # White text
-        cv2.putText(result, text, (x, y), font, font_scale,
-                    (255, 255, 255), thickness, cv2.LINE_AA)
         return result
@@ -371,7 +366,7 @@ class SafetyValidator:
             "procedure": procedure,
             "intensity": intensity,
             "disclaimer": "AI-generated surgical prediction for visualization only. "
-                          "Not a guarantee of surgical outcome.",
         }
         # Save as sidecar JSON (PNG doesn't have easy EXIF support)

 from __future__ import annotations
 from dataclasses import dataclass, field
 import cv2
 import numpy as np
 @dataclass
 class SafetyResult:
     """Result of safety validation checks."""
     passed: bool = True
     failures: list[str] = field(default_factory=list)
     warnings: list[str] = field(default_factory=list)
         return result
+    def _check_face_confidence(self, result: SafetyResult, confidence: float) -> None:
         """Check face detection confidence."""
         if confidence < self.min_face_confidence:
             result.add_failure(
         """Check identity preservation using ArcFace similarity."""
         try:
             from landmarkdiff.evaluation import compute_identity_similarity
             sim = compute_identity_similarity(output_image, input_image)
             result.details["identity_similarity"] = float(sim)
             if sim < self.identity_threshold:
                 result.add_failure(
                     "identity",
+                    f"Identity similarity {sim:.3f} below threshold {self.identity_threshold}",
                 )
             else:
                 result.add_pass("identity")
         else:
             result.add_pass("procedure_region")
+    def _check_output_quality(self, result: SafetyResult, output: np.ndarray) -> None:
         """Check output image quality (not blank, not corrupted)."""
         if output is None or output.size == 0:
             result.add_failure("output_quality", "Output image is empty")
         cv2.addWeighted(overlay, opacity, result, 1 - opacity, 0, result)
         # White text
+        cv2.putText(result, text, (x, y), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
         return result
             "procedure": procedure,
             "intensity": intensity,
             "disclaimer": "AI-generated surgical prediction for visualization only. "
+            "Not a guarantee of surgical outcome.",
         }
         # Save as sidecar JSON (PNG doesn't have easy EXIF support)

landmarkdiff/synthetic/__init__.py CHANGED Viewed

	@@ -0,0 +1,23 @@

+"""Synthetic data generation for ControlNet fine-tuning.
+Modules:
+  - pair_generator: Generate training pairs from face images
+  - augmentation: Clinical degradation augmentations
+  - tps_warp: TPS warping with rigid region preservation
+"""
+from landmarkdiff.synthetic.augmentation import apply_clinical_augmentation
+from landmarkdiff.synthetic.pair_generator import (
+    TrainingPair,
+    generate_pair,
+    generate_pairs_from_directory,
+)
+from landmarkdiff.synthetic.tps_warp import warp_image_tps
+__all__ = [
+    "TrainingPair",
+    "apply_clinical_augmentation",
+    "generate_pair",
+    "generate_pairs_from_directory",
+    "warp_image_tps",
+]

landmarkdiff/synthetic/augmentation.py CHANGED Viewed

@@ -7,8 +7,8 @@ Applied from day 1 - domain gap prevention, not afterthought.
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Callable
 import cv2
 import numpy as np
@@ -35,7 +35,7 @@ def point_source_lighting(image: np.ndarray, rng: np.random.Generator) -> np.nda
     # Distance-based falloff
     y_grid, x_grid = np.mgrid[0:h, 0:w].astype(np.float32)
     dist = np.sqrt((x_grid - lx) ** 2 + (y_grid - ly) ** 2)
-    max_dist = np.sqrt(w ** 2 + h ** 2)
     light_map = 1.0 - (dist / max_dist) * intensity
     light_map = np.clip(light_map, 0.3, 1.0)
@@ -132,7 +132,7 @@ def vignette(image: np.ndarray, rng: np.random.Generator) -> np.ndarray:
     y, x = np.mgrid[0:h, 0:w].astype(np.float32)
     cx, cy = w / 2, h / 2
     dist = np.sqrt((x - cx) ** 2 + (y - cy) ** 2)
-    max_dist = np.sqrt(cx ** 2 + cy ** 2)
     mask = 1 - strength * (dist / max_dist) ** 2
     mask = np.clip(mask, 0.3, 1.0)

 from __future__ import annotations
+from collections.abc import Callable
 from dataclasses import dataclass
 import cv2
 import numpy as np
     # Distance-based falloff
     y_grid, x_grid = np.mgrid[0:h, 0:w].astype(np.float32)
     dist = np.sqrt((x_grid - lx) ** 2 + (y_grid - ly) ** 2)
+    max_dist = np.sqrt(w**2 + h**2)
     light_map = 1.0 - (dist / max_dist) * intensity
     light_map = np.clip(light_map, 0.3, 1.0)
     y, x = np.mgrid[0:h, 0:w].astype(np.float32)
     cx, cy = w / 2, h / 2
     dist = np.sqrt((x - cx) ** 2 + (y - cy) ** 2)
+    max_dist = np.sqrt(cx**2 + cy**2)
     mask = 1 - strength * (dist / max_dist) ** 2
     mask = np.clip(mask, 0.3, 1.0)

landmarkdiff/synthetic/pair_generator.py CHANGED Viewed

@@ -6,33 +6,32 @@ Augmentations on INPUT only, never target.
 from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Iterator
 import cv2
 import numpy as np
-from landmarkdiff.landmarks import FaceLandmarks, extract_landmarks, render_landmark_image
 from landmarkdiff.conditioning import generate_conditioning
 from landmarkdiff.manipulation import (
-    PROCEDURE_LANDMARKS,
     apply_procedure_preset,
 )
 from landmarkdiff.masking import generate_surgical_mask
 from landmarkdiff.synthetic.augmentation import apply_clinical_augmentation
-from landmarkdiff.synthetic.tps_warp import warp_image_tps, generate_random_warp
 @dataclass(frozen=True)
 class TrainingPair:
     """A single training sample for ControlNet fine-tuning."""
-    input_image: np.ndarray       # augmented input (512x512 BGR)
-    target_image: np.ndarray      # clean target (512x512 BGR) - TPS-warped original
-    conditioning: np.ndarray      # landmark rendering (512x512 BGR)
-    canny: np.ndarray             # canny edge map (512x512 grayscale)
-    mask: np.ndarray              # feathered surgical mask (512x512 float32)
     procedure: str
     intensity: float
@@ -104,10 +103,7 @@ def generate_pairs_from_directory(
     image_dir = Path(image_dir)
     extensions = {".jpg", ".jpeg", ".png", ".webp"}
-    image_files = sorted(
-        f for f in image_dir.iterdir()
-        if f.suffix.lower() in extensions
-    )
     if not image_files:
         raise FileNotFoundError(f"No images found in {image_dir}")

 from __future__ import annotations
+from collections.abc import Iterator
 from dataclasses import dataclass
 from pathlib import Path
 import cv2
 import numpy as np
 from landmarkdiff.conditioning import generate_conditioning
+from landmarkdiff.landmarks import extract_landmarks, render_landmark_image
 from landmarkdiff.manipulation import (
     apply_procedure_preset,
 )
 from landmarkdiff.masking import generate_surgical_mask
 from landmarkdiff.synthetic.augmentation import apply_clinical_augmentation
+from landmarkdiff.synthetic.tps_warp import warp_image_tps
 @dataclass(frozen=True)
 class TrainingPair:
     """A single training sample for ControlNet fine-tuning."""
+    input_image: np.ndarray  # augmented input (512x512 BGR)
+    target_image: np.ndarray  # clean target (512x512 BGR) - TPS-warped original
+    conditioning: np.ndarray  # landmark rendering (512x512 BGR)
+    canny: np.ndarray  # canny edge map (512x512 grayscale)
+    mask: np.ndarray  # feathered surgical mask (512x512 float32)
     procedure: str
     intensity: float
     image_dir = Path(image_dir)
     extensions = {".jpg", ".jpeg", ".png", ".webp"}
+    image_files = sorted(f for f in image_dir.iterdir() if f.suffix.lower() in extensions)
     if not image_files:
         raise FileNotFoundError(f"No images found in {image_dir}")

landmarkdiff/synthetic/tps_warp.py CHANGED Viewed

@@ -156,7 +156,7 @@ def _solve_tps_weights(
     # Build kernel matrix K (vectorized)
     diff = control_pts[:, np.newaxis, :] - control_pts[np.newaxis, :, :]  # (n, n, 2)
-    r_mat = np.sqrt((diff ** 2).sum(axis=2))  # (n, n)
     K = np.zeros((n, n))
     nz = r_mat > 0
     K[nz] = r_mat[nz] ** 2 * np.log(r_mat[nz])
@@ -205,7 +205,7 @@ def _evaluate_tps(
         # Compute all distances at once: (M, n)
         dx = batch[:, 0:1] - control_pts[:, 0]  # (M, n) via broadcasting
         dy = batch[:, 1:2] - control_pts[:, 1]  # (M, n)
-        r = np.sqrt(dx ** 2 + dy ** 2)
         # TPS kernel: r^2 * log(r), with r=0 -> 0
         kernel = np.zeros_like(r)
@@ -230,9 +230,8 @@ def _compute_rigid_translation(
     inside = []
     for i, (x, y) in enumerate(src):
         ix, iy = int(x), int(y)
-        if 0 <= ix < width and 0 <= iy < height:
-            if mask[iy, ix] > 0:
-                inside.append(i)
     if not inside:
         return np.array([0.0, 0.0])

     # Build kernel matrix K (vectorized)
     diff = control_pts[:, np.newaxis, :] - control_pts[np.newaxis, :, :]  # (n, n, 2)
+    r_mat = np.sqrt((diff**2).sum(axis=2))  # (n, n)
     K = np.zeros((n, n))
     nz = r_mat > 0
     K[nz] = r_mat[nz] ** 2 * np.log(r_mat[nz])
         # Compute all distances at once: (M, n)
         dx = batch[:, 0:1] - control_pts[:, 0]  # (M, n) via broadcasting
         dy = batch[:, 1:2] - control_pts[:, 1]  # (M, n)
+        r = np.sqrt(dx**2 + dy**2)
         # TPS kernel: r^2 * log(r), with r=0 -> 0
         kernel = np.zeros_like(r)
     inside = []
     for i, (x, y) in enumerate(src):
         ix, iy = int(x), int(y)
+        if 0 <= ix < width and 0 <= iy < height and mask[iy, ix] > 0:
+            inside.append(i)
     if not inside:
         return np.array([0.0, 0.0])

landmarkdiff/validation.py CHANGED Viewed

@@ -14,13 +14,11 @@ import json
 import time
 from pathlib import Path
-import cv2
 import numpy as np
 import torch
-import torch.nn.functional as F
 from PIL import Image
-from landmarkdiff.evaluation import compute_ssim, compute_lpips, compute_nme
 class ValidationCallback:
@@ -116,13 +114,18 @@ class ValidationCallback:
                 # ControlNet
                 down_samples, mid_sample = controlnet(
-                    scaled, t, encoder_hidden_states=encoder_hidden_states,
-                    controlnet_cond=conditioning, return_dict=False,
                 )
                 # UNet with ControlNet residuals
                 noise_pred = unet(
-                    scaled, t, encoder_hidden_states=encoder_hidden_states,
                     down_block_additional_residuals=down_samples,
                     mid_block_additional_residual=mid_sample,
                 ).sample
@@ -136,7 +139,9 @@ class ValidationCallback:
             # Convert to numpy for metrics
             gen_np = (decoded[0].float().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
             tgt_np = (target[0].float().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
-            cond_np = (conditioning[0].float().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
             # BGR for metrics (our metrics expect BGR)
             gen_bgr = gen_np[:, :, ::-1].copy()
@@ -177,7 +182,7 @@ class ValidationCallback:
         if generated_images:
             grid_rows = []
             for i in range(0, len(generated_images), 4):
-                row_imgs = generated_images[i:i+4]
                 while len(row_imgs) < 4:
                     row_imgs.append(np.zeros_like(generated_images[0]))
                 grid_rows.append(np.hstack(row_imgs))
@@ -202,6 +207,7 @@ class ValidationCallback:
         try:
             import matplotlib
             matplotlib.use("Agg")
             import matplotlib.pyplot as plt
         except ImportError:

 import time
 from pathlib import Path
 import numpy as np
 import torch
 from PIL import Image
+from landmarkdiff.evaluation import compute_lpips, compute_ssim
 class ValidationCallback:
                 # ControlNet
                 down_samples, mid_sample = controlnet(
+                    scaled,
+                    t,
+                    encoder_hidden_states=encoder_hidden_states,
+                    controlnet_cond=conditioning,
+                    return_dict=False,
                 )
                 # UNet with ControlNet residuals
                 noise_pred = unet(
+                    scaled,
+                    t,
+                    encoder_hidden_states=encoder_hidden_states,
                     down_block_additional_residuals=down_samples,
                     mid_block_additional_residual=mid_sample,
                 ).sample
             # Convert to numpy for metrics
             gen_np = (decoded[0].float().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
             tgt_np = (target[0].float().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
+            cond_np = (conditioning[0].float().permute(1, 2, 0).cpu().numpy() * 255).astype(
+                np.uint8
+            )
             # BGR for metrics (our metrics expect BGR)
             gen_bgr = gen_np[:, :, ::-1].copy()
         if generated_images:
             grid_rows = []
             for i in range(0, len(generated_images), 4):
+                row_imgs = generated_images[i : i + 4]
                 while len(row_imgs) < 4:
                     row_imgs.append(np.zeros_like(generated_images[0]))
                 grid_rows.append(np.hstack(row_imgs))
         try:
             import matplotlib
             matplotlib.use("Agg")
             import matplotlib.pyplot as plt
         except ImportError: