saliacoel
/

walkanims

Model card Files Files and versions

xet

Community

saliacoel commited on Feb 6

Commit

5988d40

verified ·

1 Parent(s): e636647

Upload FD_Standalone_2.py

Browse files

Files changed (1) hide show

FD_Standalone_2.py +1087 -0

FD_Standalone_2.py ADDED Viewed

	@@ -0,0 +1,1087 @@

+import os
+import sys
+import importlib.util
+import torch
+import numpy as np
+from ultralytics import YOLO
+from comfy_extras import nodes_differential_diffusion
+# NEW: import comfy core nodes (for CLIPTextEncode)
+try:
+    import nodes  # comfy-core nodes.py
+except Exception:
+    nodes = None
+# -----------------------------
+# helper loader
+# -----------------------------
+def _load_helpers():
+    here = os.path.dirname(os.path.abspath(__file__))
+    candidate_filenames = (
+        "Salia_Facedetailer_Helpers.py",
+        "Salia_Facedetailer_helpers.py",
+        "Facedetailer_helpers.py",
+    )
+    try:
+        from . import Salia_Facedetailer_Helpers as helpers  # type: ignore
+        return helpers
+    except (ImportError, ModuleNotFoundError):
+        pass
+    for fname in candidate_filenames:
+        path = os.path.join(here, fname)
+        if os.path.isfile(path):
+            mod_name = os.path.splitext(fname)[0]
+            spec = importlib.util.spec_from_file_location(mod_name, path)
+            if spec is None or spec.loader is None:
+                continue
+            module = importlib.util.module_from_spec(spec)
+            sys.modules[mod_name] = module
+            spec.loader.exec_module(module)
+            return module
+    if here not in sys.path:
+        sys.path.insert(0, here)
+    import Salia_Facedetailer_Helpers as helpers  # type: ignore
+    return helpers
+helpers = _load_helpers()
+# Make sure the helpers module is always importable under this canonical name
+# (needed because we inlined TRT code that imports SEG from Salia_Facedetailer_Helpers)
+try:
+    if "Salia_Facedetailer_Helpers" not in sys.modules:
+        sys.modules["Salia_Facedetailer_Helpers"] = helpers
+except Exception:
+    pass
+# -----------------------------
+# Lazy import for TRT_D_HYPA (TRT VAE decoder)
+# -----------------------------
+_TRTHYPA_MODULE = None
+_TRTHYPA_DECODER_1344x768 = None
+def _load_trt_d_hypa_module():
+    """
+    Locate and import TRT_D_HYPA.py from the comfyui-TRT_VAE custom node.
+    We intentionally resolve it via filesystem paths so we do not depend on
+    how ComfyUI chooses to package/import custom nodes.
+    """
+    here = os.path.dirname(os.path.abspath(__file__))
+    # FD_Standalone.py: .../custom_nodes/comfyui-salia_facedetailer/nodes/FD_Standalone.py
+    # -> custom_nodes
+    custom_nodes_dir = os.path.dirname(os.path.dirname(here))
+    trt_nodes_dir = os.path.join(custom_nodes_dir, "comfyui-TRT_VAE", "nodes")
+    trt_file = os.path.join(trt_nodes_dir, "TRT_D_HYPA.py")
+    if not os.path.isfile(trt_file):
+        return None
+    mod_name = "TRT_D_HYPA"
+    # Reuse already-loaded module if present
+    existing = sys.modules.get(mod_name)
+    if existing is not None:
+        return existing
+    spec = importlib.util.spec_from_file_location(mod_name, trt_file)
+    if spec is None or spec.loader is None:
+        return None
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[mod_name] = module
+    try:
+        spec.loader.exec_module(module)
+    except Exception:
+        # If import fails, remove the partially-loaded module to avoid poisoning sys.modules
+        sys.modules.pop(mod_name, None)
+        raise
+    return module
+def _get_trt_decoder_1344x768():
+    """
+    Return a singleton instance of TRT_D_HYPA_1344x768 (lazy-created).
+    This keeps TensorRT engine initialization and memory allocations
+    outside of the ComfyUI graph definition path and only runs them
+    when the node is actually executed.
+    """
+    global _TRTHYPA_MODULE, _TRTHYPA_DECODER_1344x768
+    if _TRTHYPA_DECODER_1344x768 is not None:
+        return _TRTHYPA_DECODER_1344x768
+    if _TRTHYPA_MODULE is None:
+        _TRTHYPA_MODULE = _load_trt_d_hypa_module()
+    if _TRTHYPA_MODULE is None:
+        raise ImportError(
+            "[FD_Standalone] Could not locate TRT_D_HYPA.py under comfyui-TRT_VAE/nodes. "
+            "Make sure the comfyui-TRT_VAE custom node is installed."
+        )
+    try:
+        DecoderCls = getattr(_TRTHYPA_MODULE, "TRT_D_HYPA_1344x768")
+    except AttributeError as exc:
+        raise ImportError(
+            "[FD_Standalone] TRT_D_HYPA_1344x768 class not found inside TRT_D_HYPA.py."
+        ) from exc
+    _TRTHYPA_DECODER_1344x768 = DecoderCls()
+    return _TRTHYPA_DECODER_1344x768
+# -----------------------------
+# Lazy import for Salia_FD_Parsed.py (NEXT TO THIS FILE)
+# -----------------------------
+_SALIA_FD_PARSED_MODULE = None
+_SALIA_PARSED_NODE = None
+def _load_salia_fd_parsed_module():
+    """
+    Load Salia_FD_Parsed.py from the same directory as this file (relative import-by-path).
+    This remains valid if you move both files together to another folder.
+    """
+    global _SALIA_FD_PARSED_MODULE
+    here = os.path.dirname(os.path.abspath(__file__))
+    parsed_file = os.path.join(here, "Salia_FD_Parsed.py")
+    if not os.path.isfile(parsed_file):
+        raise FileNotFoundError(
+            f"[FD_Standalone] Missing Salia_FD_Parsed.py next to FD_Standalone.py.\n"
+            f"Expected: {parsed_file}"
+        )
+    mod_name = "Salia_FD_Parsed"
+    existing = sys.modules.get(mod_name)
+    if existing is not None:
+        try:
+            existing_file = os.path.abspath(getattr(existing, "__file__", "") or "")
+            if existing_file == os.path.abspath(parsed_file) and hasattr(existing, "Salia_Parsed"):
+                _SALIA_FD_PARSED_MODULE = existing
+                return existing
+        except Exception:
+            pass
+    spec = importlib.util.spec_from_file_location(mod_name, parsed_file)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"[FD_Standalone] Failed to create import spec for: {parsed_file}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[mod_name] = module
+    try:
+        spec.loader.exec_module(module)
+    except Exception:
+        sys.modules.pop(mod_name, None)
+        raise
+    if not hasattr(module, "Salia_Parsed"):
+        raise ImportError(
+            f"[FD_Standalone] Loaded {parsed_file}, but it does not define Salia_Parsed."
+        )
+    _SALIA_FD_PARSED_MODULE = module
+    return module
+def _get_salia_parsed_node():
+    """Return a singleton instance of Salia_Parsed (lazy-created)."""
+    global _SALIA_PARSED_NODE
+    if _SALIA_PARSED_NODE is not None:
+        return _SALIA_PARSED_NODE
+    module = _load_salia_fd_parsed_module()
+    ParserCls = getattr(module, "Salia_Parsed", None)
+    if ParserCls is None:
+        raise ImportError("[FD_Standalone] Salia_Parsed class not found in Salia_FD_Parsed.py.")
+    _SALIA_PARSED_NODE = ParserCls()
+    return _SALIA_PARSED_NODE
+# =====================================================================================
+# INLINED: Salia_TRT_face.py (everything except the node wrapper)
+# =====================================================================================
+# Shared SEG definition (same fields as in Facedetailer_helpers)
+try:
+    from .Salia_Facedetailer_Helpers import SEG
+except ImportError:
+    # Fallback if used outside of a package
+    from Salia_Facedetailer_Helpers import SEG
+# -------------------------------------------------------------------------
+# Constants
+# -------------------------------------------------------------------------
+NODE_DIR = os.path.dirname(os.path.abspath(__file__))
+# Engine is always this exact filename, located next to this .py file
+ENGINE_FILENAME = "salia_face.engine"
+# Optional: cache to avoid re-loading the engine every execution
+_YOLO_ENGINE_CACHE = {}
+def load_yolo_detect(model_path: str) -> YOLO:
+    """
+    Load a YOLO model with task explicitly set to 'detect' to suppress:
+      WARNING ⚠️ Unable to automatically guess model task...
+    Works across Ultralytics versions by falling back if 'task=' isn't supported.
+    """
+    try:
+        m = YOLO(model_path, task="detect")
+    except TypeError:
+        # Older Ultralytics versions may not accept 'task=' in the constructor
+        m = YOLO(model_path)
+    # Reinforce task in case the backend/model doesn't carry task metadata (e.g. TRT engine)
+    try:
+        m.task = "detect"
+    except Exception:
+        pass
+    try:
+        if hasattr(m, "overrides") and isinstance(m.overrides, dict):
+            m.overrides["task"] = "detect"
+    except Exception:
+        pass
+    return m
+def load_engine_model(engine_path: str) -> YOLO:
+    """Load (and cache) the TensorRT engine as a YOLO detect model."""
+    m = _YOLO_ENGINE_CACHE.get(engine_path)
+    if m is None:
+        m = load_yolo_detect(engine_path)
+        _YOLO_ENGINE_CACHE[engine_path] = m
+    return m
+# -------------------------------------------------------------------------
+# Helpers (mirrors Salia_BBOX.py behavior)
+# -------------------------------------------------------------------------
+def tensor_to_pil(image: torch.Tensor):
+    """Convert a ComfyUI IMAGE tensor [B,H,W,C] (0..1) to a PIL RGB image (first item in batch)."""
+    from PIL import Image
+    if not isinstance(image, torch.Tensor):
+        raise TypeError(f"Expected torch.Tensor, got {type(image)}")
+    if image.dim() == 4:
+        img = image[0]
+    else:
+        img = image
+    img = img.detach()
+    if img.is_cuda:
+        img = img.cpu()
+    img = img.clamp(0, 1).numpy()
+    if img.shape[-1] == 1:
+        img = np.repeat(img, 3, axis=-1)
+    img_u8 = (img * 255.0).round().astype(np.uint8)
+    return Image.fromarray(img_u8)
+def make_crop_region(w: int, h: int, bbox_xyxy, crop_factor: float, crop_min_size=None):
+    """Expanded bbox crop-region logic, clamped to image."""
+    try:
+        x1f = float(bbox_xyxy[0])
+        y1f = float(bbox_xyxy[1])
+        x2f = float(bbox_xyxy[2])
+        y2f = float(bbox_xyxy[3])
+    except Exception:
+        x1f = y1f = x2f = y2f = 0.0
+    bbox_w = max(1.0, x2f - x1f)
+    bbox_h = max(1.0, y2f - y1f)
+    crop_w = bbox_w * float(crop_factor)
+    crop_h = bbox_h * float(crop_factor)
+    if crop_min_size is not None:
+        crop_w = max(crop_w, float(crop_min_size))
+        crop_h = max(crop_h, float(crop_min_size))
+    cx = (x1f + x2f) / 2.0
+    cy = (y1f + y2f) / 2.0
+    rx1 = int(round(cx - crop_w / 2.0))
+    ry1 = int(round(cy - crop_h / 2.0))
+    rx2 = int(round(cx + crop_w / 2.0))
+    ry2 = int(round(cy + crop_h / 2.0))
+    # clamp
+    rx1 = max(0, min(w - 1, rx1))
+    ry1 = max(0, min(h - 1, ry1))
+    rx2 = max(rx1 + 1, min(w, rx2))
+    ry2 = max(ry1 + 1, min(h, ry2))
+    return (rx1, ry1, rx2, ry2)
+def crop_image(image: torch.Tensor, crop_region):
+    """Crop a ComfyUI IMAGE tensor [B,H,W,C] using (x1,y1,x2,y2)."""
+    x1, y1, x2, y2 = crop_region
+    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+    if image.dim() == 4:
+        return image[:, y1:y2, x1:x2, :]
+    if image.dim() == 3:
+        return image[y1:y2, x1:x2, :]
+    raise ValueError(f"Unexpected image tensor shape: {tuple(image.shape)}")
+def crop_ndarray2(arr: np.ndarray, crop_region):
+    """Crop a 2D numpy array using (x1,y1,x2,y2)."""
+    x1, y1, x2, y2 = crop_region
+    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+    return arr[y1:y2, x1:x2]
+try:
+    import cv2  # opencv-python or opencv-python-headless
+except Exception:
+    cv2 = None
+def dilate_masks(segmasks, dilation: int):
+    """Dilate masks only if dilation > 0 and cv2 is available."""
+    if dilation <= 0:
+        return segmasks
+    if cv2 is None:
+        return segmasks
+    k = int(dilation)
+    ksize = k * 2 + 1
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ksize, ksize))
+    out = []
+    for bbox, mask, conf in segmasks:
+        try:
+            m = (mask > 0.5).astype(np.uint8) * 255
+            m = cv2.dilate(m, kernel, iterations=1)
+            out_mask = (m > 0).astype(np.float32)
+            out.append((bbox, out_mask, conf))
+        except Exception:
+            out.append((bbox, mask, conf))
+    return out
+def combine_masks(segmasks, out_shape_hw=None) -> torch.Tensor:
+    """Combine multiple masks using max()."""
+    if not segmasks:
+        if out_shape_hw is None:
+            return torch.zeros((1, 1, 1), dtype=torch.float32)
+        h, w = out_shape_hw
+        return torch.zeros((1, h, w), dtype=torch.float32)
+    base = segmasks[0][1]
+    combined = np.zeros_like(base, dtype=np.float32)
+    for _, m, _ in segmasks:
+        try:
+            combined = np.maximum(combined, m.astype(np.float32))
+        except Exception:
+            pass
+    return torch.from_numpy(combined).unsqueeze(0)
+def _create_segmasks(results):
+    """Create list of (bbox, mask_float32, conf)."""
+    bboxes = results[1]
+    segms = results[2]
+    confs = results[3]
+    out = []
+    try:
+        n = int(len(segms))
+    except Exception:
+        n = 0
+    for i in range(n):
+        try:
+            out.append((bboxes[i], segms[i].astype(np.float32), confs[i]))
+        except Exception:
+            pass
+    return out
+def _inference_bbox(model, image_pil, confidence: float = 0.3, device: str = "0"):
+    """
+    Run bbox inference and return:
+        [labels, bboxes_xyxy_list, segm_masks_list, confs_list]
+    Where segm_masks are full-image boolean masks (rectangle fill per bbox).
+    """
+    pred = model(image_pil, conf=float(confidence), device=str(device), verbose=False)
+    bboxes = pred[0].boxes.xyxy.cpu().numpy()  # xyxy
+    if bboxes is None or (hasattr(bboxes, "shape") and bboxes.shape[0] == 0):
+        return [[], [], [], []]
+    # Original image size (H, W)
+    w_orig, h_orig = image_pil.size
+    ih = int(h_orig)
+    iw = int(w_orig)
+    segms = []
+    for (x0, y0, x1, y1) in bboxes:
+        m = np.zeros((ih, iw), dtype=np.uint8)
+        # Clamp coords
+        try:
+            x0i = int(x0)
+        except Exception:
+            x0i = 0
+        try:
+            y0i = int(y0)
+        except Exception:
+            y0i = 0
+        try:
+            x1i = int(x1)
+        except Exception:
+            x1i = 0
+        try:
+            y1i = int(y1)
+        except Exception:
+            y1i = 0
+        x0c = max(0, min(iw - 1, x0i))
+        x1c = max(x0c + 1, min(iw, x1i))
+        y0c = max(0, min(ih - 1, y0i))
+        y1c = max(y0c + 1, min(ih, y1i))
+        if cv2 is not None:
+            try:
+                cv2.rectangle(m, (x0c, y0c), (x1c, y1c), 255, -1)
+            except Exception:
+                m[y0c:y1c, x0c:x1c] = 255
+        else:
+            m[y0c:y1c, x0c:x1c] = 255
+        segms.append((m > 0))
+    labels = []
+    confs = []
+    names = getattr(pred[0], "names", None)
+    names_is_seq = isinstance(names, (list, tuple))
+    for i in range(len(bboxes)):
+        # label
+        label = "unknown"
+        try:
+            cls_idx = int(pred[0].boxes[i].cls.item())
+            if names_is_seq:
+                label = names[cls_idx] if 0 <= cls_idx < len(names) else str(cls_idx)
+            elif isinstance(names, dict):
+                label = names.get(cls_idx, str(cls_idx))
+            else:
+                label = str(cls_idx)
+        except Exception:
+            label = "unknown"
+        # conf (force to float)
+        try:
+            conf_val = float(pred[0].boxes[i].conf.item())
+        except Exception:
+            conf_val = 0.0
+        labels.append(conf_val)  # NOTE: kept as-is from your original code
+        confs.append(conf_val)
+    return [labels, list(bboxes), segms, confs]
+# -------------------------------------------------------------------------
+# YOLO TensorRT-based BBOX_DETECTOR implementation
+# -------------------------------------------------------------------------
+class TRTYOLOBBoxDetector:
+    """BBOX_DETECTOR interface compatible with FaceDetailer."""
+    def __init__(self, yolo_model: YOLO, device: str = "0"):
+        self.bbox_model = yolo_model
+        self.device = device or "0"
+    def setAux(self, x: str):
+        # Kept for interface compatibility
+        pass
+    def detect(
+        self,
+        image: torch.Tensor,
+        threshold: float,
+        dilation: int,
+        crop_factor: float,
+        drop_size: int = 1,
+        detailer_hook=None,
+    ):
+        """Return FaceDetailer-style SEGS: ( (H, W), [SEG, ...] )."""
+        if not isinstance(image, torch.Tensor):
+            raise TypeError(f"[TRTYOLOBBoxDetector] Expected torch.Tensor for image, got {type(image)}")
+        if image.dim() != 4:
+            raise ValueError("[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C].")
+        h, w = int(image.shape[1]), int(image.shape[2])
+        shape = (h, w)
+        detected = _inference_bbox(
+            self.bbox_model,
+            tensor_to_pil(image),
+            confidence=float(threshold),
+            device=str(self.device),
+        )
+        segmasks = _create_segmasks(detected)
+        if int(dilation) > 0:
+            segmasks = dilate_masks(segmasks, int(dilation))
+        drop_size_int = int(drop_size) if int(drop_size) > 0 else 1
+        items = []
+        for (bbox, mask, conf), label in zip(segmasks, detected[0]):
+            try:
+                x1f = float(bbox[0])
+                y1f = float(bbox[1])
+                x2f = float(bbox[2])
+                y2f = float(bbox[3])
+            except Exception:
+                continue
+            bwf = x2f - x1f
+            bhf = y2f - y1f
+            if bwf > drop_size_int and bhf > drop_size_int:
+                crop_region = make_crop_region(w, h, bbox, float(crop_factor))
+                if detailer_hook is not None and hasattr(detailer_hook, "post_crop_region"):
+                    try:
+                        crop_region = detailer_hook.post_crop_region(w, h, bbox, crop_region)
+                    except Exception:
+                        pass
+                cropped_image = crop_image(image, crop_region)
+                cropped_mask = crop_ndarray2(mask, crop_region)
+                items.append(SEG(cropped_image, cropped_mask, conf, crop_region, bbox, label, None))
+        segs = (shape, items)
+        if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
+            try:
+                segs = detailer_hook.post_detection(segs)
+            except Exception:
+                pass
+        return segs
+    def detect_combined(self, image: torch.Tensor, threshold: float, dilation: int) -> torch.Tensor:
+        """Return a single combined MASK tensor covering all detections."""
+        if not isinstance(image, torch.Tensor):
+            raise TypeError(f"[TRTYOLOBBoxDetector] Expected torch.Tensor for image, got {type(image)}")
+        if image.dim() != 4:
+            raise ValueError("[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C].")
+        detected = _inference_bbox(
+            self.bbox_model,
+            tensor_to_pil(image),
+            confidence=float(threshold),
+            device=str(self.device),
+        )
+        segmasks = _create_segmasks(detected)
+        if int(dilation) > 0:
+            segmasks = dilate_masks(segmasks, int(dilation))
+        return combine_masks(segmasks, out_shape_hw=(int(image.shape[1]), int(image.shape[2])))
+# =====================================================================================
+# END INLINED: Salia_TRT_face.py
+# =====================================================================================
+# -----------------------------
+# CLIP Text Encode (core) wrapper
+# -----------------------------
+_CLIP_TEXT_ENCODE_NODE = None
+def _encode_conditioning(clip, text: str):
+    """
+    Uses comfy-core CLIPTextEncode node (preferred), with a robust fallback for older/newer core APIs.
+    """
+    global _CLIP_TEXT_ENCODE_NODE
+    if text is None:
+        text = ""
+    # Preferred: call comfy-core node CLIPTextEncode
+    if nodes is not None:
+        if _CLIP_TEXT_ENCODE_NODE is None:
+            _CLIP_TEXT_ENCODE_NODE = nodes.CLIPTextEncode()
+        # Core node returns a tuple: (conditioning,)
+        return _CLIP_TEXT_ENCODE_NODE.encode(clip=clip, text=text)[0]
+    # Fallback if for some reason `import nodes` failed in your environment:
+    if clip is None:
+        raise RuntimeError("CLIP input is None (cannot encode).")
+    tokens = clip.tokenize(text)
+    # Newer-ish API (2024/2025+)
+    if hasattr(clip, "encode_from_tokens_scheduled"):
+        return clip.encode_from_tokens_scheduled(tokens)
+    # Older API fallback
+    output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True)
+    cond = output.pop("cond")
+    return [[cond, output]]
+def _manual_bbox_from_ltrb(left, top, right, bottom):
+    """
+    Manual bbox override from 4 ints: (left, top, right, bottom).
+    These 4 ints imply the 4 corners:
+      - Top-left     = (left, top)
+      - Top-right    = (right, top)
+      - Bottom-left  = (left, bottom)
+      - Bottom-right = (right, bottom)
+    Convention:
+      - If ANY value is None or < 0 -> return None (use YOLO detection).
+      - Otherwise returns (x1, y1, x2, y2) with correct ordering.
+    """
+    if left is None or top is None or right is None or bottom is None:
+        return None
+    try:
+        x1 = int(left)
+        y1 = int(top)
+        x2 = int(right)
+        y2 = int(bottom)
+    except Exception:
+        return None
+    # Sentinel: any negative => auto detect
+    if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0:
+        return None
+    # Ensure proper ordering
+    if x2 < x1:
+        x1, x2 = x2, x1
+    if y2 < y1:
+        y1, y2 = y2, y1
+    return (x1, y1, x2, y2)
+class FD_Standalone_2:
+    _BBOX_DETECTOR = None
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                # CHANGED: take latent instead of image, and internally decode via TRT_D_HYPA_1344x768
+                "latent": (
+                    "LATENT",
+                    {
+                        "tooltip": "Latent to be decoded with TRT_D_HYPA_1344x768 before face detailing."
+                    },
+                ),
+                "model": ("MODEL", {"tooltip": "If ImpactDummyInput connected, inference may be skipped."}),
+                # single CLIP input (from Load Checkpoint)
+                "clip": ("CLIP", {"tooltip": "CLIP from Load Checkpoint (SDXL CLIP is fine)."}),
+                # NEW: manual bbox override via 4 ints (left/top/right/bottom)
+                # Leave any value at -1 to use YOLO auto-detection.
+                "bbox_left": (
+                    "INT",
+                    {
+                        "default": -1,
+                        "min": -1,
+                        "max": 1000000,
+                        "step": 1,
+                        "tooltip": "Manual bbox LEFT (x1). Top-left=(LEFT,TOP), Bottom-left=(LEFT,BOTTOM). -1 => YOLO auto-detect.",
+                    },
+                ),
+                "bbox_top": (
+                    "INT",
+                    {
+                        "default": -1,
+                        "min": -1,
+                        "max": 1000000,
+                        "step": 1,
+                        "tooltip": "Manual bbox TOP (y1). Top-left=(LEFT,TOP), Top-right=(RIGHT,TOP). -1 => YOLO auto-detect.",
+                    },
+                ),
+                "bbox_right": (
+                    "INT",
+                    {
+                        "default": -1,
+                        "min": -1,
+                        "max": 1000000,
+                        "step": 1,
+                        "tooltip": "Manual bbox RIGHT (x2). Top-right=(RIGHT,TOP), Bottom-right=(RIGHT,BOTTOM). -1 => YOLO auto-detect.",
+                    },
+                ),
+                "bbox_bottom": (
+                    "INT",
+                    {
+                        "default": -1,
+                        "min": -1,
+                        "max": 1000000,
+                        "step": 1,
+                        "tooltip": "Manual bbox BOTTOM (y2). Bottom-left=(LEFT,BOTTOM), Bottom-right=(RIGHT,BOTTOM). -1 => YOLO auto-detect.",
+                    },
+                ),
+                # POV integer
+                "pov_id": (
+                    "INT",
+                    {
+                        "default": 1,
+                        "min": 1,
+                        "max": 4,
+                        "step": 1,
+                        "tooltip": "POV: 1=front, 2=three-quarter, 3=side, 4=rear. If 4, node bypasses and outputs decoded image unchanged.",
+                    },
+                ),
+                # single input string, internally parsed by Salia_Parsed into (pos, neg)
+                "prompt": (
+                    "STRING",
+                    {
+                        "multiline": True,
+                        "default": "",
+                        "dynamicPrompts": True,
+                        "tooltip": "Single prompt string. Internally parsed by Salia_Parsed into (pos, neg) for face detailing.",
+                    },
+                ),
+            },
+            "optional": {},
+        }
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("image",)
+    OUTPUT_IS_LIST = (False,)
+    FUNCTION = "doit"
+    CATEGORY = "ImpactPack/Simple"
+    @classmethod
+    def _get_bbox_detector(cls):
+        if cls._BBOX_DETECTOR is not None:
+            return cls._BBOX_DETECTOR
+        engine_path = os.path.join(NODE_DIR, ENGINE_FILENAME)
+        if not os.path.isfile(engine_path):
+            raise FileNotFoundError(
+                f"[TRTYOLOBBoxDetectorProvider] Engine file not found: {engine_path}\n"
+                f"Expected the file '{ENGINE_FILENAME}' next to this node .py file."
+            )
+        yolo_model = load_engine_model(engine_path)
+        detector = TRTYOLOBBoxDetector(yolo_model, device="0")
+        cls._BBOX_DETECTOR = detector
+        return cls._BBOX_DETECTOR
+    @staticmethod
+    def enhance_face(image, model, positive, negative, bbox_detector=None, manual_bbox=None):
+        """
+        If manual_bbox is provided (x1,y1,x2,y2), skip detector and detail only that region.
+        Otherwise use bbox_detector.detect(...) (original behavior).
+        """
+        # Manual override path
+        if manual_bbox is not None:
+            try:
+                return DetailerForEach.do_detail_bbox(image, manual_bbox, model, positive, negative)
+            except Exception:
+                return image
+        # Original detection path
+        if bbox_detector is None:
+            return image
+        try:
+            bbox_detector.setAux("face")
+        except Exception:
+            pass
+        try:
+            segs = bbox_detector.detect(image, 0.55, 0, 1.0, 10)
+        except Exception:
+            try:
+                bbox_detector.setAux(None)
+            except Exception:
+                pass
+            return image
+        try:
+            bbox_detector.setAux(None)
+        except Exception:
+            pass
+        try:
+            num_segs = int(len(segs[1]))
+        except Exception:
+            num_segs = 0
+        if num_segs == 0:
+            return image
+        try:
+            out = DetailerForEach.do_detail(image, segs, model, positive, negative)
+            return out
+        except Exception:
+            return image
+    def doit(self, latent, model, clip, bbox_left, bbox_top, bbox_right, bbox_bottom, pov_id, prompt):
+        # Step 1: decode latent -> image using the TRT VAE decoder
+        decoder = _get_trt_decoder_1344x768()
+        decoded = decoder.decode(latent)
+        if isinstance(decoded, (list, tuple)):
+            image = decoded[0]
+        else:
+            image = decoded
+        # Normalize POV (1..4)
+        try:
+            pov_id_int = int(pov_id)
+        except Exception:
+            pov_id_int = 1
+        if pov_id_int < 1:
+            pov_id_int = 1
+        if pov_id_int > 4:
+            pov_id_int = 4
+        # POV=4 (rear view): skip entire task and output decoded image unchanged
+        if pov_id_int == 4:
+            return (image,)
+        # Parse the single prompt string -> (pos, neg)
+        if prompt is None:
+            prompt = ""
+        parser = _get_salia_parsed_node()
+        try:
+            pos, neg = parser.run(pov_id_int, prompt)
+        except Exception as exc:
+            raise RuntimeError(f"[FD_Standalone] Salia_Parsed failed: {exc}") from exc
+        # Encode ONCE per node execution (not per face / not per segment)
+        skip_inference = isinstance(model, str) and model == "DUMMY"
+        if skip_inference:
+            positive = []
+            negative = []
+        else:
+            positive = _encode_conditioning(clip, pos)
+            negative = _encode_conditioning(clip, neg)
+        # Decide manual bbox vs detector:
+        # If bbox_left/top/right/bottom are all >= 0 -> manual override.
+        # Otherwise -> YOLO detection.
+        manual_bbox = _manual_bbox_from_ltrb(bbox_left, bbox_top, bbox_right, bbox_bottom)
+        # Only load detector if needed
+        bbox_detector = None
+        if manual_bbox is None:
+            bbox_detector = FD_Standalone._get_bbox_detector()
+        outs = []
+        # Image from TRT VAE is [B,H,W,C]; iterate over batch dimension
+        for img in image:
+            try:
+                out = self.enhance_face(
+                    img.unsqueeze(0),
+                    model,
+                    positive,
+                    negative,
+                    bbox_detector=bbox_detector,
+                    manual_bbox=manual_bbox,
+                )
+            except Exception:
+                out = img.unsqueeze(0)
+            outs.append(out)
+        try:
+            result = torch.cat(outs, dim=0)
+        except Exception:
+            result = image
+        return (result,)
+class DetailerForEach:
+    @staticmethod
+    def do_detail_bbox(image, bbox, model, positive, negative):
+        """
+        NEW: Detail exactly one bbox (x1,y1,x2,y2) without needing SEGS/detection.
+        Uses the same square-crop/detail/paste logic as do_detail().
+        """
+        try:
+            image = image.clone().cpu()
+        except Exception:
+            pass
+        # Clamp bbox to image bounds (best-effort safety)
+        try:
+            h = int(image.shape[1])
+            w = int(image.shape[2])
+        except Exception:
+            h, w = 0, 0
+        try:
+            x1, y1, x2, y2 = bbox
+            x1 = int(x1)
+            y1 = int(y1)
+            x2 = int(x2)
+            y2 = int(y2)
+        except Exception:
+            return image
+        if w > 0 and h > 0:
+            x1 = max(0, min(w - 1, x1))
+            y1 = max(0, min(h - 1, y1))
+            x2 = max(x1 + 1, min(w, x2))
+            y2 = max(y1 + 1, min(h, y2))
+        bbox_clamped = (x1, y1, x2, y2)
+        try:
+            model = nodes_differential_diffusion.DifferentialDiffusion().apply(model)[0]
+        except Exception:
+            pass
+        try:
+            rx1, ry1, side, _, _, _, _ = helpers.bbox_to_square_region(bbox_clamped, max_side=1024)
+        except Exception:
+            return image
+        square_patch = helpers.crop_with_pad_nhwc(image, rx1, ry1, side, fill=0.0)
+        if square_patch is None:
+            return image
+        try:
+            if square_patch is not None and not (isinstance(model, str) and model == "DUMMY"):
+                premult_side, alpha_side = helpers.enhance_detail_bbox_square(
+                    square_patch,
+                    model,
+                    positive,
+                    negative,
+                    side=side,
+                )
+            else:
+                premult_side = square_patch
+                alpha_side = torch.ones(
+                    (1, side, side, 1),
+                    dtype=square_patch.dtype,
+                    device=square_patch.device,
+                )
+        except Exception:
+            return image
+        try:
+            helpers.tensor_paste_premult_oob(image, premult_side, alpha_side, (rx1, ry1))
+        except Exception:
+            pass
+        try:
+            out = helpers.tensor_convert_rgb(image)
+        except Exception:
+            out = image
+        return out
+    @staticmethod
+    def do_detail(image, segs, model, positive, negative):
+        try:
+            image = image.clone().cpu()
+        except Exception:
+            pass
+        try:
+            _, ordered_segs = helpers.segs_scale_match(segs, image.shape)
+        except Exception:
+            ordered_segs = segs[1] if (segs and len(segs) > 1) else []
+        try:
+            model = nodes_differential_diffusion.DifferentialDiffusion().apply(model)[0]
+        except Exception:
+            pass
+        for seg in ordered_segs:
+            try:
+                rx1, ry1, side, _, _, _, _ = helpers.bbox_to_square_region(seg.bbox, max_side=1024)
+            except Exception:
+                continue
+            square_patch = helpers.crop_with_pad_nhwc(image, rx1, ry1, side, fill=0.0)
+            try:
+                if square_patch is not None and not (isinstance(model, str) and model == "DUMMY"):
+                    premult_side, alpha_side = helpers.enhance_detail_bbox_square(
+                        square_patch,
+                        model,
+                        positive,
+                        negative,
+                        side=side,
+                    )
+                else:
+                    premult_side = square_patch
+                    alpha_side = torch.ones(
+                        (1, side, side, 1),
+                        dtype=square_patch.dtype,
+                        device=square_patch.device,
+                    )
+            except Exception:
+                continue
+            try:
+                helpers.tensor_paste_premult_oob(image, premult_side, alpha_side, (rx1, ry1))
+            except Exception:
+                pass
+        try:
+            out = helpers.tensor_convert_rgb(image)
+        except Exception:
+            out = image
+        return out
+NODE_CLASS_MAPPINGS = {
+    "FD_Standalone_2": FD_Standalone_2,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "FD_Standalone_2": "FD_Standalone_2",
+}