import os
import sys
import importlib.util

import torch
import numpy as np
from ultralytics import YOLO

from comfy_extras import nodes_differential_diffusion

# NEW: import comfy core nodes (for CLIPTextEncode)
try:
    import nodes  # comfy-core nodes.py
except Exception:
    nodes = None


# -----------------------------
# helper loader
# -----------------------------
def _load_helpers():
    here = os.path.dirname(os.path.abspath(__file__))

    candidate_filenames = (
        "Salia_Facedetailer_Helpers.py",
        "Salia_Facedetailer_helpers.py",
        "Facedetailer_helpers.py",
    )

    try:
        from . import Salia_Facedetailer_Helpers as helpers  # type: ignore
        return helpers
    except (ImportError, ModuleNotFoundError):
        pass

    for fname in candidate_filenames:
        path = os.path.join(here, fname)
        if os.path.isfile(path):
            mod_name = os.path.splitext(fname)[0]
            spec = importlib.util.spec_from_file_location(mod_name, path)
            if spec is None or spec.loader is None:
                continue
            module = importlib.util.module_from_spec(spec)
            sys.modules[mod_name] = module
            spec.loader.exec_module(module)
            return module

    if here not in sys.path:
        sys.path.insert(0, here)

    import Salia_Facedetailer_Helpers as helpers  # type: ignore
    return helpers


helpers = _load_helpers()

# Make sure the helpers module is always importable under this canonical name
# (needed because we inlined TRT code that imports SEG from Salia_Facedetailer_Helpers)
try:
    if "Salia_Facedetailer_Helpers" not in sys.modules:
        sys.modules["Salia_Facedetailer_Helpers"] = helpers
except Exception:
    pass


# -----------------------------
# Lazy import for TRT_D_HYPA (TRT VAE decoder)
# -----------------------------
_TRTHYPA_MODULE = None
_TRTHYPA_DECODER_1344x768 = None


def _load_trt_d_hypa_module():
    """
    Locate and import TRT_D_HYPA.py from the comfyui-TRT_VAE custom node.
    We intentionally resolve it via filesystem paths so we do not depend on
    how ComfyUI chooses to package/import custom nodes.
    """
    here = os.path.dirname(os.path.abspath(__file__))

    # FD_Standalone.py: .../custom_nodes/comfyui-salia_facedetailer/nodes/FD_Standalone.py
    # -> custom_nodes
    custom_nodes_dir = os.path.dirname(os.path.dirname(here))
    trt_nodes_dir = os.path.join(custom_nodes_dir, "comfyui-TRT_VAE", "nodes")
    trt_file = os.path.join(trt_nodes_dir, "TRT_D_HYPA.py")

    if not os.path.isfile(trt_file):
        return None

    mod_name = "TRT_D_HYPA"

    # Reuse already-loaded module if present
    existing = sys.modules.get(mod_name)
    if existing is not None:
        return existing

    spec = importlib.util.spec_from_file_location(mod_name, trt_file)
    if spec is None or spec.loader is None:
        return None

    module = importlib.util.module_from_spec(spec)
    sys.modules[mod_name] = module
    try:
        spec.loader.exec_module(module)
    except Exception:
        # If import fails, remove the partially-loaded module to avoid poisoning sys.modules
        sys.modules.pop(mod_name, None)
        raise

    return module


def _get_trt_decoder_1344x768():
    """
    Return a singleton instance of TRT_D_HYPA_1344x768 (lazy-created).
    This keeps TensorRT engine initialization and memory allocations
    outside of the ComfyUI graph definition path and only runs them
    when the node is actually executed.
    """
    global _TRTHYPA_MODULE, _TRTHYPA_DECODER_1344x768

    if _TRTHYPA_DECODER_1344x768 is not None:
        return _TRTHYPA_DECODER_1344x768

    if _TRTHYPA_MODULE is None:
        _TRTHYPA_MODULE = _load_trt_d_hypa_module()

    if _TRTHYPA_MODULE is None:
        raise ImportError(
            "[FD_Standalone] Could not locate TRT_D_HYPA.py under comfyui-TRT_VAE/nodes. "
            "Make sure the comfyui-TRT_VAE custom node is installed."
        )

    try:
        DecoderCls = getattr(_TRTHYPA_MODULE, "TRT_D_HYPA_1344x768")
    except AttributeError as exc:
        raise ImportError(
            "[FD_Standalone] TRT_D_HYPA_1344x768 class not found inside TRT_D_HYPA.py."
        ) from exc

    _TRTHYPA_DECODER_1344x768 = DecoderCls()
    return _TRTHYPA_DECODER_1344x768


# -----------------------------
# Lazy import for Salia_FD_Parsed.py (NEXT TO THIS FILE)
# -----------------------------
_SALIA_FD_PARSED_MODULE = None
_SALIA_PARSED_NODE = None


def _load_salia_fd_parsed_module():
    """
    Load Salia_FD_Parsed.py from the same directory as this file (relative import-by-path).
    This remains valid if you move both files together to another folder.
    """
    global _SALIA_FD_PARSED_MODULE

    here = os.path.dirname(os.path.abspath(__file__))
    parsed_file = os.path.join(here, "Salia_FD_Parsed.py")

    if not os.path.isfile(parsed_file):
        raise FileNotFoundError(
            f"[FD_Standalone] Missing Salia_FD_Parsed.py next to FD_Standalone.py.\n"
            f"Expected: {parsed_file}"
        )

    mod_name = "Salia_FD_Parsed"

    existing = sys.modules.get(mod_name)
    if existing is not None:
        try:
            existing_file = os.path.abspath(getattr(existing, "__file__", "") or "")
            if existing_file == os.path.abspath(parsed_file) and hasattr(existing, "Salia_Parsed"):
                _SALIA_FD_PARSED_MODULE = existing
                return existing
        except Exception:
            pass

    spec = importlib.util.spec_from_file_location(mod_name, parsed_file)
    if spec is None or spec.loader is None:
        raise ImportError(f"[FD_Standalone] Failed to create import spec for: {parsed_file}")

    module = importlib.util.module_from_spec(spec)
    sys.modules[mod_name] = module
    try:
        spec.loader.exec_module(module)
    except Exception:
        sys.modules.pop(mod_name, None)
        raise

    if not hasattr(module, "Salia_Parsed"):
        raise ImportError(
            f"[FD_Standalone] Loaded {parsed_file}, but it does not define Salia_Parsed."
        )

    _SALIA_FD_PARSED_MODULE = module
    return module


def _get_salia_parsed_node():
    """Return a singleton instance of Salia_Parsed (lazy-created)."""
    global _SALIA_PARSED_NODE

    if _SALIA_PARSED_NODE is not None:
        return _SALIA_PARSED_NODE

    module = _load_salia_fd_parsed_module()
    ParserCls = getattr(module, "Salia_Parsed", None)
    if ParserCls is None:
        raise ImportError("[FD_Standalone] Salia_Parsed class not found in Salia_FD_Parsed.py.")

    _SALIA_PARSED_NODE = ParserCls()
    return _SALIA_PARSED_NODE


# =====================================================================================
# INLINED: Salia_TRT_face.py (everything except the node wrapper)
# =====================================================================================

# Shared SEG definition (same fields as in Facedetailer_helpers)
try:
    from .Salia_Facedetailer_Helpers import SEG
except ImportError:
    # Fallback if used outside of a package
    from Salia_Facedetailer_Helpers import SEG


# -------------------------------------------------------------------------
# Constants
# -------------------------------------------------------------------------

NODE_DIR = os.path.dirname(os.path.abspath(__file__))

# Engine is always this exact filename, located next to this .py file
ENGINE_FILENAME = "salia_face.engine"

# Optional: cache to avoid re-loading the engine every execution
_YOLO_ENGINE_CACHE = {}


def load_yolo_detect(model_path: str) -> YOLO:
    """
    Load a YOLO model with task explicitly set to 'detect' to suppress:
      WARNING ⚠️ Unable to automatically guess model task...
    Works across Ultralytics versions by falling back if 'task=' isn't supported.
    """
    try:
        m = YOLO(model_path, task="detect")
    except TypeError:
        # Older Ultralytics versions may not accept 'task=' in the constructor
        m = YOLO(model_path)

    # Reinforce task in case the backend/model doesn't carry task metadata (e.g. TRT engine)
    try:
        m.task = "detect"
    except Exception:
        pass

    try:
        if hasattr(m, "overrides") and isinstance(m.overrides, dict):
            m.overrides["task"] = "detect"
    except Exception:
        pass

    return m


def load_engine_model(engine_path: str) -> YOLO:
    """Load (and cache) the TensorRT engine as a YOLO detect model."""
    m = _YOLO_ENGINE_CACHE.get(engine_path)
    if m is None:
        m = load_yolo_detect(engine_path)
        _YOLO_ENGINE_CACHE[engine_path] = m
    return m


# -------------------------------------------------------------------------
# Helpers (mirrors Salia_BBOX.py behavior)
# -------------------------------------------------------------------------


def tensor_to_pil(image: torch.Tensor):
    """Convert a ComfyUI IMAGE tensor [B,H,W,C] (0..1) to a PIL RGB image (first item in batch)."""
    from PIL import Image

    if not isinstance(image, torch.Tensor):
        raise TypeError(f"Expected torch.Tensor, got {type(image)}")

    if image.dim() == 4:
        img = image[0]
    else:
        img = image

    img = img.detach()
    if img.is_cuda:
        img = img.cpu()

    img = img.clamp(0, 1).numpy()
    if img.shape[-1] == 1:
        img = np.repeat(img, 3, axis=-1)

    img_u8 = (img * 255.0).round().astype(np.uint8)
    return Image.fromarray(img_u8)


def make_crop_region(w: int, h: int, bbox_xyxy, crop_factor: float, crop_min_size=None):
    """Expanded bbox crop-region logic, clamped to image."""
    try:
        x1f = float(bbox_xyxy[0])
        y1f = float(bbox_xyxy[1])
        x2f = float(bbox_xyxy[2])
        y2f = float(bbox_xyxy[3])
    except Exception:
        x1f = y1f = x2f = y2f = 0.0

    bbox_w = max(1.0, x2f - x1f)
    bbox_h = max(1.0, y2f - y1f)

    crop_w = bbox_w * float(crop_factor)
    crop_h = bbox_h * float(crop_factor)

    if crop_min_size is not None:
        crop_w = max(crop_w, float(crop_min_size))
        crop_h = max(crop_h, float(crop_min_size))

    cx = (x1f + x2f) / 2.0
    cy = (y1f + y2f) / 2.0

    rx1 = int(round(cx - crop_w / 2.0))
    ry1 = int(round(cy - crop_h / 2.0))
    rx2 = int(round(cx + crop_w / 2.0))
    ry2 = int(round(cy + crop_h / 2.0))

    # clamp
    rx1 = max(0, min(w - 1, rx1))
    ry1 = max(0, min(h - 1, ry1))
    rx2 = max(rx1 + 1, min(w, rx2))
    ry2 = max(ry1 + 1, min(h, ry2))

    return (rx1, ry1, rx2, ry2)


def crop_image(image: torch.Tensor, crop_region):
    """Crop a ComfyUI IMAGE tensor [B,H,W,C] using (x1,y1,x2,y2)."""
    x1, y1, x2, y2 = crop_region
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

    if image.dim() == 4:
        return image[:, y1:y2, x1:x2, :]
    if image.dim() == 3:
        return image[y1:y2, x1:x2, :]
    raise ValueError(f"Unexpected image tensor shape: {tuple(image.shape)}")


def crop_ndarray2(arr: np.ndarray, crop_region):
    """Crop a 2D numpy array using (x1,y1,x2,y2)."""
    x1, y1, x2, y2 = crop_region
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    return arr[y1:y2, x1:x2]


try:
    import cv2  # opencv-python or opencv-python-headless
except Exception:
    cv2 = None


def dilate_masks(segmasks, dilation: int):
    """Dilate masks only if dilation > 0 and cv2 is available."""
    if dilation <= 0:
        return segmasks
    if cv2 is None:
        return segmasks

    k = int(dilation)
    ksize = k * 2 + 1
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ksize, ksize))

    out = []
    for bbox, mask, conf in segmasks:
        try:
            m = (mask > 0.5).astype(np.uint8) * 255
            m = cv2.dilate(m, kernel, iterations=1)
            out_mask = (m > 0).astype(np.float32)
            out.append((bbox, out_mask, conf))
        except Exception:
            out.append((bbox, mask, conf))
    return out


def combine_masks(segmasks, out_shape_hw=None) -> torch.Tensor:
    """Combine multiple masks using max()."""
    if not segmasks:
        if out_shape_hw is None:
            return torch.zeros((1, 1, 1), dtype=torch.float32)
        h, w = out_shape_hw
        return torch.zeros((1, h, w), dtype=torch.float32)

    base = segmasks[0][1]
    combined = np.zeros_like(base, dtype=np.float32)
    for _, m, _ in segmasks:
        try:
            combined = np.maximum(combined, m.astype(np.float32))
        except Exception:
            pass

    return torch.from_numpy(combined).unsqueeze(0)


def _create_segmasks(results):
    """Create list of (bbox, mask_float32, conf)."""
    bboxes = results[1]
    segms = results[2]
    confs = results[3]

    out = []
    try:
        n = int(len(segms))
    except Exception:
        n = 0

    for i in range(n):
        try:
            out.append((bboxes[i], segms[i].astype(np.float32), confs[i]))
        except Exception:
            pass

    return out


def _inference_bbox(model, image_pil, confidence: float = 0.3, device: str = "0"):
    """
    Run bbox inference and return:
        [labels, bboxes_xyxy_list, segm_masks_list, confs_list]
    Where segm_masks are full-image boolean masks (rectangle fill per bbox).
    """
    pred = model(image_pil, conf=float(confidence), device=str(device), verbose=False)

    bboxes = pred[0].boxes.xyxy.cpu().numpy()  # xyxy
    if bboxes is None or (hasattr(bboxes, "shape") and bboxes.shape[0] == 0):
        return [[], [], [], []]

    # Original image size (H, W)
    w_orig, h_orig = image_pil.size
    ih = int(h_orig)
    iw = int(w_orig)

    segms = []
    for (x0, y0, x1, y1) in bboxes:
        m = np.zeros((ih, iw), dtype=np.uint8)

        # Clamp coords
        try:
            x0i = int(x0)
        except Exception:
            x0i = 0
        try:
            y0i = int(y0)
        except Exception:
            y0i = 0
        try:
            x1i = int(x1)
        except Exception:
            x1i = 0
        try:
            y1i = int(y1)
        except Exception:
            y1i = 0

        x0c = max(0, min(iw - 1, x0i))
        x1c = max(x0c + 1, min(iw, x1i))
        y0c = max(0, min(ih - 1, y0i))
        y1c = max(y0c + 1, min(ih, y1i))

        if cv2 is not None:
            try:
                cv2.rectangle(m, (x0c, y0c), (x1c, y1c), 255, -1)
            except Exception:
                m[y0c:y1c, x0c:x1c] = 255
        else:
            m[y0c:y1c, x0c:x1c] = 255

        segms.append((m > 0))

    labels = []
    confs = []

    names = getattr(pred[0], "names", None)
    names_is_seq = isinstance(names, (list, tuple))

    for i in range(len(bboxes)):
        # label
        label = "unknown"
        try:
            cls_idx = int(pred[0].boxes[i].cls.item())
            if names_is_seq:
                label = names[cls_idx] if 0 <= cls_idx < len(names) else str(cls_idx)
            elif isinstance(names, dict):
                label = names.get(cls_idx, str(cls_idx))
            else:
                label = str(cls_idx)
        except Exception:
            label = "unknown"

        # conf (force to float)
        try:
            conf_val = float(pred[0].boxes[i].conf.item())
        except Exception:
            conf_val = 0.0

        labels.append(conf_val)  # NOTE: kept as-is from your original code
        confs.append(conf_val)

    return [labels, list(bboxes), segms, confs]


# -------------------------------------------------------------------------
# YOLO TensorRT-based BBOX_DETECTOR implementation
# -------------------------------------------------------------------------


class TRTYOLOBBoxDetector:
    """BBOX_DETECTOR interface compatible with FaceDetailer."""

    def __init__(self, yolo_model: YOLO, device: str = "0"):
        self.bbox_model = yolo_model
        self.device = device or "0"

    def setAux(self, x: str):
        # Kept for interface compatibility
        pass

    def detect(
        self,
        image: torch.Tensor,
        threshold: float,
        dilation: int,
        crop_factor: float,
        drop_size: int = 1,
        detailer_hook=None,
    ):
        """Return FaceDetailer-style SEGS: ( (H, W), [SEG, ...] )."""
        if not isinstance(image, torch.Tensor):
            raise TypeError(f"[TRTYOLOBBoxDetector] Expected torch.Tensor for image, got {type(image)}")
        if image.dim() != 4:
            raise ValueError("[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C].")

        h, w = int(image.shape[1]), int(image.shape[2])
        shape = (h, w)

        detected = _inference_bbox(
            self.bbox_model,
            tensor_to_pil(image),
            confidence=float(threshold),
            device=str(self.device),
        )

        segmasks = _create_segmasks(detected)

        if int(dilation) > 0:
            segmasks = dilate_masks(segmasks, int(dilation))

        drop_size_int = int(drop_size) if int(drop_size) > 0 else 1

        items = []
        for (bbox, mask, conf), label in zip(segmasks, detected[0]):
            try:
                x1f = float(bbox[0])
                y1f = float(bbox[1])
                x2f = float(bbox[2])
                y2f = float(bbox[3])
            except Exception:
                continue

            bwf = x2f - x1f
            bhf = y2f - y1f

            if bwf > drop_size_int and bhf > drop_size_int:
                crop_region = make_crop_region(w, h, bbox, float(crop_factor))

                if detailer_hook is not None and hasattr(detailer_hook, "post_crop_region"):
                    try:
                        crop_region = detailer_hook.post_crop_region(w, h, bbox, crop_region)
                    except Exception:
                        pass

                cropped_image = crop_image(image, crop_region)
                cropped_mask = crop_ndarray2(mask, crop_region)

                items.append(SEG(cropped_image, cropped_mask, conf, crop_region, bbox, label, None))

        segs = (shape, items)

        if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
            try:
                segs = detailer_hook.post_detection(segs)
            except Exception:
                pass

        return segs

    def detect_combined(self, image: torch.Tensor, threshold: float, dilation: int) -> torch.Tensor:
        """Return a single combined MASK tensor covering all detections."""
        if not isinstance(image, torch.Tensor):
            raise TypeError(f"[TRTYOLOBBoxDetector] Expected torch.Tensor for image, got {type(image)}")
        if image.dim() != 4:
            raise ValueError("[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C].")

        detected = _inference_bbox(
            self.bbox_model,
            tensor_to_pil(image),
            confidence=float(threshold),
            device=str(self.device),
        )

        segmasks = _create_segmasks(detected)
        if int(dilation) > 0:
            segmasks = dilate_masks(segmasks, int(dilation))

        return combine_masks(segmasks, out_shape_hw=(int(image.shape[1]), int(image.shape[2])))


# =====================================================================================
# END INLINED: Salia_TRT_face.py
# =====================================================================================


# -----------------------------
# CLIP Text Encode (core) wrapper
# -----------------------------
_CLIP_TEXT_ENCODE_NODE = None


def _encode_conditioning(clip, text: str):
    """
    Uses comfy-core CLIPTextEncode node (preferred), with a robust fallback for older/newer core APIs.
    """
    global _CLIP_TEXT_ENCODE_NODE

    if text is None:
        text = ""

    # Preferred: call comfy-core node CLIPTextEncode
    if nodes is not None:
        if _CLIP_TEXT_ENCODE_NODE is None:
            _CLIP_TEXT_ENCODE_NODE = nodes.CLIPTextEncode()

        # Core node returns a tuple: (conditioning,)
        return _CLIP_TEXT_ENCODE_NODE.encode(clip=clip, text=text)[0]

    # Fallback if for some reason `import nodes` failed in your environment:
    if clip is None:
        raise RuntimeError("CLIP input is None (cannot encode).")

    tokens = clip.tokenize(text)

    # Newer-ish API (2024/2025+)
    if hasattr(clip, "encode_from_tokens_scheduled"):
        return clip.encode_from_tokens_scheduled(tokens)

    # Older API fallback
    output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True)
    cond = output.pop("cond")
    return [[cond, output]]


def _manual_bbox_from_ltrb(left, top, right, bottom):
    """
    Manual bbox override from 4 ints: (left, top, right, bottom).

    These 4 ints imply the 4 corners:
      - Top-left     = (left, top)
      - Top-right    = (right, top)
      - Bottom-left  = (left, bottom)
      - Bottom-right = (right, bottom)

    Convention:
      - If ANY value is None or < 0 -> return None (use YOLO detection).
      - Otherwise returns (x1, y1, x2, y2) with correct ordering.
    """
    if left is None or top is None or right is None or bottom is None:
        return None

    try:
        x1 = int(left)
        y1 = int(top)
        x2 = int(right)
        y2 = int(bottom)
    except Exception:
        return None

    # Sentinel: any negative => auto detect
    if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0:
        return None

    # Ensure proper ordering
    if x2 < x1:
        x1, x2 = x2, x1
    if y2 < y1:
        y1, y2 = y2, y1

    return (x1, y1, x2, y2)


class FD_Standalone_2:
    _BBOX_DETECTOR = None

    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {
                # CHANGED: take latent instead of image, and internally decode via TRT_D_HYPA_1344x768
                "latent": (
                    "LATENT",
                    {
                        "tooltip": "Latent to be decoded with TRT_D_HYPA_1344x768 before face detailing."
                    },
                ),
                "model": ("MODEL", {"tooltip": "If ImpactDummyInput connected, inference may be skipped."}),
                # single CLIP input (from Load Checkpoint)
                "clip": ("CLIP", {"tooltip": "CLIP from Load Checkpoint (SDXL CLIP is fine)."}),

                # NEW: manual bbox override via 4 ints (left/top/right/bottom)
                # Leave any value at -1 to use YOLO auto-detection.
                "bbox_left": (
                    "INT",
                    {
                        "default": -1,
                        "min": -1,
                        "max": 1000000,
                        "step": 1,
                        "tooltip": "Manual bbox LEFT (x1). Top-left=(LEFT,TOP), Bottom-left=(LEFT,BOTTOM). -1 => YOLO auto-detect.",
                    },
                ),
                "bbox_top": (
                    "INT",
                    {
                        "default": -1,
                        "min": -1,
                        "max": 1000000,
                        "step": 1,
                        "tooltip": "Manual bbox TOP (y1). Top-left=(LEFT,TOP), Top-right=(RIGHT,TOP). -1 => YOLO auto-detect.",
                    },
                ),
                "bbox_right": (
                    "INT",
                    {
                        "default": -1,
                        "min": -1,
                        "max": 1000000,
                        "step": 1,
                        "tooltip": "Manual bbox RIGHT (x2). Top-right=(RIGHT,TOP), Bottom-right=(RIGHT,BOTTOM). -1 => YOLO auto-detect.",
                    },
                ),
                "bbox_bottom": (
                    "INT",
                    {
                        "default": -1,
                        "min": -1,
                        "max": 1000000,
                        "step": 1,
                        "tooltip": "Manual bbox BOTTOM (y2). Bottom-left=(LEFT,BOTTOM), Bottom-right=(RIGHT,BOTTOM). -1 => YOLO auto-detect.",
                    },
                ),

                # POV integer
                "pov_id": (
                    "INT",
                    {
                        "default": 1,
                        "min": 1,
                        "max": 4,
                        "step": 1,
                        "tooltip": "POV: 1=front, 2=three-quarter, 3=side, 4=rear. If 4, node bypasses and outputs decoded image unchanged.",
                    },
                ),

                # single input string, internally parsed by Salia_Parsed into (pos, neg)
                "prompt": (
                    "STRING",
                    {
                        "multiline": True,
                        "default": "",
                        "dynamicPrompts": True,
                        "tooltip": "Single prompt string. Internally parsed by Salia_Parsed into (pos, neg) for face detailing.",
                    },
                ),
            },
            "optional": {},
        }

    RETURN_TYPES = ("IMAGE",)
    RETURN_NAMES = ("image",)
    OUTPUT_IS_LIST = (False,)
    FUNCTION = "doit"
    CATEGORY = "ImpactPack/Simple"

    @classmethod
    def _get_bbox_detector(cls):
        if cls._BBOX_DETECTOR is not None:
            return cls._BBOX_DETECTOR

        engine_path = os.path.join(NODE_DIR, ENGINE_FILENAME)

        if not os.path.isfile(engine_path):
            raise FileNotFoundError(
                f"[TRTYOLOBBoxDetectorProvider] Engine file not found: {engine_path}\n"
                f"Expected the file '{ENGINE_FILENAME}' next to this node .py file."
            )

        yolo_model = load_engine_model(engine_path)
        detector = TRTYOLOBBoxDetector(yolo_model, device="0")
        cls._BBOX_DETECTOR = detector
        return cls._BBOX_DETECTOR

    @staticmethod
    def enhance_face(image, model, positive, negative, bbox_detector=None, manual_bbox=None):
        """
        If manual_bbox is provided (x1,y1,x2,y2), skip detector and detail only that region.
        Otherwise use bbox_detector.detect(...) (original behavior).
        """
        # Manual override path
        if manual_bbox is not None:
            try:
                return DetailerForEach.do_detail_bbox(image, manual_bbox, model, positive, negative)
            except Exception:
                return image

        # Original detection path
        if bbox_detector is None:
            return image

        try:
            bbox_detector.setAux("face")
        except Exception:
            pass

        try:
            segs = bbox_detector.detect(image, 0.55, 0, 1.0, 10)
        except Exception:
            try:
                bbox_detector.setAux(None)
            except Exception:
                pass
            return image

        try:
            bbox_detector.setAux(None)
        except Exception:
            pass

        try:
            num_segs = int(len(segs[1]))
        except Exception:
            num_segs = 0

        if num_segs == 0:
            return image

        try:
            out = DetailerForEach.do_detail(image, segs, model, positive, negative)
            return out
        except Exception:
            return image

    def doit(self, latent, model, clip, bbox_left, bbox_top, bbox_right, bbox_bottom, pov_id, prompt):
        # Step 1: decode latent -> image using the TRT VAE decoder
        decoder = _get_trt_decoder_1344x768()
        decoded = decoder.decode(latent)
        if isinstance(decoded, (list, tuple)):
            image = decoded[0]
        else:
            image = decoded

        # Normalize POV (1..4)
        try:
            pov_id_int = int(pov_id)
        except Exception:
            pov_id_int = 1
        if pov_id_int < 1:
            pov_id_int = 1
        if pov_id_int > 4:
            pov_id_int = 4

        # POV=4 (rear view): skip entire task and output decoded image unchanged
        if pov_id_int == 4:
            return (image,)

        # Parse the single prompt string -> (pos, neg)
        if prompt is None:
            prompt = ""
        parser = _get_salia_parsed_node()
        try:
            pos, neg = parser.run(pov_id_int, prompt)
        except Exception as exc:
            raise RuntimeError(f"[FD_Standalone] Salia_Parsed failed: {exc}") from exc

        # Encode ONCE per node execution (not per face / not per segment)
        skip_inference = isinstance(model, str) and model == "DUMMY"

        if skip_inference:
            positive = []
            negative = []
        else:
            positive = _encode_conditioning(clip, pos)
            negative = _encode_conditioning(clip, neg)

        # Decide manual bbox vs detector:
        # If bbox_left/top/right/bottom are all >= 0 -> manual override.
        # Otherwise -> YOLO detection.
        manual_bbox = _manual_bbox_from_ltrb(bbox_left, bbox_top, bbox_right, bbox_bottom)

        # Only load detector if needed
        bbox_detector = None
        if manual_bbox is None:
            bbox_detector = FD_Standalone._get_bbox_detector()

        outs = []
        # Image from TRT VAE is [B,H,W,C]; iterate over batch dimension
        for img in image:
            try:
                out = self.enhance_face(
                    img.unsqueeze(0),
                    model,
                    positive,
                    negative,
                    bbox_detector=bbox_detector,
                    manual_bbox=manual_bbox,
                )
            except Exception:
                out = img.unsqueeze(0)
            outs.append(out)

        try:
            result = torch.cat(outs, dim=0)
        except Exception:
            result = image

        return (result,)


class DetailerForEach:
    @staticmethod
    def do_detail_bbox(image, bbox, model, positive, negative):
        """
        NEW: Detail exactly one bbox (x1,y1,x2,y2) without needing SEGS/detection.
        Uses the same square-crop/detail/paste logic as do_detail().
        """
        try:
            image = image.clone().cpu()
        except Exception:
            pass

        # Clamp bbox to image bounds (best-effort safety)
        try:
            h = int(image.shape[1])
            w = int(image.shape[2])
        except Exception:
            h, w = 0, 0

        try:
            x1, y1, x2, y2 = bbox
            x1 = int(x1)
            y1 = int(y1)
            x2 = int(x2)
            y2 = int(y2)
        except Exception:
            return image

        if w > 0 and h > 0:
            x1 = max(0, min(w - 1, x1))
            y1 = max(0, min(h - 1, y1))
            x2 = max(x1 + 1, min(w, x2))
            y2 = max(y1 + 1, min(h, y2))

        bbox_clamped = (x1, y1, x2, y2)

        try:
            model = nodes_differential_diffusion.DifferentialDiffusion().apply(model)[0]
        except Exception:
            pass

        try:
            rx1, ry1, side, _, _, _, _ = helpers.bbox_to_square_region(bbox_clamped, max_side=1024)
        except Exception:
            return image

        square_patch = helpers.crop_with_pad_nhwc(image, rx1, ry1, side, fill=0.0)
        if square_patch is None:
            return image

        try:
            if square_patch is not None and not (isinstance(model, str) and model == "DUMMY"):
                premult_side, alpha_side = helpers.enhance_detail_bbox_square(
                    square_patch,
                    model,
                    positive,
                    negative,
                    side=side,
                )
            else:
                premult_side = square_patch
                alpha_side = torch.ones(
                    (1, side, side, 1),
                    dtype=square_patch.dtype,
                    device=square_patch.device,
                )
        except Exception:
            return image

        try:
            helpers.tensor_paste_premult_oob(image, premult_side, alpha_side, (rx1, ry1))
        except Exception:
            pass

        try:
            out = helpers.tensor_convert_rgb(image)
        except Exception:
            out = image

        return out

    @staticmethod
    def do_detail(image, segs, model, positive, negative):
        try:
            image = image.clone().cpu()
        except Exception:
            pass

        try:
            _, ordered_segs = helpers.segs_scale_match(segs, image.shape)
        except Exception:
            ordered_segs = segs[1] if (segs and len(segs) > 1) else []

        try:
            model = nodes_differential_diffusion.DifferentialDiffusion().apply(model)[0]
        except Exception:
            pass

        for seg in ordered_segs:
            try:
                rx1, ry1, side, _, _, _, _ = helpers.bbox_to_square_region(seg.bbox, max_side=1024)
            except Exception:
                continue

            square_patch = helpers.crop_with_pad_nhwc(image, rx1, ry1, side, fill=0.0)

            try:
                if square_patch is not None and not (isinstance(model, str) and model == "DUMMY"):
                    premult_side, alpha_side = helpers.enhance_detail_bbox_square(
                        square_patch,
                        model,
                        positive,
                        negative,
                        side=side,
                    )
                else:
                    premult_side = square_patch
                    alpha_side = torch.ones(
                        (1, side, side, 1),
                        dtype=square_patch.dtype,
                        device=square_patch.device,
                    )
            except Exception:
                continue

            try:
                helpers.tensor_paste_premult_oob(image, premult_side, alpha_side, (rx1, ry1))
            except Exception:
                pass

        try:
            out = helpers.tensor_convert_rgb(image)
        except Exception:
            out = image

        return out


NODE_CLASS_MAPPINGS = {
    "FD_Standalone_2": FD_Standalone_2,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "FD_Standalone_2": "FD_Standalone_2",
}