import os import sys import importlib.util import torch import numpy as np from ultralytics import YOLO from comfy_extras import nodes_differential_diffusion # NEW: import comfy core nodes (for CLIPTextEncode) try: import nodes # comfy-core nodes.py except Exception: nodes = None # ----------------------------- # helper loader # ----------------------------- def _load_helpers(): here = os.path.dirname(os.path.abspath(__file__)) candidate_filenames = ( "Salia_Facedetailer_Helpers.py", "Salia_Facedetailer_helpers.py", "Facedetailer_helpers.py", ) try: from . import Salia_Facedetailer_Helpers as helpers # type: ignore return helpers except (ImportError, ModuleNotFoundError): pass for fname in candidate_filenames: path = os.path.join(here, fname) if os.path.isfile(path): mod_name = os.path.splitext(fname)[0] spec = importlib.util.spec_from_file_location(mod_name, path) if spec is None or spec.loader is None: continue module = importlib.util.module_from_spec(spec) sys.modules[mod_name] = module spec.loader.exec_module(module) return module if here not in sys.path: sys.path.insert(0, here) import Salia_Facedetailer_Helpers as helpers # type: ignore return helpers helpers = _load_helpers() # Make sure the helpers module is always importable under this canonical name # (needed because we inlined TRT code that imports SEG from Salia_Facedetailer_Helpers) try: if "Salia_Facedetailer_Helpers" not in sys.modules: sys.modules["Salia_Facedetailer_Helpers"] = helpers except Exception: pass # ----------------------------- # Lazy import for TRT_D_HYPA (TRT VAE decoder) # ----------------------------- _TRTHYPA_MODULE = None _TRTHYPA_DECODER_1344x768 = None def _load_trt_d_hypa_module(): """ Locate and import TRT_D_HYPA.py from the comfyui-TRT_VAE custom node. We intentionally resolve it via filesystem paths so we do not depend on how ComfyUI chooses to package/import custom nodes. """ here = os.path.dirname(os.path.abspath(__file__)) # FD_Standalone.py: .../custom_nodes/comfyui-salia_facedetailer/nodes/FD_Standalone.py # -> custom_nodes custom_nodes_dir = os.path.dirname(os.path.dirname(here)) trt_nodes_dir = os.path.join(custom_nodes_dir, "comfyui-TRT_VAE", "nodes") trt_file = os.path.join(trt_nodes_dir, "TRT_D_HYPA.py") if not os.path.isfile(trt_file): return None mod_name = "TRT_D_HYPA" # Reuse already-loaded module if present existing = sys.modules.get(mod_name) if existing is not None: return existing spec = importlib.util.spec_from_file_location(mod_name, trt_file) if spec is None or spec.loader is None: return None module = importlib.util.module_from_spec(spec) sys.modules[mod_name] = module try: spec.loader.exec_module(module) except Exception: # If import fails, remove the partially-loaded module to avoid poisoning sys.modules sys.modules.pop(mod_name, None) raise return module def _get_trt_decoder_1344x768(): """ Return a singleton instance of TRT_D_HYPA_1344x768 (lazy-created). This keeps TensorRT engine initialization and memory allocations outside of the ComfyUI graph definition path and only runs them when the node is actually executed. """ global _TRTHYPA_MODULE, _TRTHYPA_DECODER_1344x768 if _TRTHYPA_DECODER_1344x768 is not None: return _TRTHYPA_DECODER_1344x768 if _TRTHYPA_MODULE is None: _TRTHYPA_MODULE = _load_trt_d_hypa_module() if _TRTHYPA_MODULE is None: raise ImportError( "[FD_Standalone] Could not locate TRT_D_HYPA.py under comfyui-TRT_VAE/nodes. " "Make sure the comfyui-TRT_VAE custom node is installed." ) try: DecoderCls = getattr(_TRTHYPA_MODULE, "TRT_D_HYPA_1344x768") except AttributeError as exc: raise ImportError( "[FD_Standalone] TRT_D_HYPA_1344x768 class not found inside TRT_D_HYPA.py." ) from exc _TRTHYPA_DECODER_1344x768 = DecoderCls() return _TRTHYPA_DECODER_1344x768 # ----------------------------- # Lazy import for Salia_FD_Parsed.py (NEXT TO THIS FILE) # ----------------------------- _SALIA_FD_PARSED_MODULE = None _SALIA_PARSED_NODE = None def _load_salia_fd_parsed_module(): """ Load Salia_FD_Parsed.py from the same directory as this file (relative import-by-path). This remains valid if you move both files together to another folder. """ global _SALIA_FD_PARSED_MODULE here = os.path.dirname(os.path.abspath(__file__)) parsed_file = os.path.join(here, "Salia_FD_Parsed.py") if not os.path.isfile(parsed_file): raise FileNotFoundError( f"[FD_Standalone] Missing Salia_FD_Parsed.py next to FD_Standalone.py.\n" f"Expected: {parsed_file}" ) mod_name = "Salia_FD_Parsed" existing = sys.modules.get(mod_name) if existing is not None: try: existing_file = os.path.abspath(getattr(existing, "__file__", "") or "") if existing_file == os.path.abspath(parsed_file) and hasattr(existing, "Salia_Parsed"): _SALIA_FD_PARSED_MODULE = existing return existing except Exception: pass spec = importlib.util.spec_from_file_location(mod_name, parsed_file) if spec is None or spec.loader is None: raise ImportError(f"[FD_Standalone] Failed to create import spec for: {parsed_file}") module = importlib.util.module_from_spec(spec) sys.modules[mod_name] = module try: spec.loader.exec_module(module) except Exception: sys.modules.pop(mod_name, None) raise if not hasattr(module, "Salia_Parsed"): raise ImportError( f"[FD_Standalone] Loaded {parsed_file}, but it does not define Salia_Parsed." ) _SALIA_FD_PARSED_MODULE = module return module def _get_salia_parsed_node(): """Return a singleton instance of Salia_Parsed (lazy-created).""" global _SALIA_PARSED_NODE if _SALIA_PARSED_NODE is not None: return _SALIA_PARSED_NODE module = _load_salia_fd_parsed_module() ParserCls = getattr(module, "Salia_Parsed", None) if ParserCls is None: raise ImportError("[FD_Standalone] Salia_Parsed class not found in Salia_FD_Parsed.py.") _SALIA_PARSED_NODE = ParserCls() return _SALIA_PARSED_NODE # ===================================================================================== # INLINED: Salia_TRT_face.py (everything except the node wrapper) # ===================================================================================== # Shared SEG definition (same fields as in Facedetailer_helpers) try: from .Salia_Facedetailer_Helpers import SEG except ImportError: # Fallback if used outside of a package from Salia_Facedetailer_Helpers import SEG # ------------------------------------------------------------------------- # Constants # ------------------------------------------------------------------------- NODE_DIR = os.path.dirname(os.path.abspath(__file__)) # Engine is always this exact filename, located next to this .py file ENGINE_FILENAME = "salia_face.engine" # Optional: cache to avoid re-loading the engine every execution _YOLO_ENGINE_CACHE = {} def load_yolo_detect(model_path: str) -> YOLO: """ Load a YOLO model with task explicitly set to 'detect' to suppress: WARNING ⚠️ Unable to automatically guess model task... Works across Ultralytics versions by falling back if 'task=' isn't supported. """ try: m = YOLO(model_path, task="detect") except TypeError: # Older Ultralytics versions may not accept 'task=' in the constructor m = YOLO(model_path) # Reinforce task in case the backend/model doesn't carry task metadata (e.g. TRT engine) try: m.task = "detect" except Exception: pass try: if hasattr(m, "overrides") and isinstance(m.overrides, dict): m.overrides["task"] = "detect" except Exception: pass return m def load_engine_model(engine_path: str) -> YOLO: """Load (and cache) the TensorRT engine as a YOLO detect model.""" m = _YOLO_ENGINE_CACHE.get(engine_path) if m is None: m = load_yolo_detect(engine_path) _YOLO_ENGINE_CACHE[engine_path] = m return m # ------------------------------------------------------------------------- # Helpers (mirrors Salia_BBOX.py behavior) # ------------------------------------------------------------------------- def tensor_to_pil(image: torch.Tensor): """Convert a ComfyUI IMAGE tensor [B,H,W,C] (0..1) to a PIL RGB image (first item in batch).""" from PIL import Image if not isinstance(image, torch.Tensor): raise TypeError(f"Expected torch.Tensor, got {type(image)}") if image.dim() == 4: img = image[0] else: img = image img = img.detach() if img.is_cuda: img = img.cpu() img = img.clamp(0, 1).numpy() if img.shape[-1] == 1: img = np.repeat(img, 3, axis=-1) img_u8 = (img * 255.0).round().astype(np.uint8) return Image.fromarray(img_u8) def make_crop_region(w: int, h: int, bbox_xyxy, crop_factor: float, crop_min_size=None): """Expanded bbox crop-region logic, clamped to image.""" try: x1f = float(bbox_xyxy[0]) y1f = float(bbox_xyxy[1]) x2f = float(bbox_xyxy[2]) y2f = float(bbox_xyxy[3]) except Exception: x1f = y1f = x2f = y2f = 0.0 bbox_w = max(1.0, x2f - x1f) bbox_h = max(1.0, y2f - y1f) crop_w = bbox_w * float(crop_factor) crop_h = bbox_h * float(crop_factor) if crop_min_size is not None: crop_w = max(crop_w, float(crop_min_size)) crop_h = max(crop_h, float(crop_min_size)) cx = (x1f + x2f) / 2.0 cy = (y1f + y2f) / 2.0 rx1 = int(round(cx - crop_w / 2.0)) ry1 = int(round(cy - crop_h / 2.0)) rx2 = int(round(cx + crop_w / 2.0)) ry2 = int(round(cy + crop_h / 2.0)) # clamp rx1 = max(0, min(w - 1, rx1)) ry1 = max(0, min(h - 1, ry1)) rx2 = max(rx1 + 1, min(w, rx2)) ry2 = max(ry1 + 1, min(h, ry2)) return (rx1, ry1, rx2, ry2) def crop_image(image: torch.Tensor, crop_region): """Crop a ComfyUI IMAGE tensor [B,H,W,C] using (x1,y1,x2,y2).""" x1, y1, x2, y2 = crop_region x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) if image.dim() == 4: return image[:, y1:y2, x1:x2, :] if image.dim() == 3: return image[y1:y2, x1:x2, :] raise ValueError(f"Unexpected image tensor shape: {tuple(image.shape)}") def crop_ndarray2(arr: np.ndarray, crop_region): """Crop a 2D numpy array using (x1,y1,x2,y2).""" x1, y1, x2, y2 = crop_region x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) return arr[y1:y2, x1:x2] try: import cv2 # opencv-python or opencv-python-headless except Exception: cv2 = None def dilate_masks(segmasks, dilation: int): """Dilate masks only if dilation > 0 and cv2 is available.""" if dilation <= 0: return segmasks if cv2 is None: return segmasks k = int(dilation) ksize = k * 2 + 1 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ksize, ksize)) out = [] for bbox, mask, conf in segmasks: try: m = (mask > 0.5).astype(np.uint8) * 255 m = cv2.dilate(m, kernel, iterations=1) out_mask = (m > 0).astype(np.float32) out.append((bbox, out_mask, conf)) except Exception: out.append((bbox, mask, conf)) return out def combine_masks(segmasks, out_shape_hw=None) -> torch.Tensor: """Combine multiple masks using max().""" if not segmasks: if out_shape_hw is None: return torch.zeros((1, 1, 1), dtype=torch.float32) h, w = out_shape_hw return torch.zeros((1, h, w), dtype=torch.float32) base = segmasks[0][1] combined = np.zeros_like(base, dtype=np.float32) for _, m, _ in segmasks: try: combined = np.maximum(combined, m.astype(np.float32)) except Exception: pass return torch.from_numpy(combined).unsqueeze(0) def _create_segmasks(results): """Create list of (bbox, mask_float32, conf).""" bboxes = results[1] segms = results[2] confs = results[3] out = [] try: n = int(len(segms)) except Exception: n = 0 for i in range(n): try: out.append((bboxes[i], segms[i].astype(np.float32), confs[i])) except Exception: pass return out def _inference_bbox(model, image_pil, confidence: float = 0.3, device: str = "0"): """ Run bbox inference and return: [labels, bboxes_xyxy_list, segm_masks_list, confs_list] Where segm_masks are full-image boolean masks (rectangle fill per bbox). """ pred = model(image_pil, conf=float(confidence), device=str(device), verbose=False) bboxes = pred[0].boxes.xyxy.cpu().numpy() # xyxy if bboxes is None or (hasattr(bboxes, "shape") and bboxes.shape[0] == 0): return [[], [], [], []] # Original image size (H, W) w_orig, h_orig = image_pil.size ih = int(h_orig) iw = int(w_orig) segms = [] for (x0, y0, x1, y1) in bboxes: m = np.zeros((ih, iw), dtype=np.uint8) # Clamp coords try: x0i = int(x0) except Exception: x0i = 0 try: y0i = int(y0) except Exception: y0i = 0 try: x1i = int(x1) except Exception: x1i = 0 try: y1i = int(y1) except Exception: y1i = 0 x0c = max(0, min(iw - 1, x0i)) x1c = max(x0c + 1, min(iw, x1i)) y0c = max(0, min(ih - 1, y0i)) y1c = max(y0c + 1, min(ih, y1i)) if cv2 is not None: try: cv2.rectangle(m, (x0c, y0c), (x1c, y1c), 255, -1) except Exception: m[y0c:y1c, x0c:x1c] = 255 else: m[y0c:y1c, x0c:x1c] = 255 segms.append((m > 0)) labels = [] confs = [] names = getattr(pred[0], "names", None) names_is_seq = isinstance(names, (list, tuple)) for i in range(len(bboxes)): # label label = "unknown" try: cls_idx = int(pred[0].boxes[i].cls.item()) if names_is_seq: label = names[cls_idx] if 0 <= cls_idx < len(names) else str(cls_idx) elif isinstance(names, dict): label = names.get(cls_idx, str(cls_idx)) else: label = str(cls_idx) except Exception: label = "unknown" # conf (force to float) try: conf_val = float(pred[0].boxes[i].conf.item()) except Exception: conf_val = 0.0 labels.append(conf_val) # NOTE: kept as-is from your original code confs.append(conf_val) return [labels, list(bboxes), segms, confs] # ------------------------------------------------------------------------- # YOLO TensorRT-based BBOX_DETECTOR implementation # ------------------------------------------------------------------------- class TRTYOLOBBoxDetector: """BBOX_DETECTOR interface compatible with FaceDetailer.""" def __init__(self, yolo_model: YOLO, device: str = "0"): self.bbox_model = yolo_model self.device = device or "0" def setAux(self, x: str): # Kept for interface compatibility pass def detect( self, image: torch.Tensor, threshold: float, dilation: int, crop_factor: float, drop_size: int = 1, detailer_hook=None, ): """Return FaceDetailer-style SEGS: ( (H, W), [SEG, ...] ).""" if not isinstance(image, torch.Tensor): raise TypeError(f"[TRTYOLOBBoxDetector] Expected torch.Tensor for image, got {type(image)}") if image.dim() != 4: raise ValueError("[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C].") h, w = int(image.shape[1]), int(image.shape[2]) shape = (h, w) detected = _inference_bbox( self.bbox_model, tensor_to_pil(image), confidence=float(threshold), device=str(self.device), ) segmasks = _create_segmasks(detected) if int(dilation) > 0: segmasks = dilate_masks(segmasks, int(dilation)) drop_size_int = int(drop_size) if int(drop_size) > 0 else 1 items = [] for (bbox, mask, conf), label in zip(segmasks, detected[0]): try: x1f = float(bbox[0]) y1f = float(bbox[1]) x2f = float(bbox[2]) y2f = float(bbox[3]) except Exception: continue bwf = x2f - x1f bhf = y2f - y1f if bwf > drop_size_int and bhf > drop_size_int: crop_region = make_crop_region(w, h, bbox, float(crop_factor)) if detailer_hook is not None and hasattr(detailer_hook, "post_crop_region"): try: crop_region = detailer_hook.post_crop_region(w, h, bbox, crop_region) except Exception: pass cropped_image = crop_image(image, crop_region) cropped_mask = crop_ndarray2(mask, crop_region) items.append(SEG(cropped_image, cropped_mask, conf, crop_region, bbox, label, None)) segs = (shape, items) if detailer_hook is not None and hasattr(detailer_hook, "post_detection"): try: segs = detailer_hook.post_detection(segs) except Exception: pass return segs def detect_combined(self, image: torch.Tensor, threshold: float, dilation: int) -> torch.Tensor: """Return a single combined MASK tensor covering all detections.""" if not isinstance(image, torch.Tensor): raise TypeError(f"[TRTYOLOBBoxDetector] Expected torch.Tensor for image, got {type(image)}") if image.dim() != 4: raise ValueError("[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C].") detected = _inference_bbox( self.bbox_model, tensor_to_pil(image), confidence=float(threshold), device=str(self.device), ) segmasks = _create_segmasks(detected) if int(dilation) > 0: segmasks = dilate_masks(segmasks, int(dilation)) return combine_masks(segmasks, out_shape_hw=(int(image.shape[1]), int(image.shape[2]))) # ===================================================================================== # END INLINED: Salia_TRT_face.py # ===================================================================================== # ----------------------------- # CLIP Text Encode (core) wrapper # ----------------------------- _CLIP_TEXT_ENCODE_NODE = None def _encode_conditioning(clip, text: str): """ Uses comfy-core CLIPTextEncode node (preferred), with a robust fallback for older/newer core APIs. """ global _CLIP_TEXT_ENCODE_NODE if text is None: text = "" # Preferred: call comfy-core node CLIPTextEncode if nodes is not None: if _CLIP_TEXT_ENCODE_NODE is None: _CLIP_TEXT_ENCODE_NODE = nodes.CLIPTextEncode() # Core node returns a tuple: (conditioning,) return _CLIP_TEXT_ENCODE_NODE.encode(clip=clip, text=text)[0] # Fallback if for some reason `import nodes` failed in your environment: if clip is None: raise RuntimeError("CLIP input is None (cannot encode).") tokens = clip.tokenize(text) # Newer-ish API (2024/2025+) if hasattr(clip, "encode_from_tokens_scheduled"): return clip.encode_from_tokens_scheduled(tokens) # Older API fallback output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True) cond = output.pop("cond") return [[cond, output]] def _manual_bbox_from_ltrb(left, top, right, bottom): """ Manual bbox override from 4 ints: (left, top, right, bottom). These 4 ints imply the 4 corners: - Top-left = (left, top) - Top-right = (right, top) - Bottom-left = (left, bottom) - Bottom-right = (right, bottom) Convention: - If ANY value is None or < 0 -> return None (use YOLO detection). - Otherwise returns (x1, y1, x2, y2) with correct ordering. """ if left is None or top is None or right is None or bottom is None: return None try: x1 = int(left) y1 = int(top) x2 = int(right) y2 = int(bottom) except Exception: return None # Sentinel: any negative => auto detect if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0: return None # Ensure proper ordering if x2 < x1: x1, x2 = x2, x1 if y2 < y1: y1, y2 = y2, y1 return (x1, y1, x2, y2) class FD_Standalone_2: _BBOX_DETECTOR = None @classmethod def INPUT_TYPES(cls): return { "required": { # CHANGED: take latent instead of image, and internally decode via TRT_D_HYPA_1344x768 "latent": ( "LATENT", { "tooltip": "Latent to be decoded with TRT_D_HYPA_1344x768 before face detailing." }, ), "model": ("MODEL", {"tooltip": "If ImpactDummyInput connected, inference may be skipped."}), # single CLIP input (from Load Checkpoint) "clip": ("CLIP", {"tooltip": "CLIP from Load Checkpoint (SDXL CLIP is fine)."}), # NEW: manual bbox override via 4 ints (left/top/right/bottom) # Leave any value at -1 to use YOLO auto-detection. "bbox_left": ( "INT", { "default": -1, "min": -1, "max": 1000000, "step": 1, "tooltip": "Manual bbox LEFT (x1). Top-left=(LEFT,TOP), Bottom-left=(LEFT,BOTTOM). -1 => YOLO auto-detect.", }, ), "bbox_top": ( "INT", { "default": -1, "min": -1, "max": 1000000, "step": 1, "tooltip": "Manual bbox TOP (y1). Top-left=(LEFT,TOP), Top-right=(RIGHT,TOP). -1 => YOLO auto-detect.", }, ), "bbox_right": ( "INT", { "default": -1, "min": -1, "max": 1000000, "step": 1, "tooltip": "Manual bbox RIGHT (x2). Top-right=(RIGHT,TOP), Bottom-right=(RIGHT,BOTTOM). -1 => YOLO auto-detect.", }, ), "bbox_bottom": ( "INT", { "default": -1, "min": -1, "max": 1000000, "step": 1, "tooltip": "Manual bbox BOTTOM (y2). Bottom-left=(LEFT,BOTTOM), Bottom-right=(RIGHT,BOTTOM). -1 => YOLO auto-detect.", }, ), # POV integer "pov_id": ( "INT", { "default": 1, "min": 1, "max": 4, "step": 1, "tooltip": "POV: 1=front, 2=three-quarter, 3=side, 4=rear. If 4, node bypasses and outputs decoded image unchanged.", }, ), # single input string, internally parsed by Salia_Parsed into (pos, neg) "prompt": ( "STRING", { "multiline": True, "default": "", "dynamicPrompts": True, "tooltip": "Single prompt string. Internally parsed by Salia_Parsed into (pos, neg) for face detailing.", }, ), }, "optional": {}, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) OUTPUT_IS_LIST = (False,) FUNCTION = "doit" CATEGORY = "ImpactPack/Simple" @classmethod def _get_bbox_detector(cls): if cls._BBOX_DETECTOR is not None: return cls._BBOX_DETECTOR engine_path = os.path.join(NODE_DIR, ENGINE_FILENAME) if not os.path.isfile(engine_path): raise FileNotFoundError( f"[TRTYOLOBBoxDetectorProvider] Engine file not found: {engine_path}\n" f"Expected the file '{ENGINE_FILENAME}' next to this node .py file." ) yolo_model = load_engine_model(engine_path) detector = TRTYOLOBBoxDetector(yolo_model, device="0") cls._BBOX_DETECTOR = detector return cls._BBOX_DETECTOR @staticmethod def enhance_face(image, model, positive, negative, bbox_detector=None, manual_bbox=None): """ If manual_bbox is provided (x1,y1,x2,y2), skip detector and detail only that region. Otherwise use bbox_detector.detect(...) (original behavior). """ # Manual override path if manual_bbox is not None: try: return DetailerForEach.do_detail_bbox(image, manual_bbox, model, positive, negative) except Exception: return image # Original detection path if bbox_detector is None: return image try: bbox_detector.setAux("face") except Exception: pass try: segs = bbox_detector.detect(image, 0.55, 0, 1.0, 10) except Exception: try: bbox_detector.setAux(None) except Exception: pass return image try: bbox_detector.setAux(None) except Exception: pass try: num_segs = int(len(segs[1])) except Exception: num_segs = 0 if num_segs == 0: return image try: out = DetailerForEach.do_detail(image, segs, model, positive, negative) return out except Exception: return image def doit(self, latent, model, clip, bbox_left, bbox_top, bbox_right, bbox_bottom, pov_id, prompt): # Step 1: decode latent -> image using the TRT VAE decoder decoder = _get_trt_decoder_1344x768() decoded = decoder.decode(latent) if isinstance(decoded, (list, tuple)): image = decoded[0] else: image = decoded # Normalize POV (1..4) try: pov_id_int = int(pov_id) except Exception: pov_id_int = 1 if pov_id_int < 1: pov_id_int = 1 if pov_id_int > 4: pov_id_int = 4 # POV=4 (rear view): skip entire task and output decoded image unchanged if pov_id_int == 4: return (image,) # Parse the single prompt string -> (pos, neg) if prompt is None: prompt = "" parser = _get_salia_parsed_node() try: pos, neg = parser.run(pov_id_int, prompt) except Exception as exc: raise RuntimeError(f"[FD_Standalone] Salia_Parsed failed: {exc}") from exc # Encode ONCE per node execution (not per face / not per segment) skip_inference = isinstance(model, str) and model == "DUMMY" if skip_inference: positive = [] negative = [] else: positive = _encode_conditioning(clip, pos) negative = _encode_conditioning(clip, neg) # Decide manual bbox vs detector: # If bbox_left/top/right/bottom are all >= 0 -> manual override. # Otherwise -> YOLO detection. manual_bbox = _manual_bbox_from_ltrb(bbox_left, bbox_top, bbox_right, bbox_bottom) # Only load detector if needed bbox_detector = None if manual_bbox is None: bbox_detector = FD_Standalone._get_bbox_detector() outs = [] # Image from TRT VAE is [B,H,W,C]; iterate over batch dimension for img in image: try: out = self.enhance_face( img.unsqueeze(0), model, positive, negative, bbox_detector=bbox_detector, manual_bbox=manual_bbox, ) except Exception: out = img.unsqueeze(0) outs.append(out) try: result = torch.cat(outs, dim=0) except Exception: result = image return (result,) class DetailerForEach: @staticmethod def do_detail_bbox(image, bbox, model, positive, negative): """ NEW: Detail exactly one bbox (x1,y1,x2,y2) without needing SEGS/detection. Uses the same square-crop/detail/paste logic as do_detail(). """ try: image = image.clone().cpu() except Exception: pass # Clamp bbox to image bounds (best-effort safety) try: h = int(image.shape[1]) w = int(image.shape[2]) except Exception: h, w = 0, 0 try: x1, y1, x2, y2 = bbox x1 = int(x1) y1 = int(y1) x2 = int(x2) y2 = int(y2) except Exception: return image if w > 0 and h > 0: x1 = max(0, min(w - 1, x1)) y1 = max(0, min(h - 1, y1)) x2 = max(x1 + 1, min(w, x2)) y2 = max(y1 + 1, min(h, y2)) bbox_clamped = (x1, y1, x2, y2) try: model = nodes_differential_diffusion.DifferentialDiffusion().apply(model)[0] except Exception: pass try: rx1, ry1, side, _, _, _, _ = helpers.bbox_to_square_region(bbox_clamped, max_side=1024) except Exception: return image square_patch = helpers.crop_with_pad_nhwc(image, rx1, ry1, side, fill=0.0) if square_patch is None: return image try: if square_patch is not None and not (isinstance(model, str) and model == "DUMMY"): premult_side, alpha_side = helpers.enhance_detail_bbox_square( square_patch, model, positive, negative, side=side, ) else: premult_side = square_patch alpha_side = torch.ones( (1, side, side, 1), dtype=square_patch.dtype, device=square_patch.device, ) except Exception: return image try: helpers.tensor_paste_premult_oob(image, premult_side, alpha_side, (rx1, ry1)) except Exception: pass try: out = helpers.tensor_convert_rgb(image) except Exception: out = image return out @staticmethod def do_detail(image, segs, model, positive, negative): try: image = image.clone().cpu() except Exception: pass try: _, ordered_segs = helpers.segs_scale_match(segs, image.shape) except Exception: ordered_segs = segs[1] if (segs and len(segs) > 1) else [] try: model = nodes_differential_diffusion.DifferentialDiffusion().apply(model)[0] except Exception: pass for seg in ordered_segs: try: rx1, ry1, side, _, _, _, _ = helpers.bbox_to_square_region(seg.bbox, max_side=1024) except Exception: continue square_patch = helpers.crop_with_pad_nhwc(image, rx1, ry1, side, fill=0.0) try: if square_patch is not None and not (isinstance(model, str) and model == "DUMMY"): premult_side, alpha_side = helpers.enhance_detail_bbox_square( square_patch, model, positive, negative, side=side, ) else: premult_side = square_patch alpha_side = torch.ones( (1, side, side, 1), dtype=square_patch.dtype, device=square_patch.device, ) except Exception: continue try: helpers.tensor_paste_premult_oob(image, premult_side, alpha_side, (rx1, ry1)) except Exception: pass try: out = helpers.tensor_convert_rgb(image) except Exception: out = image return out NODE_CLASS_MAPPINGS = { "FD_Standalone_2": FD_Standalone_2, } NODE_DISPLAY_NAME_MAPPINGS = { "FD_Standalone_2": "FD_Standalone_2", }