saliacoel
/

MyCustomNodes

ONNX

Safetensors

depth_anything

Model card Files Files and versions

xet

Community

saliacoel commited on Mar 1

Commit

71c89c1

verified ·

1 Parent(s): d828181

Update Inspyrenet_Rembg2.py

Browse files

Files changed (1) hide show

Inspyrenet_Rembg2.py +285 -508

Inspyrenet_Rembg2.py CHANGED Viewed

@@ -1,20 +1,3 @@
-# Inspyrenet_Rembg2.py
-# - Keeps InspyrenetRembg2 + InspyrenetRembg3
-# - REMOVES InspyrenetRembg4 (do not include)
-# - ADDS:
-#     Load_Inspyrenet_Global
-#     Remove_Inspyrenet_Gobal
-#     Run_InspyrenetRembg_Global
-#
-# Design:
-# - One process-wide Remover singleton per (mode, jit, ckpt)
-# - Construct Remover on CPU (safe). You explicitly move it to GPU via Load node.
-# - Run node:
-#     * ensures it is on desired device (auto/cuda/cpu)
-#     * if OOM: evicts VRAM (smallest-first or comfy_default, etc.) and retries
-#     * if still OOM: optionally falls back to CPU
-#     * NEVER crashes on OOM: returns original image (pass-through) as last resort
 from __future__ import annotations
 from PIL import Image
@@ -22,15 +5,15 @@ import os
 import urllib.request
 import gc
 import threading
-from typing import Dict, Tuple, Optional, Any
-from contextlib import nullcontext
 import torch
 import numpy as np
 from transparent_background import Remover
 from tqdm import tqdm
-# Optional: ComfyUI memory manager (present when running inside ComfyUI)
 try:
     import comfy.model_management as comfy_mm
 except Exception:
@@ -42,11 +25,6 @@ CKPT_URL = "https://huggingface.co/saliacoel/x/resolve/main/ckpt_base.pth"
 def _ensure_ckpt_base():
-    """
-    1) Check /root/.transparent-background/ckpt_base.pth
-       - if exists: do nothing
-       - else: download from CKPT_URL
-    """
     try:
         if os.path.isfile(CKPT_PATH) and os.path.getsize(CKPT_PATH) > 0:
             return
@@ -83,7 +61,6 @@ def _ensure_ckpt_base():
                         f.write(chunk)
         os.replace(tmp_path, CKPT_PATH)
     finally:
         if os.path.isfile(tmp_path):
             try:
@@ -92,10 +69,7 @@ def _ensure_ckpt_base():
                 pass
-# -----------------------------------------------------------------------------
-# Conversions
-# -----------------------------------------------------------------------------
 def tensor2pil(image: torch.Tensor) -> Image.Image:
     arr = image.detach().cpu().numpy()
     if arr.ndim == 4 and arr.shape[0] == 1:
@@ -104,18 +78,12 @@ def tensor2pil(image: torch.Tensor) -> Image.Image:
     return Image.fromarray(arr)
 def pil2tensor(image: Image.Image) -> torch.Tensor:
     return torch.from_numpy(np.array(image).astype(np.float32) / 255.0).unsqueeze(0)
 def _rgba_to_rgb_on_white(pil_img: Image.Image) -> Image.Image:
-    """
-    If input is RGBA:
-      - alpha composite over WHITE background
-      - convert to RGB (drop alpha)
-    If input is RGB:
-      - carry on
-    """
     if pil_img.mode == "RGBA":
         bg = Image.new("RGBA", pil_img.size, (255, 255, 255, 255))
         composited = Image.alpha_composite(bg, pil_img)
@@ -127,19 +95,28 @@ def _rgba_to_rgb_on_white(pil_img: Image.Image) -> Image.Image:
     return pil_img
-def _force_rgba_passthrough(pil_img: Image.Image) -> Image.Image:
     """
-    Make sure we can return a sane IMAGE if everything fails.
-    This is a PASS-THROUGH fallback (NOT white).
     """
-    if pil_img.mode == "RGBA":
-        return pil_img
-    return pil_img.convert("RGBA")
-# -----------------------------------------------------------------------------
-# OOM + CUDA cleanup
-# -----------------------------------------------------------------------------
 def _is_oom_error(e: BaseException) -> bool:
     oom_cuda_cls = getattr(getattr(torch, "cuda", None), "OutOfMemoryError", None)
@@ -194,88 +171,72 @@ def _comfy_soft_empty_cache() -> None:
             pass
-def _bytes_from_mb(mb: int) -> int:
-    return int(mb) * 1024 * 1024
-def _get_free_vram_bytes_best_effort() -> Optional[int]:
     """
-    Prefer ComfyUI's free memory estimate (it includes torch-reserved-but-free blocks),
-    otherwise fall back to torch.cuda.mem_get_info.
     """
-    if comfy_mm is not None and hasattr(comfy_mm, "get_free_memory"):
         try:
-            return int(comfy_mm.get_free_memory())
         except Exception:
             pass
     if torch.cuda.is_available():
-        try:
-            free_b, _total_b = torch.cuda.mem_get_info()
-            return int(free_b)
-        except Exception:
-            pass
-    return None
-# -----------------------------------------------------------------------------
-# ComfyUI model eviction (smallest-first, comfy_default, unload_all)
-# -----------------------------------------------------------------------------
-_GLOBAL_VRAM_EVICT_LOCK = threading.Lock()
-def _comfy_unload_all_models() -> None:
-    if comfy_mm is None:
-        return
-    if hasattr(comfy_mm, "unload_all_models"):
         try:
-            comfy_mm.unload_all_models()
         except Exception:
             pass
-    _comfy_soft_empty_cache()
-    _cuda_soft_cleanup()
-def _comfy_free_memory_to_target(target_free_bytes: int) -> None:
-    if comfy_mm is None:
-        return
-    if not hasattr(comfy_mm, "free_memory") or not hasattr(comfy_mm, "get_torch_device"):
-        return
     try:
-        comfy_mm.free_memory(int(target_free_bytes), comfy_mm.get_torch_device())
     except Exception:
-        pass
-    _comfy_soft_empty_cache()
-    _cuda_soft_cleanup()
-def _comfy_unload_smallest_model_once() -> bool:
     """
-    Unload exactly ONE smallest model tracked by ComfyUI (best-effort).
-    Returns True if we unloaded something.
     """
     if comfy_mm is None:
         return False
-    if not hasattr(comfy_mm, "current_loaded_models") or not hasattr(comfy_mm, "get_torch_device"):
         return False
     try:
-        dev = comfy_mm.get_torch_device()
     except Exception:
-        dev = None
-    loaded = []
     try:
-        for lm in list(getattr(comfy_mm, "current_loaded_models", [])):
             try:
-                if dev is not None and getattr(lm, "device", None) != dev:
-                    continue
-                if hasattr(lm, "is_dead") and callable(lm.is_dead) and lm.is_dead():
                     continue
-                mem = 0
                 mem_fn = getattr(lm, "model_loaded_memory", None)
                 if callable(mem_fn):
                     mem = int(mem_fn())
@@ -283,21 +244,20 @@ def _comfy_unload_smallest_model_once() -> bool:
                     mem = int(getattr(lm, "loaded_memory", 0) or 0)
                 if mem > 0:
-                    loaded.append((mem, lm))
             except Exception:
                 continue
     except Exception:
         return False
-    if not loaded:
         return False
-    loaded.sort(key=lambda x: x[0])  # smallest first
-    _mem, smallest = loaded[0]
-    # Unload it
     try:
-        unload_fn = getattr(smallest, "model_unload", None)
         if callable(unload_fn):
             try:
                 unload_fn(unpatch_weights=True)
@@ -306,7 +266,7 @@ def _comfy_unload_smallest_model_once() -> bool:
     except Exception:
         pass
-    # Ask ComfyUI to clean up bookkeeping (if available)
     try:
         cleanup = getattr(comfy_mm, "cleanup_models", None)
         if callable(cleanup):
@@ -319,94 +279,35 @@ def _comfy_unload_smallest_model_once() -> bool:
     return True
-def _evict_until_free(target_free_bytes: int, policy: str) -> None:
-    """
-    policy:
-      - "smallest_first": unload smallest ComfyUI model repeatedly until free>=target or nothing left
-      - "comfy_default": comfy_mm.free_memory(target, device)
-      - "unload_all": unload_all_models()
-      - "none": do nothing
-    """
-    if policy == "none":
         return
-    with _GLOBAL_VRAM_EVICT_LOCK:
-        if policy == "unload_all":
-            _comfy_unload_all_models()
-            return
-        if policy == "comfy_default":
-            _comfy_free_memory_to_target(target_free_bytes)
-            return
-        if policy == "smallest_first":
-            # Loop until enough memory or no more models to unload
-            for _ in range(256):
-                free_b = _get_free_vram_bytes_best_effort()
-                if free_b is None or free_b >= target_free_bytes:
-                    break
-                if not _comfy_unload_smallest_model_once():
-                    break
-            return
 # -----------------------------------------------------------------------------
-# Remover singleton cache (shared by all nodes)
 # -----------------------------------------------------------------------------
-# key = (mode, jit, ckpt)
-_RemKey = Tuple[str, bool, Optional[str]]
-_REMOVER_CACHE: Dict[_RemKey, Remover] = {}
-_REMOVER_RUN_LOCKS: Dict[_RemKey, threading.Lock] = {}
-_REMOVER_VRAM_BYTES: Dict[_RemKey, int] = {}   # approximate model VRAM residency cost (bytes)
 _CACHE_LOCK = threading.Lock()
-def _construct_remover(mode: str, jit: bool, device: str, ckpt: Optional[str]) -> Remover:
-    """
-    Construct Remover with best-effort compatibility across transparent_background versions.
-    """
-    # Try current signature: Remover(mode=..., jit=..., device=..., ckpt=...)
-    kwargs: Dict[str, Any] = {"jit": jit, "device": device}
-    if mode:
-        kwargs["mode"] = mode
-    if ckpt:
-        kwargs["ckpt"] = ckpt
-    try:
-        return Remover(**kwargs)
-    except TypeError:
-        pass
-    # Try without "mode" (some variants)
-    kwargs2: Dict[str, Any] = {"jit": jit, "device": device}
-    if ckpt:
-        kwargs2["ckpt"] = ckpt
-    try:
-        return Remover(**kwargs2)
-    except TypeError:
-        pass
-    # Try legacy "fast=" API
-    kwargs3: Dict[str, Any] = {"jit": jit, "device": device, "fast": (mode == "fast")}
-    if ckpt:
-        kwargs3["ckpt"] = ckpt
-    return Remover(**kwargs3)
-def _get_remover(mode: str = "base", jit: bool = False, ckpt: Optional[str] = None) -> tuple[Remover, threading.Lock, _RemKey]:
-    """
-    Cached Remover per (mode, jit, ckpt). Constructed on CPU by default to avoid VRAM OOM.
-    """
-    key: _RemKey = (mode, jit, ckpt)
     with _CACHE_LOCK:
         inst = _REMOVER_CACHE.get(key)
         if inst is None:
             _ensure_ckpt_base()
             try:
-                inst = _construct_remover(mode=mode, jit=jit, device="cpu", ckpt=ckpt)
             except BaseException as e:
                 if _is_oom_error(e):
                     _cuda_soft_cleanup()
@@ -418,205 +319,204 @@ def _get_remover(mode: str = "base", jit: bool = False, ckpt: Optional[str] = No
             run_lock = threading.Lock()
             _REMOVER_RUN_LOCKS[key] = run_lock
-    return inst, run_lock, key
-def _get_target_device_str(device_choice: str) -> str:
     """
-    device_choice:
-      - "auto": ComfyUI device if available, else "cuda:0" if cuda else "cpu"
-      - "cuda": "cuda:0"
-      - "cpu": "cpu"
     """
-    if device_choice == "cpu":
-        return "cpu"
-    if device_choice == "cuda":
-        return "cuda:0"
-    # auto:
-    if comfy_mm is not None and hasattr(comfy_mm, "get_torch_device"):
         try:
-            return str(comfy_mm.get_torch_device())
         except Exception:
             pass
-    if torch.cuda.is_available():
-        return "cuda:0"
-    return "cpu"
-def _move_remover_to_device(
-    remover: Remover,
-    *,
-    key: _RemKey,
-    device_str: str,
-    min_free_vram_mb: int,
-    extra_vram_mb: int,
-    unload_policy: str,
-    measure_model_vram: bool,
-) -> tuple[bool, Optional[int]]:
     """
-    Move remover.model to device_str (best effort).
-    For CUDA: evict models first if free VRAM < required.
-    Returns (ok, measured_model_vram_bytes_or_None)
     """
-    # Already on device?
-    if str(getattr(remover, "device", "")) == device_str:
-        return True, _REMOVER_VRAM_BYTES.get(key)
-    # CPU path: always try
-    if device_str.startswith("cpu") or device_str.startswith("mps") or device_str.startswith("xpu") or device_str.startswith("npu") or device_str.startswith("mlu"):
-        try:
-            remover.model = remover.model.to(device_str)
-            remover.device = device_str
-            _cuda_soft_cleanup()
-            return True, _REMOVER_VRAM_BYTES.get(key)
-        except BaseException as e:
-            if _is_oom_error(e):
-                _cuda_soft_cleanup()
-            return False, _REMOVER_VRAM_BYTES.get(key)
-    # CUDA path
-    required_bytes = _bytes_from_mb(min_free_vram_mb)
-    # If we have a measured model size, require that + extra_vram_mb instead
-    measured = _REMOVER_VRAM_BYTES.get(key)
-    if measured is not None and measured > 0:
-        required_bytes = max(required_bytes, int(measured) + _bytes_from_mb(extra_vram_mb))
-    free_b = _get_free_vram_bytes_best_effort()
-    if free_b is not None and free_b < required_bytes:
-        _evict_until_free(required_bytes, unload_policy)
-    before = _get_free_vram_bytes_best_effort() if measure_model_vram else None
-    try:
-        remover.model = remover.model.to(device_str)
-        remover.device = device_str
-        _comfy_soft_empty_cache()
-        _cuda_soft_cleanup()
-        if measure_model_vram:
-            after = _get_free_vram_bytes_best_effort()
-            if before is not None and after is not None:
-                used = max(0, int(before) - int(after))
-                if used > 0:
-                    _REMOVER_VRAM_BYTES[key] = used
-        return True, _REMOVER_VRAM_BYTES.get(key)
-    except BaseException as e:
-        if _is_oom_error(e):
             _cuda_soft_cleanup()
-        return False, _REMOVER_VRAM_BYTES.get(key)
-def _remover_process_basic(remover: Remover, run_lock: threading.Lock, pil_img: Image.Image, out_type: str) -> Image.Image:
-    """
-    Basic safe call. May still OOM if on GPU and VRAM is tight.
-    """
-    with run_lock:
-        with torch.inference_mode():
-            return remover.process(pil_img, type=out_type)
-def _remover_process_no_crash(
-    remover: Remover,
-    run_lock: threading.Lock,
-    *,
-    key: _RemKey,
-    pil_img_rgb: Image.Image,
-    out_type: str,
-    device_choice: str,
-    min_free_vram_mb: int,
-    extra_vram_mb: int,
-    unload_policy: str,
-    allow_cpu_fallback: bool,
-    use_fp16_autocast: bool,
-) -> Optional[Image.Image]:
     """
-    Try inference:
-      1) run once
-      2) on OOM: evict VRAM (policy) and retry once
-      3) on OOM: optional CPU fallback and retry once
-    Returns PIL on success, None if still OOM.
     """
-    def _amp_ctx():
-        dev = str(getattr(remover, "device", "") or "")
-        if use_fp16_autocast and torch.cuda.is_available() and dev.startswith("cuda"):
-            try:
-                return torch.autocast("cuda", dtype=torch.float16)
-            except Exception:
-                return nullcontext()
-        return nullcontext()
-    # First try
     try:
-        with run_lock:
             with torch.inference_mode():
-                with _amp_ctx():
-                    return remover.process(pil_img_rgb, type=out_type)
     except BaseException as e:
         if not _is_oom_error(e):
             raise
-    # OOM: cleanup + evict + retry
     _cuda_soft_cleanup()
-    required_bytes = _bytes_from_mb(min_free_vram_mb)
-    measured = _REMOVER_VRAM_BYTES.get(key)
-    if measured is not None and measured > 0:
-        required_bytes = max(required_bytes, int(measured) + _bytes_from_mb(extra_vram_mb))
-    _evict_until_free(required_bytes, unload_policy)
     try:
-        with run_lock:
             with torch.inference_mode():
-                with _amp_ctx():
-                    return remover.process(pil_img_rgb, type=out_type)
     except BaseException as e:
         if not _is_oom_error(e):
             raise
-    if not allow_cpu_fallback:
-        return None
-    # CPU fallback
     try:
-        ok, _ = _move_remover_to_device(
-            remover,
-            key=key,
-            device_str="cpu",
-            min_free_vram_mb=min_free_vram_mb,
-            extra_vram_mb=extra_vram_mb,
-            unload_policy="none",
-            measure_model_vram=False,
-        )
-        if not ok:
-            return None
-        _cuda_soft_cleanup()
-        with run_lock:
             with torch.inference_mode():
-                return remover.process(pil_img_rgb, type=out_type)
     except BaseException as e:
         if not _is_oom_error(e):
             raise
-        return None
 # -----------------------------------------------------------------------------
-# Existing Nodes: InspyrenetRembg2 / InspyrenetRembg3 (kept)
 # -----------------------------------------------------------------------------
 class InspyrenetRembg2:
-    """
-    Kept behavior/output.
-    Uses cached Remover (constructed on CPU by default in this file).
-    If you want it on GPU: call Load_Inspyrenet_Global first with matching (mode/jit/ckpt).
-    """
     def __init__(self):
         pass
@@ -625,7 +525,7 @@ class InspyrenetRembg2:
         return {
             "required": {
                 "image": ("IMAGE",),
-                "torchscript_jit": (["default", "on"],),
             },
         }
@@ -635,18 +535,21 @@ class InspyrenetRembg2:
     def remove_background(self, image, torchscript_jit):
         jit = (torchscript_jit != "default")
-        remover, run_lock, _key = _get_remover(mode="base", jit=jit, ckpt=None)
         img_list = []
         for img in tqdm(image, "Inspyrenet Rembg2"):
             pil_in = tensor2pil(img)
             try:
-                mid = _remover_process_basic(remover, run_lock, pil_in, out_type="rgba")
             except BaseException as e:
                 if _is_oom_error(e):
                     _cuda_soft_cleanup()
                     raise RuntimeError("InspyrenetRembg2: CUDA out of memory.") from e
                 raise
             out = pil2tensor(mid)
             img_list.append(out)
             del pil_in, mid, out
@@ -657,11 +560,6 @@ class InspyrenetRembg2:
 class InspyrenetRembg3:
-    """
-    Kept behavior/output.
-    Uses cached Remover (constructed on CPU by default in this file).
-    If you want it on GPU: call Load_Inspyrenet_Global first with matching (mode/jit/ckpt).
-    """
     def __init__(self):
         pass
@@ -678,7 +576,7 @@ class InspyrenetRembg3:
     CATEGORY = "image"
     def remove_background(self, image):
-        remover, run_lock, _key = _get_remover(mode="base", jit=False, ckpt=None)
         img_list = []
         for img in tqdm(image, "Inspyrenet Rembg3"):
@@ -686,7 +584,9 @@ class InspyrenetRembg3:
             pil_rgb = _rgba_to_rgb_on_white(pil_in)
             try:
-                mid = _remover_process_basic(remover, run_lock, pil_rgb, out_type="rgba")
             except BaseException as e:
                 if _is_oom_error(e):
                     _cuda_soft_cleanup()
@@ -695,7 +595,6 @@ class InspyrenetRembg3:
             out = pil2tensor(mid)
             img_list.append(out)
             del pil_in, pil_rgb, mid, out
         img_stack = torch.cat(img_list, dim=0)
@@ -703,77 +602,36 @@ class InspyrenetRembg3:
 # -----------------------------------------------------------------------------
-# New Nodes: Load / Remove / Run (Global)
 # -----------------------------------------------------------------------------
 class Load_Inspyrenet_Global:
     """
-    Loads the global singleton Remover instance (per mode/jit/ckpt) and moves it to target device.
-    Measures approximate VRAM delta (best-effort) for the model residency.
     """
     def __init__(self):
         pass
     @classmethod
     def INPUT_TYPES(s):
-        return {
-            "required": {
-                "mode": (["base", "fast", "base-nightly"], {"default": "base"}),
-                "torchscript_jit": (["off", "on"], {"default": "off"}),
-                "device": (["auto", "cuda", "cpu"], {"default": "auto"}),
-                # target VRAM policy
-                "min_free_vram_mb": ("INT", {"default": 4096, "min": 0, "max": 65536, "step": 256}),
-                "extra_vram_mb": ("INT", {"default": 1024, "min": 0, "max": 65536, "step": 256}),
-                "unload_policy": (["smallest_first", "comfy_default", "unload_all", "none"], {"default": "smallest_first"}),
-                "measure_model_vram": (["yes", "no"], {"default": "yes"}),
-            },
-            "optional": {
-                "ckpt_override": ("STRING", {"default": ""}),
-            },
-        }
-    RETURN_TYPES = ("BOOLEAN", "INT", "STRING")
     FUNCTION = "load"
     CATEGORY = "image"
-    def load(
-        self,
-        mode: str,
-        torchscript_jit: str,
-        device: str,
-        min_free_vram_mb: int,
-        extra_vram_mb: int,
-        unload_policy: str,
-        measure_model_vram: str,
-        ckpt_override: str = "",
-    ):
-        jit = (torchscript_jit == "on")
-        ckpt = ckpt_override.strip() or None
-        remover, _run_lock, key = _get_remover(mode=mode, jit=jit, ckpt=ckpt)
-        device_str = _get_target_device_str(device)
-        ok, measured_bytes = _move_remover_to_device(
-            remover,
-            key=key,
-            device_str=device_str,
-            min_free_vram_mb=int(min_free_vram_mb),
-            extra_vram_mb=int(extra_vram_mb),
-            unload_policy=unload_policy,
-            measure_model_vram=(measure_model_vram == "yes"),
-        )
-        measured_mb = int((measured_bytes or 0) / (1024 * 1024))
-        status = f"Load_Inspyrenet_Global: ok={ok}, mode={mode}, jit={jit}, device={getattr(remover,'device',None)}, measured_model_vram_mb={measured_mb}"
-        return (bool(ok), measured_mb, status)
-class Remove_Inspyrenet_Gobal:
     """
-    Offloads the global singleton Remover to CPU (keeps instance), or deletes it (forces re-create later).
-    Optionally unloads all ComfyUI models too.
     """
     def __init__(self):
         pass
@@ -782,60 +640,44 @@ class Remove_Inspyrenet_Gobal:
     def INPUT_TYPES(s):
         return {
             "required": {
-                "mode": (["base", "fast", "base-nightly"], {"default": "base"}),
-                "torchscript_jit": (["off", "on"], {"default": "off"}),
-                "action": (["offload_to_cpu", "delete_instance"], {"default": "offload_to_cpu"}),
-                "also_unload_all_models": (["no", "yes"], {"default": "no"}),
-            },
-            "optional": {
-                "ckpt_override": ("STRING", {"default": ""}),
-            },
         }
-    RETURN_TYPES = ("BOOLEAN", "STRING")
     FUNCTION = "remove"
     CATEGORY = "image"
-    def remove(self, mode, torchscript_jit, action, also_unload_all_models, ckpt_override=""):
-        jit = (torchscript_jit == "on")
-        ckpt = ckpt_override.strip() or None
-        remover, _run_lock, key = _get_remover(mode=mode, jit=jit, ckpt=ckpt)
-        # Offload remover itself to CPU
-        try:
-            remover.model = remover.model.to("cpu")
-            remover.device = "cpu"
-        except Exception:
-            pass
-        # Optionally delete instance from cache
-        if action == "delete_instance":
-            with _CACHE_LOCK:
-                try:
-                    _REMOVER_CACHE.pop(key, None)
-                    _REMOVER_RUN_LOCKS.pop(key, None)
-                    _REMOVER_VRAM_BYTES.pop(key, None)
-                except Exception:
-                    pass
-        if also_unload_all_models == "yes":
-            _comfy_unload_all_models()
-        _comfy_soft_empty_cache()
         _cuda_soft_cleanup()
-        status = f"Remove_Inspyrenet_Gobal: action={action}, offloaded_device={getattr(remover,'device',None)}, also_unload_all_models={also_unload_all_models}"
-        return (True, status)
 class Run_InspyrenetRembg_Global:
     """
-    Runs global Remover with OOM avoidance:
-      - tries on requested device (auto/cuda/cpu)
-      - on OOM: evicts models (policy) and retries
-      - optional CPU fallback
-      - NEVER crashes on OOM: last resort returns input image (pass-through RGBA)
     """
     def __init__(self):
         pass
@@ -845,107 +687,42 @@ class Run_InspyrenetRembg_Global:
         return {
             "required": {
                 "image": ("IMAGE",),
-            },
-            "optional": {
-                "mode": (["base", "fast", "base-nightly"], {"default": "base"}),
-                "torchscript_jit": (["off", "on"], {"default": "off"}),
-                "device": (["auto", "cuda", "cpu"], {"default": "auto"}),
-                "min_free_vram_mb": ("INT", {"default": 4096, "min": 0, "max": 65536, "step": 256}),
-                "extra_vram_mb": ("INT", {"default": 1024, "min": 0, "max": 65536, "step": 256}),
-                "unload_policy": (["smallest_first", "comfy_default", "unload_all", "none"], {"default": "smallest_first"}),
-                "allow_cpu_fallback": (["yes", "no"], {"default": "yes"}),
-                "use_fp16_autocast": (["yes", "no"], {"default": "yes"}),
-                "ckpt_override": ("STRING", {"default": ""}),
-            },
         }
     RETURN_TYPES = ("IMAGE",)
     FUNCTION = "remove_background"
     CATEGORY = "image"
-    def remove_background(
-        self,
-        image,
-        mode="base",
-        torchscript_jit="off",
-        device="auto",
-        min_free_vram_mb=4096,
-        extra_vram_mb=1024,
-        unload_policy="smallest_first",
-        allow_cpu_fallback="yes",
-        use_fp16_autocast="yes",
-        ckpt_override="",
-    ):
-        jit = (torchscript_jit == "on")
-        ckpt = ckpt_override.strip() or None
-        remover, run_lock, key = _get_remover(mode=mode, jit=jit, ckpt=ckpt)
-        # Ensure desired device (best-effort)
-        device_str = _get_target_device_str(device)
-        _move_remover_to_device(
-            remover,
-            key=key,
-            device_str=device_str,
-            min_free_vram_mb=int(min_free_vram_mb),
-            extra_vram_mb=int(extra_vram_mb),
-            unload_policy=unload_policy,
-            measure_model_vram=False,
-        )
-        allow_cpu = (allow_cpu_fallback == "yes")
-        fp16_amp = (use_fp16_autocast == "yes")
         img_list = []
         for img in tqdm(image, "Run InspyrenetRembg Global"):
             pil_in = tensor2pil(img)
-            # Always keep a pass-through fallback (NOT white)
-            fallback = _force_rgba_passthrough(pil_in)
-            # Model input: RGB (your prior behavior uses white composite if RGBA)
             pil_rgb = _rgba_to_rgb_on_white(pil_in)
-            out_pil = _remover_process_no_crash(
-                remover,
-                run_lock,
-                key=key,
-                pil_img_rgb=pil_rgb,
-                out_type="rgba",
-                device_choice=device,
-                min_free_vram_mb=int(min_free_vram_mb),
-                extra_vram_mb=int(extra_vram_mb),
-                unload_policy=unload_policy,
-                allow_cpu_fallback=allow_cpu,
-                use_fp16_autocast=fp16_amp,
-            )
-            if out_pil is None:
-                # Absolute last resort: return input pixels (pass-through), do not crash.
-                out_pil = fallback
             out = pil2tensor(out_pil)
             img_list.append(out)
-            del pil_in, pil_rgb, fallback, out_pil, out
         img_stack = torch.cat(img_list, dim=0)
         return (img_stack,)
-# -----------------------------------------------------------------------------
-# Node mappings
-# -----------------------------------------------------------------------------
 NODE_CLASS_MAPPINGS = {
     "InspyrenetRembg2": InspyrenetRembg2,
     "InspyrenetRembg3": InspyrenetRembg3,
     "Load_Inspyrenet_Global": Load_Inspyrenet_Global,
-    "Remove_Inspyrenet_Gobal": Remove_Inspyrenet_Gobal,
     "Run_InspyrenetRembg_Global": Run_InspyrenetRembg_Global,
 }
@@ -954,6 +731,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
     "InspyrenetRembg3": "Inspyrenet Rembg3",
     "Load_Inspyrenet_Global": "Load Inspyrenet Global",
-    "Remove_Inspyrenet_Gobal": "Remove Inspyrenet Gobal",
     "Run_InspyrenetRembg_Global": "Run InspyrenetRembg Global",
 }

 from __future__ import annotations
 from PIL import Image
 import urllib.request
 import gc
 import threading
+from typing import Dict, Tuple, Optional
 import torch
 import numpy as np
 from transparent_background import Remover
 from tqdm import tqdm
+# Optional: ComfyUI memory manager (present inside ComfyUI)
 try:
     import comfy.model_management as comfy_mm
 except Exception:
 def _ensure_ckpt_base():
     try:
         if os.path.isfile(CKPT_PATH) and os.path.getsize(CKPT_PATH) > 0:
             return
                         f.write(chunk)
         os.replace(tmp_path, CKPT_PATH)
     finally:
         if os.path.isfile(tmp_path):
             try:
                 pass
+# Tensor to PIL
 def tensor2pil(image: torch.Tensor) -> Image.Image:
     arr = image.detach().cpu().numpy()
     if arr.ndim == 4 and arr.shape[0] == 1:
     return Image.fromarray(arr)
+# Convert PIL to Tensor
 def pil2tensor(image: Image.Image) -> torch.Tensor:
     return torch.from_numpy(np.array(image).astype(np.float32) / 255.0).unsqueeze(0)
 def _rgba_to_rgb_on_white(pil_img: Image.Image) -> Image.Image:
     if pil_img.mode == "RGBA":
         bg = Image.new("RGBA", pil_img.size, (255, 255, 255, 255))
         composited = Image.alpha_composite(bg, pil_img)
     return pil_img
+def _force_rgba_opaque(pil_img: Image.Image) -> Image.Image:
     """
+    Opaque RGBA fallback (alpha=255), so you never get an "invisible" output.
     """
+    rgba = pil_img.convert("RGBA")
+    r, g, b, _a = rgba.split()
+    a = Image.new("L", rgba.size, 255)
+    return Image.merge("RGBA", (r, g, b, a))
+def _alpha_is_all_zero(pil_img: Image.Image) -> bool:
+    """
+    True if RGBA image alpha channel is entirely 0.
+    """
+    if pil_img.mode != "RGBA":
+        return False
+    try:
+        extrema = pil_img.getextrema()  # ((min,max),(min,max),(min,max),(min,max))
+        return extrema[3][1] == 0
+    except Exception:
+        return False
 def _is_oom_error(e: BaseException) -> bool:
     oom_cuda_cls = getattr(getattr(torch, "cuda", None), "OutOfMemoryError", None)
             pass
+def _get_comfy_torch_device() -> torch.device:
     """
+    Always prefer ComfyUI's chosen device.
     """
+    if comfy_mm is not None and hasattr(comfy_mm, "get_torch_device"):
         try:
+            d = comfy_mm.get_torch_device()
+            if isinstance(d, torch.device):
+                return d
+            return torch.device(str(d))
         except Exception:
             pass
     if torch.cuda.is_available():
+        return torch.device("cuda:0")
+    return torch.device("cpu")
+def _set_current_cuda_device(dev: torch.device) -> None:
+    """
+    Make sure mem_get_info() measurements are on the same device ComfyUI uses.
+    """
+    if dev.type == "cuda":
         try:
+            if dev.index is not None:
+                torch.cuda.set_device(dev.index)
         except Exception:
             pass
+def _cuda_free_bytes_on(dev: torch.device) -> Optional[int]:
+    if dev.type != "cuda" or not torch.cuda.is_available():
+        return None
     try:
+        _set_current_cuda_device(dev)
+        free_b, _total_b = torch.cuda.mem_get_info()
+        return int(free_b)
     except Exception:
+        return None
+def _comfy_unload_one_smallest_model() -> bool:
     """
+    Best-effort "smallest-first" eviction of one ComfyUI-tracked loaded model.
+    If ComfyUI internals differ, this may do nothing (and we fall back to unload_all_models()).
     """
     if comfy_mm is None:
         return False
+    if not hasattr(comfy_mm, "current_loaded_models"):
         return False
     try:
+        cur_dev = _get_comfy_torch_device()
     except Exception:
+        cur_dev = None
+    models = []
     try:
+        for lm in list(comfy_mm.current_loaded_models):
             try:
+                # Prefer same device
+                lm_dev = getattr(lm, "device", None)
+                if cur_dev is not None and lm_dev is not None and str(lm_dev) != str(cur_dev):
                     continue
                 mem_fn = getattr(lm, "model_loaded_memory", None)
                 if callable(mem_fn):
                     mem = int(mem_fn())
                     mem = int(getattr(lm, "loaded_memory", 0) or 0)
                 if mem > 0:
+                    models.append((mem, lm))
             except Exception:
                 continue
     except Exception:
         return False
+    if not models:
         return False
+    models.sort(key=lambda x: x[0])  # smallest first
+    _mem, lm = models[0]
     try:
+        unload_fn = getattr(lm, "model_unload", None)
         if callable(unload_fn):
             try:
                 unload_fn(unpatch_weights=True)
     except Exception:
         pass
+    # Cleanup hook if present
     try:
         cleanup = getattr(comfy_mm, "cleanup_models", None)
         if callable(cleanup):
     return True
+def _comfy_unload_all_models() -> None:
+    if comfy_mm is None:
         return
+    if hasattr(comfy_mm, "unload_all_models"):
+        try:
+            comfy_mm.unload_all_models()
+        except Exception:
+            pass
+    _comfy_soft_empty_cache()
+    _cuda_soft_cleanup()
 # -----------------------------------------------------------------------------
+# Existing singleton cache for Rembg2/Rembg3 (your original)
 # -----------------------------------------------------------------------------
+_REMOVER_CACHE: Dict[Tuple[bool], Remover] = {}
+_REMOVER_RUN_LOCKS: Dict[Tuple[bool], threading.Lock] = {}
 _CACHE_LOCK = threading.Lock()
+def _get_remover(jit: bool = False) -> tuple[Remover, threading.Lock]:
+    key = (jit,)
     with _CACHE_LOCK:
         inst = _REMOVER_CACHE.get(key)
         if inst is None:
             _ensure_ckpt_base()
             try:
+                inst = Remover(jit=jit) if jit else Remover()
             except BaseException as e:
                 if _is_oom_error(e):
                     _cuda_soft_cleanup()
             run_lock = threading.Lock()
             _REMOVER_RUN_LOCKS[key] = run_lock
+    return inst, run_lock
+# -----------------------------------------------------------------------------
+# GLOBAL remover (for Load/Remove/Run Global nodes)
+# -----------------------------------------------------------------------------
+_GLOBAL_LOCK = threading.Lock()
+_GLOBAL_RUN_LOCK = threading.Lock()
+_GLOBAL_REMOVER: Optional[Remover] = None
+_GLOBAL_ON_DEVICE: str = "cpu"
+_GLOBAL_VRAM_DELTA_BYTES: int = 0
+def _create_global_remover_cpu() -> Remover:
     """
+    Create the Remover configured like InspyrenetRembg3 (jit=False),
+    but *try* to force CPU init to avoid VRAM OOM during creation.
     """
+    _ensure_ckpt_base()
+    # Prefer constructing on CPU if supported by this library version.
+    try:
+        r = Remover(device="cpu")  # type: ignore[arg-type]
         try:
+            r.device = "cpu"
         except Exception:
             pass
+        return r
+    except TypeError:
+        pass
+    # Fallback: construct default and immediately offload to CPU
+    r = Remover()
+    try:
+        if hasattr(r, "model"):
+            r.model = r.model.to("cpu")
+        r.device = "cpu"
+    except Exception:
+        pass
+    _cuda_soft_cleanup()
+    return r
+def _get_global_remover() -> Remover:
+    global _GLOBAL_REMOVER, _GLOBAL_ON_DEVICE
+    with _GLOBAL_LOCK:
+        if _GLOBAL_REMOVER is None:
+            _GLOBAL_REMOVER = _create_global_remover_cpu()
+            _GLOBAL_ON_DEVICE = str(getattr(_GLOBAL_REMOVER, "device", "cpu"))
+        return _GLOBAL_REMOVER
+def _move_global_to_cpu() -> None:
+    global _GLOBAL_ON_DEVICE
+    r = _get_global_remover()
+    try:
+        if hasattr(r, "model"):
+            r.model = r.model.to("cpu")
+        r.device = "cpu"
+        _GLOBAL_ON_DEVICE = "cpu"
+    except Exception:
+        pass
+    _cuda_soft_cleanup()
+def _load_global_to_comfy_cuda_no_crash(max_evictions: int = 32) -> bool:
     """
+    Load the global remover into VRAM on ComfyUI's chosen CUDA device.
+    Never crashes on OOM: evicts smallest model first, then unload_all as last resort.
+    Also records a best-effort VRAM delta.
     """
+    global _GLOBAL_ON_DEVICE, _GLOBAL_VRAM_DELTA_BYTES
+    r = _get_global_remover()
+    dev = _get_comfy_torch_device()
+    if dev.type != "cuda" or not torch.cuda.is_available():
+        _move_global_to_cpu()
+        return False
+    # Already on CUDA?
+    cur_dev = str(getattr(r, "device", "") or "")
+    if cur_dev.startswith("cuda"):
+        _GLOBAL_ON_DEVICE = cur_dev
+        return True
+    _set_current_cuda_device(dev)
+    free_before = _cuda_free_bytes_on(dev)
+    for _ in range(max_evictions + 1):
+        try:
+            # Move model to the SAME device ComfyUI uses
+            if hasattr(r, "model"):
+                r.model = r.model.to(dev)
+            r.device = str(dev)
+            _GLOBAL_ON_DEVICE = str(dev)
+            _comfy_soft_empty_cache()
+            _cuda_soft_cleanup()
+            free_after = _cuda_free_bytes_on(dev)
+            if free_before is not None and free_after is not None:
+                delta = max(0, int(free_before) - int(free_after))
+                if delta > 0:
+                    _GLOBAL_VRAM_DELTA_BYTES = delta
+            return True
+        except BaseException as e:
+            if not _is_oom_error(e):
+                raise
+            _comfy_soft_empty_cache()
             _cuda_soft_cleanup()
+            # Evict ONE smallest model; if that fails, unload all.
+            if not _comfy_unload_one_smallest_model():
+                _comfy_unload_all_models()
+    # Could not load
+    _move_global_to_cpu()
+    return False
+def _run_global_rgba_no_crash(pil_rgb: Image.Image, fallback_rgba: Image.Image) -> Image.Image:
     """
+    Run remover.process() (rgba output), matching InspyrenetRembg3 behavior.
+    On OOM: evict models and retry, then CPU fallback.
+    If output alpha is fully transparent, return fallback (prevents "invisible" output).
     """
+    r = _get_global_remover()
+    # Try to keep it on CUDA (Comfy device) if possible; do not crash if not.
+    _load_global_to_comfy_cuda_no_crash()
+    # Attempt 1: whatever device we're on (likely CUDA)
     try:
+        with _GLOBAL_RUN_LOCK:
             with torch.inference_mode():
+                out = r.process(pil_rgb, type="rgba")
+        if _alpha_is_all_zero(out):
+            # Treat as failure -> prevents invisible output
+            return fallback_rgba
+        return out
     except BaseException as e:
         if not _is_oom_error(e):
             raise
+    # OOM path: evict one smallest and retry (still on CUDA if we are)
+    _comfy_soft_empty_cache()
     _cuda_soft_cleanup()
+    _comfy_unload_one_smallest_model()
     try:
+        with _GLOBAL_RUN_LOCK:
             with torch.inference_mode():
+                out = r.process(pil_rgb, type="rgba")
+        if _alpha_is_all_zero(out):
+            return fallback_rgba
+        return out
     except BaseException as e:
         if not _is_oom_error(e):
             raise
+    # OOM again: unload all comfy models and retry once
+    _comfy_unload_all_models()
     try:
+        with _GLOBAL_RUN_LOCK:
             with torch.inference_mode():
+                out = r.process(pil_rgb, type="rgba")
+        if _alpha_is_all_zero(out):
+            return fallback_rgba
+        return out
     except BaseException as e:
         if not _is_oom_error(e):
             raise
+    # Final: CPU fallback
+    _move_global_to_cpu()
+    try:
+        with _GLOBAL_RUN_LOCK:
+            with torch.inference_mode():
+                out = r.process(pil_rgb, type="rgba")
+        if _alpha_is_all_zero(out):
+            return fallback_rgba
+        return out
+    except BaseException:
+        # Last resort: passthrough
+        return fallback_rgba
 # -----------------------------------------------------------------------------
+# Nodes
 # -----------------------------------------------------------------------------
 class InspyrenetRembg2:
     def __init__(self):
         pass
         return {
             "required": {
                 "image": ("IMAGE",),
+                "torchscript_jit": (["default", "on"],)
             },
         }
     def remove_background(self, image, torchscript_jit):
         jit = (torchscript_jit != "default")
+        remover, run_lock = _get_remover(jit=jit)
         img_list = []
         for img in tqdm(image, "Inspyrenet Rembg2"):
             pil_in = tensor2pil(img)
             try:
+                with run_lock:
+                    with torch.inference_mode():
+                        mid = remover.process(pil_in, type="rgba")
             except BaseException as e:
                 if _is_oom_error(e):
                     _cuda_soft_cleanup()
                     raise RuntimeError("InspyrenetRembg2: CUDA out of memory.") from e
                 raise
             out = pil2tensor(mid)
             img_list.append(out)
             del pil_in, mid, out
 class InspyrenetRembg3:
     def __init__(self):
         pass
     CATEGORY = "image"
     def remove_background(self, image):
+        remover, run_lock = _get_remover(jit=False)
         img_list = []
         for img in tqdm(image, "Inspyrenet Rembg3"):
             pil_rgb = _rgba_to_rgb_on_white(pil_in)
             try:
+                with run_lock:
+                    with torch.inference_mode():
+                        mid = remover.process(pil_rgb, type="rgba")
             except BaseException as e:
                 if _is_oom_error(e):
                     _cuda_soft_cleanup()
             out = pil2tensor(mid)
             img_list.append(out)
             del pil_in, pil_rgb, mid, out
         img_stack = torch.cat(img_list, dim=0)
 # -----------------------------------------------------------------------------
+# NEW: Global nodes (simple, no user settings on Load/Run)
 # -----------------------------------------------------------------------------
 class Load_Inspyrenet_Global:
     """
+    No inputs. Creates the global remover (once) and moves it to ComfyUI's CUDA device (if possible).
+    Returns:
+      - loaded_ok (BOOLEAN)
+      - vram_delta_bytes (INT) best-effort (weights residency only; not peak inference)
     """
     def __init__(self):
         pass
     @classmethod
     def INPUT_TYPES(s):
+        return {"required": {}}
+    RETURN_TYPES = ("BOOLEAN", "INT")
     FUNCTION = "load"
     CATEGORY = "image"
+    def load(self):
+        _get_global_remover()
+        ok = _load_global_to_comfy_cuda_no_crash()
+        return (bool(ok), int(_GLOBAL_VRAM_DELTA_BYTES))
+class Remove_Inspyrenet_Global:
     """
+    Offload global remover to CPU or delete it.
     """
     def __init__(self):
         pass
     def INPUT_TYPES(s):
         return {
             "required": {
+                "action": (["offload_to_cpu", "delete_instance"],),
+            }
         }
+    RETURN_TYPES = ("BOOLEAN",)
     FUNCTION = "remove"
     CATEGORY = "image"
+    def remove(self, action):
+        global _GLOBAL_REMOVER, _GLOBAL_ON_DEVICE, _GLOBAL_VRAM_DELTA_BYTES
+        if action == "offload_to_cpu":
+            _move_global_to_cpu()
+            return (True,)
+        # delete_instance
+        with _GLOBAL_LOCK:
+            try:
+                if _GLOBAL_REMOVER is not None:
+                    try:
+                        if hasattr(_GLOBAL_REMOVER, "model"):
+                            _GLOBAL_REMOVER.model = _GLOBAL_REMOVER.model.to("cpu")
+                        _GLOBAL_REMOVER.device = "cpu"
+                    except Exception:
+                        pass
+                _GLOBAL_REMOVER = None
+                _GLOBAL_ON_DEVICE = "cpu"
+                _GLOBAL_VRAM_DELTA_BYTES = 0
+            except Exception:
+                pass
         _cuda_soft_cleanup()
+        return (True,)
 class Run_InspyrenetRembg_Global:
     """
+    No settings. Same behavior as InspyrenetRembg3, but uses the global remover and won't crash on OOM.
+    On failure/OOM, returns a visible passthrough (opaque RGBA), NOT an invisible image.
     """
     def __init__(self):
         pass
         return {
             "required": {
                 "image": ("IMAGE",),
+            }
         }
     RETURN_TYPES = ("IMAGE",)
     FUNCTION = "remove_background"
     CATEGORY = "image"
+    def remove_background(self, image):
+        _get_global_remover()
         img_list = []
         for img in tqdm(image, "Run InspyrenetRembg Global"):
             pil_in = tensor2pil(img)
+            # Visible fallback (never invisible)
+            fallback = _force_rgba_opaque(pil_in)
+            # Exactly like Rembg3 input path
             pil_rgb = _rgba_to_rgb_on_white(pil_in)
+            out_pil = _run_global_rgba_no_crash(pil_rgb, fallback)
             out = pil2tensor(out_pil)
             img_list.append(out)
+            del pil_in, fallback, pil_rgb, out_pil, out
         img_stack = torch.cat(img_list, dim=0)
         return (img_stack,)
 NODE_CLASS_MAPPINGS = {
     "InspyrenetRembg2": InspyrenetRembg2,
     "InspyrenetRembg3": InspyrenetRembg3,
     "Load_Inspyrenet_Global": Load_Inspyrenet_Global,
+    "Remove_Inspyrenet_Global": Remove_Inspyrenet_Global,
     "Run_InspyrenetRembg_Global": Run_InspyrenetRembg_Global,
 }
     "InspyrenetRembg3": "Inspyrenet Rembg3",
     "Load_Inspyrenet_Global": "Load Inspyrenet Global",
+    "Remove_Inspyrenet_Global": "Remove Inspyrenet Global",
     "Run_InspyrenetRembg_Global": "Run InspyrenetRembg Global",
 }