MyCustomNodes / Inspyrenet_RembgX2.py

Upload Inspyrenet_RembgX2.py

94419b7 verified about 2 months ago

21.8 kB

	from __future__ import annotations

	from PIL import Image
	import os
	import urllib.request
	import gc
	import threading
	from typing import Dict, Tuple, Optional

	import torch
	import numpy as np
	from transparent_background import Remover
	from tqdm import tqdm


	# Optional: ComfyUI memory manager (present inside ComfyUI)
	try:
	import comfy.model_management as comfy_mm
	except Exception:
	comfy_mm = None


	CKPT_PATH = "/root/.transparent-background/ckpt_base.pth"
	CKPT_URL = "https://huggingface.co/saliacoel/x/resolve/main/ckpt_base.pth"


	def _ensure_ckpt_base():
	try:
	if os.path.isfile(CKPT_PATH) and os.path.getsize(CKPT_PATH) > 0:
	return
	except Exception:
	pass

	os.makedirs(os.path.dirname(CKPT_PATH), exist_ok=True)
	tmp_path = CKPT_PATH + ".tmp"

	try:
	with urllib.request.urlopen(CKPT_URL) as resp:
	total = resp.headers.get("Content-Length")
	total = int(total) if total is not None else None

	with open(tmp_path, "wb") as f:
	if total:
	with tqdm(
	total=total,
	unit="B",
	unit_scale=True,
	desc="Downloading ckpt_base.pth",
	) as pbar:
	while True:
	chunk = resp.read(1024 * 1024)
	if not chunk:
	break
	f.write(chunk)
	pbar.update(len(chunk))
	else:
	while True:
	chunk = resp.read(1024 * 1024)
	if not chunk:
	break
	f.write(chunk)

	os.replace(tmp_path, CKPT_PATH)
	finally:
	if os.path.isfile(tmp_path):
	try:
	os.remove(tmp_path)
	except Exception:
	pass


	# Tensor to PIL
	def tensor2pil(image: torch.Tensor) -> Image.Image:
	arr = image.detach().cpu().numpy()
	if arr.ndim == 4 and arr.shape[0] == 1:
	arr = arr[0]
	arr = np.clip(255.0 * arr, 0, 255).astype(np.uint8)
	return Image.fromarray(arr)


	# Convert PIL to Tensor
	def pil2tensor(image: Image.Image) -> torch.Tensor:
	return torch.from_numpy(np.array(image).astype(np.float32) / 255.0).unsqueeze(0)


	def _rgba_to_rgb_on_white(pil_img: Image.Image) -> Image.Image:
	if pil_img.mode == "RGBA":
	bg = Image.new("RGBA", pil_img.size, (255, 255, 255, 255))
	composited = Image.alpha_composite(bg, pil_img)
	return composited.convert("RGB")

	if pil_img.mode != "RGB":
	return pil_img.convert("RGB")

	return pil_img


	def _force_rgba_opaque(pil_img: Image.Image) -> Image.Image:
	"""
	Opaque RGBA fallback (alpha=255), so you never get an "invisible" output.
	"""
	rgba = pil_img.convert("RGBA")
	r, g, b, _a = rgba.split()
	a = Image.new("L", rgba.size, 255)
	return Image.merge("RGBA", (r, g, b, a))


	def _alpha_is_all_zero(pil_img: Image.Image) -> bool:
	"""
	True if RGBA image alpha channel is entirely 0.
	"""
	if pil_img.mode != "RGBA":
	return False
	try:
	extrema = pil_img.getextrema() # ((min,max),(min,max),(min,max),(min,max))
	return extrema[3][1] == 0
	except Exception:
	return False


	def _is_oom_error(e: BaseException) -> bool:
	oom_cuda_cls = getattr(getattr(torch, "cuda", None), "OutOfMemoryError", None)
	if oom_cuda_cls is not None and isinstance(e, oom_cuda_cls):
	return True

	oom_torch_cls = getattr(torch, "OutOfMemoryError", None)
	if oom_torch_cls is not None and isinstance(e, oom_torch_cls):
	return True

	msg = str(e).lower()
	if "out of memory" in msg:
	return True
	if "allocation on device" in msg:
	return True
	return ("cuda" in msg or "cublas" in msg or "hip" in msg) and ("memory" in msg)


	def _cuda_soft_cleanup() -> None:
	try:
	gc.collect()
	except Exception:
	pass

	if torch.cuda.is_available():
	try:
	torch.cuda.synchronize()
	except Exception:
	pass
	try:
	torch.cuda.empty_cache()
	except Exception:
	pass
	try:
	torch.cuda.ipc_collect()
	except Exception:
	pass


	def _comfy_soft_empty_cache() -> None:
	if comfy_mm is None:
	return
	if hasattr(comfy_mm, "soft_empty_cache"):
	try:
	comfy_mm.soft_empty_cache(force=True)
	except TypeError:
	try:
	comfy_mm.soft_empty_cache()
	except Exception:
	pass
	except Exception:
	pass


	def _get_comfy_torch_device() -> torch.device:
	"""
	Always prefer ComfyUI's chosen device.
	"""
	if comfy_mm is not None and hasattr(comfy_mm, "get_torch_device"):
	try:
	d = comfy_mm.get_torch_device()
	if isinstance(d, torch.device):
	return d
	return torch.device(str(d))
	except Exception:
	pass

	if torch.cuda.is_available():
	return torch.device("cuda:0")
	return torch.device("cpu")


	def _set_current_cuda_device(dev: torch.device) -> None:
	"""
	Make sure mem_get_info() measurements are on the same device ComfyUI uses.
	"""
	if dev.type == "cuda":
	try:
	if dev.index is not None:
	torch.cuda.set_device(dev.index)
	except Exception:
	pass


	def _cuda_free_bytes_on(dev: torch.device) -> Optional[int]:
	if dev.type != "cuda" or not torch.cuda.is_available():
	return None
	try:
	_set_current_cuda_device(dev)
	free_b, _total_b = torch.cuda.mem_get_info()
	return int(free_b)
	except Exception:
	return None


	def _comfy_unload_one_smallest_model() -> bool:
	"""
	Best-effort "smallest-first" eviction of one ComfyUI-tracked loaded model.

	If ComfyUI internals differ, this may do nothing (and we fall back to unload_all_models()).
	"""
	if comfy_mm is None:
	return False
	if not hasattr(comfy_mm, "current_loaded_models"):
	return False

	try:
	cur_dev = _get_comfy_torch_device()
	except Exception:
	cur_dev = None

	models = []
	try:
	for lm in list(comfy_mm.current_loaded_models):
	try:
	# Prefer same device
	lm_dev = getattr(lm, "device", None)
	if cur_dev is not None and lm_dev is not None and str(lm_dev) != str(cur_dev):
	continue

	mem_fn = getattr(lm, "model_loaded_memory", None)
	if callable(mem_fn):
	mem = int(mem_fn())
	else:
	mem = int(getattr(lm, "loaded_memory", 0) or 0)

	if mem > 0:
	models.append((mem, lm))
	except Exception:
	continue
	except Exception:
	return False

	if not models:
	return False

	models.sort(key=lambda x: x[0]) # smallest first
	_mem, lm = models[0]

	try:
	unload_fn = getattr(lm, "model_unload", None)
	if callable(unload_fn):
	try:
	unload_fn(unpatch_weights=True)
	except TypeError:
	unload_fn()
	except Exception:
	pass

	# Cleanup hook if present
	try:
	cleanup = getattr(comfy_mm, "cleanup_models", None)
	if callable(cleanup):
	cleanup()
	except Exception:
	pass

	_comfy_soft_empty_cache()
	_cuda_soft_cleanup()
	return True


	def _comfy_unload_all_models() -> None:
	if comfy_mm is None:
	return
	if hasattr(comfy_mm, "unload_all_models"):
	try:
	comfy_mm.unload_all_models()
	except Exception:
	pass
	_comfy_soft_empty_cache()
	_cuda_soft_cleanup()


	# -----------------------------------------------------------------------------
	# Existing singleton cache for Rembg2/Rembg3 (your original)
	# -----------------------------------------------------------------------------

	_REMOVER_CACHE: Dict[Tuple[bool], Remover] = {}
	_REMOVER_RUN_LOCKS: Dict[Tuple[bool], threading.Lock] = {}
	_CACHE_LOCK = threading.Lock()


	def _get_remover(jit: bool = False) -> tuple[Remover, threading.Lock]:
	key = (jit,)
	with _CACHE_LOCK:
	inst = _REMOVER_CACHE.get(key)
	if inst is None:
	_ensure_ckpt_base()
	try:
	inst = Remover(jit=jit) if jit else Remover()
	except BaseException as e:
	if _is_oom_error(e):
	_cuda_soft_cleanup()
	raise
	_REMOVER_CACHE[key] = inst

	run_lock = _REMOVER_RUN_LOCKS.get(key)
	if run_lock is None:
	run_lock = threading.Lock()
	_REMOVER_RUN_LOCKS[key] = run_lock

	return inst, run_lock


	# -----------------------------------------------------------------------------
	# GLOBAL remover (for Load/Remove/Run Global nodes)
	# -----------------------------------------------------------------------------

	_GLOBAL_LOCK = threading.Lock()
	_GLOBAL_RUN_LOCK = threading.Lock()
	_GLOBAL_REMOVER: Optional[Remover] = None
	_GLOBAL_ON_DEVICE: str = "cpu"
	_GLOBAL_VRAM_DELTA_BYTES: int = 0


	def _create_global_remover_cpu() -> Remover:
	"""
	Create the Remover configured like InspyrenetRembg3 (jit=False),
	but try to force CPU init to avoid VRAM OOM during creation.
	"""
	_ensure_ckpt_base()

	# Prefer constructing on CPU if supported by this library version.
	try:
	r = Remover(device="cpu") # type: ignore[arg-type]
	try:
	r.device = "cpu"
	except Exception:
	pass
	return r
	except TypeError:
	pass

	# Fallback: construct default and immediately offload to CPU
	r = Remover()
	try:
	if hasattr(r, "model"):
	r.model = r.model.to("cpu")
	r.device = "cpu"
	except Exception:
	pass
	_cuda_soft_cleanup()
	return r


	def _get_global_remover() -> Remover:
	global _GLOBAL_REMOVER, _GLOBAL_ON_DEVICE
	with _GLOBAL_LOCK:
	if _GLOBAL_REMOVER is None:
	_GLOBAL_REMOVER = _create_global_remover_cpu()
	_GLOBAL_ON_DEVICE = str(getattr(_GLOBAL_REMOVER, "device", "cpu"))
	return _GLOBAL_REMOVER


	def _move_global_to_cpu() -> None:
	global _GLOBAL_ON_DEVICE
	r = _get_global_remover()
	try:
	if hasattr(r, "model"):
	r.model = r.model.to("cpu")
	r.device = "cpu"
	_GLOBAL_ON_DEVICE = "cpu"
	except Exception:
	pass
	_cuda_soft_cleanup()


	def _load_global_to_comfy_cuda_no_crash(max_evictions: int = 32) -> bool:
	"""
	Load the global remover into VRAM on ComfyUI's chosen CUDA device.
	Never crashes on OOM: evicts smallest model first, then unload_all as last resort.
	Also records a best-effort VRAM delta.
	"""
	global _GLOBAL_ON_DEVICE, _GLOBAL_VRAM_DELTA_BYTES

	r = _get_global_remover()
	dev = _get_comfy_torch_device()

	if dev.type != "cuda" or not torch.cuda.is_available():
	_move_global_to_cpu()
	return False

	# Already on CUDA?
	cur_dev = str(getattr(r, "device", "") or "")
	if cur_dev.startswith("cuda"):
	_GLOBAL_ON_DEVICE = cur_dev
	return True

	_set_current_cuda_device(dev)

	free_before = _cuda_free_bytes_on(dev)

	for _ in range(max_evictions + 1):
	try:
	# Move model to the SAME device ComfyUI uses
	if hasattr(r, "model"):
	r.model = r.model.to(dev)
	r.device = str(dev)
	_GLOBAL_ON_DEVICE = str(dev)

	_comfy_soft_empty_cache()
	_cuda_soft_cleanup()

	free_after = _cuda_free_bytes_on(dev)
	if free_before is not None and free_after is not None:
	delta = max(0, int(free_before) - int(free_after))
	if delta > 0:
	_GLOBAL_VRAM_DELTA_BYTES = delta

	return True

	except BaseException as e:
	if not _is_oom_error(e):
	raise
	_comfy_soft_empty_cache()
	_cuda_soft_cleanup()

	# Evict ONE smallest model; if that fails, unload all.
	if not _comfy_unload_one_smallest_model():
	_comfy_unload_all_models()

	# Could not load
	_move_global_to_cpu()
	return False


	def _run_global_rgba_no_crash(pil_rgb: Image.Image, fallback_rgba: Image.Image) -> Image.Image:
	"""
	Run remover.process() (rgba output), matching InspyrenetRembg3 behavior.
	On OOM: evict models and retry, then CPU fallback.
	If output alpha is fully transparent, return fallback (prevents "invisible" output).
	"""
	r = _get_global_remover()

	# Try to keep it on CUDA (Comfy device) if possible; do not crash if not.
	_load_global_to_comfy_cuda_no_crash()

	# Attempt 1: whatever device we're on (likely CUDA)
	try:
	with _GLOBAL_RUN_LOCK:
	with torch.inference_mode():
	out = r.process(pil_rgb, type="rgba")
	if _alpha_is_all_zero(out):
	# Treat as failure -> prevents invisible output
	return fallback_rgba
	return out
	except BaseException as e:
	if not _is_oom_error(e):
	raise

	# OOM path: evict one smallest and retry (still on CUDA if we are)
	_comfy_soft_empty_cache()
	_cuda_soft_cleanup()
	_comfy_unload_one_smallest_model()

	try:
	with _GLOBAL_RUN_LOCK:
	with torch.inference_mode():
	out = r.process(pil_rgb, type="rgba")
	if _alpha_is_all_zero(out):
	return fallback_rgba
	return out
	except BaseException as e:
	if not _is_oom_error(e):
	raise

	# OOM again: unload all comfy models and retry once
	_comfy_unload_all_models()

	try:
	with _GLOBAL_RUN_LOCK:
	with torch.inference_mode():
	out = r.process(pil_rgb, type="rgba")
	if _alpha_is_all_zero(out):
	return fallback_rgba
	return out
	except BaseException as e:
	if not _is_oom_error(e):
	raise

	# Final: CPU fallback
	_move_global_to_cpu()
	try:
	with _GLOBAL_RUN_LOCK:
	with torch.inference_mode():
	out = r.process(pil_rgb, type="rgba")
	if _alpha_is_all_zero(out):
	return fallback_rgba
	return out
	except BaseException:
	# Last resort: passthrough
	return fallback_rgba


	# -----------------------------------------------------------------------------
	# Nodes
	# -----------------------------------------------------------------------------

	class InspyrenetRembg2:
	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"image": ("IMAGE",),
	"torchscript_jit": (["default", "on"],)
	},
	}

	RETURN_TYPES = ("IMAGE", "MASK")
	FUNCTION = "remove_background"
	CATEGORY = "image"

	def remove_background(self, image, torchscript_jit):
	jit = (torchscript_jit != "default")
	remover, run_lock = _get_remover(jit=jit)

	img_list = []
	for img in tqdm(image, "Inspyrenet Rembg2"):
	pil_in = tensor2pil(img)
	try:
	with run_lock:
	with torch.inference_mode():
	mid = remover.process(pil_in, type="rgba")
	except BaseException as e:
	if _is_oom_error(e):
	_cuda_soft_cleanup()
	raise RuntimeError("InspyrenetRembg2: CUDA out of memory.") from e
	raise

	out = pil2tensor(mid)
	img_list.append(out)
	del pil_in, mid, out

	img_stack = torch.cat(img_list, dim=0)
	mask = img_stack[:, :, :, 3]
	return (img_stack, mask)


	class InspyrenetRembg3:
	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"image": ("IMAGE",),
	},
	}

	RETURN_TYPES = ("IMAGE",)
	FUNCTION = "remove_background"
	CATEGORY = "image"

	def remove_background(self, image):
	remover, run_lock = _get_remover(jit=False)

	img_list = []
	for img in tqdm(image, "Inspyrenet Rembg3"):
	pil_in = tensor2pil(img)
	pil_rgb = _rgba_to_rgb_on_white(pil_in)

	try:
	with run_lock:
	with torch.inference_mode():
	mid = remover.process(pil_rgb, type="rgba")
	except BaseException as e:
	if _is_oom_error(e):
	_cuda_soft_cleanup()
	raise RuntimeError("InspyrenetRembg3: CUDA out of memory.") from e
	raise

	out = pil2tensor(mid)
	img_list.append(out)
	del pil_in, pil_rgb, mid, out

	img_stack = torch.cat(img_list, dim=0)
	return (img_stack,)


	# -----------------------------------------------------------------------------
	# NEW: Global nodes (simple, no user settings on Load/Run)
	# -----------------------------------------------------------------------------

	class Load_Inspyrenet_Global:
	"""
	No inputs. Creates the global remover (once) and moves it to ComfyUI's CUDA device (if possible).
	Returns:
	- loaded_ok (BOOLEAN)
	- vram_delta_bytes (INT) best-effort (weights residency only; not peak inference)
	"""
	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(s):
	return {"required": {}}

	RETURN_TYPES = ("BOOLEAN", "INT")
	FUNCTION = "load"
	CATEGORY = "image"

	def load(self):
	_get_global_remover()
	ok = _load_global_to_comfy_cuda_no_crash()
	return (bool(ok), int(_GLOBAL_VRAM_DELTA_BYTES))


	class Remove_Inspyrenet_Global:
	"""
	Offload global remover to CPU or delete it.
	"""
	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"action": (["offload_to_cpu", "delete_instance"],),
	}
	}

	RETURN_TYPES = ("BOOLEAN",)
	FUNCTION = "remove"
	CATEGORY = "image"

	def remove(self, action):
	global _GLOBAL_REMOVER, _GLOBAL_ON_DEVICE, _GLOBAL_VRAM_DELTA_BYTES
	if action == "offload_to_cpu":
	_move_global_to_cpu()
	return (True,)

	# delete_instance
	with _GLOBAL_LOCK:
	try:
	if _GLOBAL_REMOVER is not None:
	try:
	if hasattr(_GLOBAL_REMOVER, "model"):
	_GLOBAL_REMOVER.model = _GLOBAL_REMOVER.model.to("cpu")
	_GLOBAL_REMOVER.device = "cpu"
	except Exception:
	pass
	_GLOBAL_REMOVER = None
	_GLOBAL_ON_DEVICE = "cpu"
	_GLOBAL_VRAM_DELTA_BYTES = 0
	except Exception:
	pass

	_cuda_soft_cleanup()
	return (True,)


	class Run_InspyrenetRembg_Global:
	"""
	No settings. Same behavior as InspyrenetRembg3, but uses the global remover and won't crash on OOM.
	On failure/OOM, returns a visible passthrough (opaque RGBA), NOT an invisible image.
	"""
	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"image": ("IMAGE",),
	}
	}

	RETURN_TYPES = ("IMAGE",)
	FUNCTION = "remove_background"
	CATEGORY = "image"

	def remove_background(self, image):
	_get_global_remover()

	img_list = []
	for img in tqdm(image, "Run InspyrenetRembg Global"):
	pil_in = tensor2pil(img)

	# Visible fallback (never invisible)
	fallback = _force_rgba_opaque(pil_in)

	# Exactly like Rembg3 input path
	pil_rgb = _rgba_to_rgb_on_white(pil_in)

	out_pil = _run_global_rgba_no_crash(pil_rgb, fallback)
	out = pil2tensor(out_pil)
	img_list.append(out)

	del pil_in, fallback, pil_rgb, out_pil, out

	img_stack = torch.cat(img_list, dim=0)
	return (img_stack,)


	NODE_CLASS_MAPPINGS = {
	"InspyrenetRembg2": InspyrenetRembg2,
	"InspyrenetRembg3": InspyrenetRembg3,

	"Load_Inspyrenet_Global": Load_Inspyrenet_Global,
	"Remove_Inspyrenet_Global": Remove_Inspyrenet_Global,
	"Run_InspyrenetRembg_Global": Run_InspyrenetRembg_Global,
	}

	NODE_DISPLAY_NAME_MAPPINGS = {
	"InspyrenetRembg2": "Inspyrenet Rembg2",
	"InspyrenetRembg3": "Inspyrenet Rembg3",

	"Load_Inspyrenet_Global": "Load Inspyrenet Global",
	"Remove_Inspyrenet_Global": "Remove Inspyrenet Global",
	"Run_InspyrenetRembg_Global": "Run InspyrenetRembg Global",
	}