Spaces:

Daankular
/

Image2Model

Running on Zero

App Files Files Community

Image2Model / pipeline /enhance_surface.py

Daankular

Initial local files

14c3d13 13 days ago

raw

history blame contribute delete

8.56 kB

	"""
	Surface enhancement for TripoSG GLB outputs.

	StableNormal — high-quality normal map from portrait reference
	Depth-Anything V2 — metric depth map → displacement intensity

	Both run on the reference portrait, produce calibrated maps that
	are baked as PBR textures (normalTexture + occlusion/displacement)
	into the output GLB.
	"""

	import os
	import numpy as np
	import torch
	from PIL import Image


	STABLE_NORMAL_PATH = "/root/models/stable-normal"
	DEPTH_ANYTHING_PATH = "/root/models/depth-anything-v2"

	_normal_pipe = None
	_depth_pipe = None


	# ── model loading ──────────────────────────────────────────────────────────────

	def load_normal_model():
	global _normal_pipe
	if _normal_pipe is not None:
	return _normal_pipe
	from stablenormal.pipeline_yoso_normal import YOSONormalsPipeline
	from stablenormal.scheduler.heuristics_ddimsampler import HEURI_DDIMScheduler
	import torch
	x_start_pipeline = YOSONormalsPipeline.from_pretrained(
	STABLE_NORMAL_PATH,
	torch_dtype=torch.float16,
	variant="fp16",
	t_start=int(0.3 * 1000),
	).to("cuda")
	_normal_pipe = YOSONormalsPipeline.from_pretrained(
	STABLE_NORMAL_PATH,
	torch_dtype=torch.float16,
	variant="fp16",
	scheduler=HEURI_DDIMScheduler.from_pretrained(
	STABLE_NORMAL_PATH, subfolder="scheduler",
	ddim_timestep_respacing="ddim10", x_start_pipeline=x_start_pipeline,
	),
	).to("cuda")
	_normal_pipe.set_progress_bar_config(disable=True)
	return _normal_pipe


	def load_depth_model():
	global _depth_pipe
	if _depth_pipe is not None:
	return _depth_pipe
	from transformers import AutoImageProcessor, AutoModelForDepthEstimation
	processor = AutoImageProcessor.from_pretrained(DEPTH_ANYTHING_PATH)
	model = AutoModelForDepthEstimation.from_pretrained(
	DEPTH_ANYTHING_PATH, torch_dtype=torch.float16
	).to("cuda")
	_depth_pipe = (processor, model)
	return _depth_pipe


	def unload_models():
	global _normal_pipe, _depth_pipe
	if _normal_pipe is not None:
	del _normal_pipe; _normal_pipe = None
	if _depth_pipe is not None:
	del _depth_pipe; _depth_pipe = None
	torch.cuda.empty_cache()


	# ── inference ──────────────────────────────────────────────────────────────────

	def run_stable_normal(image: Image.Image, resolution: int = 768) -> Image.Image:
	"""Returns normal map as RGB PIL image ([-1,1] encoded as [0,255])."""
	pipe = load_normal_model()
	img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS)
	with torch.inference_mode(), torch.autocast("cuda"):
	result = pipe(img)
	normal_img = result.prediction # numpy [H,W,3] in [-1,1]
	normal_rgb = ((normal_img + 1) / 2 * 255).clip(0, 255).astype(np.uint8)
	return Image.fromarray(normal_rgb)


	def run_depth_anything(image: Image.Image, resolution: int = 768) -> Image.Image:
	"""Returns depth map as 16-bit grayscale PIL image (normalized 0–65535)."""
	processor, model = load_depth_model()
	img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS)
	inputs = processor(images=img, return_tensors="pt")
	inputs = {k: v.to("cuda", dtype=torch.float16) for k, v in inputs.items()}
	with torch.inference_mode():
	depth = model(**inputs).predicted_depth[0].float().cpu().numpy()
	# Normalize to 0–1
	depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
	depth_16 = (depth * 65535).astype(np.uint16)
	return Image.fromarray(depth_16, mode="I;16")


	# ── GLB baking ─────────────────────────────────────────────────────────────────

	def bake_normal_into_glb(
	glb_path: str,
	normal_img: Image.Image,
	out_path: str,
	normal_strength: float = 1.0,
	) -> str:
	"""
	Adds normalTexture to the first material of the GLB.
	Normal map is resized to match the existing base color texture resolution.
	"""
	import pygltflib, struct, io

	gltf = pygltflib.GLTF2().load(glb_path)

	# Find existing base color texture size for matching resolution
	target_size = 1024
	if gltf.materials and gltf.materials[0].pbrMetallicRoughness:
	pbr = gltf.materials[0].pbrMetallicRoughness
	if pbr.baseColorTexture is not None:
	tex_idx = pbr.baseColorTexture.index
	img_idx = gltf.textures[tex_idx].source
	blob = gltf.binary_blob()
	bv = gltf.bufferViews[gltf.images[img_idx].bufferView]
	img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength]
	existing = Image.open(io.BytesIO(img_bytes))
	target_size = existing.width

	normal_resized = normal_img.resize((target_size, target_size), Image.LANCZOS)

	# Encode normal map as PNG and append to binary blob
	buf = io.BytesIO()
	normal_resized.save(buf, format="PNG")
	png_bytes = buf.getvalue()

	blob = bytearray(gltf.binary_blob() or b"")
	byte_offset = len(blob)
	blob.extend(png_bytes)

	# Pad to 4-byte alignment
	while len(blob) % 4:
	blob.append(0)

	# Add bufferView, image, texture
	bv_idx = len(gltf.bufferViews)
	gltf.bufferViews.append(pygltflib.BufferView(
	buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes),
	))
	img_idx = len(gltf.images)
	gltf.images.append(pygltflib.Image(
	bufferView=bv_idx, mimeType="image/png",
	))
	tex_idx = len(gltf.textures)
	gltf.textures.append(pygltflib.Texture(source=img_idx))

	# Update material
	if gltf.materials:
	gltf.materials[0].normalTexture = pygltflib.NormalMaterialTexture(
	index=tex_idx, scale=normal_strength,
	)

	# Update buffer length
	gltf.buffers[0].byteLength = len(blob)
	gltf.set_binary_blob(bytes(blob))
	gltf.save(out_path)
	return out_path


	def bake_depth_as_occlusion(
	glb_path: str,
	depth_img: Image.Image,
	out_path: str,
	displacement_scale: float = 1.0,
	) -> str:
	"""
	Bakes depth map as occlusionTexture (R channel) — approximates displacement
	in PBR renderers. Depth is inverted and normalized for AO-style use.
	"""
	import pygltflib, io

	gltf = pygltflib.GLTF2().load(glb_path)

	target_size = 1024
	if gltf.materials and gltf.materials[0].pbrMetallicRoughness:
	pbr = gltf.materials[0].pbrMetallicRoughness
	if pbr.baseColorTexture is not None:
	tex_idx = pbr.baseColorTexture.index
	img_idx = gltf.textures[tex_idx].source
	blob = gltf.binary_blob()
	bv = gltf.bufferViews[gltf.images[img_idx].bufferView]
	img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength]
	existing = Image.open(io.BytesIO(img_bytes))
	target_size = existing.width

	# Convert 16-bit depth to 8-bit RGB occlusion (inverted, scaled)
	depth_arr = np.array(depth_img).astype(np.float32) / 65535.0
	depth_arr = 1.0 - depth_arr # invert: close = bright
	depth_arr = np.clip(depth_arr * displacement_scale, 0, 1)
	occ_8 = (depth_arr * 255).astype(np.uint8)
	occ_rgb = Image.fromarray(np.stack([occ_8, occ_8, occ_8], axis=-1))
	occ_rgb = occ_rgb.resize((target_size, target_size), Image.LANCZOS)

	buf = io.BytesIO()
	occ_rgb.save(buf, format="PNG")
	png_bytes = buf.getvalue()

	blob = bytearray(gltf.binary_blob() or b"")
	byte_offset = len(blob)
	blob.extend(png_bytes)
	while len(blob) % 4:
	blob.append(0)

	bv_idx = len(gltf.bufferViews)
	gltf.bufferViews.append(pygltflib.BufferView(
	buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes),
	))
	img_idx = len(gltf.images)
	gltf.images.append(pygltflib.Image(
	bufferView=bv_idx, mimeType="image/png",
	))
	tex_idx = len(gltf.textures)
	gltf.textures.append(pygltflib.Texture(source=img_idx))

	if gltf.materials:
	gltf.materials[0].occlusionTexture = pygltflib.OcclusionTextureInfo(
	index=tex_idx, strength=displacement_scale,
	)

	gltf.buffers[0].byteLength = len(blob)
	gltf.set_binary_blob(bytes(blob))
	gltf.save(out_path)
	return out_path