Image2Model / pipeline /enhance_surface.py
Daankular's picture
Initial local files
14c3d13
"""
Surface enhancement for TripoSG GLB outputs.
StableNormal β€” high-quality normal map from portrait reference
Depth-Anything V2 β€” metric depth map β†’ displacement intensity
Both run on the reference portrait, produce calibrated maps that
are baked as PBR textures (normalTexture + occlusion/displacement)
into the output GLB.
"""
import os
import numpy as np
import torch
from PIL import Image
STABLE_NORMAL_PATH = "/root/models/stable-normal"
DEPTH_ANYTHING_PATH = "/root/models/depth-anything-v2"
_normal_pipe = None
_depth_pipe = None
# ── model loading ──────────────────────────────────────────────────────────────
def load_normal_model():
global _normal_pipe
if _normal_pipe is not None:
return _normal_pipe
from stablenormal.pipeline_yoso_normal import YOSONormalsPipeline
from stablenormal.scheduler.heuristics_ddimsampler import HEURI_DDIMScheduler
import torch
x_start_pipeline = YOSONormalsPipeline.from_pretrained(
STABLE_NORMAL_PATH,
torch_dtype=torch.float16,
variant="fp16",
t_start=int(0.3 * 1000),
).to("cuda")
_normal_pipe = YOSONormalsPipeline.from_pretrained(
STABLE_NORMAL_PATH,
torch_dtype=torch.float16,
variant="fp16",
scheduler=HEURI_DDIMScheduler.from_pretrained(
STABLE_NORMAL_PATH, subfolder="scheduler",
ddim_timestep_respacing="ddim10", x_start_pipeline=x_start_pipeline,
),
).to("cuda")
_normal_pipe.set_progress_bar_config(disable=True)
return _normal_pipe
def load_depth_model():
global _depth_pipe
if _depth_pipe is not None:
return _depth_pipe
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
processor = AutoImageProcessor.from_pretrained(DEPTH_ANYTHING_PATH)
model = AutoModelForDepthEstimation.from_pretrained(
DEPTH_ANYTHING_PATH, torch_dtype=torch.float16
).to("cuda")
_depth_pipe = (processor, model)
return _depth_pipe
def unload_models():
global _normal_pipe, _depth_pipe
if _normal_pipe is not None:
del _normal_pipe; _normal_pipe = None
if _depth_pipe is not None:
del _depth_pipe; _depth_pipe = None
torch.cuda.empty_cache()
# ── inference ──────────────────────────────────────────────────────────────────
def run_stable_normal(image: Image.Image, resolution: int = 768) -> Image.Image:
"""Returns normal map as RGB PIL image ([-1,1] encoded as [0,255])."""
pipe = load_normal_model()
img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS)
with torch.inference_mode(), torch.autocast("cuda"):
result = pipe(img)
normal_img = result.prediction # numpy [H,W,3] in [-1,1]
normal_rgb = ((normal_img + 1) / 2 * 255).clip(0, 255).astype(np.uint8)
return Image.fromarray(normal_rgb)
def run_depth_anything(image: Image.Image, resolution: int = 768) -> Image.Image:
"""Returns depth map as 16-bit grayscale PIL image (normalized 0–65535)."""
processor, model = load_depth_model()
img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS)
inputs = processor(images=img, return_tensors="pt")
inputs = {k: v.to("cuda", dtype=torch.float16) for k, v in inputs.items()}
with torch.inference_mode():
depth = model(**inputs).predicted_depth[0].float().cpu().numpy()
# Normalize to 0–1
depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
depth_16 = (depth * 65535).astype(np.uint16)
return Image.fromarray(depth_16, mode="I;16")
# ── GLB baking ─────────────────────────────────────────────────────────────────
def bake_normal_into_glb(
glb_path: str,
normal_img: Image.Image,
out_path: str,
normal_strength: float = 1.0,
) -> str:
"""
Adds normalTexture to the first material of the GLB.
Normal map is resized to match the existing base color texture resolution.
"""
import pygltflib, struct, io
gltf = pygltflib.GLTF2().load(glb_path)
# Find existing base color texture size for matching resolution
target_size = 1024
if gltf.materials and gltf.materials[0].pbrMetallicRoughness:
pbr = gltf.materials[0].pbrMetallicRoughness
if pbr.baseColorTexture is not None:
tex_idx = pbr.baseColorTexture.index
img_idx = gltf.textures[tex_idx].source
blob = gltf.binary_blob()
bv = gltf.bufferViews[gltf.images[img_idx].bufferView]
img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength]
existing = Image.open(io.BytesIO(img_bytes))
target_size = existing.width
normal_resized = normal_img.resize((target_size, target_size), Image.LANCZOS)
# Encode normal map as PNG and append to binary blob
buf = io.BytesIO()
normal_resized.save(buf, format="PNG")
png_bytes = buf.getvalue()
blob = bytearray(gltf.binary_blob() or b"")
byte_offset = len(blob)
blob.extend(png_bytes)
# Pad to 4-byte alignment
while len(blob) % 4:
blob.append(0)
# Add bufferView, image, texture
bv_idx = len(gltf.bufferViews)
gltf.bufferViews.append(pygltflib.BufferView(
buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes),
))
img_idx = len(gltf.images)
gltf.images.append(pygltflib.Image(
bufferView=bv_idx, mimeType="image/png",
))
tex_idx = len(gltf.textures)
gltf.textures.append(pygltflib.Texture(source=img_idx))
# Update material
if gltf.materials:
gltf.materials[0].normalTexture = pygltflib.NormalMaterialTexture(
index=tex_idx, scale=normal_strength,
)
# Update buffer length
gltf.buffers[0].byteLength = len(blob)
gltf.set_binary_blob(bytes(blob))
gltf.save(out_path)
return out_path
def bake_depth_as_occlusion(
glb_path: str,
depth_img: Image.Image,
out_path: str,
displacement_scale: float = 1.0,
) -> str:
"""
Bakes depth map as occlusionTexture (R channel) β€” approximates displacement
in PBR renderers. Depth is inverted and normalized for AO-style use.
"""
import pygltflib, io
gltf = pygltflib.GLTF2().load(glb_path)
target_size = 1024
if gltf.materials and gltf.materials[0].pbrMetallicRoughness:
pbr = gltf.materials[0].pbrMetallicRoughness
if pbr.baseColorTexture is not None:
tex_idx = pbr.baseColorTexture.index
img_idx = gltf.textures[tex_idx].source
blob = gltf.binary_blob()
bv = gltf.bufferViews[gltf.images[img_idx].bufferView]
img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength]
existing = Image.open(io.BytesIO(img_bytes))
target_size = existing.width
# Convert 16-bit depth to 8-bit RGB occlusion (inverted, scaled)
depth_arr = np.array(depth_img).astype(np.float32) / 65535.0
depth_arr = 1.0 - depth_arr # invert: close = bright
depth_arr = np.clip(depth_arr * displacement_scale, 0, 1)
occ_8 = (depth_arr * 255).astype(np.uint8)
occ_rgb = Image.fromarray(np.stack([occ_8, occ_8, occ_8], axis=-1))
occ_rgb = occ_rgb.resize((target_size, target_size), Image.LANCZOS)
buf = io.BytesIO()
occ_rgb.save(buf, format="PNG")
png_bytes = buf.getvalue()
blob = bytearray(gltf.binary_blob() or b"")
byte_offset = len(blob)
blob.extend(png_bytes)
while len(blob) % 4:
blob.append(0)
bv_idx = len(gltf.bufferViews)
gltf.bufferViews.append(pygltflib.BufferView(
buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes),
))
img_idx = len(gltf.images)
gltf.images.append(pygltflib.Image(
bufferView=bv_idx, mimeType="image/png",
))
tex_idx = len(gltf.textures)
gltf.textures.append(pygltflib.Texture(source=img_idx))
if gltf.materials:
gltf.materials[0].occlusionTexture = pygltflib.OcclusionTextureInfo(
index=tex_idx, strength=displacement_scale,
)
gltf.buffers[0].byteLength = len(blob)
gltf.set_binary_blob(bytes(blob))
gltf.save(out_path)
return out_path