Spaces:
Running on Zero
Running on Zero
| """ | |
| Surface enhancement for TripoSG GLB outputs. | |
| StableNormal β high-quality normal map from portrait reference | |
| Depth-Anything V2 β metric depth map β displacement intensity | |
| Both run on the reference portrait, produce calibrated maps that | |
| are baked as PBR textures (normalTexture + occlusion/displacement) | |
| into the output GLB. | |
| """ | |
| import os | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| STABLE_NORMAL_PATH = "/root/models/stable-normal" | |
| DEPTH_ANYTHING_PATH = "/root/models/depth-anything-v2" | |
| _normal_pipe = None | |
| _depth_pipe = None | |
| # ββ model loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_normal_model(): | |
| global _normal_pipe | |
| if _normal_pipe is not None: | |
| return _normal_pipe | |
| from stablenormal.pipeline_yoso_normal import YOSONormalsPipeline | |
| from stablenormal.scheduler.heuristics_ddimsampler import HEURI_DDIMScheduler | |
| import torch | |
| x_start_pipeline = YOSONormalsPipeline.from_pretrained( | |
| STABLE_NORMAL_PATH, | |
| torch_dtype=torch.float16, | |
| variant="fp16", | |
| t_start=int(0.3 * 1000), | |
| ).to("cuda") | |
| _normal_pipe = YOSONormalsPipeline.from_pretrained( | |
| STABLE_NORMAL_PATH, | |
| torch_dtype=torch.float16, | |
| variant="fp16", | |
| scheduler=HEURI_DDIMScheduler.from_pretrained( | |
| STABLE_NORMAL_PATH, subfolder="scheduler", | |
| ddim_timestep_respacing="ddim10", x_start_pipeline=x_start_pipeline, | |
| ), | |
| ).to("cuda") | |
| _normal_pipe.set_progress_bar_config(disable=True) | |
| return _normal_pipe | |
| def load_depth_model(): | |
| global _depth_pipe | |
| if _depth_pipe is not None: | |
| return _depth_pipe | |
| from transformers import AutoImageProcessor, AutoModelForDepthEstimation | |
| processor = AutoImageProcessor.from_pretrained(DEPTH_ANYTHING_PATH) | |
| model = AutoModelForDepthEstimation.from_pretrained( | |
| DEPTH_ANYTHING_PATH, torch_dtype=torch.float16 | |
| ).to("cuda") | |
| _depth_pipe = (processor, model) | |
| return _depth_pipe | |
| def unload_models(): | |
| global _normal_pipe, _depth_pipe | |
| if _normal_pipe is not None: | |
| del _normal_pipe; _normal_pipe = None | |
| if _depth_pipe is not None: | |
| del _depth_pipe; _depth_pipe = None | |
| torch.cuda.empty_cache() | |
| # ββ inference ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_stable_normal(image: Image.Image, resolution: int = 768) -> Image.Image: | |
| """Returns normal map as RGB PIL image ([-1,1] encoded as [0,255]).""" | |
| pipe = load_normal_model() | |
| img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS) | |
| with torch.inference_mode(), torch.autocast("cuda"): | |
| result = pipe(img) | |
| normal_img = result.prediction # numpy [H,W,3] in [-1,1] | |
| normal_rgb = ((normal_img + 1) / 2 * 255).clip(0, 255).astype(np.uint8) | |
| return Image.fromarray(normal_rgb) | |
| def run_depth_anything(image: Image.Image, resolution: int = 768) -> Image.Image: | |
| """Returns depth map as 16-bit grayscale PIL image (normalized 0β65535).""" | |
| processor, model = load_depth_model() | |
| img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS) | |
| inputs = processor(images=img, return_tensors="pt") | |
| inputs = {k: v.to("cuda", dtype=torch.float16) for k, v in inputs.items()} | |
| with torch.inference_mode(): | |
| depth = model(**inputs).predicted_depth[0].float().cpu().numpy() | |
| # Normalize to 0β1 | |
| depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8) | |
| depth_16 = (depth * 65535).astype(np.uint16) | |
| return Image.fromarray(depth_16, mode="I;16") | |
| # ββ GLB baking βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def bake_normal_into_glb( | |
| glb_path: str, | |
| normal_img: Image.Image, | |
| out_path: str, | |
| normal_strength: float = 1.0, | |
| ) -> str: | |
| """ | |
| Adds normalTexture to the first material of the GLB. | |
| Normal map is resized to match the existing base color texture resolution. | |
| """ | |
| import pygltflib, struct, io | |
| gltf = pygltflib.GLTF2().load(glb_path) | |
| # Find existing base color texture size for matching resolution | |
| target_size = 1024 | |
| if gltf.materials and gltf.materials[0].pbrMetallicRoughness: | |
| pbr = gltf.materials[0].pbrMetallicRoughness | |
| if pbr.baseColorTexture is not None: | |
| tex_idx = pbr.baseColorTexture.index | |
| img_idx = gltf.textures[tex_idx].source | |
| blob = gltf.binary_blob() | |
| bv = gltf.bufferViews[gltf.images[img_idx].bufferView] | |
| img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength] | |
| existing = Image.open(io.BytesIO(img_bytes)) | |
| target_size = existing.width | |
| normal_resized = normal_img.resize((target_size, target_size), Image.LANCZOS) | |
| # Encode normal map as PNG and append to binary blob | |
| buf = io.BytesIO() | |
| normal_resized.save(buf, format="PNG") | |
| png_bytes = buf.getvalue() | |
| blob = bytearray(gltf.binary_blob() or b"") | |
| byte_offset = len(blob) | |
| blob.extend(png_bytes) | |
| # Pad to 4-byte alignment | |
| while len(blob) % 4: | |
| blob.append(0) | |
| # Add bufferView, image, texture | |
| bv_idx = len(gltf.bufferViews) | |
| gltf.bufferViews.append(pygltflib.BufferView( | |
| buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes), | |
| )) | |
| img_idx = len(gltf.images) | |
| gltf.images.append(pygltflib.Image( | |
| bufferView=bv_idx, mimeType="image/png", | |
| )) | |
| tex_idx = len(gltf.textures) | |
| gltf.textures.append(pygltflib.Texture(source=img_idx)) | |
| # Update material | |
| if gltf.materials: | |
| gltf.materials[0].normalTexture = pygltflib.NormalMaterialTexture( | |
| index=tex_idx, scale=normal_strength, | |
| ) | |
| # Update buffer length | |
| gltf.buffers[0].byteLength = len(blob) | |
| gltf.set_binary_blob(bytes(blob)) | |
| gltf.save(out_path) | |
| return out_path | |
| def bake_depth_as_occlusion( | |
| glb_path: str, | |
| depth_img: Image.Image, | |
| out_path: str, | |
| displacement_scale: float = 1.0, | |
| ) -> str: | |
| """ | |
| Bakes depth map as occlusionTexture (R channel) β approximates displacement | |
| in PBR renderers. Depth is inverted and normalized for AO-style use. | |
| """ | |
| import pygltflib, io | |
| gltf = pygltflib.GLTF2().load(glb_path) | |
| target_size = 1024 | |
| if gltf.materials and gltf.materials[0].pbrMetallicRoughness: | |
| pbr = gltf.materials[0].pbrMetallicRoughness | |
| if pbr.baseColorTexture is not None: | |
| tex_idx = pbr.baseColorTexture.index | |
| img_idx = gltf.textures[tex_idx].source | |
| blob = gltf.binary_blob() | |
| bv = gltf.bufferViews[gltf.images[img_idx].bufferView] | |
| img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength] | |
| existing = Image.open(io.BytesIO(img_bytes)) | |
| target_size = existing.width | |
| # Convert 16-bit depth to 8-bit RGB occlusion (inverted, scaled) | |
| depth_arr = np.array(depth_img).astype(np.float32) / 65535.0 | |
| depth_arr = 1.0 - depth_arr # invert: close = bright | |
| depth_arr = np.clip(depth_arr * displacement_scale, 0, 1) | |
| occ_8 = (depth_arr * 255).astype(np.uint8) | |
| occ_rgb = Image.fromarray(np.stack([occ_8, occ_8, occ_8], axis=-1)) | |
| occ_rgb = occ_rgb.resize((target_size, target_size), Image.LANCZOS) | |
| buf = io.BytesIO() | |
| occ_rgb.save(buf, format="PNG") | |
| png_bytes = buf.getvalue() | |
| blob = bytearray(gltf.binary_blob() or b"") | |
| byte_offset = len(blob) | |
| blob.extend(png_bytes) | |
| while len(blob) % 4: | |
| blob.append(0) | |
| bv_idx = len(gltf.bufferViews) | |
| gltf.bufferViews.append(pygltflib.BufferView( | |
| buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes), | |
| )) | |
| img_idx = len(gltf.images) | |
| gltf.images.append(pygltflib.Image( | |
| bufferView=bv_idx, mimeType="image/png", | |
| )) | |
| tex_idx = len(gltf.textures) | |
| gltf.textures.append(pygltflib.Texture(source=img_idx)) | |
| if gltf.materials: | |
| gltf.materials[0].occlusionTexture = pygltflib.OcclusionTextureInfo( | |
| index=tex_idx, strength=displacement_scale, | |
| ) | |
| gltf.buffers[0].byteLength = len(blob) | |
| gltf.set_binary_blob(bytes(blob)) | |
| gltf.save(out_path) | |
| return out_path | |