"""
Surface enhancement for TripoSG GLB outputs.

StableNormal  — high-quality normal map from portrait reference
Depth-Anything V2 — metric depth map → displacement intensity

Both run on the reference portrait, produce calibrated maps that
are baked as PBR textures (normalTexture + occlusion/displacement)
into the output GLB.
"""

import os
import numpy as np
import torch
from PIL import Image


STABLE_NORMAL_PATH  = "/root/models/stable-normal"
DEPTH_ANYTHING_PATH = "/root/models/depth-anything-v2"

_normal_pipe  = None
_depth_pipe   = None


# ── model loading ──────────────────────────────────────────────────────────────

def load_normal_model():
    global _normal_pipe
    if _normal_pipe is not None:
        return _normal_pipe
    from stablenormal.pipeline_yoso_normal import YOSONormalsPipeline
    from stablenormal.scheduler.heuristics_ddimsampler import HEURI_DDIMScheduler
    import torch
    x_start_pipeline = YOSONormalsPipeline.from_pretrained(
        STABLE_NORMAL_PATH,
        torch_dtype=torch.float16,
        variant="fp16",
        t_start=int(0.3 * 1000),
    ).to("cuda")
    _normal_pipe = YOSONormalsPipeline.from_pretrained(
        STABLE_NORMAL_PATH,
        torch_dtype=torch.float16,
        variant="fp16",
        scheduler=HEURI_DDIMScheduler.from_pretrained(
            STABLE_NORMAL_PATH, subfolder="scheduler",
            ddim_timestep_respacing="ddim10", x_start_pipeline=x_start_pipeline,
        ),
    ).to("cuda")
    _normal_pipe.set_progress_bar_config(disable=True)
    return _normal_pipe


def load_depth_model():
    global _depth_pipe
    if _depth_pipe is not None:
        return _depth_pipe
    from transformers import AutoImageProcessor, AutoModelForDepthEstimation
    processor = AutoImageProcessor.from_pretrained(DEPTH_ANYTHING_PATH)
    model = AutoModelForDepthEstimation.from_pretrained(
        DEPTH_ANYTHING_PATH, torch_dtype=torch.float16
    ).to("cuda")
    _depth_pipe = (processor, model)
    return _depth_pipe


def unload_models():
    global _normal_pipe, _depth_pipe
    if _normal_pipe is not None:
        del _normal_pipe; _normal_pipe = None
    if _depth_pipe is not None:
        del _depth_pipe; _depth_pipe = None
    torch.cuda.empty_cache()


# ── inference ──────────────────────────────────────────────────────────────────

def run_stable_normal(image: Image.Image, resolution: int = 768) -> Image.Image:
    """Returns normal map as RGB PIL image ([-1,1] encoded as [0,255])."""
    pipe = load_normal_model()
    img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS)
    with torch.inference_mode(), torch.autocast("cuda"):
        result = pipe(img)
    normal_img = result.prediction  # numpy [H,W,3] in [-1,1]
    normal_rgb = ((normal_img + 1) / 2 * 255).clip(0, 255).astype(np.uint8)
    return Image.fromarray(normal_rgb)


def run_depth_anything(image: Image.Image, resolution: int = 768) -> Image.Image:
    """Returns depth map as 16-bit grayscale PIL image (normalized 0–65535)."""
    processor, model = load_depth_model()
    img = image.convert("RGB").resize((resolution, resolution), Image.LANCZOS)
    inputs = processor(images=img, return_tensors="pt")
    inputs = {k: v.to("cuda", dtype=torch.float16) for k, v in inputs.items()}
    with torch.inference_mode():
        depth = model(**inputs).predicted_depth[0].float().cpu().numpy()
    # Normalize to 0–1
    depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
    depth_16 = (depth * 65535).astype(np.uint16)
    return Image.fromarray(depth_16, mode="I;16")


# ── GLB baking ─────────────────────────────────────────────────────────────────

def bake_normal_into_glb(
    glb_path: str,
    normal_img: Image.Image,
    out_path: str,
    normal_strength: float = 1.0,
) -> str:
    """
    Adds normalTexture to the first material of the GLB.
    Normal map is resized to match the existing base color texture resolution.
    """
    import pygltflib, struct, io

    gltf = pygltflib.GLTF2().load(glb_path)

    # Find existing base color texture size for matching resolution
    target_size = 1024
    if gltf.materials and gltf.materials[0].pbrMetallicRoughness:
        pbr = gltf.materials[0].pbrMetallicRoughness
        if pbr.baseColorTexture is not None:
            tex_idx = pbr.baseColorTexture.index
            img_idx = gltf.textures[tex_idx].source
            blob = gltf.binary_blob()
            bv = gltf.bufferViews[gltf.images[img_idx].bufferView]
            img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength]
            existing = Image.open(io.BytesIO(img_bytes))
            target_size = existing.width

    normal_resized = normal_img.resize((target_size, target_size), Image.LANCZOS)

    # Encode normal map as PNG and append to binary blob
    buf = io.BytesIO()
    normal_resized.save(buf, format="PNG")
    png_bytes = buf.getvalue()

    blob = bytearray(gltf.binary_blob() or b"")
    byte_offset = len(blob)
    blob.extend(png_bytes)

    # Pad to 4-byte alignment
    while len(blob) % 4:
        blob.append(0)

    # Add bufferView, image, texture
    bv_idx = len(gltf.bufferViews)
    gltf.bufferViews.append(pygltflib.BufferView(
        buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes),
    ))
    img_idx = len(gltf.images)
    gltf.images.append(pygltflib.Image(
        bufferView=bv_idx, mimeType="image/png",
    ))
    tex_idx = len(gltf.textures)
    gltf.textures.append(pygltflib.Texture(source=img_idx))

    # Update material
    if gltf.materials:
        gltf.materials[0].normalTexture = pygltflib.NormalMaterialTexture(
            index=tex_idx, scale=normal_strength,
        )

    # Update buffer length
    gltf.buffers[0].byteLength = len(blob)
    gltf.set_binary_blob(bytes(blob))
    gltf.save(out_path)
    return out_path


def bake_depth_as_occlusion(
    glb_path: str,
    depth_img: Image.Image,
    out_path: str,
    displacement_scale: float = 1.0,
) -> str:
    """
    Bakes depth map as occlusionTexture (R channel) — approximates displacement
    in PBR renderers. Depth is inverted and normalized for AO-style use.
    """
    import pygltflib, io

    gltf = pygltflib.GLTF2().load(glb_path)

    target_size = 1024
    if gltf.materials and gltf.materials[0].pbrMetallicRoughness:
        pbr = gltf.materials[0].pbrMetallicRoughness
        if pbr.baseColorTexture is not None:
            tex_idx = pbr.baseColorTexture.index
            img_idx = gltf.textures[tex_idx].source
            blob = gltf.binary_blob()
            bv = gltf.bufferViews[gltf.images[img_idx].bufferView]
            img_bytes = blob[bv.byteOffset: bv.byteOffset + bv.byteLength]
            existing = Image.open(io.BytesIO(img_bytes))
            target_size = existing.width

    # Convert 16-bit depth to 8-bit RGB occlusion (inverted, scaled)
    depth_arr = np.array(depth_img).astype(np.float32) / 65535.0
    depth_arr = 1.0 - depth_arr  # invert: close = bright
    depth_arr = np.clip(depth_arr * displacement_scale, 0, 1)
    occ_8 = (depth_arr * 255).astype(np.uint8)
    occ_rgb = Image.fromarray(np.stack([occ_8, occ_8, occ_8], axis=-1))
    occ_rgb = occ_rgb.resize((target_size, target_size), Image.LANCZOS)

    buf = io.BytesIO()
    occ_rgb.save(buf, format="PNG")
    png_bytes = buf.getvalue()

    blob = bytearray(gltf.binary_blob() or b"")
    byte_offset = len(blob)
    blob.extend(png_bytes)
    while len(blob) % 4:
        blob.append(0)

    bv_idx = len(gltf.bufferViews)
    gltf.bufferViews.append(pygltflib.BufferView(
        buffer=0, byteOffset=byte_offset, byteLength=len(png_bytes),
    ))
    img_idx = len(gltf.images)
    gltf.images.append(pygltflib.Image(
        bufferView=bv_idx, mimeType="image/png",
    ))
    tex_idx = len(gltf.textures)
    gltf.textures.append(pygltflib.Texture(source=img_idx))

    if gltf.materials:
        gltf.materials[0].occlusionTexture = pygltflib.OcclusionTextureInfo(
            index=tex_idx, strength=displacement_scale,
        )

    gltf.buffers[0].byteLength = len(blob)
    gltf.set_binary_blob(bytes(blob))
    gltf.save(out_path)
    return out_path