Pixal3D-T

Runtime error

App Files Files Community

Yang2001

akhaliq HF Staff commited on 5 days ago

Commit

680cbca

1 Parent(s): 25145b8

gradio server (#3)

Browse files

- feat: add frontend UI for Pixal3D image-to-3D generation interface (0d71da7dc709a3e1bd5af6990304fe9d19eb169a)
- refactor: overhaul UI layout with sidebar shell, updated color palette, and component-based navigation structure (9f7d349c499da40b71f55b9ab8e39f6170ad73d8)
- feat: add thread-safe model initialization, nest_asyncio support, and pre-loading on startup (3b7c6289670a11950a24634380508d942e680286)
- fix: access file path using dictionary key instead of attribute in image processing functions (d8b4140ea7360ffde94af4b8ffd3ca219591d264)
- feat: mount /tmp directory and add client-side fallback logic for image previews (c80fdaeef52eb14bf00a5cf6dae057f4b6128f98)

Co-authored-by: AK <akhaliq@users.noreply.huggingface.co>

Files changed (2) hide show

app.py +171 -412
index.html +936 -0

app.py CHANGED Viewed

@@ -1,51 +1,55 @@
-"""
-Pixal3D (TRELLIS.2 Backbone) - Gradio App
-Image-to-3D generation using Proj-mode Cascade inference (512->1024/1536).
-"""
-import spaces
-import gradio as gr
 import os
 import subprocess
-subprocess.run([
-    "pip", "install", "--force-reinstall", "--no-deps",
-    "https://github.com/LDYang694/Storages/releases/download/20260430/utils3d-0.0.2-py3-none-any.whl"
-], check=True)
-os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1'
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
-os.environ["ATTN_BACKEND"] = "flash_attn_3"
-os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
-os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 import argparse
 import math
 import time
-from datetime import datetime
 import shutil
 import cv2
-from typing import *
 import torch
 import numpy as np
-from PIL import Image
 import base64
 import io
 from trellis2.modules.sparse import SparseTensor
 from trellis2.pipelines import Pixal3DImageTo3DPipeline
 from trellis2.renderers import EnvMap
 from trellis2.utils import render_utils
 import o_voxel
 # ============================================================================
 # Constants & Defaults
 # ============================================================================
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 MODES = [
     {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
     {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
@@ -55,8 +59,6 @@ MODES = [
     {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
 ]
 STEPS = 8
-DEFAULT_MODE = 3
-DEFAULT_STEP = 0
 # Cascade parameters
 CASCADE_LR_RESOLUTION = 512
@@ -68,7 +70,7 @@ WILD_MESH_SCALE = 1.0
 WILD_EXTEND_PIXEL = 0
 WILD_IMAGE_RESOLUTION = 512
-# Image Cond Model configs (extracted from training configs, hardcoded)
 IMAGE_COND_CONFIGS = {
     "ss": {
         "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
@@ -98,126 +100,63 @@ IMAGE_COND_CONFIGS = {
     },
 }
 # ============================================================================
-# CSS & JS
-# ============================================================================
-css = """
-.stepper-wrapper { padding: 0; }
-.stepper-container { padding: 0; align-items: center; }
-.step-button { flex-direction: row; }
-.step-connector { transform: none; }
-.step-number { width: 16px; height: 16px; }
-.step-label { position: relative; bottom: 0; }
-.wrap.center.full { inset: 0; height: 100%; }
-.wrap.center.full.translucent { background: var(--block-background-fill); }
-.meta-text-center {
-    display: block !important; position: absolute !important;
-    top: unset !important; bottom: 0 !important; right: 0 !important; transform: unset !important;
-}
-.previewer-container {
-    position: relative;
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
-    width: 100%; height: 722px; margin: 0 auto; padding: 20px;
-    display: flex; flex-direction: column; align-items: center; justify-content: center;
-}
-.previewer-container .tips-icon {
-    position: absolute; right: 10px; top: 10px; z-index: 10;
-    border-radius: 10px; color: #fff; background-color: var(--color-accent); padding: 3px 6px; user-select: none;
-}
-.previewer-container .tips-text {
-    position: absolute; right: 10px; top: 50px; color: #fff; background-color: var(--color-accent);
-    border-radius: 10px; padding: 6px; text-align: left; max-width: 300px; z-index: 10;
-    transition: all 0.3s; opacity: 0%; user-select: none;
-}
-.previewer-container .tips-text p { font-size: 14px; line-height: 1.2; }
-.tips-icon:hover + .tips-text { display: block; opacity: 100%; }
-.previewer-container .mode-row {
-    width: 100%; display: flex; gap: 8px; justify-content: center; margin-bottom: 20px; flex-wrap: wrap;
-}
-.previewer-container .mode-btn {
-    width: 24px; height: 24px; border-radius: 50%; cursor: pointer; opacity: 0.5;
-    transition: all 0.2s; border: 2px solid #ddd; object-fit: cover;
-}
-.previewer-container .mode-btn:hover { opacity: 0.9; transform: scale(1.1); }
-.previewer-container .mode-btn.active { opacity: 1; border-color: var(--color-accent); transform: scale(1.1); }
-.previewer-container .display-row {
-    margin-bottom: 20px; min-height: 400px; width: 100%; flex-grow: 1;
-    display: flex; justify-content: center; align-items: center;
-}
-.previewer-container .previewer-main-image {
-    max-width: 100%; max-height: 100%; flex-grow: 1; object-fit: contain; display: none;
-}
-.previewer-container .previewer-main-image.visible { display: block; }
-.previewer-container .slider-row {
-    width: 100%; display: flex; flex-direction: column; align-items: center; gap: 10px; padding: 0 10px;
-}
-.previewer-container input[type=range] { -webkit-appearance: none; width: 100%; max-width: 400px; background: transparent; }
-.previewer-container input[type=range]::-webkit-slider-runnable-track {
-    width: 100%; height: 8px; cursor: pointer; background: #ddd; border-radius: 5px;
-}
-.previewer-container input[type=range]::-webkit-slider-thumb {
-    height: 20px; width: 20px; border-radius: 50%; background: var(--color-accent);
-    cursor: pointer; -webkit-appearance: none; margin-top: -6px;
-    box-shadow: 0 2px 5px rgba(0,0,0,0.2); transition: transform 0.1s;
-}
-.previewer-container input[type=range]::-webkit-slider-thumb:hover { transform: scale(1.2); }
-.gradio-container .padded:has(.previewer-container) { padding: 0 !important; }
-.gradio-container:has(.previewer-container) [data-testid="block-label"] { position: absolute; top: 0; left: 0; }
-"""
-head = """
-<script>
-    function refreshView(mode, step) {
-        const allImgs = document.querySelectorAll('.previewer-main-image');
-        for (let i = 0; i < allImgs.length; i++) {
-            const img = allImgs[i];
-            if (img.classList.contains('visible')) {
-                const id = img.id;
-                const [_, m, s] = id.split('-');
-                if (mode === -1) mode = parseInt(m.slice(1));
-                if (step === -1) step = parseInt(s.slice(1));
-                break;
-            }
-        }
-        allImgs.forEach(img => img.classList.remove('visible'));
-        const targetId = 'view-m' + mode + '-s' + step;
-        const targetImg = document.getElementById(targetId);
-        if (targetImg) targetImg.classList.add('visible');
-        const allBtns = document.querySelectorAll('.mode-btn');
-        allBtns.forEach((btn, idx) => {
-            if (idx === mode) btn.classList.add('active');
-            else btn.classList.remove('active');
-        });
-    }
-    function selectMode(mode) { refreshView(mode, -1); }
-    function onSliderChange(val) { refreshView(-1, parseInt(val)); }
-</script>
-"""
-empty_html = f"""
-<div class="previewer-container">
-    <svg style=" opacity: .5; height: var(--size-5); color: var(--body-text-color);"
-    xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather feather-image"><rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect><circle cx="8.5" cy="8.5" r="1.5"></circle><polyline points="21 15 16 10 5 21"></polyline></svg>
-</div>
-"""
-# ============================================================================
-# Model Loading Utilities
 # ============================================================================
 def build_image_cond_model(config: dict):
-    """Build DinoV3ProjFeatureExtractor."""
     from trellis2.trainers.flow_matching.mixins.image_conditioned_proj import DinoV3ProjFeatureExtractor
     model = DinoV3ProjFeatureExtractor(**config)
     model.eval()
     return model
 # ============================================================================
-# Camera Parameter Utilities
 # ============================================================================
 def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
@@ -225,7 +164,6 @@ def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
     f_pixels = focal_length * resolution / 32.0
     return float(f_pixels.item())
 def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, image_resolution):
     rotation_matrix = torch.tensor([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
     gp = grid_point.to(torch.float32) @ rotation_matrix.T
@@ -238,25 +176,8 @@ def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, imag
     distance_x = f_pixels * xw / x_ndc - yw
     return {"distance_from_x": float(distance_x), "f_pixels": float(f_pixels)}
-def load_moge_model(device="cuda", model_name=MOGE_MODEL_NAME):
-    print(f"[MoGe-2] Loading model {model_name}...")
-    from moge.model.v2 import MoGeModel
-    moge_model = MoGeModel.from_pretrained(model_name).to(device)
-    moge_model.eval()
-    print("[MoGe-2] Model loaded!")
-    return moge_model
-def get_camera_params_wild_moge(image, moge_model, device="cuda",
-                                 mesh_scale=1.0, extend_pixel=0, image_resolution=512):
-    """Estimate camera parameters via MoGe-2."""
-    if isinstance(image, str):
-        pil_image = Image.open(image).convert("RGB")
-    elif isinstance(image, Image.Image):
-        pil_image = image.convert("RGB")
-    else:
-        raise ValueError(f"Unsupported image type: {type(image)}")
     width, height = pil_image.size
     image_np = np.array(pil_image).astype(np.float32) / 255.0
     image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).to(device)
@@ -275,88 +196,81 @@ def get_camera_params_wild_moge(image, moge_model, device="cuda",
     )["distance_from_x"]
     return {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': mesh_scale}
-# ============================================================================
-# UI Utilities
-# ============================================================================
-def image_to_base64(image):
-    buffered = io.BytesIO()
-    image = image.convert("RGB")
-    image.save(buffered, format="jpeg", quality=85)
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    return f"data:image/jpeg;base64,{img_str}"
-def start_session(req: gr.Request):
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
-def end_session(req: gr.Request):
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    if os.path.exists(user_dir):
-        shutil.rmtree(user_dir)
-def preprocess_image(image: Image.Image) -> Image.Image:
-    return pipeline.preprocess_image(image)
 def pack_state(shape_slat, tex_slat, res):
-    return {
         'shape_slat_feats': shape_slat.feats.cpu().numpy(),
         'tex_slat_feats': tex_slat.feats.cpu().numpy(),
         'coords': shape_slat.coords.cpu().numpy(),
         'res': res,
     }
-def unpack_state(state):
     shape_slat = SparseTensor(
-        feats=torch.from_numpy(state['shape_slat_feats']).cuda(),
-        coords=torch.from_numpy(state['coords']).cuda(),
     )
-    tex_slat = shape_slat.replace(torch.from_numpy(state['tex_slat_feats']).cuda())
-    return shape_slat, tex_slat, state['res']
-def get_seed(randomize_seed, seed):
-    return np.random.randint(0, MAX_SEED) if randomize_seed else seed
-# ============================================================================
-# Core Inference
-# ============================================================================
 @spaces.GPU(duration=120)
-def image_to_3d(
-    image, seed, resolution,
-    ss_guidance_strength, ss_guidance_rescale, ss_sampling_steps, ss_rescale_t,
-    shape_slat_guidance_strength, shape_slat_guidance_rescale, shape_slat_sampling_steps, shape_slat_rescale_t,
-    tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t,
-    req: gr.Request,
-    progress=gr.Progress(track_tqdm=True),
-):
-    device = pipeline.device
     torch.manual_seed(seed)
     hr_resolution = int(resolution)
-    total_t0 = time.time()
-    print(f"\n{'='*60}")
-    print(f"  [Generate] Start | seed={seed}, resolution={hr_resolution}")
-    print(f"{'='*60}")
-    # Preprocessing
-    image_preprocessed = pipeline.preprocess_image(image)
-    # Camera estimation via MoGe-2
     camera_params = get_camera_params_wild_moge(
-        image_preprocessed, moge_model, device=str(device),
         mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
         image_resolution=WILD_IMAGE_RESOLUTION,
     )
     ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
                            "guidance_rescale": ss_guidance_rescale, "rescale_t": ss_rescale_t}
     shape_sampler_override = {"steps": shape_slat_sampling_steps, "guidance_strength": shape_slat_guidance_strength,
@@ -364,7 +278,6 @@ def image_to_3d(
     tex_sampler_override = {"steps": tex_slat_sampling_steps, "guidance_strength": tex_slat_guidance_strength,
                             "guidance_rescale": tex_slat_guidance_rescale, "rescale_t": tex_slat_rescale_t}
-    # Run pipeline
     pipeline_type = f"{hr_resolution}_cascade"
     mesh_list, (shape_slat, tex_slat, res) = pipeline.run(
         image_preprocessed,
@@ -378,60 +291,37 @@ def image_to_3d(
         pipeline_type=pipeline_type,
         max_num_tokens=CASCADE_MAX_NUM_TOKENS,
     )
     mesh = mesh_list[0]
-    state = pack_state(shape_slat, tex_slat, res)
-    del shape_slat, tex_slat, mesh_list
-    torch.cuda.empty_cache()
-    # Render
     mesh.simplify(16777216)
-    images = render_utils.render_proj_aligned_video(
         mesh, camera_angle_x=camera_params['camera_angle_x'],
         distance=camera_params['distance'], resolution=1024,
         num_frames=STEPS, envmap=envmap,
     )
-    del mesh
-    torch.cuda.empty_cache()
-    print(f"\n  [Generate] Total time: {time.time()-total_t0:.2f}s")
-    # Build HTML
-    images_html = ""
-    for m_idx, mode in enumerate(MODES):
-        for s_idx in range(STEPS):
-            unique_id = f"view-m{m_idx}-s{s_idx}"
-            is_visible = (m_idx == DEFAULT_MODE and s_idx == DEFAULT_STEP)
-            vis_class = "visible" if is_visible else ""
-            img_base64 = image_to_base64(Image.fromarray(images[mode['render_key']][s_idx]))
-            images_html += f'<img id="{unique_id}" class="previewer-main-image {vis_class}" src="{img_base64}" loading="eager">'
-    btns_html = ""
-    for idx, mode in enumerate(MODES):
-        active_class = "active" if idx == DEFAULT_MODE else ""
-        btns_html += f'<img src="{mode["icon_base64"]}" class="mode-btn {active_class}" onclick="selectMode({idx})" title="{mode["name"]}">'
-    full_html = f"""
-    <div class="previewer-container">
-        <div class="tips-wrapper">
-            <div class="tips-icon">Tips</div>
-            <div class="tips-text">
-                <p>Render Mode - Click circular buttons to switch render modes.</p>
-                <p>View Angle - Drag the slider to change the view angle.</p>
-            </div>
-        </div>
-        <div class="display-row">{images_html}</div>
-        <div class="mode-row" id="btn-group">{btns_html}</div>
-        <div class="slider-row">
-            <input type="range" id="custom-slider" min="0" max="{STEPS - 1}" value="{DEFAULT_STEP}" step="1" oninput="onSliderChange(this.value)">
-        </div>
-    </div>
-    """
-    return state, full_html
 @spaces.GPU(duration=120)
-def extract_glb(state, decimation_target, texture_size, req: gr.Request, progress=gr.Progress(track_tqdm=True)):
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    shape_slat, tex_slat, res = unpack_state(state)
     mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0]
     glb = o_voxel.postprocess.to_glb(
         vertices=mesh.vertices, faces=mesh.faces, attr_volume=mesh.attrs,
@@ -440,7 +330,6 @@ def extract_glb(state, decimation_target, texture_size, req: gr.Request, progres
         decimation_target=decimation_target, texture_size=texture_size,
         remesh=True, remesh_band=1, remesh_project=0, use_tqdm=True,
     )
-    # Ry(180°) @ Rx(90°): (x,y,z) → (-x, -z, -y)
     rot = np.array([
         [-1,  0,  0,  0],
         [ 0,  0, -1,  0],
@@ -448,153 +337,23 @@ def extract_glb(state, decimation_target, texture_size, req: gr.Request, progres
         [ 0,  0,  0,  1],
     ], dtype=np.float64)
     glb.apply_transform(rot)
-    now = datetime.now()
-    timestamp = now.strftime("%Y-%m-%dT%H%M%S") + f".{now.microsecond // 1000:03d}"
-    os.makedirs(user_dir, exist_ok=True)
-    glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
-    glb.export(glb_path, extension_webp=True)
-    torch.cuda.empty_cache()
-    return glb_path, glb_path
-# ============================================================================
-# Gradio UI
-# ============================================================================
-with gr.Blocks(delete_cache=(600, 600)) as demo:
-    gr.Markdown("""
-    ## Pixal3D: Pixel-Aligned 3D Generation from Images
-    [[Project Page](https://ldyang694.github.io/projects/pixal3d/)]
-    * Upload an image and click **Generate** to create a 3D asset using Pixal3D with TRELLIS.2 backbone.
-    * Click **Extract GLB** to export and download the generated GLB file.
-    * Camera parameters are estimated automatically via MoGe-2.
-    """)
-    with gr.Row():
-        with gr.Column(scale=1, min_width=360):
-            image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=400)
-            resolution = gr.Radio(["1024", "1536"], label="Resolution", value="1536")
-            seed = gr.Slider(0, MAX_SEED, label="Seed", value=42, step=1)
-            randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-            decimation_target = gr.Slider(100000, 1000000, label="Decimation Target", value=1000000, step=10000)
-            texture_size = gr.Slider(1024, 4096, label="Texture Size", value=4096, step=1024)
-            generate_btn = gr.Button("Generate")
-            with gr.Accordion(label="Advanced Settings", open=False):
-                gr.Markdown("Stage 1: Sparse Structure Generation")
-                with gr.Row():
-                    ss_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                    ss_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.7, step=0.01)
-                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    ss_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=5.0, step=0.1)
-                gr.Markdown("Stage 2: Shape Generation")
-                with gr.Row():
-                    shape_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                    shape_slat_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.5, step=0.01)
-                    shape_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    shape_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
-                gr.Markdown("Stage 3: Material Generation")
-                with gr.Row():
-                    tex_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=1.0, step=0.1)
-                    tex_slat_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.0, step=0.01)
-                    tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
-        with gr.Column(scale=10):
-            with gr.Walkthrough(selected=0) as walkthrough:
-                with gr.Step("Preview", id=0):
-                    preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
-                    extract_btn = gr.Button("Extract GLB")
-                with gr.Step("Extract", id=1):
-                    glb_output = gr.Model3D(label="Extracted GLB", height=724, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), camera_position=(-90, 90, None))
-                    download_btn = gr.DownloadButton(label="Download GLB")
-        with gr.Column(scale=1, min_width=172):
-            examples = gr.Examples(
-                examples=[f'assets/example_image/{image}' for image in os.listdir("assets/example_image")],
-                inputs=[image_prompt], fn=preprocess_image, outputs=[image_prompt],
-                run_on_click=True, examples_per_page=18,
-            )
-    output_buf = gr.State()
-    demo.load(start_session)
-    demo.unload(end_session)
-    image_prompt.upload(preprocess_image, inputs=[image_prompt], outputs=[image_prompt])
-    generate_btn.click(get_seed, inputs=[randomize_seed, seed], outputs=[seed]).then(
-        lambda: gr.Walkthrough(selected=0), outputs=walkthrough
-    ).then(
-        image_to_3d,
-        inputs=[image_prompt, seed, resolution,
-                ss_guidance_strength, ss_guidance_rescale, ss_sampling_steps, ss_rescale_t,
-                shape_slat_guidance_strength, shape_slat_guidance_rescale, shape_slat_sampling_steps, shape_slat_rescale_t,
-                tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t],
-        outputs=[output_buf, preview_output],
-    )
-    extract_btn.click(lambda: gr.Walkthrough(selected=1), outputs=walkthrough).then(
-        extract_glb, inputs=[output_buf, decimation_target, texture_size], outputs=[glb_output, download_btn],
-    )
-# ============================================================================
-# Launch
-# ============================================================================
-def parse_args():
-    parser = argparse.ArgumentParser(description="Pixal3D Gradio App")
-    parser.add_argument("--model_path", type=str, default="TencentARC/Pixal3D-T",
-                        help="HuggingFace repo ID or local path (default: TencentARC/Pixal3D-T)")
-    parser.add_argument("--port", type=int, default=7860)
-    parser.add_argument("--share", action="store_true", default=True)
-    return parser.parse_args()
 if __name__ == "__main__":
-    args = parse_args()
-    os.makedirs(TMP_DIR, exist_ok=True)
-    # Construct UI icon base64
-    for i in range(len(MODES)):
-        icon = Image.open(MODES[i]['icon'])
-        MODES[i]['icon_base64'] = image_to_base64(icon)
-    # Load pipeline from HuggingFace or local path
-    print(f"[Pipeline] Loading from {args.model_path}...")
-    pipeline = Pixal3DImageTo3DPipeline.from_pretrained(args.model_path)
-    # Load environment maps
-    envmap = {
-        'forest': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
-        'sunset': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
-        'courtyard': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
-    }
-    # Build image cond models and set on pipeline
-    print("[ImageCond] Building DinoV3ProjFeatureExtractor models...")
-    pipeline.image_cond_model_ss = build_image_cond_model(IMAGE_COND_CONFIGS["ss"])
-    pipeline.image_cond_model_shape_512 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_512"])
-    pipeline.image_cond_model_shape_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_1024"])
-    pipeline.image_cond_model_tex_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["tex_1024"])
-    pipeline.cuda()
-    # Pre-download NAF model (avoid lazy-loading during inference)
-    print("[NAF] Pre-loading NAF upsampler model...")
-    for attr in ['image_cond_model_ss', 'image_cond_model_shape_512', 'image_cond_model_shape_1024', 'image_cond_model_tex_1024']:
-        model = getattr(pipeline, attr, None)
-        if model is not None and getattr(model, 'use_naf_upsample', False):
-            model._load_naf()
-    print("[NAF] NAF model loaded.")
-    # Load MoGe-2
-    print("\n[MoGe-2] Loading model for camera estimation...")
-    moge_model = load_moge_model(device="cuda")
-    print(f"\n{'=' * 60}")
-    print(f"  Pixal3D ready! Model loaded from: {args.model_path}")
-    print(f"  Cascade: {CASCADE_LR_RESOLUTION} -> 1024/1536")
-    print(f"{'=' * 60}\n")
-    demo.launch(css=css, head=head, server_port=args.port, share=args.share)

 import os
 import subprocess
 import argparse
 import math
 import time
 import shutil
 import cv2
 import torch
 import numpy as np
 import base64
 import io
+import json
+from datetime import datetime
+from typing import *
+from PIL import Image
+import threading
+try:
+    import nest_asyncio
+    nest_asyncio.apply()
+except ImportError:
+    pass
+# Lock for model initialization
+init_lock = threading.Lock()
+os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1'
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+os.environ["ATTN_BACKEND"] = "flash_attn_3"
+os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
+os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
+import spaces
+from gradio import Server
+from gradio.data_classes import FileData
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
 from trellis2.modules.sparse import SparseTensor
 from trellis2.pipelines import Pixal3DImageTo3DPipeline
 from trellis2.renderers import EnvMap
 from trellis2.utils import render_utils
 import o_voxel
 # ============================================================================
 # Constants & Defaults
 # ============================================================================
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
+os.makedirs(TMP_DIR, exist_ok=True)
 MODES = [
     {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
     {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
     {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
 ]
 STEPS = 8
 # Cascade parameters
 CASCADE_LR_RESOLUTION = 512
 WILD_EXTEND_PIXEL = 0
 WILD_IMAGE_RESOLUTION = 512
+# Image Cond Model configs
 IMAGE_COND_CONFIGS = {
     "ss": {
         "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
     },
 }
 # ============================================================================
+# Model Loading
 # ============================================================================
 def build_image_cond_model(config: dict):
     from trellis2.trainers.flow_matching.mixins.image_conditioned_proj import DinoV3ProjFeatureExtractor
     model = DinoV3ProjFeatureExtractor(**config)
     model.eval()
     return model
+def load_moge_model(device="cuda", model_name=MOGE_MODEL_NAME):
+    from moge.model.v2 import MoGeModel
+    moge_model = MoGeModel.from_pretrained(model_name).to(device)
+    moge_model.eval()
+    return moge_model
+# Global instances (lazy loaded or loaded at start)
+pipeline = None
+moge_model = None
+envmap = None
+def init_models():
+    global pipeline, moge_model, envmap
+    with init_lock:
+        if pipeline is not None:
+            return
+        model_path = "TencentARC/Pixal3D-T"
+        print(f"[Pipeline] Loading from {model_path}...")
+        pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
+        print("[ImageCond] Building DinoV3ProjFeatureExtractor models...")
+        pipeline.image_cond_model_ss = build_image_cond_model(IMAGE_COND_CONFIGS["ss"])
+        pipeline.image_cond_model_shape_512 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_512"])
+        pipeline.image_cond_model_shape_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_1024"])
+        pipeline.image_cond_model_tex_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["tex_1024"])
+        pipeline.cuda()
+        print("[NAF] Pre-loading NAF upsampler model...")
+        for attr in ['image_cond_model_ss', 'image_cond_model_shape_512', 'image_cond_model_shape_1024', 'image_cond_model_tex_1024']:
+            model = getattr(pipeline, attr, None)
+            if model is not None and getattr(model, 'use_naf_upsample', False):
+                model._load_naf()
+        print("[MoGe-2] Loading model for camera estimation...")
+        moge_model = load_moge_model(device="cuda")
+        print("[EnvMap] Loading environment maps...")
+        envmap = {
+            'forest': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
+            'sunset': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
+            'courtyard': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
+        }
 # ============================================================================
+# Utilities
 # ============================================================================
 def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
     f_pixels = focal_length * resolution / 32.0
     return float(f_pixels.item())
 def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, image_resolution):
     rotation_matrix = torch.tensor([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
     gp = grid_point.to(torch.float32) @ rotation_matrix.T
     distance_x = f_pixels * xw / x_ndc - yw
     return {"distance_from_x": float(distance_x), "f_pixels": float(f_pixels)}
+def get_camera_params_wild_moge(image_path, device="cuda", mesh_scale=1.0, extend_pixel=0, image_resolution=512):
+    pil_image = Image.open(image_path).convert("RGB")
     width, height = pil_image.size
     image_np = np.array(pil_image).astype(np.float32) / 255.0
     image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).to(device)
     )["distance_from_x"]
     return {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': mesh_scale}
 def pack_state(shape_slat, tex_slat, res):
+    state_data = {
         'shape_slat_feats': shape_slat.feats.cpu().numpy(),
         'tex_slat_feats': tex_slat.feats.cpu().numpy(),
         'coords': shape_slat.coords.cpu().numpy(),
         'res': res,
     }
+    state_path = os.path.join(TMP_DIR, f"state_{int(time.time()*1000)}.npz")
+    np.savez_compressed(state_path, **state_data)
+    return state_path
+def unpack_state(state_path):
+    data = np.load(state_path)
     shape_slat = SparseTensor(
+        feats=torch.from_numpy(data['shape_slat_feats']).cuda(),
+        coords=torch.from_numpy(data['coords']).cuda(),
     )
+    tex_slat = shape_slat.replace(torch.from_numpy(data['tex_slat_feats']).cuda())
+    return shape_slat, tex_slat, int(data['res'])
+# ============================================================================
+# API Implementation
+# ============================================================================
+app = Server()
+@app.get("/")
+async def homepage():
+    html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
+    with open(html_path, "r", encoding="utf-8") as f:
+        return HTMLResponse(content=f.read())
+@app.api()
+def preprocess(image: FileData) -> FileData:
+    init_models()
+    img = Image.open(image["path"])
+    processed = pipeline.preprocess_image(img)
+    out_path = os.path.join(TMP_DIR, f"preprocessed_{int(time.time()*1000)}.png")
+    processed.save(out_path)
+    return FileData(path=out_path)
+@app.api()
 @spaces.GPU(duration=120)
+def generate_3d(
+    image: FileData,
+    seed: int,
+    resolution: int,
+    ss_guidance_strength: float = 7.5,
+    ss_guidance_rescale: float = 0.7,
+    ss_sampling_steps: int = 12,
+    ss_rescale_t: float = 5.0,
+    shape_slat_guidance_strength: float = 7.5,
+    shape_slat_guidance_rescale: float = 0.5,
+    shape_slat_sampling_steps: int = 12,
+    shape_slat_rescale_t: float = 3.0,
+    tex_slat_guidance_strength: float = 1.0,
+    tex_slat_guidance_rescale: float = 0.0,
+    tex_slat_sampling_steps: int = 12,
+    tex_slat_rescale_t: float = 3.0,
+) -> Dict:
+    init_models()
     torch.manual_seed(seed)
     hr_resolution = int(resolution)
+    img = Image.open(image["path"])
+    image_preprocessed = pipeline.preprocess_image(img)
+    temp_processed_path = os.path.join(TMP_DIR, "temp_proc.png")
+    image_preprocessed.save(temp_processed_path)
     camera_params = get_camera_params_wild_moge(
+        temp_processed_path, device="cuda",
         mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
         image_resolution=WILD_IMAGE_RESOLUTION,
     )
     ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
                            "guidance_rescale": ss_guidance_rescale, "rescale_t": ss_rescale_t}
     shape_sampler_override = {"steps": shape_slat_sampling_steps, "guidance_strength": shape_slat_guidance_strength,
     tex_sampler_override = {"steps": tex_slat_sampling_steps, "guidance_strength": tex_slat_guidance_strength,
                             "guidance_rescale": tex_slat_guidance_rescale, "rescale_t": tex_slat_rescale_t}
     pipeline_type = f"{hr_resolution}_cascade"
     mesh_list, (shape_slat, tex_slat, res) = pipeline.run(
         image_preprocessed,
         pipeline_type=pipeline_type,
         max_num_tokens=CASCADE_MAX_NUM_TOKENS,
     )
     mesh = mesh_list[0]
+    state_path = pack_state(shape_slat, tex_slat, res)
     mesh.simplify(16777216)
+    renders = render_utils.render_proj_aligned_video(
         mesh, camera_angle_x=camera_params['camera_angle_x'],
         distance=camera_params['distance'], resolution=1024,
         num_frames=STEPS, envmap=envmap,
     )
+    # Save renders and return paths
+    render_files = {}
+    for mode_key, frames in renders.items():
+        mode_files = []
+        for i, frame in enumerate(frames):
+            p = os.path.abspath(os.path.join(TMP_DIR, f"render_{mode_key}_{i}_{int(time.time()*1000)}.jpg"))
+            Image.fromarray(frame).save(p, quality=85)
+            mode_files.append(FileData(path=p))
+        render_files[mode_key] = mode_files
+    return {
+        "render_paths": render_files,
+        "state_path": os.path.abspath(state_path)
+    }
+@app.api()
 @spaces.GPU(duration=120)
+def extract_glb_api(state_path: str, decimation_target: int, texture_size: int) -> FileData:
+    init_models()
+    shape_slat, tex_slat, res = unpack_state(state_path)
     mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0]
     glb = o_voxel.postprocess.to_glb(
         vertices=mesh.vertices, faces=mesh.faces, attr_volume=mesh.attrs,
         decimation_target=decimation_target, texture_size=texture_size,
         remesh=True, remesh_band=1, remesh_project=0, use_tqdm=True,
     )
     rot = np.array([
         [-1,  0,  0,  0],
         [ 0,  0, -1,  0],
         [ 0,  0,  0,  1],
     ], dtype=np.float64)
     glb.apply_transform(rot)
+    out_glb = os.path.join(TMP_DIR, f"result_{int(time.time()*1000)}.glb")
+    glb.export(out_glb, extension_webp=True)
+    return FileData(path=out_glb)
+# Mount assets and tmp for direct access
+app.mount("/assets", StaticFiles(directory="assets"), name="assets")
+app.mount("/tmp", StaticFiles(directory=TMP_DIR), name="tmp")
 if __name__ == "__main__":
+    # Re-install utils3d as in original app.py
+    subprocess.run([
+        "pip", "install", "--force-reinstall", "--no-deps",
+        "https://github.com/LDYang694/Storages/releases/download/20260430/utils3d-0.0.2-py3-none-any.whl"
+    ], check=True)
+    # Pre-initialize models before launching the server
+    init_models()
+    app.launch(show_error=True, share=True)

index.html ADDED Viewed

	@@ -0,0 +1,936 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pixal3D | AI Image-to-3D</title>
+    <!-- Fonts & Icons -->
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@300;400;500;600;700;800&family=Outfit:wght@400;500;600;700;800&display=swap" rel="stylesheet">
+    <script src="https://unpkg.com/lucide@latest"></script>
+    <script type="module" src="https://ajax.googleapis.com/ajax/libs/model-viewer/4.0.0/model-viewer.min.js"></script>
+    <style>
+        :root {
+            --primary: #818cf8;
+            --primary-dark: #6366f1;
+            --accent: #10b981;
+            --bg: #0b0f1a;
+            --surface: #161c2d;
+            --surface-light: #222b3e;
+            --border: rgba(255, 255, 255, 0.08);
+            --text: #f1f5f9;
+            --text-dim: #94a3b8;
+            --glass: rgba(255, 255, 255, 0.03);
+            --radius-lg: 24px;
+            --radius-md: 16px;
+            --radius-sm: 8px;
+        }
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Plus Jakarta Sans', sans-serif;
+            background: var(--bg);
+            color: var(--text);
+            min-height: 100vh;
+            display: flex;
+            flex-direction: column;
+            overflow-x: hidden;
+            background:
+                radial-gradient(circle at 0% 0%, rgba(99, 102, 241, 0.15) 0%, transparent 40%),
+                radial-gradient(circle at 100% 100%, rgba(16, 185, 129, 0.1) 0%, transparent 40%);
+        }
+        /* Top Navigation / Steps */
+        .app-shell {
+            display: flex;
+            height: 100vh;
+            width: 100vw;
+        }
+        .sidebar {
+            width: 380px;
+            background: var(--surface);
+            border-right: 1px solid var(--border);
+            display: flex;
+            flex-direction: column;
+            padding: 1.5rem;
+            overflow-y: auto;
+            z-index: 10;
+        }
+        .main-content {
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            position: relative;
+            background: rgba(0,0,0,0.2);
+        }
+        header {
+            padding: 1rem 2rem;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            border-bottom: 1px solid var(--border);
+            background: rgba(11, 15, 26, 0.8);
+            backdrop-filter: blur(10px);
+        }
+        .logo {
+            display: flex;
+            align-items: center;
+            gap: 0.75rem;
+            font-family: 'Outfit', sans-serif;
+            font-weight: 800;
+            font-size: 1.5rem;
+            background: linear-gradient(135deg, #fff 0%, #94a3b8 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+        }
+        .logo i {
+            color: var(--primary);
+            -webkit-text-fill-color: initial;
+        }
+        .steps-nav {
+            display: flex;
+            gap: 2rem;
+        }
+        .step-item {
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+            font-size: 0.9rem;
+            font-weight: 600;
+            color: var(--text-dim);
+            transition: all 0.3s;
+            cursor: pointer;
+            padding: 0.5rem 0;
+            border-bottom: 2px solid transparent;
+        }
+        .step-item.active {
+            color: var(--primary);
+            border-bottom-color: var(--primary);
+        }
+        .step-item.completed {
+            color: var(--accent);
+        }
+        /* Workspace Panels */
+        .workspace {
+            flex: 1;
+            padding: 2rem;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            position: relative;
+        }
+        .panel {
+            width: 100%;
+            height: 100%;
+            display: none;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            animation: fadeIn 0.4s ease-out;
+        }
+        .panel.active {
+            display: flex;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        /* Upload Zone */
+        .upload-card {
+            width: 100%;
+            max-width: 600px;
+            aspect-ratio: 4/3;
+            background: var(--surface-light);
+            border: 2px dashed var(--border);
+            border-radius: var(--radius-lg);
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            transition: all 0.3s;
+            position: relative;
+            overflow: hidden;
+        }
+        .upload-card:hover {
+            border-color: var(--primary);
+            background: rgba(99, 102, 241, 0.05);
+        }
+        .upload-card img {
+            width: 100%;
+            height: 100%;
+            object-fit: contain;
+            display: none;
+        }
+        .upload-hint {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 1rem;
+            color: var(--text-dim);
+            text-align: center;
+            padding: 2rem;
+        }
+        .upload-hint i {
+            width: 48px;
+            height: 48px;
+            color: var(--primary);
+        }
+        /* Result Viewers */
+        .viewer-wrapper {
+            width: 100%;
+            height: 100%;
+            border-radius: var(--radius-lg);
+            overflow: hidden;
+            background: #000;
+            position: relative;
+            box-shadow: 0 40px 100px rgba(0,0,0,0.6);
+        }
+        #frame-container {
+            width: 100%;
+            height: 100%;
+            position: relative;
+        }
+        .preview-frame {
+            position: absolute;
+            inset: 0;
+            width: 100%;
+            height: 100%;
+            object-fit: contain;
+            display: none;
+        }
+        .preview-frame.active {
+            display: block;
+        }
+        .viewer-overlay {
+            position: absolute;
+            bottom: 2rem;
+            left: 50%;
+            transform: translateX(-50%);
+            background: rgba(11, 15, 26, 0.6);
+            backdrop-filter: blur(12px);
+            padding: 1rem 2rem;
+            border-radius: 100px;
+            border: 1px solid var(--border);
+            display: flex;
+            align-items: center;
+            gap: 1.5rem;
+            width: 80%;
+            max-width: 600px;
+        }
+        /* Model Viewer Customization */
+        model-viewer {
+            width: 100%;
+            height: 100%;
+            background: radial-gradient(circle at 50% 50%, #1a2235 0%, #0b0f1a 100%);
+        }
+        /* Sidebar Controls */
+        .sidebar-section {
+            margin-bottom: 2rem;
+        }
+        .sidebar-section h3 {
+            font-size: 0.75rem;
+            text-transform: uppercase;
+            letter-spacing: 0.1em;
+            color: var(--text-dim);
+            margin-bottom: 1.25rem;
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+        }
+        .control-group {
+            display: flex;
+            flex-direction: column;
+            gap: 1rem;
+        }
+        .input-wrapper {
+            display: flex;
+            flex-direction: column;
+            gap: 0.5rem;
+        }
+        .input-wrapper label {
+            font-size: 0.85rem;
+            font-weight: 600;
+            color: #cbd5e1;
+            display: flex;
+            justify-content: space-between;
+        }
+        .input-wrapper label span {
+            color: var(--primary);
+            font-family: monospace;
+        }
+        select, input[type="number"] {
+            background: var(--surface-light);
+            border: 1px solid var(--border);
+            color: white;
+            padding: 0.75rem;
+            border-radius: var(--radius-sm);
+            width: 100%;
+            outline: none;
+            transition: border-color 0.2s;
+        }
+        select:focus {
+            border-color: var(--primary);
+        }
+        input[type="range"] {
+            -webkit-appearance: none;
+            height: 4px;
+            background: var(--border);
+            border-radius: 2px;
+            margin: 10px 0;
+        }
+        input[type="range"]::-webkit-slider-thumb {
+            -webkit-appearance: none;
+            width: 16px;
+            height: 16px;
+            background: var(--primary);
+            border-radius: 50%;
+            cursor: pointer;
+            border: 3px solid var(--surface);
+            box-shadow: 0 0 10px rgba(129, 140, 248, 0.4);
+        }
+        /* Action Buttons */
+        .btn-stack {
+            margin-top: auto;
+            display: flex;
+            flex-direction: column;
+            gap: 0.75rem;
+        }
+        .btn {
+            width: 100%;
+            padding: 1rem;
+            border-radius: var(--radius-md);
+            font-weight: 700;
+            font-size: 0.95rem;
+            cursor: pointer;
+            transition: all 0.3s;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.75rem;
+            border: none;
+        }
+        .btn-primary {
+            background: var(--primary);
+            color: white;
+            box-shadow: 0 10px 20px rgba(99, 102, 241, 0.2);
+        }
+        .btn-primary:hover {
+            background: var(--primary-dark);
+            transform: translateY(-2px);
+        }
+        .btn-primary:disabled {
+            background: #334155;
+            color: #64748b;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .btn-outline {
+            background: transparent;
+            border: 1px solid var(--border);
+            color: var(--text);
+        }
+        .btn-outline:hover {
+            background: var(--border);
+        }
+        /* Mode Buttons */
+        .mode-grid {
+            display: grid;
+            grid-template-columns: repeat(3, 1fr);
+            gap: 0.5rem;
+        }
+        .mode-tab {
+            background: var(--surface-light);
+            border: 1px solid var(--border);
+            padding: 0.5rem;
+            border-radius: var(--radius-sm);
+            font-size: 0.75rem;
+            font-weight: 600;
+            text-align: center;
+            cursor: pointer;
+            transition: all 0.2s;
+            color: var(--text-dim);
+        }
+        .mode-tab.active {
+            background: var(--primary);
+            color: white;
+            border-color: var(--primary);
+        }
+        /* Examples Footer */
+        .examples-drawer {
+            padding: 1.5rem 2rem;
+            border-top: 1px solid var(--border);
+            background: var(--surface);
+        }
+        .examples-grid {
+            display: flex;
+            gap: 1rem;
+            overflow-x: auto;
+            padding-bottom: 0.5rem;
+        }
+        .example-item {
+            flex: 0 0 100px;
+            aspect-ratio: 1/1;
+            border-radius: var(--radius-md);
+            overflow: hidden;
+            cursor: pointer;
+            border: 2px solid transparent;
+            transition: all 0.2s;
+        }
+        .example-item:hover {
+            transform: translateY(-4px);
+            border-color: var(--primary);
+        }
+        .example-item img {
+            width: 100%;
+            height: 100%;
+            object-fit: cover;
+        }
+        /* Loading & Status */
+        .loading-overlay {
+            position: fixed;
+            inset: 0;
+            background: rgba(11, 15, 26, 0.9);
+            z-index: 1000;
+            display: none;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            gap: 2rem;
+            backdrop-filter: blur(8px);
+        }
+        .loader-ring {
+            width: 80px;
+            height: 80px;
+            border-radius: 50%;
+            border: 4px solid var(--border);
+            border-top-color: var(--primary);
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin { 100% { transform: rotate(360deg); } }
+        .status-toast {
+            position: fixed;
+            bottom: 2rem;
+            right: 2rem;
+            background: var(--surface-light);
+            padding: 1rem 1.5rem;
+            border-radius: var(--radius-md);
+            border: 1px solid var(--border);
+            border-left: 4px solid var(--primary);
+            box-shadow: 0 20px 40px rgba(0,0,0,0.4);
+            display: none;
+            z-index: 2000;
+            animation: slideIn 0.3s cubic-bezier(0.16, 1, 0.3, 1);
+        }
+        @keyframes slideIn { from { transform: translateX(100%); } to { transform: translateX(0); } }
+        /* Scrollbar */
+        ::-webkit-scrollbar { width: 6px; height: 6px; }
+        ::-webkit-scrollbar-track { background: transparent; }
+        ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 10px; }
+        ::-webkit-scrollbar-thumb:hover { background: var(--text-dim); }
+    </style>
+</head>
+<body>
+    <div class="app-shell">
+        <!-- Left Sidebar: Controls -->
+        <div class="sidebar">
+            <div class="logo" style="margin-bottom: 2.5rem;">
+                <i data-lucide="sparkles"></i>
+                <span>Pixal3D</span>
+            </div>
+            <div class="sidebar-section">
+                <h3><i data-lucide="sliders-horizontal" style="width: 14px;"></i> Base Settings</h3>
+                <div class="control-group">
+                    <div class="input-wrapper">
+                        <label>Target Resolution</label>
+                        <select id="resolution">
+                            <option value="1024">1024 (Balanced)</option>
+                            <option value="1536" selected>1536 (High Quality)</option>
+                        </select>
+                    </div>
+                    <div class="input-wrapper">
+                        <label>Generation Seed <span>#<span id="seed-display">42</span></span></label>
+                        <div style="display: flex; gap: 0.5rem;">
+                            <input type="number" id="seed" value="42" style="flex: 1;">
+                            <button class="btn btn-outline" style="width: 50px; padding: 0;" onclick="randomizeSeed()">
+                                <i data-lucide="rotate-cw" style="width: 16px;"></i>
+                            </button>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <div class="sidebar-section" id="render-controls" style="display: none;">
+                <h3><i data-lucide="palette" style="width: 14px;"></i> Render Mode</h3>
+                <div class="mode-grid" id="mode-grid">
+                    <!-- Tabs injected via JS -->
+                </div>
+            </div>
+            <div class="sidebar-section">
+                <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem; cursor: pointer;" onclick="toggleAdvanced()">
+                    <h3 style="margin-bottom: 0;"><i data-lucide="shield-alert" style="width: 14px;"></i> Advanced Engine</h3>
+                    <i data-lucide="chevron-down" id="adv-chevron" style="width: 16px; transition: transform 0.3s;"></i>
+                </div>
+                <div id="advanced-settings" style="display: none; padding-top: 1rem; border-top: 1px solid var(--border);">
+                    <div class="control-group">
+                        <div class="input-wrapper">
+                            <label>SS Guidance <span><span id="ss_gs_val">7.5</span></span></label>
+                            <input type="range" id="ss_gs" min="1" max="10" step="0.1" value="7.5" oninput="updateVal('ss_gs')">
+                        </div>
+                        <div class="input-wrapper">
+                            <label>SS Sampling <span><span id="ss_steps_val">12</span></span></label>
+                            <input type="range" id="ss_steps" min="1" max="50" step="1" value="12" oninput="updateVal('ss_steps')">
+                        </div>
+                        <div class="input-wrapper">
+                            <label>Shape Guidance <span><span id="shape_gs_val">7.5</span></span></label>
+                            <input type="range" id="shape_gs" min="1" max="10" step="0.1" value="7.5" oninput="updateVal('shape_gs')">
+                        </div>
+                        <hr style="border: 0; border-top: 1px solid var(--border); margin: 0.5rem 0;">
+                        <div class="input-wrapper">
+                            <label>Decimation <span><span id="decim_val">1M</span></span></label>
+                            <input type="range" id="decimation" min="100000" max="1000000" step="10000" value="1000000" oninput="updateVal('decimation')">
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <div class="btn-stack">
+                <button class="btn btn-primary" id="generate-btn" disabled>
+                    <i data-lucide="zap"></i>
+                    Start Generation
+                </button>
+                <button class="btn btn-outline" id="extract-btn" style="display: none;">
+                    <i data-lucide="box"></i>
+                    Extract Mesh (GLB)
+                </button>
+                <button class="btn btn-outline" id="download-btn" style="display: none; background: rgba(16, 185, 129, 0.1); border-color: var(--accent); color: var(--accent);">
+                    <i data-lucide="download"></i>
+                    Download Asset
+                </button>
+            </div>
+        </div>
+        <!-- Right: Main Area -->
+        <div class="main-content">
+            <header>
+                <div class="steps-nav">
+                    <div class="step-item active" id="step-1">
+                        <i data-lucide="image"></i>
+                        <span>1. SOURCE</span>
+                    </div>
+                    <div class="step-item" id="step-2">
+                        <i data-lucide="view"></i>
+                        <span>2. PREVIEW</span>
+                    </div>
+                    <div class="step-item" id="step-3">
+                        <i data-lucide="box"></i>
+                        <span>3. RESULT</span>
+                    </div>
+                </div>
+                <div style="color: var(--text-dim); font-size: 0.8rem; font-weight: 500;">
+                    TRELLIS.2 Engine • V2.6
+                </div>
+            </header>
+            <div class="workspace">
+                <!-- Panel 1: Upload -->
+                <div class="panel active" id="panel-1">
+                    <div class="upload-card" id="drop-zone" onclick="document.getElementById('file-input').click()">
+                        <input type="file" id="file-input" hidden accept="image/*">
+                        <div class="upload-hint" id="upload-hint">
+                            <i data-lucide="cloud-upload"></i>
+                            <h2 style="font-family: 'Outfit'; margin-top: 1rem;">Upload Reference</h2>
+                            <p>Drag and drop any image, or click to browse</p>
+                        </div>
+                        <img id="source-preview" src="" alt="Source">
+                    </div>
+                </div>
+                <!-- Panel 2: Multi-frame Preview -->
+                <div class="panel" id="panel-2">
+                    <div class="viewer-wrapper">
+                        <div id="frame-container">
+                            <!-- Injected via JS -->
+                        </div>
+                        <div class="viewer-overlay">
+                            <i data-lucide="move-horizontal" style="color: var(--primary); width: 20px;"></i>
+                            <input type="range" id="angle-slider" min="0" max="7" value="0" step="1" style="flex: 1;">
+                            <div style="font-family: monospace; font-weight: 700; color: var(--primary); font-size: 0.8rem;">
+                                VIEW_ANGLE: <span id="angle-display">00</span>°
+                            </div>
+                        </div>
+                    </div>
+                </div>
+                <!-- Panel 3: 3D Result -->
+                <div class="panel" id="panel-3">
+                    <div class="viewer-wrapper">
+                        <model-viewer id="main-3d-viewer"
+                            camera-controls
+                            auto-rotate
+                            shadow-intensity="1.5"
+                            environment-image="neutral"
+                            exposure="1.2">
+                            <div slot="progress-bar" style="background: var(--primary); height: 4px;"></div>
+                        </model-viewer>
+                    </div>
+                </div>
+            </div>
+            <!-- Footer: Examples -->
+            <div class="examples-drawer">
+                <h4 style="font-size: 0.75rem; color: var(--text-dim); text-transform: uppercase; letter-spacing: 0.1em; margin-bottom: 1rem;">Sample Gallery</h4>
+                <div class="examples-grid" id="examples-grid">
+                    <!-- Injected via JS -->
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="loading-overlay" id="loading-overlay">
+        <div class="loader-ring"></div>
+        <div style="text-align: center;">
+            <h2 id="loading-title" style="font-family: 'Outfit'; margin-bottom: 0.5rem;">Synthesizing Geometry</h2>
+            <p id="loading-subtitle" style="color: var(--text-dim);">The neural engine is crafting your 3D model...</p>
+        </div>
+    </div>
+    <div class="status-toast" id="toast">Generation started!</div>
+    <script type="module">
+        import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
+        let client;
+        let currentFile = null;
+        let generationResult = null;
+        let currentMode = "shaded_forest";
+        let currentFrame = 0;
+        let currentStep = 1;
+        const MODES = [
+            { name: "Normal", key: "normal" },
+            { name: "Clay", key: "clay" },
+            { name: "Color", key: "base_color" },
+            { name: "Forest", key: "shaded_forest" },
+            { name: "Sunset", key: "shaded_sunset" },
+            { name: "Blue", key: "shaded_courtyard" }
+        ];
+        async function init() {
+            lucide.createIcons();
+            try {
+                client = await Client.connect(window.location.origin);
+                setupUI();
+                loadSamples();
+            } catch (err) {
+                console.error("Connection error:", err);
+                showToast("Connection failed. Try refreshing.");
+            }
+        }
+        function setupUI() {
+            // File Handling
+            const dropZone = document.getElementById('drop-zone');
+            const fileInput = document.getElementById('file-input');
+            dropZone.ondragover = (e) => { e.preventDefault(); dropZone.style.borderColor = 'var(--primary)'; };
+            dropZone.ondragleave = () => dropZone.style.borderColor = 'var(--border)';
+            dropZone.ondrop = (e) => {
+                e.preventDefault();
+                if (e.dataTransfer.files.length) handleImageUpload(e.dataTransfer.files[0]);
+            };
+            fileInput.onchange = (e) => { if (e.target.files.length) handleImageUpload(e.target.files[0]); };
+            // Buttons
+            document.getElementById('generate-btn').onclick = startGeneration;
+            document.getElementById('extract-btn').onclick = startExtraction;
+            document.getElementById('download-btn').onclick = () => {
+                const link = document.createElement('a');
+                link.href = document.getElementById('main-3d-viewer').src;
+                link.download = "pixal3d_export.glb";
+                link.click();
+            };
+            // Slider
+            document.getElementById('angle-slider').oninput = (e) => {
+                currentFrame = parseInt(e.target.value);
+                document.getElementById('angle-display').textContent = (currentFrame * 22.5).toFixed(0).padStart(2, '0');
+                updateFrame();
+            };
+            // Mode Grid
+            const grid = document.getElementById('mode-grid');
+            MODES.forEach(m => {
+                const tab = document.createElement('div');
+                tab.className = `mode-tab ${m.key === currentMode ? 'active' : ''}`;
+                tab.textContent = m.name;
+                tab.onclick = () => {
+                    currentMode = m.key;
+                    document.querySelectorAll('.mode-tab').forEach(t => t.classList.remove('active'));
+                    tab.classList.add('active');
+                    updateFrame();
+                };
+                grid.appendChild(tab);
+            });
+        }
+        async function handleImageUpload(file) {
+            currentFile = file;
+            const reader = new FileReader();
+            reader.onload = (e) => {
+                const img = document.getElementById('source-preview');
+                const hint = document.getElementById('upload-hint');
+                img.src = e.target.result;
+                img.style.display = 'block';
+                hint.style.display = 'none';
+                document.getElementById('generate-btn').disabled = false;
+                setStep(1);
+            };
+            reader.readAsDataURL(file);
+            // Background pre-warm
+            client.predict("/preprocess", { image: handle_file(file) }).catch(console.error);
+        }
+        function setStep(num) {
+            currentStep = num;
+            document.querySelectorAll('.step-item').forEach((item, i) => {
+                item.className = 'step-item';
+                if (i + 1 < num) item.classList.add('completed');
+                if (i + 1 === num) item.classList.add('active');
+            });
+            document.querySelectorAll('.panel').forEach((p, i) => {
+                p.classList.toggle('active', i + 1 === num);
+            });
+            // Toggle side controls based on step
+            document.getElementById('render-controls').style.display = (num >= 2) ? 'block' : 'none';
+            document.getElementById('extract-btn').style.display = (num === 2) ? 'flex' : 'none';
+            document.getElementById('download-btn').style.display = (num === 3) ? 'flex' : 'none';
+        }
+        async function startGeneration() {
+            if (!currentFile) return;
+            showLoading("Neural Synthesis", "Optimizing geometry for " + (document.getElementById('resolution').value) + "px output...");
+            try {
+                const params = {
+                    image: handle_file(currentFile),
+                    seed: parseInt(document.getElementById('seed').value),
+                    resolution: parseInt(document.getElementById('resolution').value),
+                    ss_guidance_strength: parseFloat(document.getElementById('ss_gs').value),
+                    ss_sampling_steps: parseInt(document.getElementById('ss_steps').value),
+                    shape_slat_guidance_strength: parseFloat(document.getElementById('shape_gs').value)
+                };
+                const result = await client.predict("/generate_3d", params);
+                generationResult = result.data[0];
+                populateFrames(generationResult.render_paths);
+                setStep(2);
+                hideLoading();
+                showToast("Generation complete!");
+            } catch (err) {
+                console.error(err);
+                hideLoading();
+                showToast("An error occurred during synthesis.");
+            }
+        }
+        function populateFrames(renderPaths) {
+            const container = document.getElementById('frame-container');
+            container.innerHTML = '';
+            Object.entries(renderPaths).forEach(([mode, files]) => {
+                files.forEach((file, i) => {
+                    const img = document.createElement('img');
+                    // Try the URL from Gradio, fallback to our mounted /tmp route if it's an absolute local path
+                    let url = file.url;
+                    if (!url && file.path) {
+                        const filename = file.path.split(/[\\/]/).pop();
+                        url = `/tmp/${filename}`;
+                    }
+                    img.src = url;
+                    img.className = 'preview-frame';
+                    img.id = `frame-${mode}-${i}`;
+                    img.onerror = () => {
+                        // Fallback attempt if the first URL fails
+                        const filename = file.path ? file.path.split(/[\\/]/).pop() : null;
+                        if (filename && !img.src.includes('/tmp/')) {
+                            img.src = `/tmp/${filename}`;
+                        }
+                    };
+                    container.appendChild(img);
+                });
+            });
+            updateFrame();
+        }
+        function updateFrame() {
+            document.querySelectorAll('.preview-frame').forEach(f => f.classList.remove('active'));
+            const active = document.getElementById(`frame-${currentMode}-${currentFrame}`);
+            if (active) active.classList.add('active');
+        }
+        async function startExtraction() {
+            if (!generationResult) return;
+            showLoading("Finalizing Mesh", "Performing PBR texture baking and decimation...");
+            try {
+                const params = {
+                    state_path: generationResult.state_path,
+                    decimation_target: parseInt(document.getElementById('decimation').value),
+                    texture_size: 4096 // Constant for highest quality
+                };
+                const result = await client.predict("/extract_glb_api", params);
+                const glbUrl = result.data[0].url;
+                const viewer = document.getElementById('main-3d-viewer');
+                viewer.src = glbUrl;
+                setStep(3);
+                hideLoading();
+                showToast("3D Asset ready!");
+            } catch (err) {
+                console.error(err);
+                hideLoading();
+                showToast("Extraction failed.");
+            }
+        }
+        function loadSamples() {
+            const grid = document.getElementById('examples-grid');
+            const samples = [
+                'assets/example_image/0a34fae7ba57cb8870df5325b9c30ea474def1b0913c19c596655b85a79fdee4.webp',
+                'assets/example_image/0e4984a9b3765ce80e9853443f9319ecedf90885c74b56cccfebc09402740f8a.webp',
+                'assets/example_image/130c2b18f1651a70f8aa15b2c99f8dba29bb943044d92871f9223bd3e989e8b1.webp',
+                'assets/example_image/22a868bac8e62511fccd2bc82ed31ae77ed31ae2a8a149be7150957f11b30c9b.webp',
+                'assets/example_image/3903b87907a6b4947006e6fc7c0c64f40cd98932a02bf0ecf7d6dfae776f3a38.webp',
+                'assets/example_image/4bc7abe209c8673dd3766ee4fad14d40acbed02d118e7629f645c60fd77313f1.webp'
+            ];
+            samples.forEach(path => {
+                const div = document.createElement('div');
+                div.className = 'example-item';
+                div.innerHTML = `<img src="${path}">`;
+                div.onclick = async () => {
+                    showLoading("Fetching Sample", "Loading high-resolution asset from gallery...");
+                    const res = await fetch(path);
+                    const blob = await res.blob();
+                    const file = new File([blob], "sample.webp", { type: "image/webp" });
+                    await handleImageUpload(file);
+                    hideLoading();
+                };
+                grid.appendChild(div);
+            });
+        }
+        // Helpers
+        window.toggleAdvanced = () => {
+            const el = document.getElementById('advanced-settings');
+            const chev = document.getElementById('adv-chevron');
+            const isOpen = el.style.display === 'block';
+            el.style.display = isOpen ? 'none' : 'block';
+            chev.style.transform = isOpen ? 'rotate(0deg)' : 'rotate(180deg)';
+        };
+        window.updateVal = (id) => {
+            const val = document.getElementById(id).value;
+            let label = val;
+            if (id === 'decimation') label = (val/1000000).toFixed(1) + 'M';
+            document.getElementById(id + '_val').textContent = label;
+        };
+        window.randomizeSeed = () => {
+            const s = Math.floor(Math.random() * 999999);
+            document.getElementById('seed').value = s;
+            document.getElementById('seed-display').textContent = s;
+        };
+        function showLoading(title, sub) {
+            document.getElementById('loading-title').textContent = title;
+            document.getElementById('loading-subtitle').textContent = sub;
+            document.getElementById('loading-overlay').style.display = 'flex';
+        }
+        function hideLoading() {
+            document.getElementById('loading-overlay').style.display = 'none';
+        }
+        function showToast(msg) {
+            const t = document.getElementById('toast');
+            t.textContent = msg;
+            t.style.display = 'block';
+            setTimeout(() => t.style.display = 'none', 3000);
+        }
+        init();
+    </script>
+</body>
+</html>