Spaces:

TencentARC
/

Pixal3D

Running

App Files Files Community

akhaliq HF Staff commited on 5 days ago

Commit

0d71da7

1 Parent(s): 25145b8

feat: add frontend UI for Pixal3D image-to-3D generation interface

Browse files

Files changed (2) hide show

app.py +154 -410
index.html +855 -0

app.py CHANGED Viewed

@@ -1,15 +1,20 @@
-"""
-Pixal3D (TRELLIS.2 Backbone) - Gradio App
-Image-to-3D generation using Proj-mode Cascade inference (512->1024/1536).
-"""
-import spaces
-import gradio as gr
 import os
 import subprocess
 subprocess.run([
     "pip", "install", "--force-reinstall", "--no-deps",
     "https://github.com/LDYang694/Storages/releases/download/20260430/utils3d-0.0.2-py3-none-any.whl"
@@ -21,31 +26,26 @@ os.environ["ATTN_BACKEND"] = "flash_attn_3"
 os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
-import argparse
-import math
-import time
-from datetime import datetime
-import shutil
-import cv2
-from typing import *
-import torch
-import numpy as np
-from PIL import Image
-import base64
-import io
 from trellis2.modules.sparse import SparseTensor
 from trellis2.pipelines import Pixal3DImageTo3DPipeline
 from trellis2.renderers import EnvMap
 from trellis2.utils import render_utils
 import o_voxel
 # ============================================================================
 # Constants & Defaults
 # ============================================================================
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 MODES = [
     {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
     {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
@@ -55,8 +55,6 @@ MODES = [
     {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
 ]
 STEPS = 8
-DEFAULT_MODE = 3
-DEFAULT_STEP = 0
 # Cascade parameters
 CASCADE_LR_RESOLUTION = 512
@@ -68,7 +66,7 @@ WILD_MESH_SCALE = 1.0
 WILD_EXTEND_PIXEL = 0
 WILD_IMAGE_RESOLUTION = 512
-# Image Cond Model configs (extracted from training configs, hardcoded)
 IMAGE_COND_CONFIGS = {
     "ss": {
         "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
@@ -98,126 +96,62 @@ IMAGE_COND_CONFIGS = {
     },
 }
-# ============================================================================
-# CSS & JS
-# ============================================================================
-css = """
-.stepper-wrapper { padding: 0; }
-.stepper-container { padding: 0; align-items: center; }
-.step-button { flex-direction: row; }
-.step-connector { transform: none; }
-.step-number { width: 16px; height: 16px; }
-.step-label { position: relative; bottom: 0; }
-.wrap.center.full { inset: 0; height: 100%; }
-.wrap.center.full.translucent { background: var(--block-background-fill); }
-.meta-text-center {
-    display: block !important; position: absolute !important;
-    top: unset !important; bottom: 0 !important; right: 0 !important; transform: unset !important;
-}
-.previewer-container {
-    position: relative;
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
-    width: 100%; height: 722px; margin: 0 auto; padding: 20px;
-    display: flex; flex-direction: column; align-items: center; justify-content: center;
-}
-.previewer-container .tips-icon {
-    position: absolute; right: 10px; top: 10px; z-index: 10;
-    border-radius: 10px; color: #fff; background-color: var(--color-accent); padding: 3px 6px; user-select: none;
-}
-.previewer-container .tips-text {
-    position: absolute; right: 10px; top: 50px; color: #fff; background-color: var(--color-accent);
-    border-radius: 10px; padding: 6px; text-align: left; max-width: 300px; z-index: 10;
-    transition: all 0.3s; opacity: 0%; user-select: none;
-}
-.previewer-container .tips-text p { font-size: 14px; line-height: 1.2; }
-.tips-icon:hover + .tips-text { display: block; opacity: 100%; }
-.previewer-container .mode-row {
-    width: 100%; display: flex; gap: 8px; justify-content: center; margin-bottom: 20px; flex-wrap: wrap;
-}
-.previewer-container .mode-btn {
-    width: 24px; height: 24px; border-radius: 50%; cursor: pointer; opacity: 0.5;
-    transition: all 0.2s; border: 2px solid #ddd; object-fit: cover;
-}
-.previewer-container .mode-btn:hover { opacity: 0.9; transform: scale(1.1); }
-.previewer-container .mode-btn.active { opacity: 1; border-color: var(--color-accent); transform: scale(1.1); }
-.previewer-container .display-row {
-    margin-bottom: 20px; min-height: 400px; width: 100%; flex-grow: 1;
-    display: flex; justify-content: center; align-items: center;
-}
-.previewer-container .previewer-main-image {
-    max-width: 100%; max-height: 100%; flex-grow: 1; object-fit: contain; display: none;
-}
-.previewer-container .previewer-main-image.visible { display: block; }
-.previewer-container .slider-row {
-    width: 100%; display: flex; flex-direction: column; align-items: center; gap: 10px; padding: 0 10px;
-}
-.previewer-container input[type=range] { -webkit-appearance: none; width: 100%; max-width: 400px; background: transparent; }
-.previewer-container input[type=range]::-webkit-slider-runnable-track {
-    width: 100%; height: 8px; cursor: pointer; background: #ddd; border-radius: 5px;
-}
-.previewer-container input[type=range]::-webkit-slider-thumb {
-    height: 20px; width: 20px; border-radius: 50%; background: var(--color-accent);
-    cursor: pointer; -webkit-appearance: none; margin-top: -6px;
-    box-shadow: 0 2px 5px rgba(0,0,0,0.2); transition: transform 0.1s;
-}
-.previewer-container input[type=range]::-webkit-slider-thumb:hover { transform: scale(1.2); }
-.gradio-container .padded:has(.previewer-container) { padding: 0 !important; }
-.gradio-container:has(.previewer-container) [data-testid="block-label"] { position: absolute; top: 0; left: 0; }
-"""
-head = """
-<script>
-    function refreshView(mode, step) {
-        const allImgs = document.querySelectorAll('.previewer-main-image');
-        for (let i = 0; i < allImgs.length; i++) {
-            const img = allImgs[i];
-            if (img.classList.contains('visible')) {
-                const id = img.id;
-                const [_, m, s] = id.split('-');
-                if (mode === -1) mode = parseInt(m.slice(1));
-                if (step === -1) step = parseInt(s.slice(1));
-                break;
-            }
-        }
-        allImgs.forEach(img => img.classList.remove('visible'));
-        const targetId = 'view-m' + mode + '-s' + step;
-        const targetImg = document.getElementById(targetId);
-        if (targetImg) targetImg.classList.add('visible');
-        const allBtns = document.querySelectorAll('.mode-btn');
-        allBtns.forEach((btn, idx) => {
-            if (idx === mode) btn.classList.add('active');
-            else btn.classList.remove('active');
-        });
-    }
-    function selectMode(mode) { refreshView(mode, -1); }
-    function onSliderChange(val) { refreshView(-1, parseInt(val)); }
-</script>
-"""
-empty_html = f"""
-<div class="previewer-container">
-    <svg style=" opacity: .5; height: var(--size-5); color: var(--body-text-color);"
-    xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather feather-image"><rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect><circle cx="8.5" cy="8.5" r="1.5"></circle><polyline points="21 15 16 10 5 21"></polyline></svg>
-</div>
-"""
 # ============================================================================
-# Model Loading Utilities
 # ============================================================================
 def build_image_cond_model(config: dict):
-    """Build DinoV3ProjFeatureExtractor."""
     from trellis2.trainers.flow_matching.mixins.image_conditioned_proj import DinoV3ProjFeatureExtractor
     model = DinoV3ProjFeatureExtractor(**config)
     model.eval()
     return model
 # ============================================================================
-# Camera Parameter Utilities
 # ============================================================================
 def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
@@ -225,7 +159,6 @@ def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
     f_pixels = focal_length * resolution / 32.0
     return float(f_pixels.item())
 def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, image_resolution):
     rotation_matrix = torch.tensor([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
     gp = grid_point.to(torch.float32) @ rotation_matrix.T
@@ -238,25 +171,8 @@ def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, imag
     distance_x = f_pixels * xw / x_ndc - yw
     return {"distance_from_x": float(distance_x), "f_pixels": float(f_pixels)}
-def load_moge_model(device="cuda", model_name=MOGE_MODEL_NAME):
-    print(f"[MoGe-2] Loading model {model_name}...")
-    from moge.model.v2 import MoGeModel
-    moge_model = MoGeModel.from_pretrained(model_name).to(device)
-    moge_model.eval()
-    print("[MoGe-2] Model loaded!")
-    return moge_model
-def get_camera_params_wild_moge(image, moge_model, device="cuda",
-                                 mesh_scale=1.0, extend_pixel=0, image_resolution=512):
-    """Estimate camera parameters via MoGe-2."""
-    if isinstance(image, str):
-        pil_image = Image.open(image).convert("RGB")
-    elif isinstance(image, Image.Image):
-        pil_image = image.convert("RGB")
-    else:
-        raise ValueError(f"Unsupported image type: {type(image)}")
     width, height = pil_image.size
     image_np = np.array(pil_image).astype(np.float32) / 255.0
     image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).to(device)
@@ -275,88 +191,81 @@ def get_camera_params_wild_moge(image, moge_model, device="cuda",
     )["distance_from_x"]
     return {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': mesh_scale}
-# ============================================================================
-# UI Utilities
-# ============================================================================
-def image_to_base64(image):
-    buffered = io.BytesIO()
-    image = image.convert("RGB")
-    image.save(buffered, format="jpeg", quality=85)
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    return f"data:image/jpeg;base64,{img_str}"
-def start_session(req: gr.Request):
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
-def end_session(req: gr.Request):
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    if os.path.exists(user_dir):
-        shutil.rmtree(user_dir)
-def preprocess_image(image: Image.Image) -> Image.Image:
-    return pipeline.preprocess_image(image)
 def pack_state(shape_slat, tex_slat, res):
-    return {
         'shape_slat_feats': shape_slat.feats.cpu().numpy(),
         'tex_slat_feats': tex_slat.feats.cpu().numpy(),
         'coords': shape_slat.coords.cpu().numpy(),
         'res': res,
     }
-def unpack_state(state):
     shape_slat = SparseTensor(
-        feats=torch.from_numpy(state['shape_slat_feats']).cuda(),
-        coords=torch.from_numpy(state['coords']).cuda(),
     )
-    tex_slat = shape_slat.replace(torch.from_numpy(state['tex_slat_feats']).cuda())
-    return shape_slat, tex_slat, state['res']
-def get_seed(randomize_seed, seed):
-    return np.random.randint(0, MAX_SEED) if randomize_seed else seed
-# ============================================================================
-# Core Inference
-# ============================================================================
 @spaces.GPU(duration=120)
-def image_to_3d(
-    image, seed, resolution,
-    ss_guidance_strength, ss_guidance_rescale, ss_sampling_steps, ss_rescale_t,
-    shape_slat_guidance_strength, shape_slat_guidance_rescale, shape_slat_sampling_steps, shape_slat_rescale_t,
-    tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t,
-    req: gr.Request,
-    progress=gr.Progress(track_tqdm=True),
-):
-    device = pipeline.device
     torch.manual_seed(seed)
     hr_resolution = int(resolution)
-    total_t0 = time.time()
-    print(f"\n{'='*60}")
-    print(f"  [Generate] Start | seed={seed}, resolution={hr_resolution}")
-    print(f"{'='*60}")
-    # Preprocessing
-    image_preprocessed = pipeline.preprocess_image(image)
-    # Camera estimation via MoGe-2
     camera_params = get_camera_params_wild_moge(
-        image_preprocessed, moge_model, device=str(device),
         mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
         image_resolution=WILD_IMAGE_RESOLUTION,
     )
     ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
                            "guidance_rescale": ss_guidance_rescale, "rescale_t": ss_rescale_t}
     shape_sampler_override = {"steps": shape_slat_sampling_steps, "guidance_strength": shape_slat_guidance_strength,
@@ -364,7 +273,6 @@ def image_to_3d(
     tex_sampler_override = {"steps": tex_slat_sampling_steps, "guidance_strength": tex_slat_guidance_strength,
                             "guidance_rescale": tex_slat_guidance_rescale, "rescale_t": tex_slat_rescale_t}
-    # Run pipeline
     pipeline_type = f"{hr_resolution}_cascade"
     mesh_list, (shape_slat, tex_slat, res) = pipeline.run(
         image_preprocessed,
@@ -378,60 +286,37 @@ def image_to_3d(
         pipeline_type=pipeline_type,
         max_num_tokens=CASCADE_MAX_NUM_TOKENS,
     )
     mesh = mesh_list[0]
-    state = pack_state(shape_slat, tex_slat, res)
-    del shape_slat, tex_slat, mesh_list
-    torch.cuda.empty_cache()
-    # Render
     mesh.simplify(16777216)
-    images = render_utils.render_proj_aligned_video(
         mesh, camera_angle_x=camera_params['camera_angle_x'],
         distance=camera_params['distance'], resolution=1024,
         num_frames=STEPS, envmap=envmap,
     )
-    del mesh
-    torch.cuda.empty_cache()
-    print(f"\n  [Generate] Total time: {time.time()-total_t0:.2f}s")
-    # Build HTML
-    images_html = ""
-    for m_idx, mode in enumerate(MODES):
-        for s_idx in range(STEPS):
-            unique_id = f"view-m{m_idx}-s{s_idx}"
-            is_visible = (m_idx == DEFAULT_MODE and s_idx == DEFAULT_STEP)
-            vis_class = "visible" if is_visible else ""
-            img_base64 = image_to_base64(Image.fromarray(images[mode['render_key']][s_idx]))
-            images_html += f'<img id="{unique_id}" class="previewer-main-image {vis_class}" src="{img_base64}" loading="eager">'
-    btns_html = ""
-    for idx, mode in enumerate(MODES):
-        active_class = "active" if idx == DEFAULT_MODE else ""
-        btns_html += f'<img src="{mode["icon_base64"]}" class="mode-btn {active_class}" onclick="selectMode({idx})" title="{mode["name"]}">'
-    full_html = f"""
-    <div class="previewer-container">
-        <div class="tips-wrapper">
-            <div class="tips-icon">Tips</div>
-            <div class="tips-text">
-                <p>Render Mode - Click circular buttons to switch render modes.</p>
-                <p>View Angle - Drag the slider to change the view angle.</p>
-            </div>
-        </div>
-        <div class="display-row">{images_html}</div>
-        <div class="mode-row" id="btn-group">{btns_html}</div>
-        <div class="slider-row">
-            <input type="range" id="custom-slider" min="0" max="{STEPS - 1}" value="{DEFAULT_STEP}" step="1" oninput="onSliderChange(this.value)">
-        </div>
-    </div>
-    """
-    return state, full_html
 @spaces.GPU(duration=120)
-def extract_glb(state, decimation_target, texture_size, req: gr.Request, progress=gr.Progress(track_tqdm=True)):
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    shape_slat, tex_slat, res = unpack_state(state)
     mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0]
     glb = o_voxel.postprocess.to_glb(
         vertices=mesh.vertices, faces=mesh.faces, attr_volume=mesh.attrs,
@@ -440,7 +325,6 @@ def extract_glb(state, decimation_target, texture_size, req: gr.Request, progres
         decimation_target=decimation_target, texture_size=texture_size,
         remesh=True, remesh_band=1, remesh_project=0, use_tqdm=True,
     )
-    # Ry(180°) @ Rx(90°): (x,y,z) → (-x, -z, -y)
     rot = np.array([
         [-1,  0,  0,  0],
         [ 0,  0, -1,  0],
@@ -448,153 +332,13 @@ def extract_glb(state, decimation_target, texture_size, req: gr.Request, progres
         [ 0,  0,  0,  1],
     ], dtype=np.float64)
     glb.apply_transform(rot)
-    now = datetime.now()
-    timestamp = now.strftime("%Y-%m-%dT%H%M%S") + f".{now.microsecond // 1000:03d}"
-    os.makedirs(user_dir, exist_ok=True)
-    glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
-    glb.export(glb_path, extension_webp=True)
-    torch.cuda.empty_cache()
-    return glb_path, glb_path
-# ============================================================================
-# Gradio UI
-# ============================================================================
-with gr.Blocks(delete_cache=(600, 600)) as demo:
-    gr.Markdown("""
-    ## Pixal3D: Pixel-Aligned 3D Generation from Images
-    [[Project Page](https://ldyang694.github.io/projects/pixal3d/)]
-    * Upload an image and click **Generate** to create a 3D asset using Pixal3D with TRELLIS.2 backbone.
-    * Click **Extract GLB** to export and download the generated GLB file.
-    * Camera parameters are estimated automatically via MoGe-2.
-    """)
-    with gr.Row():
-        with gr.Column(scale=1, min_width=360):
-            image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=400)
-            resolution = gr.Radio(["1024", "1536"], label="Resolution", value="1536")
-            seed = gr.Slider(0, MAX_SEED, label="Seed", value=42, step=1)
-            randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-            decimation_target = gr.Slider(100000, 1000000, label="Decimation Target", value=1000000, step=10000)
-            texture_size = gr.Slider(1024, 4096, label="Texture Size", value=4096, step=1024)
-            generate_btn = gr.Button("Generate")
-            with gr.Accordion(label="Advanced Settings", open=False):
-                gr.Markdown("Stage 1: Sparse Structure Generation")
-                with gr.Row():
-                    ss_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                    ss_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.7, step=0.01)
-                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    ss_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=5.0, step=0.1)
-                gr.Markdown("Stage 2: Shape Generation")
-                with gr.Row():
-                    shape_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                    shape_slat_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.5, step=0.01)
-                    shape_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    shape_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
-                gr.Markdown("Stage 3: Material Generation")
-                with gr.Row():
-                    tex_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=1.0, step=0.1)
-                    tex_slat_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.0, step=0.01)
-                    tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
-        with gr.Column(scale=10):
-            with gr.Walkthrough(selected=0) as walkthrough:
-                with gr.Step("Preview", id=0):
-                    preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
-                    extract_btn = gr.Button("Extract GLB")
-                with gr.Step("Extract", id=1):
-                    glb_output = gr.Model3D(label="Extracted GLB", height=724, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), camera_position=(-90, 90, None))
-                    download_btn = gr.DownloadButton(label="Download GLB")
-        with gr.Column(scale=1, min_width=172):
-            examples = gr.Examples(
-                examples=[f'assets/example_image/{image}' for image in os.listdir("assets/example_image")],
-                inputs=[image_prompt], fn=preprocess_image, outputs=[image_prompt],
-                run_on_click=True, examples_per_page=18,
-            )
-    output_buf = gr.State()
-    demo.load(start_session)
-    demo.unload(end_session)
-    image_prompt.upload(preprocess_image, inputs=[image_prompt], outputs=[image_prompt])
-    generate_btn.click(get_seed, inputs=[randomize_seed, seed], outputs=[seed]).then(
-        lambda: gr.Walkthrough(selected=0), outputs=walkthrough
-    ).then(
-        image_to_3d,
-        inputs=[image_prompt, seed, resolution,
-                ss_guidance_strength, ss_guidance_rescale, ss_sampling_steps, ss_rescale_t,
-                shape_slat_guidance_strength, shape_slat_guidance_rescale, shape_slat_sampling_steps, shape_slat_rescale_t,
-                tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t],
-        outputs=[output_buf, preview_output],
-    )
-    extract_btn.click(lambda: gr.Walkthrough(selected=1), outputs=walkthrough).then(
-        extract_glb, inputs=[output_buf, decimation_target, texture_size], outputs=[glb_output, download_btn],
-    )
-# ============================================================================
-# Launch
-# ============================================================================
-def parse_args():
-    parser = argparse.ArgumentParser(description="Pixal3D Gradio App")
-    parser.add_argument("--model_path", type=str, default="TencentARC/Pixal3D-T",
-                        help="HuggingFace repo ID or local path (default: TencentARC/Pixal3D-T)")
-    parser.add_argument("--port", type=int, default=7860)
-    parser.add_argument("--share", action="store_true", default=True)
-    return parser.parse_args()
 if __name__ == "__main__":
-    args = parse_args()
-    os.makedirs(TMP_DIR, exist_ok=True)
-    # Construct UI icon base64
-    for i in range(len(MODES)):
-        icon = Image.open(MODES[i]['icon'])
-        MODES[i]['icon_base64'] = image_to_base64(icon)
-    # Load pipeline from HuggingFace or local path
-    print(f"[Pipeline] Loading from {args.model_path}...")
-    pipeline = Pixal3DImageTo3DPipeline.from_pretrained(args.model_path)
-    # Load environment maps
-    envmap = {
-        'forest': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
-        'sunset': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
-        'courtyard': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
-    }
-    # Build image cond models and set on pipeline
-    print("[ImageCond] Building DinoV3ProjFeatureExtractor models...")
-    pipeline.image_cond_model_ss = build_image_cond_model(IMAGE_COND_CONFIGS["ss"])
-    pipeline.image_cond_model_shape_512 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_512"])
-    pipeline.image_cond_model_shape_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_1024"])
-    pipeline.image_cond_model_tex_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["tex_1024"])
-    pipeline.cuda()
-    # Pre-download NAF model (avoid lazy-loading during inference)
-    print("[NAF] Pre-loading NAF upsampler model...")
-    for attr in ['image_cond_model_ss', 'image_cond_model_shape_512', 'image_cond_model_shape_1024', 'image_cond_model_tex_1024']:
-        model = getattr(pipeline, attr, None)
-        if model is not None and getattr(model, 'use_naf_upsample', False):
-            model._load_naf()
-    print("[NAF] NAF model loaded.")
-    # Load MoGe-2
-    print("\n[MoGe-2] Loading model for camera estimation...")
-    moge_model = load_moge_model(device="cuda")
-    print(f"\n{'=' * 60}")
-    print(f"  Pixal3D ready! Model loaded from: {args.model_path}")
-    print(f"  Cascade: {CASCADE_LR_RESOLUTION} -> 1024/1536")
-    print(f"{'=' * 60}\n")
-    demo.launch(css=css, head=head, server_port=args.port, share=args.share)

 import os
 import subprocess
+import argparse
+import math
+import time
+import shutil
+import cv2
+import torch
+import numpy as np
+import base64
+import io
+import json
+from datetime import datetime
+from typing import *
+from PIL import Image
+# Re-install utils3d as in original app.py
 subprocess.run([
     "pip", "install", "--force-reinstall", "--no-deps",
     "https://github.com/LDYang694/Storages/releases/download/20260430/utils3d-0.0.2-py3-none-any.whl"
 os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
+import spaces
+from gradio import Server
+from gradio.data_classes import FileData
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
 from trellis2.modules.sparse import SparseTensor
 from trellis2.pipelines import Pixal3DImageTo3DPipeline
 from trellis2.renderers import EnvMap
 from trellis2.utils import render_utils
 import o_voxel
 # ============================================================================
 # Constants & Defaults
 # ============================================================================
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
+os.makedirs(TMP_DIR, exist_ok=True)
 MODES = [
     {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
     {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
     {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
 ]
 STEPS = 8
 # Cascade parameters
 CASCADE_LR_RESOLUTION = 512
 WILD_EXTEND_PIXEL = 0
 WILD_IMAGE_RESOLUTION = 512
+# Image Cond Model configs
 IMAGE_COND_CONFIGS = {
     "ss": {
         "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
     },
 }
 # ============================================================================
+# Model Loading
 # ============================================================================
 def build_image_cond_model(config: dict):
     from trellis2.trainers.flow_matching.mixins.image_conditioned_proj import DinoV3ProjFeatureExtractor
     model = DinoV3ProjFeatureExtractor(**config)
     model.eval()
     return model
+def load_moge_model(device="cuda", model_name=MOGE_MODEL_NAME):
+    from moge.model.v2 import MoGeModel
+    moge_model = MoGeModel.from_pretrained(model_name).to(device)
+    moge_model.eval()
+    return moge_model
+# Global instances (lazy loaded or loaded at start)
+pipeline = None
+moge_model = None
+envmap = None
+def init_models():
+    global pipeline, moge_model, envmap
+    if pipeline is not None:
+        return
+    model_path = "TencentARC/Pixal3D-T"
+    print(f"[Pipeline] Loading from {model_path}...")
+    pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
+    print("[ImageCond] Building DinoV3ProjFeatureExtractor models...")
+    pipeline.image_cond_model_ss = build_image_cond_model(IMAGE_COND_CONFIGS["ss"])
+    pipeline.image_cond_model_shape_512 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_512"])
+    pipeline.image_cond_model_shape_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_1024"])
+    pipeline.image_cond_model_tex_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["tex_1024"])
+    pipeline.cuda()
+    print("[NAF] Pre-loading NAF upsampler model...")
+    for attr in ['image_cond_model_ss', 'image_cond_model_shape_512', 'image_cond_model_shape_1024', 'image_cond_model_tex_1024']:
+        model = getattr(pipeline, attr, None)
+        if model is not None and getattr(model, 'use_naf_upsample', False):
+            model._load_naf()
+    print("[MoGe-2] Loading model for camera estimation...")
+    moge_model = load_moge_model(device="cuda")
+    print("[EnvMap] Loading environment maps...")
+    envmap = {
+        'forest': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
+        'sunset': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
+        'courtyard': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
+    }
 # ============================================================================
+# Utilities
 # ============================================================================
 def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
     f_pixels = focal_length * resolution / 32.0
     return float(f_pixels.item())
 def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, image_resolution):
     rotation_matrix = torch.tensor([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
     gp = grid_point.to(torch.float32) @ rotation_matrix.T
     distance_x = f_pixels * xw / x_ndc - yw
     return {"distance_from_x": float(distance_x), "f_pixels": float(f_pixels)}
+def get_camera_params_wild_moge(image_path, device="cuda", mesh_scale=1.0, extend_pixel=0, image_resolution=512):
+    pil_image = Image.open(image_path).convert("RGB")
     width, height = pil_image.size
     image_np = np.array(pil_image).astype(np.float32) / 255.0
     image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).to(device)
     )["distance_from_x"]
     return {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': mesh_scale}
 def pack_state(shape_slat, tex_slat, res):
+    state_data = {
         'shape_slat_feats': shape_slat.feats.cpu().numpy(),
         'tex_slat_feats': tex_slat.feats.cpu().numpy(),
         'coords': shape_slat.coords.cpu().numpy(),
         'res': res,
     }
+    state_path = os.path.join(TMP_DIR, f"state_{int(time.time()*1000)}.npz")
+    np.savez_compressed(state_path, **state_data)
+    return state_path
+def unpack_state(state_path):
+    data = np.load(state_path)
     shape_slat = SparseTensor(
+        feats=torch.from_numpy(data['shape_slat_feats']).cuda(),
+        coords=torch.from_numpy(data['coords']).cuda(),
     )
+    tex_slat = shape_slat.replace(torch.from_numpy(data['tex_slat_feats']).cuda())
+    return shape_slat, tex_slat, int(data['res'])
+# ============================================================================
+# API Implementation
+# ============================================================================
+app = Server()
+@app.get("/")
+async def homepage():
+    html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
+    with open(html_path, "r", encoding="utf-8") as f:
+        return HTMLResponse(content=f.read())
+@app.api()
+def preprocess(image: FileData) -> FileData:
+    init_models()
+    img = Image.open(image.path)
+    processed = pipeline.preprocess_image(img)
+    out_path = os.path.join(TMP_DIR, f"preprocessed_{int(time.time()*1000)}.png")
+    processed.save(out_path)
+    return FileData(path=out_path)
+@app.api()
 @spaces.GPU(duration=120)
+def generate_3d(
+    image: FileData,
+    seed: int,
+    resolution: int,
+    ss_guidance_strength: float = 7.5,
+    ss_guidance_rescale: float = 0.7,
+    ss_sampling_steps: int = 12,
+    ss_rescale_t: float = 5.0,
+    shape_slat_guidance_strength: float = 7.5,
+    shape_slat_guidance_rescale: float = 0.5,
+    shape_slat_sampling_steps: int = 12,
+    shape_slat_rescale_t: float = 3.0,
+    tex_slat_guidance_strength: float = 1.0,
+    tex_slat_guidance_rescale: float = 0.0,
+    tex_slat_sampling_steps: int = 12,
+    tex_slat_rescale_t: float = 3.0,
+) -> Dict:
+    init_models()
     torch.manual_seed(seed)
     hr_resolution = int(resolution)
+    img = Image.open(image.path)
+    image_preprocessed = pipeline.preprocess_image(img)
+    temp_processed_path = os.path.join(TMP_DIR, "temp_proc.png")
+    image_preprocessed.save(temp_processed_path)
     camera_params = get_camera_params_wild_moge(
+        temp_processed_path, device="cuda",
         mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
         image_resolution=WILD_IMAGE_RESOLUTION,
     )
     ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
                            "guidance_rescale": ss_guidance_rescale, "rescale_t": ss_rescale_t}
     shape_sampler_override = {"steps": shape_slat_sampling_steps, "guidance_strength": shape_slat_guidance_strength,
     tex_sampler_override = {"steps": tex_slat_sampling_steps, "guidance_strength": tex_slat_guidance_strength,
                             "guidance_rescale": tex_slat_guidance_rescale, "rescale_t": tex_slat_rescale_t}
     pipeline_type = f"{hr_resolution}_cascade"
     mesh_list, (shape_slat, tex_slat, res) = pipeline.run(
         image_preprocessed,
         pipeline_type=pipeline_type,
         max_num_tokens=CASCADE_MAX_NUM_TOKENS,
     )
     mesh = mesh_list[0]
+    state_path = pack_state(shape_slat, tex_slat, res)
     mesh.simplify(16777216)
+    renders = render_utils.render_proj_aligned_video(
         mesh, camera_angle_x=camera_params['camera_angle_x'],
         distance=camera_params['distance'], resolution=1024,
         num_frames=STEPS, envmap=envmap,
     )
+    # Save renders and return paths
+    render_files = {}
+    for mode_key, frames in renders.items():
+        mode_files = []
+        for i, frame in enumerate(frames):
+            p = os.path.abspath(os.path.join(TMP_DIR, f"render_{mode_key}_{i}_{int(time.time()*1000)}.jpg"))
+            Image.fromarray(frame).save(p, quality=85)
+            mode_files.append(FileData(path=p))
+        render_files[mode_key] = mode_files
+    return {
+        "render_paths": render_files,
+        "state_path": os.path.abspath(state_path)
+    }
+@app.api()
 @spaces.GPU(duration=120)
+def extract_glb_api(state_path: str, decimation_target: int, texture_size: int) -> FileData:
+    init_models()
+    shape_slat, tex_slat, res = unpack_state(state_path)
     mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0]
     glb = o_voxel.postprocess.to_glb(
         vertices=mesh.vertices, faces=mesh.faces, attr_volume=mesh.attrs,
         decimation_target=decimation_target, texture_size=texture_size,
         remesh=True, remesh_band=1, remesh_project=0, use_tqdm=True,
     )
     rot = np.array([
         [-1,  0,  0,  0],
         [ 0,  0, -1,  0],
         [ 0,  0,  0,  1],
     ], dtype=np.float64)
     glb.apply_transform(rot)
+    out_glb = os.path.join(TMP_DIR, f"result_{int(time.time()*1000)}.glb")
+    glb.export(out_glb, extension_webp=True)
+    return FileData(path=out_glb)
+# Mount assets and tmp for direct access if needed (though FileData handles it)
+app.mount("/assets", StaticFiles(directory="assets"), name="assets")
 if __name__ == "__main__":
+    app.launch(show_error=True)

index.html ADDED Viewed

	@@ -0,0 +1,855 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pixal3D | Premium 3D Generation</title>
+    <meta name="description" content="State-of-the-art pixel-aligned 3D generation from a single image.">
+    <!-- Fonts -->
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;500;600;700;800&display=swap" rel="stylesheet">
+    <!-- Icons & Components -->
+    <script src="https://unpkg.com/lucide@latest"></script>
+    <script type="module" src="https://ajax.googleapis.com/ajax/libs/model-viewer/4.0.0/model-viewer.min.js"></script>
+    <style>
+        :root {
+            --primary: #6366f1;
+            --primary-hover: #4f46e5;
+            --bg: #0f172a;
+            --card-bg: rgba(30, 41, 59, 0.7);
+            --border: rgba(255, 255, 255, 0.1);
+            --text: #f8fafc;
+            --text-muted: #94a3b8;
+            --accent: #10b981;
+            --glass: rgba(255, 255, 255, 0.03);
+            --glass-border: rgba(255, 255, 255, 0.08);
+        }
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Inter', sans-serif;
+            background: var(--bg);
+            color: var(--text);
+            line-height: 1.6;
+            overflow-x: hidden;
+            background: radial-gradient(circle at top right, #1e1b4b, transparent),
+                        radial-gradient(circle at bottom left, #0f172a, transparent);
+            min-height: 100vh;
+        }
+        h1, h2, h3 {
+            font-family: 'Outfit', sans-serif;
+            font-weight: 700;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 2rem;
+        }
+        /* Header */
+        header {
+            text-align: center;
+            margin-bottom: 3rem;
+            animation: fadeInDown 0.8s ease-out;
+        }
+        .logo-container {
+            display: inline-flex;
+            align-items: center;
+            gap: 1rem;
+            margin-bottom: 1rem;
+        }
+        .logo-icon {
+            color: var(--primary);
+            width: 40px;
+            height: 40px;
+        }
+        h1 {
+            font-size: 3rem;
+            background: linear-gradient(135deg, #fff 0%, #94a3b8 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            letter-spacing: -0.02em;
+        }
+        header p {
+            color: var(--text-muted);
+            font-size: 1.1rem;
+            max-width: 600px;
+            margin: 0 auto;
+        }
+        /* Main Grid */
+        .main-grid {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 2rem;
+            align-items: start;
+        }
+        @media (max-width: 968px) {
+            .main-grid {
+                grid-template-columns: 1fr;
+            }
+        }
+        /* Card Style */
+        .card {
+            background: var(--card-bg);
+            backdrop-filter: blur(12px);
+            border: 1px solid var(--border);
+            border-radius: 24px;
+            padding: 2rem;
+            box-shadow: 0 20px 50px rgba(0, 0, 0, 0.3);
+            transition: transform 0.3s ease, box-shadow 0.3s ease;
+        }
+        .card:hover {
+            box-shadow: 0 30px 60px rgba(0, 0, 0, 0.4);
+        }
+        /* Upload Area */
+        .upload-area {
+            border: 2px dashed var(--border);
+            border-radius: 16px;
+            padding: 3rem;
+            text-align: center;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            position: relative;
+            overflow: hidden;
+            background: var(--glass);
+        }
+        .upload-area:hover {
+            border-color: var(--primary);
+            background: rgba(99, 102, 241, 0.05);
+        }
+        .upload-area.dragging {
+            border-color: var(--accent);
+            background: rgba(16, 185, 129, 0.05);
+            transform: scale(1.02);
+        }
+        .upload-placeholder {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 1rem;
+        }
+        .upload-icon {
+            width: 64px;
+            height: 64px;
+            color: var(--primary);
+            opacity: 0.8;
+        }
+        #preview-img {
+            max-width: 100%;
+            max-height: 400px;
+            border-radius: 12px;
+            display: none;
+            box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
+        }
+        /* Controls */
+        .controls {
+            margin-top: 2rem;
+            display: flex;
+            flex-direction: column;
+            gap: 1.5rem;
+        }
+        .input-group {
+            display: flex;
+            flex-direction: column;
+            gap: 0.5rem;
+        }
+        .input-group label {
+            font-weight: 500;
+            color: var(--text-muted);
+            font-size: 0.9rem;
+        }
+        select, input[type="range"] {
+            width: 100%;
+        }
+        select {
+            background: #1e293b;
+            border: 1px solid var(--border);
+            color: white;
+            padding: 0.75rem;
+            border-radius: 8px;
+            outline: none;
+            cursor: pointer;
+        }
+        .btn {
+            background: var(--primary);
+            color: white;
+            border: none;
+            padding: 1rem 2rem;
+            border-radius: 12px;
+            font-weight: 600;
+            font-size: 1rem;
+            cursor: pointer;
+            transition: all 0.2s ease;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+            width: 100%;
+        }
+        .btn:hover {
+            background: var(--primary-hover);
+            transform: translateY(-2px);
+        }
+        .btn:active {
+            transform: translateY(0);
+        }
+        .btn:disabled {
+            background: #475569;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .btn-secondary {
+            background: var(--glass);
+            border: 1px solid var(--border);
+        }
+        .btn-secondary:hover {
+            background: var(--border);
+        }
+        /* Result Area */
+        .result-container {
+            display: flex;
+            flex-direction: column;
+            gap: 2rem;
+            min-height: 500px;
+            justify-content: center;
+        }
+        .empty-state {
+            text-align: center;
+            color: var(--text-muted);
+            opacity: 0.5;
+        }
+        /* 3D Viewer / Frame Slider */
+        .viewer-container {
+            position: relative;
+            width: 100%;
+            aspect-ratio: 1/1;
+            background: #000;
+            border-radius: 16px;
+            overflow: hidden;
+            display: none;
+            box-shadow: inset 0 0 50px rgba(0,0,0,1);
+        }
+        .viewer-frame {
+            position: absolute;
+            inset: 0;
+            width: 100%;
+            height: 100%;
+            object-fit: contain;
+            display: none;
+        }
+        .viewer-frame.active {
+            display: block;
+        }
+        .viewer-controls {
+            position: absolute;
+            bottom: 20px;
+            left: 20px;
+            right: 20px;
+            display: flex;
+            flex-direction: column;
+            gap: 10px;
+            background: rgba(0,0,0,0.5);
+            backdrop-filter: blur(8px);
+            padding: 15px;
+            border-radius: 12px;
+            border: 1px solid rgba(255,255,255,0.1);
+        }
+        .mode-selector {
+            display: flex;
+            justify-content: center;
+            gap: 8px;
+            flex-wrap: wrap;
+        }
+        .mode-btn {
+            width: 32px;
+            height: 32px;
+            border-radius: 50%;
+            border: 2px solid transparent;
+            cursor: pointer;
+            transition: all 0.2s;
+            background-size: cover;
+            opacity: 0.6;
+        }
+        .mode-btn.active {
+            border-color: var(--primary);
+            transform: scale(1.1);
+            opacity: 1;
+        }
+        /* Slider Styling */
+        input[type="range"] {
+            -webkit-appearance: none;
+            height: 6px;
+            background: rgba(255,255,255,0.1);
+            border-radius: 5px;
+        }
+        input[type="range"]::-webkit-slider-thumb {
+            -webkit-appearance: none;
+            width: 18px;
+            height: 18px;
+            background: var(--primary);
+            border-radius: 50%;
+            cursor: pointer;
+            box-shadow: 0 0 10px rgba(99, 102, 241, 0.5);
+        }
+        /* Advanced Settings */
+        .advanced-toggle {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+            font-size: 0.85rem;
+            color: var(--text-muted);
+            cursor: pointer;
+            margin-top: 1rem;
+            transition: color 0.2s;
+        }
+        .advanced-toggle:hover {
+            color: var(--text);
+        }
+        .advanced-panel {
+            display: none;
+            flex-direction: column;
+            gap: 1.5rem;
+            margin-top: 1.5rem;
+            padding-top: 1.5rem;
+            border-top: 1px solid var(--border);
+        }
+        .advanced-panel.visible {
+            display: flex;
+        }
+        .advanced-section h4 {
+            font-size: 0.8rem;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+            margin-bottom: 1rem;
+            color: var(--primary);
+        }
+        /* Examples */
+        .examples-section {
+            margin-top: 4rem;
+        }
+        .examples-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
+            gap: 1rem;
+            margin-top: 1.5rem;
+        }
+        .example-item {
+            aspect-ratio: 1/1;
+            border-radius: 12px;
+            overflow: hidden;
+            cursor: pointer;
+            border: 2px solid transparent;
+            transition: all 0.2s;
+        }
+        .example-item:hover {
+            transform: scale(1.05);
+            border-color: var(--primary);
+        }
+        .example-item img {
+            width: 100%;
+            height: 100%;
+            object-fit: cover;
+        }
+        /* Loading Overlay */
+        #loading-overlay {
+            position: fixed;
+            inset: 0;
+            background: rgba(15, 23, 42, 0.8);
+            backdrop-filter: blur(8px);
+            z-index: 1000;
+            display: none;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            gap: 1.5rem;
+        }
+        .loader {
+            width: 48px;
+            height: 48px;
+            border: 4px solid var(--primary);
+            border-bottom-color: transparent;
+            border-radius: 50%;
+            animation: rotation 1s linear infinite;
+        }
+        @keyframes rotation {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        @keyframes fadeInDown {
+            from { opacity: 0; transform: translateY(-20px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        /* Model Viewer */
+        #final-model-viewer {
+            width: 100%;
+            height: 500px;
+            background: #111;
+            border-radius: 16px;
+            display: none;
+        }
+        /* Toast */
+        #toast {
+            position: fixed;
+            bottom: 2rem;
+            right: 2rem;
+            background: #1e293b;
+            color: white;
+            padding: 1rem 1.5rem;
+            border-radius: 12px;
+            border-left: 4px solid var(--primary);
+            box-shadow: 0 10px 30px rgba(0,0,0,0.5);
+            display: none;
+            z-index: 2000;
+            animation: slideIn 0.3s ease-out;
+        }
+        @keyframes slideIn {
+            from { transform: translateX(100%); opacity: 0; }
+            to { transform: translateX(0); opacity: 1; }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <div class="logo-container">
+                <i data-lucide="box" class="logo-icon"></i>
+                <h1>Pixal3D</h1>
+            </div>
+            <p>High-fidelity, pixel-aligned 3D asset generation from a single reference image. Powered by TRELLIS.2</p>
+        </header>
+        <div class="main-grid">
+            <!-- Left: Input -->
+            <div class="card">
+                <div class="upload-area" id="drop-zone" onclick="document.getElementById('file-input').click()">
+                    <input type="file" id="file-input" hidden accept="image/*">
+                    <div class="upload-placeholder" id="upload-placeholder">
+                        <i data-lucide="image-up" class="upload-icon"></i>
+                        <h3>Drop your image here</h3>
+                        <p>or click to browse from files</p>
+                    </div>
+                    <img id="preview-img" src="" alt="Preview">
+                </div>
+                <div class="controls">
+                    <div class="input-group">
+                        <label>Resolution</label>
+                        <select id="resolution">
+                            <option value="1024">1024 (Balanced)</option>
+                            <option value="1536" selected>1536 (High Quality)</option>
+                        </select>
+                    </div>
+                    <div class="input-group">
+                        <label>Seed</label>
+                        <div style="display: flex; gap: 1rem;">
+                            <input type="number" id="seed" value="42" style="flex: 1; background: #1e293b; border: 1px solid var(--border); color: white; padding: 0.5rem; border-radius: 8px;">
+                            <button class="btn-secondary" style="padding: 0.5rem 1rem; border-radius: 8px;" onclick="randomizeSeed()">
+                                <i data-lucide="shuffle" style="width: 16px;"></i>
+                            </button>
+                        </div>
+                    </div>
+                    <button class="btn" id="generate-btn" disabled>
+                        <i data-lucide="zap"></i>
+                        Generate 3D Asset
+                    </button>
+                    <div class="advanced-toggle" onclick="toggleAdvanced()">
+                        <i data-lucide="settings-2" style="width: 14px;"></i>
+                        Advanced Generation Parameters
+                    </div>
+                    <div class="advanced-panel" id="advanced-panel">
+                        <div class="advanced-section">
+                            <h4>Stage 1: Sparse Structure</h4>
+                            <div class="input-group">
+                                <label>Guidance Strength: <span id="ss_gs_val">7.5</span></label>
+                                <input type="range" id="ss_gs" min="1" max="10" step="0.1" value="7.5" oninput="updateVal('ss_gs')">
+                            </div>
+                            <div class="input-group">
+                                <label>Sampling Steps: <span id="ss_steps_val">12</span></label>
+                                <input type="range" id="ss_steps" min="1" max="50" step="1" value="12" oninput="updateVal('ss_steps')">
+                            </div>
+                        </div>
+                        <div class="advanced-section">
+                            <h4>Stage 2: Shape</h4>
+                            <div class="input-group">
+                                <label>Guidance Strength: <span id="shape_gs_val">7.5</span></label>
+                                <input type="range" id="shape_gs" min="1" max="10" step="0.1" value="7.5" oninput="updateVal('shape_gs')">
+                            </div>
+                        </div>
+                        <div class="advanced-section">
+                            <h4>Export Settings</h4>
+                            <div class="input-group">
+                                <label>Decimation Target: <span id="decim_val">1,000,000</span></label>
+                                <input type="range" id="decimation" min="100000" max="1000000" step="10000" value="1000000" oninput="updateVal('decimation')">
+                            </div>
+                            <div class="input-group">
+                                <label>Texture Size: <span id="tex_val">4096</span></label>
+                                <input type="range" id="tex_size" min="1024" max="4096" step="1024" value="4096" oninput="updateVal('tex_size')">
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <!-- Right: Results -->
+            <div class="card">
+                <div class="result-container" id="result-container">
+                    <div class="empty-state" id="empty-state">
+                        <i data-lucide="rocket" style="width: 48px; height: 48px; margin-bottom: 1rem; opacity: 0.3;"></i>
+                        <p>Upload an image to start generating</p>
+                    </div>
+                    <div class="viewer-container" id="preview-viewer">
+                        <div id="frame-stack"></div>
+                        <div class="viewer-controls">
+                            <div class="mode-selector" id="mode-selector"></div>
+                            <input type="range" id="angle-slider" min="0" max="7" value="0" step="1">
+                            <div style="display: flex; justify-content: space-between; font-size: 0.7rem; color: var(--text-muted); padding: 0 5px;">
+                                <span>-90°</span>
+                                <span>Rotation Scrub</span>
+                                <span>+90°</span>
+                            </div>
+                        </div>
+                    </div>
+                    <model-viewer id="final-model-viewer"
+                        camera-controls
+                        auto-rotate
+                        shadow-intensity="1"
+                        environment-image="neutral"
+                        exposure="1">
+                    </model-viewer>
+                    <div id="result-actions" style="display: none; flex-direction: column; gap: 1rem;">
+                        <button class="btn" id="extract-btn">
+                            <i data-lucide="box"></i>
+                            Extract & Optimize GLB
+                        </button>
+                        <a id="download-link" style="text-decoration: none;">
+                            <button class="btn btn-secondary" id="download-btn" style="display: none;">
+                                <i data-lucide="download"></i>
+                                Download GLB
+                            </button>
+                        </a>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <!-- Examples -->
+        <div class="examples-section">
+            <h3>Try an Example</h3>
+            <div class="examples-grid" id="examples-grid">
+                <!-- Will be populated by JS -->
+            </div>
+        </div>
+    </div>
+    <div id="loading-overlay">
+        <div class="loader"></div>
+        <h3 id="loading-text">Generating Magic...</h3>
+        <p style="color: var(--text-muted); font-size: 0.9rem;">This usually takes about 60-90 seconds</p>
+    </div>
+    <div id="toast"></div>
+    <script type="module">
+        import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
+        let client;
+        let currentFile = null;
+        let generationResult = null;
+        let currentMode = "shaded_forest";
+        let currentFrame = 0;
+        const MODES = [
+            { name: "Normal", key: "normal", color: "#888" },
+            { name: "Clay", key: "clay", color: "#d2b48c" },
+            { name: "Base Color", key: "base_color", color: "#fff" },
+            { name: "Forest", key: "shaded_forest", color: "#228b22" },
+            { name: "Sunset", key: "shaded_sunset", color: "#ff4500" },
+            { name: "Courtyard", key: "shaded_courtyard", color: "#4682b4" }
+        ];
+        async function init() {
+            lucide.createIcons();
+            try {
+                client = await Client.connect(window.location.origin);
+                setupEventListeners();
+                loadExamples();
+            } catch (err) {
+                console.error("Failed to connect to Gradio Server:", err);
+                showToast("Connection failed. Please refresh.");
+            }
+        }
+        function setupEventListeners() {
+            const dropZone = document.getElementById('drop-zone');
+            const fileInput = document.getElementById('file-input');
+            dropZone.ondragover = (e) => { e.preventDefault(); dropZone.classList.add('dragging'); };
+            dropZone.ondragleave = () => dropZone.classList.remove('dragging');
+            dropZone.ondrop = (e) => {
+                e.preventDefault();
+                dropZone.classList.remove('dragging');
+                if (e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]);
+            };
+            fileInput.onchange = (e) => {
+                if (e.target.files.length) handleFile(e.target.files[0]);
+            };
+            document.getElementById('generate-btn').onclick = generate;
+            document.getElementById('extract-btn').onclick = extract;
+            document.getElementById('angle-slider').oninput = (e) => {
+                currentFrame = parseInt(e.target.value);
+                updateFrameVisibility();
+            };
+            // Mode selector
+            const selector = document.getElementById('mode-selector');
+            MODES.forEach(m => {
+                const btn = document.createElement('div');
+                btn.className = 'mode-btn' + (m.key === currentMode ? ' active' : '');
+                btn.style.backgroundColor = m.color;
+                btn.title = m.name;
+                btn.onclick = () => {
+                    currentMode = m.key;
+                    document.querySelectorAll('.mode-btn').forEach(b => b.classList.remove('active'));
+                    btn.classList.add('active');
+                    updateFrameVisibility();
+                };
+                selector.appendChild(btn);
+            });
+        }
+        async function handleFile(file) {
+            currentFile = file;
+            const reader = new FileReader();
+            reader.onload = (e) => {
+                const preview = document.getElementById('preview-img');
+                const placeholder = document.getElementById('upload-placeholder');
+                preview.src = e.target.result;
+                preview.style.display = 'block';
+                placeholder.style.display = 'none';
+                document.getElementById('generate-btn').disabled = false;
+            };
+            reader.readAsDataURL(file);
+            // Auto-preprocess
+            try {
+                const result = await client.predict("/preprocess", { image: handle_file(file) });
+                // We don't necessarily need to do anything with the result yet,
+                // but it warms up the server.
+            } catch (err) {
+                console.error("Preprocess error:", err);
+            }
+        }
+        async function generate() {
+            if (!currentFile) return;
+            showLoading("Generating 3D Asset...");
+            try {
+                const params = {
+                    image: handle_file(currentFile),
+                    seed: parseInt(document.getElementById('seed').value),
+                    resolution: parseInt(document.getElementById('resolution').value),
+                    ss_guidance_strength: parseFloat(document.getElementById('ss_gs').value),
+                    ss_sampling_steps: parseInt(document.getElementById('ss_steps').value),
+                    shape_slat_guidance_strength: parseFloat(document.getElementById('shape_gs').value)
+                    // Others use defaults
+                };
+                const result = await client.predict("/generate_3d", params);
+                generationResult = result.data[0];
+                renderPreview(generationResult.render_paths);
+                document.getElementById('empty-state').style.display = 'none';
+                document.getElementById('preview-viewer').style.display = 'block';
+                document.getElementById('result-actions').style.display = 'flex';
+                document.getElementById('final-model-viewer').style.display = 'none';
+                document.getElementById('download-btn').style.display = 'none';
+                hideLoading();
+                showToast("Generation complete!");
+            } catch (err) {
+                console.error("Generation error:", err);
+                hideLoading();
+                showToast("Generation failed. Check console.");
+            }
+        }
+        function renderPreview(renderPaths) {
+            const stack = document.getElementById('frame-stack');
+            stack.innerHTML = '';
+            Object.entries(renderPaths).forEach(([mode, files]) => {
+                files.forEach((file, i) => {
+                    const img = document.getElementById(`f-${mode}-${i}`) || document.createElement('img');
+                    img.src = file.url;
+                    img.className = 'viewer-frame';
+                    img.dataset.mode = mode;
+                    img.dataset.frame = i;
+                    img.id = `f-${mode}-${i}`;
+                    if (!img.parentElement) stack.appendChild(img);
+                });
+            });
+            updateFrameVisibility();
+        }
+        function updateFrameVisibility() {
+            document.querySelectorAll('.viewer-frame').forEach(f => f.classList.remove('active'));
+            const active = document.getElementById(`f-${currentMode}-${currentFrame}`);
+            if (active) active.classList.add('active');
+        }
+        async function extract() {
+            if (!generationResult) return;
+            showLoading("Extracting & Optimizing GLB...");
+            try {
+                const params = {
+                    state_path: generationResult.state_path,
+                    decimation_target: parseInt(document.getElementById('decimation').value),
+                    texture_size: parseInt(document.getElementById('tex_size').value)
+                };
+                const result = await client.predict("/extract_glb_api", params);
+                const glbUrl = result.data[0].url;
+                const viewer = document.getElementById('final-model-viewer');
+                viewer.src = glbUrl;
+                viewer.style.display = 'block';
+                document.getElementById('preview-viewer').style.display = 'none';
+                const dlBtn = document.getElementById('download-btn');
+                dlBtn.style.display = 'flex';
+                document.getElementById('download-link').href = glbUrl;
+                document.getElementById('download-link').download = "pixal3d_asset.glb";
+                hideLoading();
+                showToast("GLB Extracted successfully!");
+            } catch (err) {
+                console.error("Extraction error:", err);
+                hideLoading();
+                showToast("Extraction failed.");
+            }
+        }
+        function loadExamples() {
+            const grid = document.getElementById('examples-grid');
+            const examples = [
+                'assets/example_image/0a34fae7ba57cb8870df5325b9c30ea474def1b0913c19c596655b85a79fdee4.webp',
+                'assets/example_image/0e4984a9b3765ce80e9853443f9319ecedf90885c74b56cccfebc09402740f8a.webp',
+                'assets/example_image/130c2b18f1651a70f8aa15b2c99f8dba29bb943044d92871f9223bd3e989e8b1.webp',
+                'assets/example_image/22a868bac8e62511fccd2bc82ed31ae77ed31ae2a8a149be7150957f11b30c9b.webp',
+                'assets/example_image/3903b87907a6b4947006e6fc7c0c64f40cd98932a02bf0ecf7d6dfae776f3a38.webp',
+                'assets/example_image/4bc7abe209c8673dd3766ee4fad14d40acbed02d118e7629f645c60fd77313f1.webp'
+            ];
+        // Global UI helpers
+        window.toggleAdvanced = () => {
+            document.getElementById('advanced-panel').classList.toggle('visible');
+        };
+        window.updateVal = (id) => {
+            const val = document.getElementById(id).value;
+            let displayVal = val;
+            if (id === 'decimation') displayVal = parseInt(val).toLocaleString();
+            document.getElementById(id + '_val').textContent = displayVal;
+        };
+        window.randomizeSeed = () => {
+            document.getElementById('seed').value = Math.floor(Math.random() * 1000000);
+        };
+        function showLoading(text) {
+            document.getElementById('loading-text').textContent = text;
+            document.getElementById('loading-overlay').style.display = 'flex';
+        }
+        function hideLoading() {
+            document.getElementById('loading-overlay').style.display = 'none';
+        }
+        function showToast(msg) {
+            const toast = document.getElementById('toast');
+            toast.textContent = msg;
+            toast.style.display = 'block';
+            setTimeout(() => { toast.style.display = 'none'; }, 3000);
+        }
+        init();
+    </script>
+</body>
+</html>