Yang2001 commited on
Commit
f7a2756
·
1 Parent(s): 6e58504

chore: update app.py, add app_bak.py, update requirements and autotune cache

Browse files
Files changed (4) hide show
  1. app.py +207 -485
  2. app_bak.py +493 -0
  3. autotune_cache.json +0 -0
  4. requirements.txt +0 -27
app.py CHANGED
@@ -1,493 +1,215 @@
1
- import os
2
- import subprocess
3
- import argparse
4
- import math
5
- import time
6
- import shutil
7
- import cv2
8
- import torch
9
- import numpy as np
10
- import base64
11
- import io
12
- import json
13
- from datetime import datetime
14
- from typing import *
15
- from PIL import Image
16
-
17
- import threading
18
- try:
19
- import nest_asyncio
20
- nest_asyncio.apply()
21
- except ImportError:
22
- pass
23
-
24
- # Lock for model initialization
25
- init_lock = threading.Lock()
26
-
27
- os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1'
28
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
29
- os.environ["ATTN_BACKEND"] = "flash_attn_3"
30
- os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
31
- os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
32
-
33
- import spaces
34
- from gradio import Server
35
- from gradio.data_classes import FileData
36
- from fastapi.responses import HTMLResponse
37
- from fastapi.staticfiles import StaticFiles
38
-
39
- from trellis2.modules.sparse import SparseTensor
40
- from trellis2.pipelines import Pixal3DImageTo3DPipeline
41
- from trellis2.renderers import EnvMap
42
- from trellis2.utils import render_utils
43
- import o_voxel
44
-
45
- # ============================================================================
46
- # Constants & Defaults
47
- # ============================================================================
48
-
49
- MAX_SEED = np.iinfo(np.int32).max
50
- TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
51
- os.makedirs(TMP_DIR, exist_ok=True)
52
-
53
- MODES = [
54
- {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
55
- {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
56
- {"name": "Base color", "icon": "assets/app/basecolor.png", "render_key": "base_color"},
57
- {"name": "HDRI forest", "icon": "assets/app/hdri_forest.png", "render_key": "shaded_forest"},
58
- {"name": "HDRI sunset", "icon": "assets/app/hdri_sunset.png", "render_key": "shaded_sunset"},
59
- {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
60
- ]
61
- STEPS = 8
62
-
63
- # Cascade parameters
64
- CASCADE_LR_RESOLUTION = 512
65
- CASCADE_MAX_NUM_TOKENS = 49152
66
-
67
- # MoGe defaults
68
- MOGE_MODEL_NAME = "Ruicheng/moge-2-vitl"
69
- WILD_MESH_SCALE = 1.0
70
- WILD_EXTEND_PIXEL = 0
71
- WILD_IMAGE_RESOLUTION = 512
72
-
73
- # Image Cond Model configs
74
- IMAGE_COND_CONFIGS = {
75
- "ss": {
76
- "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
77
- "image_size": 512,
78
- "grid_resolution": 16,
79
- },
80
- "shape_512": {
81
- "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
82
- "image_size": 512,
83
- "grid_resolution": 32,
84
- "use_naf_upsample": True,
85
- "naf_target_size": 512,
86
- },
87
- "shape_1024": {
88
- "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
89
- "image_size": 1024,
90
- "grid_resolution": 64,
91
- "use_naf_upsample": True,
92
- "naf_target_size": 512,
93
- },
94
- "tex_1024": {
95
- "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
96
- "image_size": 1024,
97
- "grid_resolution": 64,
98
- "use_naf_upsample": True,
99
- "naf_target_size": 1024,
100
- },
101
- }
102
-
103
- # ============================================================================
104
- # Model Loading
105
- # ============================================================================
106
 
107
- def build_image_cond_model(config: dict):
108
- from trellis2.trainers.flow_matching.mixins.image_conditioned_proj import DinoV3ProjFeatureExtractor
109
- model = DinoV3ProjFeatureExtractor(**config)
110
- model.eval()
111
- return model
112
 
113
- def load_moge_model(device="cuda", model_name=MOGE_MODEL_NAME):
114
- from moge.model.v2 import MoGeModel
115
- moge_model = MoGeModel.from_pretrained(model_name).to(device)
116
- moge_model.eval()
117
- return moge_model
118
-
119
- # Global instances (lazy loaded or loaded at start)
120
- pipeline = None
121
- moge_model = None
122
- envmap = None
123
-
124
- def init_models():
125
- global pipeline, moge_model, envmap
126
- with init_lock:
127
- if pipeline is not None:
128
- return
129
-
130
- # GPU / CUDA Diagnostics (runs when GPU is allocated)
131
- import subprocess as _sp
132
- print("=" * 60)
133
- print("[Diagnostics] PyTorch version:", torch.__version__)
134
- print("[Diagnostics] CUDA available:", torch.cuda.is_available())
135
- if torch.cuda.is_available():
136
- print("[Diagnostics] CUDA version:", torch.version.cuda)
137
- print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
138
- for i in range(torch.cuda.device_count()):
139
- name = torch.cuda.get_device_name(i)
140
- cap = torch.cuda.get_device_capability(i)
141
- mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
142
- print(f"[Diagnostics] GPU {i}: {name}, sm_{cap[0]}{cap[1]}, {mem:.1f} GB")
143
- try:
144
- res = _sp.run(["nvidia-smi", "--query-gpu=name,compute_cap,memory.total", "--format=csv,noheader"], capture_output=True, text=True, timeout=10)
145
- print("[Diagnostics] nvidia-smi:", res.stdout.strip())
146
- except Exception as e:
147
- print(f"[Diagnostics] nvidia-smi failed: {e}")
148
- print("=" * 60)
149
-
150
- model_path = "TencentARC/Pixal3D-T"
151
- print(f"[Pipeline] Loading from {model_path}...")
152
- pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
153
-
154
- print("[ImageCond] Building DinoV3ProjFeatureExtractor models...")
155
- pipeline.image_cond_model_ss = build_image_cond_model(IMAGE_COND_CONFIGS["ss"])
156
- pipeline.image_cond_model_shape_512 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_512"])
157
- pipeline.image_cond_model_shape_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_1024"])
158
- pipeline.image_cond_model_tex_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["tex_1024"])
159
-
160
- pipeline.low_vram = False
161
- pipeline.cuda()
162
-
163
- # Ensure image_cond_models are on GPU
164
- pipeline.image_cond_model_ss.cuda()
165
- pipeline.image_cond_model_shape_512.cuda()
166
- pipeline.image_cond_model_shape_1024.cuda()
167
- pipeline.image_cond_model_tex_1024.cuda()
168
-
169
- print("[NAF] Pre-loading NAF upsampler model...")
170
- for attr in ['image_cond_model_ss', 'image_cond_model_shape_512', 'image_cond_model_shape_1024', 'image_cond_model_tex_1024']:
171
- model = getattr(pipeline, attr, None)
172
- if model is not None and getattr(model, 'use_naf_upsample', False):
173
- model._load_naf()
174
-
175
- print("[MoGe-2] Loading model for camera estimation...")
176
- moge_model = load_moge_model(device="cuda")
177
-
178
- print("[EnvMap] Loading environment maps...")
179
- _base = os.path.dirname(os.path.abspath(__file__))
180
- envmap = {
181
- 'forest': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread(os.path.join(_base, 'assets/hdri/forest.exr'), cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
182
- 'sunset': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread(os.path.join(_base, 'assets/hdri/sunset.exr'), cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
183
- 'courtyard': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread(os.path.join(_base, 'assets/hdri/courtyard.exr'), cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
184
- }
185
-
186
- # ============================================================================
187
- # Utilities
188
- # ============================================================================
189
-
190
- def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
191
- focal_length = 16.0 / torch.tan(torch.tensor(camera_angle_x / 2.0))
192
- f_pixels = focal_length * resolution / 32.0
193
- return float(f_pixels.item())
194
-
195
- def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, image_resolution):
196
- rotation_matrix = torch.tensor([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
197
- gp = grid_point.to(torch.float32) @ rotation_matrix.T
198
- gp = gp / mesh_scale / 2
199
- xw, yw, zw = gp[0].item(), gp[1].item(), gp[2].item()
200
- xt, yt = float(target_point[0].item()), float(target_point[1].item())
201
- f_pixels = compute_f_pixels(camera_angle_x, image_resolution)
202
- x_ndc = xt - image_resolution / 2.0
203
- y_ndc = -(yt - image_resolution / 2.0)
204
- distance_x = f_pixels * xw / x_ndc - yw
205
- return {"distance_from_x": float(distance_x), "f_pixels": float(f_pixels)}
206
-
207
- def get_camera_params_wild_moge(image_path, device="cuda", mesh_scale=1.0, extend_pixel=0, image_resolution=512):
208
- pil_image = Image.open(image_path).convert("RGB")
209
- width, height = pil_image.size
210
- image_np = np.array(pil_image).astype(np.float32) / 255.0
211
- image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).to(device)
212
- with torch.no_grad():
213
- output = moge_model.infer(image_tensor)
214
- intrinsics = output["intrinsics"].squeeze().cpu().numpy()
215
- fx_normalized = intrinsics[0, 0]
216
- fx = fx_normalized * width
217
- camera_angle_x = 2 * math.atan(width / (2 * fx))
218
-
219
- grid_point = torch.tensor([-1.0, 0.0, 0.0])
220
- distance = distance_from_fov(
221
- camera_angle_x, grid_point,
222
- torch.tensor([0 - extend_pixel, image_resolution - 1 + extend_pixel]),
223
- mesh_scale, image_resolution
224
- )["distance_from_x"]
225
- return {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': mesh_scale}
226
-
227
- def pack_state(shape_slat, tex_slat, res):
228
- state_data = {
229
- 'shape_slat_feats': shape_slat.feats.cpu().numpy(),
230
- 'tex_slat_feats': tex_slat.feats.cpu().numpy(),
231
- 'coords': shape_slat.coords.cpu().numpy(),
232
- 'res': res,
233
- }
234
- import random
235
- state_path = os.path.join(TMP_DIR, f"state_{int(time.time()*1000)}_{random.randint(0,9999):04d}.npz")
236
- np.savez_compressed(state_path, **state_data)
237
- return state_path
238
-
239
- def unpack_state(state_path):
240
- data = np.load(state_path)
241
- shape_slat = SparseTensor(
242
- feats=torch.from_numpy(data['shape_slat_feats']).cuda(),
243
- coords=torch.from_numpy(data['coords']).cuda(),
244
- )
245
- tex_slat = shape_slat.replace(torch.from_numpy(data['tex_slat_feats']).cuda())
246
- return shape_slat, tex_slat, int(data['res'])
247
 
248
- # ============================================================================
249
- # Progress Tracking (file-based, cross-process safe for @spaces.GPU)
250
- # ============================================================================
251
-
252
- import asyncio
253
- from fastapi.responses import JSONResponse
254
- from fastapi import Request
255
-
256
- PROGRESS_DIR = os.path.join(TMP_DIR, '_progress')
257
- os.makedirs(PROGRESS_DIR, exist_ok=True)
258
-
259
- _thread_local = threading.local()
260
-
261
- def _progress_file(session_id: str) -> str:
262
- """Return path to a session's progress JSON file."""
263
- return os.path.join(PROGRESS_DIR, f"{session_id}.json")
264
-
265
- def _reset_progress(session_id: str):
266
- _thread_local.active_session = session_id
267
- _write_progress_file(session_id, {"stage": "Initializing...", "step": 0, "total": 0, "done": False})
268
-
269
- def _update_progress(stage: str, step: int, total: int):
270
- session_id = getattr(_thread_local, 'active_session', '')
271
- if session_id:
272
- _write_progress_file(session_id, {"stage": stage, "step": step, "total": total, "done": False})
273
-
274
- def _finish_progress():
275
- session_id = getattr(_thread_local, 'active_session', '')
276
- if session_id:
277
- _write_progress_file(session_id, {"done": True})
278
-
279
- def _write_progress_file(session_id: str, data: dict):
280
- """Atomically write progress JSON to a file (cross-process safe)."""
281
- path = _progress_file(session_id)
282
- tmp_path = path + ".tmp"
283
- try:
284
- with open(tmp_path, 'w') as f:
285
- json.dump(data, f)
286
- os.replace(tmp_path, path) # atomic on POSIX
287
- except Exception:
288
- pass
289
-
290
- # Monkey-patch tqdm to intercept progress
291
- import tqdm as _tqdm_module
292
-
293
- _original_tqdm = _tqdm_module.tqdm
294
-
295
- class _TqdmProgressInterceptor(_original_tqdm):
296
- """Wraps tqdm to push progress updates to SSE."""
297
- def __init__(self, *args, **kwargs):
298
- self._stage_desc = kwargs.get('desc', 'Processing')
299
- super().__init__(*args, **kwargs)
300
-
301
- def set_description(self, desc=None, refresh=True):
302
- self._stage_desc = desc or 'Processing'
303
- super().set_description(desc, refresh)
304
-
305
- def update(self, n=1):
306
- super().update(n)
307
- _update_progress(self._stage_desc, self.n, self.total or 0)
308
-
309
- # Patch tqdm globally
310
- _tqdm_module.tqdm = _TqdmProgressInterceptor
311
- # Also patch the direct import in the sampler module and render_utils
312
- import trellis2.pipelines.samplers.flow_euler as _fe_module
313
- _fe_module.tqdm = _TqdmProgressInterceptor
314
- import trellis2.utils.render_utils as _ru_module
315
- _ru_module.tqdm = _TqdmProgressInterceptor
316
- import o_voxel.postprocess as _ovp_module
317
- _ovp_module.tqdm = _TqdmProgressInterceptor
318
-
319
- # ============================================================================
320
- # API Implementation
321
- # ============================================================================
322
-
323
- app = Server()
324
-
325
- @app.get("/")
326
- async def homepage():
327
- html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
328
- with open(html_path, "r", encoding="utf-8") as f:
329
- return HTMLResponse(content=f.read())
330
-
331
- @app.get("/progress")
332
- async def progress_poll(request: Request):
333
- """Polling endpoint for real-time progress updates during generation."""
334
- session_id = request.query_params.get("session_id", "")
335
- path = _progress_file(session_id)
336
- try:
337
- with open(path, 'r') as f:
338
- data = json.load(f)
339
- return JSONResponse(data)
340
- except (FileNotFoundError, json.JSONDecodeError):
341
- return JSONResponse({"stage": "Waiting...", "step": 0, "total": 0, "done": False})
342
-
343
- @app.api()
344
- @spaces.GPU(duration=30)
345
- def preprocess(image: FileData) -> FileData:
346
- init_models()
347
- img = Image.open(image["path"])
348
- processed = pipeline.preprocess_image(img)
349
- out_path = os.path.join(TMP_DIR, f"preprocessed_{int(time.time()*1000)}.png")
350
- processed.save(out_path)
351
- return FileData(path=out_path)
352
-
353
- @app.api()
354
- @spaces.GPU(duration=120)
355
- def generate_3d(
356
- image: FileData,
357
- seed: int,
358
- resolution: int,
359
- ss_guidance_strength: float = 7.5,
360
- ss_guidance_rescale: float = 0.7,
361
- ss_sampling_steps: int = 12,
362
- ss_rescale_t: float = 5.0,
363
- shape_slat_guidance_strength: float = 7.5,
364
- shape_slat_guidance_rescale: float = 0.5,
365
- shape_slat_sampling_steps: int = 12,
366
- shape_slat_rescale_t: float = 3.0,
367
- tex_slat_guidance_strength: float = 1.0,
368
- tex_slat_guidance_rescale: float = 0.0,
369
- tex_slat_sampling_steps: int = 12,
370
- tex_slat_rescale_t: float = 3.0,
371
- session_id: str = "",
372
- ) -> Dict:
373
- init_models()
374
- _reset_progress(session_id)
375
- _update_progress("Preprocessing & Camera Estimation", 0, 1)
376
-
377
- torch.manual_seed(seed)
378
- hr_resolution = int(resolution)
379
-
380
- img = Image.open(image["path"])
381
- # Image is already preprocessed by /preprocess endpoint, use directly
382
- image_preprocessed = img
383
- temp_processed_path = os.path.join(TMP_DIR, f"temp_proc_{session_id[:8]}_{int(time.time()*1000)}.png")
384
- image_preprocessed.save(temp_processed_path)
385
-
386
- camera_params = get_camera_params_wild_moge(
387
- temp_processed_path, device="cuda",
388
- mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
389
- image_resolution=WILD_IMAGE_RESOLUTION,
390
- )
391
- _update_progress("Preprocessing & Camera Estimation", 1, 1)
392
-
393
- ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
394
- "guidance_rescale": ss_guidance_rescale, "rescale_t": ss_rescale_t}
395
- shape_sampler_override = {"steps": shape_slat_sampling_steps, "guidance_strength": shape_slat_guidance_strength,
396
- "guidance_rescale": shape_slat_guidance_rescale, "rescale_t": shape_slat_rescale_t}
397
- tex_sampler_override = {"steps": tex_slat_sampling_steps, "guidance_strength": tex_slat_guidance_strength,
398
- "guidance_rescale": tex_slat_guidance_rescale, "rescale_t": tex_slat_rescale_t}
399
-
400
- pipeline_type = f"{hr_resolution}_cascade"
401
- mesh_list, (shape_slat, tex_slat, res) = pipeline.run(
402
- image_preprocessed,
403
- camera_params=camera_params,
404
- seed=seed,
405
- sparse_structure_sampler_params=ss_sampler_override,
406
- shape_slat_sampler_params=shape_sampler_override,
407
- tex_slat_sampler_params=tex_sampler_override,
408
- preprocess_image=False,
409
- return_latent=True,
410
- pipeline_type=pipeline_type,
411
- max_num_tokens=CASCADE_MAX_NUM_TOKENS,
412
- )
413
-
414
- mesh = mesh_list[0]
415
- state_path = pack_state(shape_slat, tex_slat, res)
416
-
417
- _update_progress("Rendering views", 0, 1)
418
- mesh.simplify(16777216)
419
- cam_dist = camera_params['distance']
420
- near = max(0.01, cam_dist - 2.0)
421
- far = cam_dist + 10.0
422
- renders = render_utils.render_proj_aligned_video(
423
- mesh, camera_angle_x=camera_params['camera_angle_x'],
424
- distance=cam_dist, resolution=1024,
425
- num_frames=STEPS, envmap=envmap,
426
- near=near, far=far,
 
 
 
 
 
 
 
 
427
  )
428
- _update_progress("Rendering views", 1, 1)
429
-
430
- # Save renders and return paths
431
- render_files = {}
432
- for mode_key, frames in renders.items():
433
- mode_files = []
434
- for i, frame in enumerate(frames):
435
- p = os.path.abspath(os.path.join(TMP_DIR, f"render_{mode_key}_{i}_{int(time.time()*1000)}.jpg"))
436
- Image.fromarray(frame).save(p, quality=85)
437
- mode_files.append(FileData(path=p))
438
- render_files[mode_key] = mode_files
439
-
440
- _finish_progress()
441
- return {
442
- "render_paths": render_files,
443
- "state_path": os.path.abspath(state_path),
444
- "camera_angle_x": camera_params['camera_angle_x'],
445
- "distance": camera_params['distance'],
446
- }
447
 
448
- @app.api()
449
- @spaces.GPU(duration=240)
450
- def extract_glb_api(state_path: str, decimation_target: int, texture_size: int, session_id: str = "") -> FileData:
451
- init_models()
452
- _reset_progress(session_id)
453
- _update_progress("Decoding latent", 0, 1)
454
-
455
- shape_slat, tex_slat, res = unpack_state(state_path)
456
- mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0]
457
- _update_progress("Decoding latent", 1, 1)
458
-
459
- glb = o_voxel.postprocess.to_glb(
460
- vertices=mesh.vertices, faces=mesh.faces, attr_volume=mesh.attrs,
461
- coords=mesh.coords, attr_layout=pipeline.pbr_attr_layout,
462
- grid_size=res, aabb=[[-0.5, -0.5, -0.5], [0.5, 0.5, 0.5]],
463
- decimation_target=decimation_target, texture_size=texture_size,
464
- remesh=True, remesh_band=1, remesh_project=0, use_tqdm=True,
465
- )
466
- rot = np.array([
467
- [-1, 0, 0, 0],
468
- [ 0, 0, -1, 0],
469
- [ 0, -1, 0, 0],
470
- [ 0, 0, 0, 1],
471
- ], dtype=np.float64)
472
- glb.apply_transform(rot)
473
-
474
- out_glb = os.path.join(TMP_DIR, f"result_{int(time.time()*1000)}.glb")
475
- glb.export(out_glb, extension_webp=True)
476
- _finish_progress()
477
- return FileData(path=out_glb)
478
 
479
- # Mount assets and tmp for direct access
480
- app.mount("/assets", StaticFiles(directory="assets"), name="assets")
481
- app.mount("/tmp", StaticFiles(directory=TMP_DIR), name="tmp")
 
 
 
 
482
 
483
  if __name__ == "__main__":
484
- # Re-install utils3d as in original app.py
485
- subprocess.run([
486
- "pip", "install", "--force-reinstall", "--no-deps",
487
- "https://github.com/LDYang694/Storages/releases/download/20260430/utils3d-0.0.2-py3-none-any.whl"
488
- ], check=True)
489
-
490
- # Pre-initialize models before launching the server
491
- init_models()
492
-
493
- app.launch(show_error=True, share=True)
 
1
+ """
2
+ Pixal3D HF Space Proxy
3
+ ======================
4
+ This is a lightweight proxy app for HF Space that redirects users to a
5
+ locally deployed Gradio share link.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ Setup:
8
+ 1. Deploy this as your HF Space app.py
9
+ 2. Set HF Space Secret: REMOTE_URL = your local share link (e.g. https://xxxxx.gradio.live)
10
+ 3. Users visiting the HF Space will be seamlessly redirected to your local instance.
 
11
 
12
+ To update the share link:
13
+ - Go to HF Space Settings -> Variables and secrets -> Update REMOTE_URL
14
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ import os
17
+ import gradio as gr
18
+
19
+ REMOTE_URL = os.environ.get("REMOTE_URL", "")
20
+ GPU_NAME = os.environ.get("GPU_NAME", "")
21
+
22
+ PROXY_HTML = """
23
+ <!DOCTYPE html>
24
+ <html lang="en">
25
+ <head>
26
+ <meta charset="UTF-8">
27
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
28
+ <title>Pixal3D | AI Image-to-3D</title>
29
+ <style>
30
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
31
+ body {{
32
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
33
+ background: #0b0f1a;
34
+ color: #f1f5f9;
35
+ min-height: 100vh;
36
+ display: flex;
37
+ flex-direction: column;
38
+ }}
39
+ .header {{
40
+ padding: 8px 24px;
41
+ background: rgba(22, 28, 45, 0.9);
42
+ border-bottom: 1px solid rgba(255,255,255,0.08);
43
+ display: flex;
44
+ align-items: center;
45
+ gap: 16px;
46
+ backdrop-filter: blur(12px);
47
+ }}
48
+ .header h1 {{
49
+ font-size: 16px;
50
+ font-weight: 700;
51
+ background: linear-gradient(135deg, #818cf8, #10b981);
52
+ -webkit-background-clip: text;
53
+ -webkit-text-fill-color: transparent;
54
+ white-space: nowrap;
55
+ }}
56
+ .header .notice {{
57
+ flex: 1;
58
+ font-size: 12px;
59
+ color: #fbbf24;
60
+ text-align: center;
61
+ }}
62
+ .status {{
63
+ display: flex;
64
+ align-items: center;
65
+ gap: 6px;
66
+ font-size: 12px;
67
+ color: #94a3b8;
68
+ white-space: nowrap;
69
+ }}
70
+ .status-dot {{
71
+ width: 7px;
72
+ height: 7px;
73
+ border-radius: 50%;
74
+ background: {status_color};
75
+ animation: {status_anim};
76
+ }}
77
+ @keyframes pulse {{
78
+ 0%, 100% {{ opacity: 1; }}
79
+ 50% {{ opacity: 0.4; }}
80
+ }}
81
+ .iframe-container {{
82
+ flex: 1;
83
+ position: relative;
84
+ }}
85
+ .iframe-container iframe {{
86
+ width: 100%;
87
+ height: 100%;
88
+ border: none;
89
+ position: absolute;
90
+ top: 0;
91
+ left: 0;
92
+ }}
93
+ .no-url {{
94
+ flex: 1;
95
+ display: flex;
96
+ align-items: center;
97
+ justify-content: center;
98
+ padding: 40px;
99
+ }}
100
+ .no-url-card {{
101
+ max-width: 560px;
102
+ background: rgba(22, 28, 45, 0.8);
103
+ border: 1px solid rgba(255,255,255,0.08);
104
+ border-radius: 16px;
105
+ padding: 48px;
106
+ text-align: center;
107
+ }}
108
+ .no-url-card h2 {{
109
+ font-size: 24px;
110
+ margin-bottom: 16px;
111
+ }}
112
+ .no-url-card p {{
113
+ color: #94a3b8;
114
+ line-height: 1.7;
115
+ margin-bottom: 12px;
116
+ }}
117
+ .no-url-card code {{
118
+ background: rgba(129, 140, 248, 0.15);
119
+ color: #818cf8;
120
+ padding: 2px 8px;
121
+ border-radius: 4px;
122
+ font-size: 13px;
123
+ }}
124
+ .link-bar {{
125
+ padding: 8px 24px;
126
+ background: rgba(16, 185, 129, 0.08);
127
+ border-top: 1px solid rgba(16, 185, 129, 0.2);
128
+ font-size: 12px;
129
+ color: #94a3b8;
130
+ text-align: center;
131
+ }}
132
+ .link-bar a {{
133
+ color: #10b981;
134
+ text-decoration: none;
135
+ }}
136
+ .link-bar a:hover {{ text-decoration: underline; }}
137
+ </style>
138
+ </head>
139
+ <body>
140
+ <div class="header">
141
+ <h1>Pixal3D</h1>
142
+ <span class="notice"></span>
143
+ <div class="status">
144
+ <div class="status-dot"></div>
145
+ <span>{status_text}</span>
146
+ </div>
147
+ </div>
148
+ {content}
149
+ </body>
150
+ </html>
151
+ """
152
+
153
+
154
+ def build_page():
155
+ if REMOTE_URL:
156
+ status_color = "#10b981"
157
+ status_anim = "pulse 2s infinite"
158
+ status_text = "Connected to remote GPU instance"
159
+ content = f"""
160
+ <div class="no-url">
161
+ <div class="no-url-card">
162
+ <h2>🚀 Redirecting to Pixal3D...</h2>
163
+ <p style="color:#fbbf24; margin-bottom:12px;">⚠️ Due to a temporary HuggingFace error, this Space is currently unavailable. We have prepared a temporary locally-deployed instance for you.</p>
164
+ <p style="color:#f59e0b; margin-bottom:12px;">⚡ All users share a single GPU — requests are queued. Please be patient.</p>
165
+ <p>You will be redirected automatically.</p>
166
+ <p style="margin-top:16px;">
167
+ <a href="{REMOTE_URL}" style="display:inline-block; padding:12px 32px; background:linear-gradient(135deg,#818cf8,#10b981); color:#fff; border-radius:8px; text-decoration:none; font-weight:600; font-size:15px;">
168
+ Click here if not redirected
169
+ </a>
170
+ </p>
171
+ <p style="margin-top:16px; font-size:12px; color:#64748b;">Target: <code>{REMOTE_URL}</code></p>
172
+ </div>
173
+ </div>
174
+ <script>
175
+ // Auto redirect after a short delay
176
+ setTimeout(function() {{
177
+ window.location.href = "{REMOTE_URL}";
178
+ }}, 1500);
179
+ </script>
180
+ """
181
+ else:
182
+ status_color = "#ef4444"
183
+ status_anim = "pulse 1.5s infinite"
184
+ status_text = "Remote instance not configured"
185
+ content = """
186
+ <div class="no-url">
187
+ <div class="no-url-card">
188
+ <h2>⚡ Remote GPU Instance Not Connected</h2>
189
+ <p>This Space acts as a proxy to a locally-deployed Pixal3D instance running on a dedicated GPU.</p>
190
+ <p>To connect, set the <code>REMOTE_URL</code> secret in this Space's settings to your Gradio share link.</p>
191
+ <p style="margin-top:24px; font-size:13px;">
192
+ </p>
193
+ </div>
194
+ </div>
195
+ """
196
+
197
+ return PROXY_HTML.format(
198
+ status_color=status_color,
199
+ status_anim=status_anim,
200
+ status_text=status_text,
201
+ gpu_name=GPU_NAME,
202
+ content=content,
203
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ # Use a simple Gradio Blocks app with HTML component
207
+ with gr.Blocks(
208
+ title="Pixal3D | AI Image-to-3D",
209
+ css="footer {display:none !important;} .gradio-container {padding:0 !important; max-width:100% !important;} #proxy-frame {height:100vh; padding:0;}",
210
+ theme=gr.themes.Base(),
211
+ ) as demo:
212
+ gr.HTML(build_page(), elem_id="proxy-frame")
213
 
214
  if __name__ == "__main__":
215
+ demo.launch(share=True)
 
 
 
 
 
 
 
 
 
app_bak.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import argparse
4
+ import math
5
+ import time
6
+ import shutil
7
+ import cv2
8
+ import torch
9
+ import numpy as np
10
+ import base64
11
+ import io
12
+ import json
13
+ from datetime import datetime
14
+ from typing import *
15
+ from PIL import Image
16
+
17
+ import threading
18
+ try:
19
+ import nest_asyncio
20
+ nest_asyncio.apply()
21
+ except ImportError:
22
+ pass
23
+
24
+ # Lock for model initialization
25
+ init_lock = threading.Lock()
26
+
27
+ os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1'
28
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
29
+ os.environ["ATTN_BACKEND"] = "flash_attn_3"
30
+ os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
31
+ os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
32
+
33
+ import spaces
34
+ from gradio import Server
35
+ from gradio.data_classes import FileData
36
+ from fastapi.responses import HTMLResponse
37
+ from fastapi.staticfiles import StaticFiles
38
+
39
+ from trellis2.modules.sparse import SparseTensor
40
+ from trellis2.pipelines import Pixal3DImageTo3DPipeline
41
+ from trellis2.renderers import EnvMap
42
+ from trellis2.utils import render_utils
43
+ import o_voxel
44
+
45
+ # ============================================================================
46
+ # Constants & Defaults
47
+ # ============================================================================
48
+
49
+ MAX_SEED = np.iinfo(np.int32).max
50
+ TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
51
+ os.makedirs(TMP_DIR, exist_ok=True)
52
+
53
+ MODES = [
54
+ {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
55
+ {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
56
+ {"name": "Base color", "icon": "assets/app/basecolor.png", "render_key": "base_color"},
57
+ {"name": "HDRI forest", "icon": "assets/app/hdri_forest.png", "render_key": "shaded_forest"},
58
+ {"name": "HDRI sunset", "icon": "assets/app/hdri_sunset.png", "render_key": "shaded_sunset"},
59
+ {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
60
+ ]
61
+ STEPS = 8
62
+
63
+ # Cascade parameters
64
+ CASCADE_LR_RESOLUTION = 512
65
+ CASCADE_MAX_NUM_TOKENS = 49152
66
+
67
+ # MoGe defaults
68
+ MOGE_MODEL_NAME = "Ruicheng/moge-2-vitl"
69
+ WILD_MESH_SCALE = 1.0
70
+ WILD_EXTEND_PIXEL = 0
71
+ WILD_IMAGE_RESOLUTION = 512
72
+
73
+ # Image Cond Model configs
74
+ IMAGE_COND_CONFIGS = {
75
+ "ss": {
76
+ "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
77
+ "image_size": 512,
78
+ "grid_resolution": 16,
79
+ },
80
+ "shape_512": {
81
+ "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
82
+ "image_size": 512,
83
+ "grid_resolution": 32,
84
+ "use_naf_upsample": True,
85
+ "naf_target_size": 512,
86
+ },
87
+ "shape_1024": {
88
+ "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
89
+ "image_size": 1024,
90
+ "grid_resolution": 64,
91
+ "use_naf_upsample": True,
92
+ "naf_target_size": 512,
93
+ },
94
+ "tex_1024": {
95
+ "model_name": "camenduru/dinov3-vitl16-pretrain-lvd1689m",
96
+ "image_size": 1024,
97
+ "grid_resolution": 64,
98
+ "use_naf_upsample": True,
99
+ "naf_target_size": 1024,
100
+ },
101
+ }
102
+
103
+ # ============================================================================
104
+ # Model Loading
105
+ # ============================================================================
106
+
107
+ def build_image_cond_model(config: dict):
108
+ from trellis2.trainers.flow_matching.mixins.image_conditioned_proj import DinoV3ProjFeatureExtractor
109
+ model = DinoV3ProjFeatureExtractor(**config)
110
+ model.eval()
111
+ return model
112
+
113
+ def load_moge_model(device="cuda", model_name=MOGE_MODEL_NAME):
114
+ from moge.model.v2 import MoGeModel
115
+ moge_model = MoGeModel.from_pretrained(model_name).to(device)
116
+ moge_model.eval()
117
+ return moge_model
118
+
119
+ # Global instances (lazy loaded or loaded at start)
120
+ pipeline = None
121
+ moge_model = None
122
+ envmap = None
123
+
124
+ def init_models():
125
+ global pipeline, moge_model, envmap
126
+ with init_lock:
127
+ if pipeline is not None:
128
+ return
129
+
130
+ # GPU / CUDA Diagnostics (runs when GPU is allocated)
131
+ import subprocess as _sp
132
+ print("=" * 60)
133
+ print("[Diagnostics] PyTorch version:", torch.__version__)
134
+ print("[Diagnostics] CUDA available:", torch.cuda.is_available())
135
+ if torch.cuda.is_available():
136
+ print("[Diagnostics] CUDA version:", torch.version.cuda)
137
+ print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
138
+ for i in range(torch.cuda.device_count()):
139
+ name = torch.cuda.get_device_name(i)
140
+ cap = torch.cuda.get_device_capability(i)
141
+ mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
142
+ print(f"[Diagnostics] GPU {i}: {name}, sm_{cap[0]}{cap[1]}, {mem:.1f} GB")
143
+ try:
144
+ res = _sp.run(["nvidia-smi", "--query-gpu=name,compute_cap,memory.total", "--format=csv,noheader"], capture_output=True, text=True, timeout=10)
145
+ print("[Diagnostics] nvidia-smi:", res.stdout.strip())
146
+ except Exception as e:
147
+ print(f"[Diagnostics] nvidia-smi failed: {e}")
148
+ print("=" * 60)
149
+
150
+ model_path = "TencentARC/Pixal3D-T"
151
+ print(f"[Pipeline] Loading from {model_path}...")
152
+ pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
153
+
154
+ print("[ImageCond] Building DinoV3ProjFeatureExtractor models...")
155
+ pipeline.image_cond_model_ss = build_image_cond_model(IMAGE_COND_CONFIGS["ss"])
156
+ pipeline.image_cond_model_shape_512 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_512"])
157
+ pipeline.image_cond_model_shape_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["shape_1024"])
158
+ pipeline.image_cond_model_tex_1024 = build_image_cond_model(IMAGE_COND_CONFIGS["tex_1024"])
159
+
160
+ pipeline.low_vram = False
161
+ pipeline.cuda()
162
+
163
+ # Ensure image_cond_models are on GPU
164
+ pipeline.image_cond_model_ss.cuda()
165
+ pipeline.image_cond_model_shape_512.cuda()
166
+ pipeline.image_cond_model_shape_1024.cuda()
167
+ pipeline.image_cond_model_tex_1024.cuda()
168
+
169
+ print("[NAF] Pre-loading NAF upsampler model...")
170
+ for attr in ['image_cond_model_ss', 'image_cond_model_shape_512', 'image_cond_model_shape_1024', 'image_cond_model_tex_1024']:
171
+ model = getattr(pipeline, attr, None)
172
+ if model is not None and getattr(model, 'use_naf_upsample', False):
173
+ model._load_naf()
174
+
175
+ print("[MoGe-2] Loading model for camera estimation...")
176
+ moge_model = load_moge_model(device="cuda")
177
+
178
+ print("[EnvMap] Loading environment maps...")
179
+ _base = os.path.dirname(os.path.abspath(__file__))
180
+ envmap = {
181
+ 'forest': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread(os.path.join(_base, 'assets/hdri/forest.exr'), cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
182
+ 'sunset': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread(os.path.join(_base, 'assets/hdri/sunset.exr'), cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
183
+ 'courtyard': EnvMap(torch.tensor(cv2.cvtColor(cv2.imread(os.path.join(_base, 'assets/hdri/courtyard.exr'), cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), dtype=torch.float32, device='cuda')),
184
+ }
185
+
186
+ # ============================================================================
187
+ # Utilities
188
+ # ============================================================================
189
+
190
+ def compute_f_pixels(camera_angle_x: float, resolution: int) -> float:
191
+ focal_length = 16.0 / torch.tan(torch.tensor(camera_angle_x / 2.0))
192
+ f_pixels = focal_length * resolution / 32.0
193
+ return float(f_pixels.item())
194
+
195
+ def distance_from_fov(camera_angle_x, grid_point, target_point, mesh_scale, image_resolution):
196
+ rotation_matrix = torch.tensor([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
197
+ gp = grid_point.to(torch.float32) @ rotation_matrix.T
198
+ gp = gp / mesh_scale / 2
199
+ xw, yw, zw = gp[0].item(), gp[1].item(), gp[2].item()
200
+ xt, yt = float(target_point[0].item()), float(target_point[1].item())
201
+ f_pixels = compute_f_pixels(camera_angle_x, image_resolution)
202
+ x_ndc = xt - image_resolution / 2.0
203
+ y_ndc = -(yt - image_resolution / 2.0)
204
+ distance_x = f_pixels * xw / x_ndc - yw
205
+ return {"distance_from_x": float(distance_x), "f_pixels": float(f_pixels)}
206
+
207
+ def get_camera_params_wild_moge(image_path, device="cuda", mesh_scale=1.0, extend_pixel=0, image_resolution=512):
208
+ pil_image = Image.open(image_path).convert("RGB")
209
+ width, height = pil_image.size
210
+ image_np = np.array(pil_image).astype(np.float32) / 255.0
211
+ image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).to(device)
212
+ with torch.no_grad():
213
+ output = moge_model.infer(image_tensor)
214
+ intrinsics = output["intrinsics"].squeeze().cpu().numpy()
215
+ fx_normalized = intrinsics[0, 0]
216
+ fx = fx_normalized * width
217
+ camera_angle_x = 2 * math.atan(width / (2 * fx))
218
+
219
+ grid_point = torch.tensor([-1.0, 0.0, 0.0])
220
+ distance = distance_from_fov(
221
+ camera_angle_x, grid_point,
222
+ torch.tensor([0 - extend_pixel, image_resolution - 1 + extend_pixel]),
223
+ mesh_scale, image_resolution
224
+ )["distance_from_x"]
225
+ return {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': mesh_scale}
226
+
227
+ def pack_state(shape_slat, tex_slat, res):
228
+ state_data = {
229
+ 'shape_slat_feats': shape_slat.feats.cpu().numpy(),
230
+ 'tex_slat_feats': tex_slat.feats.cpu().numpy(),
231
+ 'coords': shape_slat.coords.cpu().numpy(),
232
+ 'res': res,
233
+ }
234
+ import random
235
+ state_path = os.path.join(TMP_DIR, f"state_{int(time.time()*1000)}_{random.randint(0,9999):04d}.npz")
236
+ np.savez_compressed(state_path, **state_data)
237
+ return state_path
238
+
239
+ def unpack_state(state_path):
240
+ data = np.load(state_path)
241
+ shape_slat = SparseTensor(
242
+ feats=torch.from_numpy(data['shape_slat_feats']).cuda(),
243
+ coords=torch.from_numpy(data['coords']).cuda(),
244
+ )
245
+ tex_slat = shape_slat.replace(torch.from_numpy(data['tex_slat_feats']).cuda())
246
+ return shape_slat, tex_slat, int(data['res'])
247
+
248
+ # ============================================================================
249
+ # Progress Tracking (file-based, cross-process safe for @spaces.GPU)
250
+ # ============================================================================
251
+
252
+ import asyncio
253
+ from fastapi.responses import JSONResponse
254
+ from fastapi import Request
255
+
256
+ PROGRESS_DIR = os.path.join(TMP_DIR, '_progress')
257
+ os.makedirs(PROGRESS_DIR, exist_ok=True)
258
+
259
+ _thread_local = threading.local()
260
+
261
+ def _progress_file(session_id: str) -> str:
262
+ """Return path to a session's progress JSON file."""
263
+ return os.path.join(PROGRESS_DIR, f"{session_id}.json")
264
+
265
+ def _reset_progress(session_id: str):
266
+ _thread_local.active_session = session_id
267
+ _write_progress_file(session_id, {"stage": "Initializing...", "step": 0, "total": 0, "done": False})
268
+
269
+ def _update_progress(stage: str, step: int, total: int):
270
+ session_id = getattr(_thread_local, 'active_session', '')
271
+ if session_id:
272
+ _write_progress_file(session_id, {"stage": stage, "step": step, "total": total, "done": False})
273
+
274
+ def _finish_progress():
275
+ session_id = getattr(_thread_local, 'active_session', '')
276
+ if session_id:
277
+ _write_progress_file(session_id, {"done": True})
278
+
279
+ def _write_progress_file(session_id: str, data: dict):
280
+ """Atomically write progress JSON to a file (cross-process safe)."""
281
+ path = _progress_file(session_id)
282
+ tmp_path = path + ".tmp"
283
+ try:
284
+ with open(tmp_path, 'w') as f:
285
+ json.dump(data, f)
286
+ os.replace(tmp_path, path) # atomic on POSIX
287
+ except Exception:
288
+ pass
289
+
290
+ # Monkey-patch tqdm to intercept progress
291
+ import tqdm as _tqdm_module
292
+
293
+ _original_tqdm = _tqdm_module.tqdm
294
+
295
+ class _TqdmProgressInterceptor(_original_tqdm):
296
+ """Wraps tqdm to push progress updates to SSE."""
297
+ def __init__(self, *args, **kwargs):
298
+ self._stage_desc = kwargs.get('desc', 'Processing')
299
+ super().__init__(*args, **kwargs)
300
+
301
+ def set_description(self, desc=None, refresh=True):
302
+ self._stage_desc = desc or 'Processing'
303
+ super().set_description(desc, refresh)
304
+
305
+ def update(self, n=1):
306
+ super().update(n)
307
+ _update_progress(self._stage_desc, self.n, self.total or 0)
308
+
309
+ # Patch tqdm globally
310
+ _tqdm_module.tqdm = _TqdmProgressInterceptor
311
+ # Also patch the direct import in the sampler module and render_utils
312
+ import trellis2.pipelines.samplers.flow_euler as _fe_module
313
+ _fe_module.tqdm = _TqdmProgressInterceptor
314
+ import trellis2.utils.render_utils as _ru_module
315
+ _ru_module.tqdm = _TqdmProgressInterceptor
316
+ import o_voxel.postprocess as _ovp_module
317
+ _ovp_module.tqdm = _TqdmProgressInterceptor
318
+
319
+ # ============================================================================
320
+ # API Implementation
321
+ # ============================================================================
322
+
323
+ app = Server()
324
+
325
+ @app.get("/")
326
+ async def homepage():
327
+ html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
328
+ with open(html_path, "r", encoding="utf-8") as f:
329
+ return HTMLResponse(content=f.read())
330
+
331
+ @app.get("/progress")
332
+ async def progress_poll(request: Request):
333
+ """Polling endpoint for real-time progress updates during generation."""
334
+ session_id = request.query_params.get("session_id", "")
335
+ path = _progress_file(session_id)
336
+ try:
337
+ with open(path, 'r') as f:
338
+ data = json.load(f)
339
+ return JSONResponse(data)
340
+ except (FileNotFoundError, json.JSONDecodeError):
341
+ return JSONResponse({"stage": "Waiting...", "step": 0, "total": 0, "done": False})
342
+
343
+ @app.api()
344
+ @spaces.GPU(duration=30)
345
+ def preprocess(image: FileData) -> FileData:
346
+ init_models()
347
+ img = Image.open(image["path"])
348
+ processed = pipeline.preprocess_image(img)
349
+ out_path = os.path.join(TMP_DIR, f"preprocessed_{int(time.time()*1000)}.png")
350
+ processed.save(out_path)
351
+ return FileData(path=out_path)
352
+
353
+ @app.api()
354
+ @spaces.GPU(duration=120)
355
+ def generate_3d(
356
+ image: FileData,
357
+ seed: int,
358
+ resolution: int,
359
+ ss_guidance_strength: float = 7.5,
360
+ ss_guidance_rescale: float = 0.7,
361
+ ss_sampling_steps: int = 12,
362
+ ss_rescale_t: float = 5.0,
363
+ shape_slat_guidance_strength: float = 7.5,
364
+ shape_slat_guidance_rescale: float = 0.5,
365
+ shape_slat_sampling_steps: int = 12,
366
+ shape_slat_rescale_t: float = 3.0,
367
+ tex_slat_guidance_strength: float = 1.0,
368
+ tex_slat_guidance_rescale: float = 0.0,
369
+ tex_slat_sampling_steps: int = 12,
370
+ tex_slat_rescale_t: float = 3.0,
371
+ session_id: str = "",
372
+ ) -> Dict:
373
+ init_models()
374
+ _reset_progress(session_id)
375
+ _update_progress("Preprocessing & Camera Estimation", 0, 1)
376
+
377
+ torch.manual_seed(seed)
378
+ hr_resolution = int(resolution)
379
+
380
+ img = Image.open(image["path"])
381
+ # Image is already preprocessed by /preprocess endpoint, use directly
382
+ image_preprocessed = img
383
+ temp_processed_path = os.path.join(TMP_DIR, f"temp_proc_{session_id[:8]}_{int(time.time()*1000)}.png")
384
+ image_preprocessed.save(temp_processed_path)
385
+
386
+ camera_params = get_camera_params_wild_moge(
387
+ temp_processed_path, device="cuda",
388
+ mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
389
+ image_resolution=WILD_IMAGE_RESOLUTION,
390
+ )
391
+ _update_progress("Preprocessing & Camera Estimation", 1, 1)
392
+
393
+ ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
394
+ "guidance_rescale": ss_guidance_rescale, "rescale_t": ss_rescale_t}
395
+ shape_sampler_override = {"steps": shape_slat_sampling_steps, "guidance_strength": shape_slat_guidance_strength,
396
+ "guidance_rescale": shape_slat_guidance_rescale, "rescale_t": shape_slat_rescale_t}
397
+ tex_sampler_override = {"steps": tex_slat_sampling_steps, "guidance_strength": tex_slat_guidance_strength,
398
+ "guidance_rescale": tex_slat_guidance_rescale, "rescale_t": tex_slat_rescale_t}
399
+
400
+ pipeline_type = f"{hr_resolution}_cascade"
401
+ mesh_list, (shape_slat, tex_slat, res) = pipeline.run(
402
+ image_preprocessed,
403
+ camera_params=camera_params,
404
+ seed=seed,
405
+ sparse_structure_sampler_params=ss_sampler_override,
406
+ shape_slat_sampler_params=shape_sampler_override,
407
+ tex_slat_sampler_params=tex_sampler_override,
408
+ preprocess_image=False,
409
+ return_latent=True,
410
+ pipeline_type=pipeline_type,
411
+ max_num_tokens=CASCADE_MAX_NUM_TOKENS,
412
+ )
413
+
414
+ mesh = mesh_list[0]
415
+ state_path = pack_state(shape_slat, tex_slat, res)
416
+
417
+ _update_progress("Rendering views", 0, 1)
418
+ mesh.simplify(16777216)
419
+ cam_dist = camera_params['distance']
420
+ near = max(0.01, cam_dist - 2.0)
421
+ far = cam_dist + 10.0
422
+ renders = render_utils.render_proj_aligned_video(
423
+ mesh, camera_angle_x=camera_params['camera_angle_x'],
424
+ distance=cam_dist, resolution=1024,
425
+ num_frames=STEPS, envmap=envmap,
426
+ near=near, far=far,
427
+ )
428
+ _update_progress("Rendering views", 1, 1)
429
+
430
+ # Save renders and return paths
431
+ render_files = {}
432
+ for mode_key, frames in renders.items():
433
+ mode_files = []
434
+ for i, frame in enumerate(frames):
435
+ p = os.path.abspath(os.path.join(TMP_DIR, f"render_{mode_key}_{i}_{int(time.time()*1000)}.jpg"))
436
+ Image.fromarray(frame).save(p, quality=85)
437
+ mode_files.append(FileData(path=p))
438
+ render_files[mode_key] = mode_files
439
+
440
+ _finish_progress()
441
+ return {
442
+ "render_paths": render_files,
443
+ "state_path": os.path.abspath(state_path),
444
+ "camera_angle_x": camera_params['camera_angle_x'],
445
+ "distance": camera_params['distance'],
446
+ }
447
+
448
+ @app.api()
449
+ @spaces.GPU(duration=240)
450
+ def extract_glb_api(state_path: str, decimation_target: int, texture_size: int, session_id: str = "") -> FileData:
451
+ init_models()
452
+ _reset_progress(session_id)
453
+ _update_progress("Decoding latent", 0, 1)
454
+
455
+ shape_slat, tex_slat, res = unpack_state(state_path)
456
+ mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0]
457
+ _update_progress("Decoding latent", 1, 1)
458
+
459
+ glb = o_voxel.postprocess.to_glb(
460
+ vertices=mesh.vertices, faces=mesh.faces, attr_volume=mesh.attrs,
461
+ coords=mesh.coords, attr_layout=pipeline.pbr_attr_layout,
462
+ grid_size=res, aabb=[[-0.5, -0.5, -0.5], [0.5, 0.5, 0.5]],
463
+ decimation_target=decimation_target, texture_size=texture_size,
464
+ remesh=True, remesh_band=1, remesh_project=0, use_tqdm=True,
465
+ )
466
+ rot = np.array([
467
+ [-1, 0, 0, 0],
468
+ [ 0, 0, -1, 0],
469
+ [ 0, -1, 0, 0],
470
+ [ 0, 0, 0, 1],
471
+ ], dtype=np.float64)
472
+ glb.apply_transform(rot)
473
+
474
+ out_glb = os.path.join(TMP_DIR, f"result_{int(time.time()*1000)}.glb")
475
+ glb.export(out_glb, extension_webp=True)
476
+ _finish_progress()
477
+ return FileData(path=out_glb)
478
+
479
+ # Mount assets and tmp for direct access
480
+ app.mount("/assets", StaticFiles(directory="assets"), name="assets")
481
+ app.mount("/tmp", StaticFiles(directory=TMP_DIR), name="tmp")
482
+
483
+ if __name__ == "__main__":
484
+ # Re-install utils3d as in original app.py
485
+ subprocess.run([
486
+ "pip", "install", "--force-reinstall", "--no-deps",
487
+ "https://github.com/LDYang694/Storages/releases/download/20260430/utils3d-0.0.2-py3-none-any.whl"
488
+ ], check=True)
489
+
490
+ # Pre-initialize models before launching the server
491
+ init_models()
492
+
493
+ app.launch(show_error=True, share=True)
autotune_cache.json CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,28 +1 @@
1
- --extra-index-url https://download.pytorch.org/whl/cu124
2
-
3
- torch==2.6.0
4
- torchvision==0.21.0
5
- triton==3.2.0
6
- pillow==12.0.0
7
- imageio==2.37.2
8
- imageio-ffmpeg==0.6.0
9
- tqdm==4.67.1
10
- easydict==1.13
11
- opencv-python-headless==4.12.0.88
12
- trimesh==4.10.1
13
- transformers==4.57.3
14
- zstandard==0.25.0
15
- kornia==0.8.2
16
- timm==1.0.22
17
- diffusers==0.37.1
18
- accelerate==1.13.0
19
  gradio
20
- plyfile==1.1.3
21
- git+https://github.com/microsoft/MoGe.git
22
- https://github.com/LDYang694/Storages/releases/download/20260430/natten-0.21.0+torch2.6cu124-cp310-cp310-linux_x86_64.whl
23
- https://github.com/JeffreyXiang/Storages/releases/download/Space_Wheels_251210/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
24
- https://github.com/JeffreyXiang/Storages/releases/download/Space_Wheels_251210/cumesh-0.0.1-cp310-cp310-linux_x86_64.whl
25
- https://github.com/JeffreyXiang/Storages/releases/download/Space_Wheels_251210/flex_gemm-0.0.1-cp310-cp310-linux_x86_64.whl
26
- https://github.com/JeffreyXiang/Storages/releases/download/Space_Wheels_251210/o_voxel-0.0.1-cp310-cp310-linux_x86_64.whl
27
- https://github.com/JeffreyXiang/Storages/releases/download/Space_Wheels_251210/nvdiffrast-0.4.0-cp310-cp310-linux_x86_64.whl
28
- https://github.com/JeffreyXiang/Storages/releases/download/Space_Wheels_251210/nvdiffrec_render-0.0.0-cp310-cp310-linux_x86_64.whl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio