Spaces:
Running on Zero
Running on Zero
Fix: move GPU diagnostics into init_models() to avoid early CUDA init before spaces import
Browse files
app.py
CHANGED
|
@@ -4,41 +4,8 @@ import argparse
|
|
| 4 |
import math
|
| 5 |
import time
|
| 6 |
import shutil
|
| 7 |
-
|
| 8 |
-
# ============================================================================
|
| 9 |
-
# GPU / CUDA Environment Diagnostics
|
| 10 |
-
# ============================================================================
|
| 11 |
-
import torch
|
| 12 |
-
print("=" * 60)
|
| 13 |
-
print("[Diagnostics] PyTorch version:", torch.__version__)
|
| 14 |
-
print("[Diagnostics] CUDA available:", torch.cuda.is_available())
|
| 15 |
-
if torch.cuda.is_available():
|
| 16 |
-
print("[Diagnostics] CUDA version:", torch.version.cuda)
|
| 17 |
-
print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
|
| 18 |
-
for i in range(torch.cuda.device_count()):
|
| 19 |
-
name = torch.cuda.get_device_name(i)
|
| 20 |
-
cap = torch.cuda.get_device_capability(i)
|
| 21 |
-
mem = torch.cuda.get_device_properties(i).total_mem / 1024**3
|
| 22 |
-
print(f"[Diagnostics] GPU {i}: {name}, compute capability: sm_{cap[0]}{cap[1]}, memory: {mem:.1f} GB")
|
| 23 |
-
else:
|
| 24 |
-
print("[Diagnostics] WARNING: No CUDA GPU detected!")
|
| 25 |
-
|
| 26 |
-
try:
|
| 27 |
-
import flash_attn_3
|
| 28 |
-
print("[Diagnostics] flash_attn_3 imported OK")
|
| 29 |
-
from flash_attn_interface import flash_attn_func
|
| 30 |
-
print("[Diagnostics] flash_attn_func imported OK")
|
| 31 |
-
except Exception as e:
|
| 32 |
-
print(f"[Diagnostics] flash_attn_3 import FAILED: {e}")
|
| 33 |
-
|
| 34 |
-
try:
|
| 35 |
-
result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
|
| 36 |
-
print("[Diagnostics] nvidia-smi:\n" + result.stdout[:500])
|
| 37 |
-
except Exception as e:
|
| 38 |
-
print(f"[Diagnostics] nvidia-smi failed: {e}")
|
| 39 |
-
print("=" * 60)
|
| 40 |
-
|
| 41 |
import cv2
|
|
|
|
| 42 |
import numpy as np
|
| 43 |
import base64
|
| 44 |
import io
|
|
@@ -160,6 +127,26 @@ def init_models():
|
|
| 160 |
if pipeline is not None:
|
| 161 |
return
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
model_path = "TencentARC/Pixal3D-T"
|
| 164 |
print(f"[Pipeline] Loading from {model_path}...")
|
| 165 |
pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
|
|
|
|
| 4 |
import math
|
| 5 |
import time
|
| 6 |
import shutil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import cv2
|
| 8 |
+
import torch
|
| 9 |
import numpy as np
|
| 10 |
import base64
|
| 11 |
import io
|
|
|
|
| 127 |
if pipeline is not None:
|
| 128 |
return
|
| 129 |
|
| 130 |
+
# GPU / CUDA Diagnostics (runs when GPU is allocated)
|
| 131 |
+
import subprocess as _sp
|
| 132 |
+
print("=" * 60)
|
| 133 |
+
print("[Diagnostics] PyTorch version:", torch.__version__)
|
| 134 |
+
print("[Diagnostics] CUDA available:", torch.cuda.is_available())
|
| 135 |
+
if torch.cuda.is_available():
|
| 136 |
+
print("[Diagnostics] CUDA version:", torch.version.cuda)
|
| 137 |
+
print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
|
| 138 |
+
for i in range(torch.cuda.device_count()):
|
| 139 |
+
name = torch.cuda.get_device_name(i)
|
| 140 |
+
cap = torch.cuda.get_device_capability(i)
|
| 141 |
+
mem = torch.cuda.get_device_properties(i).total_mem / 1024**3
|
| 142 |
+
print(f"[Diagnostics] GPU {i}: {name}, sm_{cap[0]}{cap[1]}, {mem:.1f} GB")
|
| 143 |
+
try:
|
| 144 |
+
res = _sp.run(["nvidia-smi", "--query-gpu=name,compute_cap,memory.total", "--format=csv,noheader"], capture_output=True, text=True, timeout=10)
|
| 145 |
+
print("[Diagnostics] nvidia-smi:", res.stdout.strip())
|
| 146 |
+
except Exception as e:
|
| 147 |
+
print(f"[Diagnostics] nvidia-smi failed: {e}")
|
| 148 |
+
print("=" * 60)
|
| 149 |
+
|
| 150 |
model_path = "TencentARC/Pixal3D-T"
|
| 151 |
print(f"[Pipeline] Loading from {model_path}...")
|
| 152 |
pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
|