Yang2001 commited on
Commit
06fd531
·
1 Parent(s): bcbe6d1

Fix: move GPU diagnostics into init_models() to avoid early CUDA init before spaces import

Browse files
Files changed (1) hide show
  1. app.py +21 -34
app.py CHANGED
@@ -4,41 +4,8 @@ import argparse
4
  import math
5
  import time
6
  import shutil
7
-
8
- # ============================================================================
9
- # GPU / CUDA Environment Diagnostics
10
- # ============================================================================
11
- import torch
12
- print("=" * 60)
13
- print("[Diagnostics] PyTorch version:", torch.__version__)
14
- print("[Diagnostics] CUDA available:", torch.cuda.is_available())
15
- if torch.cuda.is_available():
16
- print("[Diagnostics] CUDA version:", torch.version.cuda)
17
- print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
18
- for i in range(torch.cuda.device_count()):
19
- name = torch.cuda.get_device_name(i)
20
- cap = torch.cuda.get_device_capability(i)
21
- mem = torch.cuda.get_device_properties(i).total_mem / 1024**3
22
- print(f"[Diagnostics] GPU {i}: {name}, compute capability: sm_{cap[0]}{cap[1]}, memory: {mem:.1f} GB")
23
- else:
24
- print("[Diagnostics] WARNING: No CUDA GPU detected!")
25
-
26
- try:
27
- import flash_attn_3
28
- print("[Diagnostics] flash_attn_3 imported OK")
29
- from flash_attn_interface import flash_attn_func
30
- print("[Diagnostics] flash_attn_func imported OK")
31
- except Exception as e:
32
- print(f"[Diagnostics] flash_attn_3 import FAILED: {e}")
33
-
34
- try:
35
- result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
36
- print("[Diagnostics] nvidia-smi:\n" + result.stdout[:500])
37
- except Exception as e:
38
- print(f"[Diagnostics] nvidia-smi failed: {e}")
39
- print("=" * 60)
40
-
41
  import cv2
 
42
  import numpy as np
43
  import base64
44
  import io
@@ -160,6 +127,26 @@ def init_models():
160
  if pipeline is not None:
161
  return
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  model_path = "TencentARC/Pixal3D-T"
164
  print(f"[Pipeline] Loading from {model_path}...")
165
  pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)
 
4
  import math
5
  import time
6
  import shutil
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import cv2
8
+ import torch
9
  import numpy as np
10
  import base64
11
  import io
 
127
  if pipeline is not None:
128
  return
129
 
130
+ # GPU / CUDA Diagnostics (runs when GPU is allocated)
131
+ import subprocess as _sp
132
+ print("=" * 60)
133
+ print("[Diagnostics] PyTorch version:", torch.__version__)
134
+ print("[Diagnostics] CUDA available:", torch.cuda.is_available())
135
+ if torch.cuda.is_available():
136
+ print("[Diagnostics] CUDA version:", torch.version.cuda)
137
+ print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
138
+ for i in range(torch.cuda.device_count()):
139
+ name = torch.cuda.get_device_name(i)
140
+ cap = torch.cuda.get_device_capability(i)
141
+ mem = torch.cuda.get_device_properties(i).total_mem / 1024**3
142
+ print(f"[Diagnostics] GPU {i}: {name}, sm_{cap[0]}{cap[1]}, {mem:.1f} GB")
143
+ try:
144
+ res = _sp.run(["nvidia-smi", "--query-gpu=name,compute_cap,memory.total", "--format=csv,noheader"], capture_output=True, text=True, timeout=10)
145
+ print("[Diagnostics] nvidia-smi:", res.stdout.strip())
146
+ except Exception as e:
147
+ print(f"[Diagnostics] nvidia-smi failed: {e}")
148
+ print("=" * 60)
149
+
150
  model_path = "TencentARC/Pixal3D-T"
151
  print(f"[Pipeline] Loading from {model_path}...")
152
  pipeline = Pixal3DImageTo3DPipeline.from_pretrained(model_path)