Yang2001 commited on
Commit
bcbe6d1
·
1 Parent(s): f4d81f2

Add GPU/CUDA diagnostics at startup

Browse files
Files changed (2) hide show
  1. app.py +34 -1
  2. autotune_cache.json +12 -0
app.py CHANGED
@@ -4,8 +4,41 @@ import argparse
4
  import math
5
  import time
6
  import shutil
7
- import cv2
 
 
 
8
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  import numpy as np
10
  import base64
11
  import io
 
4
  import math
5
  import time
6
  import shutil
7
+
8
+ # ============================================================================
9
+ # GPU / CUDA Environment Diagnostics
10
+ # ============================================================================
11
  import torch
12
+ print("=" * 60)
13
+ print("[Diagnostics] PyTorch version:", torch.__version__)
14
+ print("[Diagnostics] CUDA available:", torch.cuda.is_available())
15
+ if torch.cuda.is_available():
16
+ print("[Diagnostics] CUDA version:", torch.version.cuda)
17
+ print("[Diagnostics] cuDNN version:", torch.backends.cudnn.version())
18
+ for i in range(torch.cuda.device_count()):
19
+ name = torch.cuda.get_device_name(i)
20
+ cap = torch.cuda.get_device_capability(i)
21
+ mem = torch.cuda.get_device_properties(i).total_mem / 1024**3
22
+ print(f"[Diagnostics] GPU {i}: {name}, compute capability: sm_{cap[0]}{cap[1]}, memory: {mem:.1f} GB")
23
+ else:
24
+ print("[Diagnostics] WARNING: No CUDA GPU detected!")
25
+
26
+ try:
27
+ import flash_attn_3
28
+ print("[Diagnostics] flash_attn_3 imported OK")
29
+ from flash_attn_interface import flash_attn_func
30
+ print("[Diagnostics] flash_attn_func imported OK")
31
+ except Exception as e:
32
+ print(f"[Diagnostics] flash_attn_3 import FAILED: {e}")
33
+
34
+ try:
35
+ result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
36
+ print("[Diagnostics] nvidia-smi:\n" + result.stdout[:500])
37
+ except Exception as e:
38
+ print(f"[Diagnostics] nvidia-smi failed: {e}")
39
+ print("=" * 60)
40
+
41
+ import cv2
42
  import numpy as np
43
  import base64
44
  import io
autotune_cache.json CHANGED
@@ -25039,6 +25039,18 @@
25039
  "maxnreg": null,
25040
  "pre_hook": null,
25041
  "ir_override": null
 
 
 
 
 
 
 
 
 
 
 
 
25042
  }
25043
  },
25044
  "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm.sparse_conv_fwd_implicit_gemm_kernel": {},
 
25039
  "maxnreg": null,
25040
  "pre_hook": null,
25041
  "ir_override": null
25042
+ },
25043
+ "(23, 7739451, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": {
25044
+ "kwargs": {
25045
+ "BM": 16,
25046
+ "BK": 8
25047
+ },
25048
+ "num_warps": 2,
25049
+ "num_ctas": 1,
25050
+ "num_stages": 3,
25051
+ "maxnreg": null,
25052
+ "pre_hook": null,
25053
+ "ir_override": null
25054
  }
25055
  },
25056
  "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm.sparse_conv_fwd_implicit_gemm_kernel": {},