Fix GPU detection and OUTPUT_DIR attribute errors
Browse files- app/routers/system.py +64 -29
app/routers/system.py
CHANGED
|
@@ -33,19 +33,24 @@ class SystemInfo(BaseModel):
|
|
| 33 |
total_memory_gb: float
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
class ResourceUsage(BaseModel):
|
| 37 |
"""Current resource usage."""
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
gpu_utilization: Optional[float]
|
| 45 |
-
disk_used_gb: float
|
| 46 |
-
disk_total_gb: float
|
| 47 |
-
disk_percent: float
|
| 48 |
-
|
| 49 |
|
| 50 |
class StorageInfo(BaseModel):
|
| 51 |
"""Storage information."""
|
|
@@ -103,29 +108,59 @@ async def get_resource_usage():
|
|
| 103 |
# Disk
|
| 104 |
disk = shutil.disk_usage('/')
|
| 105 |
|
| 106 |
-
# GPU
|
| 107 |
-
|
| 108 |
-
gpu_memory_total = None
|
| 109 |
-
gpu_utilization = None
|
| 110 |
|
| 111 |
if torch.cuda.is_available():
|
| 112 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
gpu_memory_used = round(torch.cuda.memory_allocated() / (1024**3), 2)
|
| 114 |
gpu_memory_total = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
return ResourceUsage(
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
)
|
| 130 |
|
| 131 |
|
|
@@ -136,8 +171,8 @@ async def get_storage_info():
|
|
| 136 |
("Models", settings.MODELS_DIR),
|
| 137 |
("Cache", settings.CACHE_DIR),
|
| 138 |
("Logs", settings.LOGS_DIR),
|
| 139 |
-
("Uploads", settings.
|
| 140 |
-
("Outputs", settings.
|
| 141 |
]
|
| 142 |
|
| 143 |
result = []
|
|
|
|
| 33 |
total_memory_gb: float
|
| 34 |
|
| 35 |
|
| 36 |
+
class GPUInfo(BaseModel):
|
| 37 |
+
"""GPU information."""
|
| 38 |
+
available: bool
|
| 39 |
+
count: int = 0
|
| 40 |
+
names: List[str] = []
|
| 41 |
+
memory_used_gb: Optional[float] = None
|
| 42 |
+
memory_total_gb: Optional[float] = None
|
| 43 |
+
utilization: Optional[float] = None
|
| 44 |
+
|
| 45 |
+
|
| 46 |
class ResourceUsage(BaseModel):
|
| 47 |
"""Current resource usage."""
|
| 48 |
+
cpu: Dict[str, float]
|
| 49 |
+
memory: Dict[str, float]
|
| 50 |
+
disk: Dict[str, float]
|
| 51 |
+
gpu: GPUInfo
|
| 52 |
+
cache: Dict[str, Any]
|
| 53 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
class StorageInfo(BaseModel):
|
| 56 |
"""Storage information."""
|
|
|
|
| 108 |
# Disk
|
| 109 |
disk = shutil.disk_usage('/')
|
| 110 |
|
| 111 |
+
# GPU info
|
| 112 |
+
gpu_info = GPUInfo(available=False, count=0, names=[])
|
|
|
|
|
|
|
| 113 |
|
| 114 |
if torch.cuda.is_available():
|
| 115 |
try:
|
| 116 |
+
gpu_names = []
|
| 117 |
+
for i in range(torch.cuda.device_count()):
|
| 118 |
+
gpu_names.append(torch.cuda.get_device_name(i))
|
| 119 |
+
|
| 120 |
gpu_memory_used = round(torch.cuda.memory_allocated() / (1024**3), 2)
|
| 121 |
gpu_memory_total = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
|
| 122 |
+
|
| 123 |
+
gpu_info = GPUInfo(
|
| 124 |
+
available=True,
|
| 125 |
+
count=torch.cuda.device_count(),
|
| 126 |
+
names=gpu_names,
|
| 127 |
+
memory_used_gb=gpu_memory_used,
|
| 128 |
+
memory_total_gb=gpu_memory_total,
|
| 129 |
+
utilization=None
|
| 130 |
+
)
|
| 131 |
+
except Exception as e:
|
| 132 |
+
logger.error(f"Error getting GPU info: {e}")
|
| 133 |
+
|
| 134 |
+
# Cache info
|
| 135 |
+
cache_total_bytes = 0
|
| 136 |
+
cache_dirs = [settings.CACHE_DIR, settings.HF_CACHE_DIR]
|
| 137 |
+
for cache_path in cache_dirs:
|
| 138 |
+
if os.path.exists(cache_path):
|
| 139 |
+
for root, dirs, files in os.walk(cache_path):
|
| 140 |
+
for f in files:
|
| 141 |
+
try:
|
| 142 |
+
cache_total_bytes += os.path.getsize(os.path.join(root, f))
|
| 143 |
+
except:
|
| 144 |
+
pass
|
| 145 |
|
| 146 |
return ResourceUsage(
|
| 147 |
+
cpu={
|
| 148 |
+
"percent": round(cpu_percent, 1)
|
| 149 |
+
},
|
| 150 |
+
memory={
|
| 151 |
+
"percent": round(memory.percent, 1),
|
| 152 |
+
"used_gb": round(memory.used / (1024**3), 2),
|
| 153 |
+
"total_gb": round(memory.total / (1024**3), 2)
|
| 154 |
+
},
|
| 155 |
+
disk={
|
| 156 |
+
"percent": round((disk.used / disk.total) * 100, 1),
|
| 157 |
+
"used_gb": round(disk.used / (1024**3), 2),
|
| 158 |
+
"total_gb": round(disk.total / (1024**3), 2)
|
| 159 |
+
},
|
| 160 |
+
gpu=gpu_info,
|
| 161 |
+
cache={
|
| 162 |
+
"total_bytes": cache_total_bytes
|
| 163 |
+
}
|
| 164 |
)
|
| 165 |
|
| 166 |
|
|
|
|
| 171 |
("Models", settings.MODELS_DIR),
|
| 172 |
("Cache", settings.CACHE_DIR),
|
| 173 |
("Logs", settings.LOGS_DIR),
|
| 174 |
+
("Uploads", settings.UPLOAD_DIR),
|
| 175 |
+
("Outputs", settings.OUTPUT_DIR)
|
| 176 |
]
|
| 177 |
|
| 178 |
result = []
|