File size: 6,256 Bytes
3a5cf48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | import sys
from collections import defaultdict
from functools import wraps
if "/www/server/panel/class" not in sys.path:
sys.path.append('/www/server/panel/class')
import public
try:
import pynvml
except:
public.ExecShell("btpip install nvidia-ml-py")
import pynvml
try:
from mod.project.docker.app.gpu.base import GPUBase
except:
class GPUBase:
pass
device_tasks = defaultdict()
system_tasks = defaultdict()
def register_task(name: str):
def task_decorator(task_func):
_task_type, _task_name = name.split(':')
if _task_type == 'device':
device_tasks[_task_name] = task_func
elif _task_type == 'system':
system_tasks[_task_name] = task_func
@wraps(task_func)
def func_wrapper(*args, **kwargs):
return task_func(*args, **kwargs)
return func_wrapper
return task_decorator
class NVIDIA(GPUBase):
name = 'nvidia'
support = None
def __init__(self):
# 判断是否支持,并在判断时初始化pynvml库。
self.device_count = 0
if self.is_support():
self.device_count = pynvml.nvmlDeviceGetCount()
def __del__(self):
if self.is_support():
pynvml.nvmlShutdown()
def get_all_device_info(self):
all_info = defaultdict()
all_info['system'] = self.get_system_info()
for index in range(self.device_count):
all_info[index] = self.get_info_by_index(index)
return all_info
def get_info_by_index(self, index=0):
info = defaultdict()
handle = pynvml.nvmlDeviceGetHandleByIndex(index)
for t_name, t_func in device_tasks.items():
try:
info[t_name] = t_func(self, handle)
except:
# public.print_log("pynvml {t_name} error: {}")
info[t_name] = None
return info
def get_system_info(self):
info = defaultdict()
for t_name, t_func in system_tasks.items():
try:
info[t_name] = t_func(self)
except:
# public.print_log(f"pynvml {t_name} error: {e}")
info[t_name] = None
return info
@classmethod
def is_support(cls):
try:
pynvml.nvmlInit()
cls.support = True
return True
except pynvml.NVMLError:
cls.support = False
# public.print_log("Nvidia was not supported!")
return False
@register_task('device:memory')
def _get_mem_info(self, handle):
info = defaultdict()
info['size'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).total) / 1024 ** 3
info['free'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).free) / 1024 ** 3
info['used'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).used) / 1024 ** 3
return info
@register_task('device:clock')
def _get_clock_info(self, handle):
info = defaultdict()
info['graphics'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_GRAPHICS)
info['sm'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
info['memory'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
info['video'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_VIDEO)
return info
@register_task('device:temperature')
def _get_temp_info(self, handle):
info = 0
try:
info = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
except pynvml.NVMLError or AttributeError:
info = pynvml.nvmlDeviceGetTemperatureV1(handle, pynvml.NVML_TEMPERATURE_GPU)
return info
@register_task('device:utilization')
def _get_uti_info(self, handle):
info = defaultdict()
info['gpu'] = pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
info['memory'] = pynvml.nvmlDeviceGetUtilizationRates(handle).memory
return info
@register_task('device:processes')
def _get_proc_uti(self, handle):
info = list()
for p in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
p.__dict__['type'] = 'Compute'
info.append(p.__dict__)
for p in pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle):
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
p.__dict__['type'] = 'Graphics'
info.append(p.__dict__)
for p in pynvml.nvmlDeviceGetMPSComputeRunningProcesses(handle):
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
p.__dict__['type'] = 'MPS'
info.append(p.__dict__)
return info
@register_task('device:fan')
def _get_fan_info(self, handle):
info = defaultdict()
try:
info['speed'] = pynvml.nvmlDeviceGetFanSpeedRPM(handle).speed
except AttributeError:
info['speed'] = pynvml.nvmlDeviceGetFanSpeed(handle)
except pynvml.NVMLError:
info['speed'] = pynvml.nvmlDeviceGetFanSpeed_v2(handle, 0)
except:
info['speed'] = 0
return info
@register_task('device:name')
def _get_device_name(self, handle):
return pynvml.nvmlDeviceGetName(handle)
@register_task('device:power')
def _get_device_power(self, handle):
info = defaultdict()
info['current'] = pynvml.nvmlDeviceGetPowerUsage(handle)
info['max'] = pynvml.nvmlDeviceGetPowerManagementLimit(handle)
return info
@register_task('system:version')
def _get_device_version(self):
info = defaultdict()
info['driver'] = pynvml.nvmlSystemGetDriverVersion()
try:
info['cuda'] = pynvml.nvmlSystemGetCudaDriverVersion()
except pynvml.NVMLError or AttributeError:
info['cuda'] = pynvml.nvmlSystemGetCudaDriverVersion_v2()
return info
@register_task('system:count')
def _get_device_count(self):
info = 0
info = pynvml.nvmlDeviceGetCount()
return info
if __name__ == '__main__':
nvidia = NVIDIA()
print(nvidia.get_all_device_info())
|