InfoLens / backend /device.py
dqy08's picture
initial beta release
494c9e4
"""设备管理:CPU/CUDA/MPS 检测与内存统计"""
import os
import torch
class DeviceManager:
"""设备管理工具类,统一处理设备相关的操作"""
@staticmethod
def clear_cache(device: torch.device) -> None:
"""清理设备缓存"""
if device.type == "cuda":
torch.cuda.empty_cache()
elif device.type == "mps":
torch.mps.empty_cache()
@staticmethod
def synchronize(device: torch.device) -> None:
"""同步设备操作"""
if device.type == "cuda":
torch.cuda.synchronize()
elif device.type == "mps":
torch.mps.synchronize()
@staticmethod
def get_device() -> torch.device:
"""
获取计算设备
优先级:1. FORCE_CPU=1 强制 CPU 2. cuda > mps > cpu
"""
if os.environ.get('FORCE_CPU') == '1':
return torch.device("cpu")
if torch.cuda.is_available():
return torch.device("cuda")
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
@staticmethod
def get_device_name(device: torch.device) -> str:
"""获取设备显示名称"""
if device.type == "cuda":
return "GPU"
elif device.type == "mps":
return "Apple Silicon"
else:
return "CPU"
@staticmethod
def print_model_load_stats(model: torch.nn.Module, load_time: float) -> None:
"""打印模型加载统计信息(大小、时间、速度)"""
# 计算模型大小
model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
model_size_mb = model_size_bytes / (1024 * 1024)
# 计算加载速度
load_speed_mb_per_sec = model_size_mb / load_time if load_time > 0 else 0
# 格式化大小
size_str = f"{model_size_mb:.1f}MB" if model_size_mb < 1024 else f"{model_size_mb / 1024:.2f}GB"
# 格式化时间
if load_time < 1:
time_str = f"{load_time * 1000:.1f}ms"
elif load_time < 60:
time_str = f"{load_time:.2f}s"
else:
time_str = f"{int(load_time // 60)}m{load_time % 60:.1f}s"
print(f"✅ 模型加载完成 [大小: {size_str}, 耗时: {time_str}, 速度: {load_speed_mb_per_sec:.1f}MB/s]")
@staticmethod
def print_cuda_memory_summary(title="GPU 内存统计", device=0):
"""打印详细的 CUDA 内存统计信息"""
if not torch.cuda.is_available():
return
print(f"\n{'='*60}")
print(f"🔍 {title}")
print(f"{'='*60}")
# 基本统计
allocated = torch.cuda.memory_allocated(device) / 1024**3
reserved = torch.cuda.memory_reserved(device) / 1024**3
max_allocated = torch.cuda.max_memory_allocated(device) / 1024**3
total = torch.cuda.get_device_properties(device).total_memory / 1024**3
print(f"📊 总显存: {total:.2f} GB")
print(f"✅ 已分配 (allocated): {allocated:.2f} GB ({allocated/total*100:.1f}%)")
print(f"📦 已预留 (reserved): {reserved:.2f} GB ({reserved/total*100:.1f}%)")
print(f"📈 峰值分配: {max_allocated:.2f} GB")
print(f"💚 可用空间: {total - reserved:.2f} GB ({(total-reserved)/total*100:.1f}%)")
print(f"🔸 碎片化: {reserved - allocated:.2f} GB")
# 详细统计(简化版)
try:
stats = torch.cuda.memory_stats(device)
num_allocs = stats.get("num_alloc_retries", 0)
num_ooms = stats.get("num_ooms", 0)
if num_allocs > 0 or num_ooms > 0:
print(f"⚠️ 分配重试: {num_allocs} 次, OOM: {num_ooms} 次")
except Exception:
pass
print(f"{'='*60}\n")