"""设备管理:CPU/CUDA/MPS 检测与内存统计""" import os import torch class DeviceManager: """设备管理工具类,统一处理设备相关的操作""" @staticmethod def clear_cache(device: torch.device) -> None: """清理设备缓存""" if device.type == "cuda": torch.cuda.empty_cache() elif device.type == "mps": torch.mps.empty_cache() @staticmethod def synchronize(device: torch.device) -> None: """同步设备操作""" if device.type == "cuda": torch.cuda.synchronize() elif device.type == "mps": torch.mps.synchronize() @staticmethod def get_device() -> torch.device: """ 获取计算设备 优先级:1. FORCE_CPU=1 强制 CPU 2. cuda > mps > cpu """ if os.environ.get('FORCE_CPU') == '1': return torch.device("cpu") if torch.cuda.is_available(): return torch.device("cuda") if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): return torch.device("mps") return torch.device("cpu") @staticmethod def get_device_name(device: torch.device) -> str: """获取设备显示名称""" if device.type == "cuda": return "GPU" elif device.type == "mps": return "Apple Silicon" else: return "CPU" @staticmethod def print_model_load_stats(model: torch.nn.Module, load_time: float) -> None: """打印模型加载统计信息(大小、时间、速度)""" # 计算模型大小 model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters()) model_size_mb = model_size_bytes / (1024 * 1024) # 计算加载速度 load_speed_mb_per_sec = model_size_mb / load_time if load_time > 0 else 0 # 格式化大小 size_str = f"{model_size_mb:.1f}MB" if model_size_mb < 1024 else f"{model_size_mb / 1024:.2f}GB" # 格式化时间 if load_time < 1: time_str = f"{load_time * 1000:.1f}ms" elif load_time < 60: time_str = f"{load_time:.2f}s" else: time_str = f"{int(load_time // 60)}m{load_time % 60:.1f}s" print(f"✅ 模型加载完成 [大小: {size_str}, 耗时: {time_str}, 速度: {load_speed_mb_per_sec:.1f}MB/s]") @staticmethod def print_cuda_memory_summary(title="GPU 内存统计", device=0): """打印详细的 CUDA 内存统计信息""" if not torch.cuda.is_available(): return print(f"\n{'='*60}") print(f"🔍 {title}") print(f"{'='*60}") # 基本统计 allocated = torch.cuda.memory_allocated(device) / 1024**3 reserved = torch.cuda.memory_reserved(device) / 1024**3 max_allocated = torch.cuda.max_memory_allocated(device) / 1024**3 total = torch.cuda.get_device_properties(device).total_memory / 1024**3 print(f"📊 总显存: {total:.2f} GB") print(f"✅ 已分配 (allocated): {allocated:.2f} GB ({allocated/total*100:.1f}%)") print(f"📦 已预留 (reserved): {reserved:.2f} GB ({reserved/total*100:.1f}%)") print(f"📈 峰值分配: {max_allocated:.2f} GB") print(f"💚 可用空间: {total - reserved:.2f} GB ({(total-reserved)/total*100:.1f}%)") print(f"🔸 碎片化: {reserved - allocated:.2f} GB") # 详细统计(简化版) try: stats = torch.cuda.memory_stats(device) num_allocs = stats.get("num_alloc_retries", 0) num_ooms = stats.get("num_ooms", 0) if num_allocs > 0 or num_ooms > 0: print(f"⚠️ 分配重试: {num_allocs} 次, OOM: {num_ooms} 次") except Exception: pass print(f"{'='*60}\n")