Spaces:
Running
Running
| """Hardware database loader + lookup.""" | |
| from __future__ import annotations | |
| from functools import lru_cache | |
| from importlib.resources import files | |
| from pathlib import Path | |
| from typing import Literal | |
| from pydantic import BaseModel, Field | |
| from llm_cal.common.yaml_loader import load_yaml | |
| class GPUSpec(BaseModel): | |
| """One GPU entry in the hardware database.""" | |
| id: str | |
| aliases: list[str] = Field(default_factory=list) | |
| memory_gb: int | |
| nvlink_bandwidth_gbps: int | |
| # HBM/GDDR memory bandwidth (NOT NVLink). This is the critical number for | |
| # decode throughput: decode is memory-bandwidth-bound, and per-token | |
| # latency = active_weight_bytes / (memory_bandwidth × utilization). | |
| # 0 or None means unknown (performance module will skip bandwidth checks). | |
| memory_bandwidth_gbps: int | None = None | |
| fp16_tflops: float | |
| fp8_support: bool | |
| fp4_support: bool | |
| notes_en: str | None = None | |
| notes_zh: str | None = None | |
| # Where the numeric specs came from. A URL to a vendor datasheet / trusted | |
| # benchmark, or a short note like "NVIDIA H100 datasheet 2024-Q3". Lets | |
| # users audit the source; honesty-over-convenience principle. | |
| spec_source: str | None = None | |
| def localized_notes(self, locale: Literal["en", "zh"]) -> str | None: | |
| if locale == "zh": | |
| return self.notes_zh or self.notes_en | |
| return self.notes_en or self.notes_zh | |
| class GPUDatabase(BaseModel): | |
| schema_version: int | |
| gpus: list[GPUSpec] | |
| class UnknownGPUError(Exception): | |
| """User asked for a GPU id we don't know.""" | |
| def _default_path() -> Path: | |
| """Locate the bundled gpu_database.yaml inside the installed package.""" | |
| return Path(str(files("llm_cal.hardware").joinpath("gpu_database.yaml"))) | |
| def load_database(path: Path | None = None) -> GPUDatabase: | |
| return load_yaml(path or _default_path(), GPUDatabase) | |
| def lookup(gpu: str, db: GPUDatabase | None = None) -> GPUSpec: | |
| """Look up a GPU by id or alias. Case-insensitive.""" | |
| database = db or load_database() | |
| target = gpu.strip().upper() | |
| for spec in database.gpus: | |
| if spec.id.upper() == target: | |
| return spec | |
| if any(alias.upper() == target for alias in spec.aliases): | |
| return spec | |
| # Helpful rejection | |
| if "X" in target and target.split("X")[-1].isdigit(): | |
| raise UnknownGPUError( | |
| f"'{gpu}' looks like old 'H800x8' format. " | |
| f"Use `--gpu {target.split('X')[0]} --gpu-count {target.split('X')[-1]}` instead." | |
| ) | |
| raise UnknownGPUError(f"Unknown GPU '{gpu}'. Known: {', '.join(s.id for s in database.gpus)}") | |