llm-cal / src /llm_cal /hardware /loader.py
GitHub Actions
Auto-deploy from GitHub Actions
cc6274a
"""Hardware database loader + lookup."""
from __future__ import annotations
from functools import lru_cache
from importlib.resources import files
from pathlib import Path
from typing import Literal
from pydantic import BaseModel, Field
from llm_cal.common.yaml_loader import load_yaml
class GPUSpec(BaseModel):
"""One GPU entry in the hardware database."""
id: str
aliases: list[str] = Field(default_factory=list)
memory_gb: int
nvlink_bandwidth_gbps: int
# HBM/GDDR memory bandwidth (NOT NVLink). This is the critical number for
# decode throughput: decode is memory-bandwidth-bound, and per-token
# latency = active_weight_bytes / (memory_bandwidth × utilization).
# 0 or None means unknown (performance module will skip bandwidth checks).
memory_bandwidth_gbps: int | None = None
fp16_tflops: float
fp8_support: bool
fp4_support: bool
notes_en: str | None = None
notes_zh: str | None = None
# Where the numeric specs came from. A URL to a vendor datasheet / trusted
# benchmark, or a short note like "NVIDIA H100 datasheet 2024-Q3". Lets
# users audit the source; honesty-over-convenience principle.
spec_source: str | None = None
def localized_notes(self, locale: Literal["en", "zh"]) -> str | None:
if locale == "zh":
return self.notes_zh or self.notes_en
return self.notes_en or self.notes_zh
class GPUDatabase(BaseModel):
schema_version: int
gpus: list[GPUSpec]
class UnknownGPUError(Exception):
"""User asked for a GPU id we don't know."""
def _default_path() -> Path:
"""Locate the bundled gpu_database.yaml inside the installed package."""
return Path(str(files("llm_cal.hardware").joinpath("gpu_database.yaml")))
@lru_cache(maxsize=1)
def load_database(path: Path | None = None) -> GPUDatabase:
return load_yaml(path or _default_path(), GPUDatabase)
def lookup(gpu: str, db: GPUDatabase | None = None) -> GPUSpec:
"""Look up a GPU by id or alias. Case-insensitive."""
database = db or load_database()
target = gpu.strip().upper()
for spec in database.gpus:
if spec.id.upper() == target:
return spec
if any(alias.upper() == target for alias in spec.aliases):
return spec
# Helpful rejection
if "X" in target and target.split("X")[-1].isdigit():
raise UnknownGPUError(
f"'{gpu}' looks like old 'H800x8' format. "
f"Use `--gpu {target.split('X')[0]} --gpu-count {target.split('X')[-1]}` instead."
)
raise UnknownGPUError(f"Unknown GPU '{gpu}'. Known: {', '.join(s.id for s in database.gpus)}")