llm-cal / src /llm_cal /model_source /huggingface.py
GitHub Actions
Auto-deploy from GitHub Actions
cc6274a
"""HuggingFace source. Uses `huggingface_hub` for metadata + `httpx` for config fetch.
Anti-pattern warning: do NOT call `list_repo_files()` then head-request each file.
Always use `model_info(files_metadata=True)` which returns all sibling sizes in
ONE request. Verified in `tests/test_hf.py` by asserting HTTP call count.
"""
from __future__ import annotations
import json
from typing import Any
import httpx
from huggingface_hub import HfApi
from huggingface_hub.utils import (
GatedRepoError,
HfHubHTTPError,
RepositoryNotFoundError,
)
from llm_cal.model_source.auth import get_hf_token, hf_auth_error_message
from llm_cal.model_source.base import (
AuthRequiredError,
ModelArtifact,
ModelNotFoundError,
ModelSource,
SiblingFile,
SourceUnavailableError,
)
_CONFIG_URL = "https://huggingface.co/{model_id}/resolve/{revision}/config.json"
class HuggingFaceSource(ModelSource):
name = "huggingface"
def __init__(self, endpoint: str | None = None, timeout_s: float = 30.0) -> None:
# huggingface_hub picks up HF_ENDPOINT env; we pass through for explicitness
self._api = HfApi(endpoint=endpoint, token=get_hf_token())
self._timeout_s = timeout_s
self._endpoint = endpoint or "https://huggingface.co"
def fetch(self, model_id: str) -> ModelArtifact:
token = get_hf_token()
# Step 1: siblings + commit sha in ONE request.
# CRITICAL: files_metadata=True — see module docstring.
try:
info = self._api.model_info(
repo_id=model_id,
files_metadata=True,
token=token,
)
except RepositoryNotFoundError as e:
raise ModelNotFoundError(f"Model '{model_id}' not found on HuggingFace.") from e
except GatedRepoError as e:
raise AuthRequiredError(hf_auth_error_message(model_id)) from e
except HfHubHTTPError as e:
status = getattr(e.response, "status_code", None)
if status in (401, 403):
raise AuthRequiredError(hf_auth_error_message(model_id)) from e
if status == 429:
retry = e.response.headers.get("Retry-After", "unknown")
raise SourceUnavailableError(
f"HuggingFace rate limit (429). Retry-After: {retry}s. "
"Setting HF_TOKEN increases your quota."
) from e
raise SourceUnavailableError(f"HuggingFace error ({status}): {e}") from e
except (httpx.TimeoutException, TimeoutError) as e:
raise SourceUnavailableError(
f"HuggingFace request timed out after {self._timeout_s}s."
) from e
siblings = tuple(
SiblingFile(filename=s.rfilename, size=s.size) for s in (info.siblings or [])
)
commit_sha = info.sha
# Step 2: fetch config.json. If commit sha is available, pin to it so we don't
# race with repo updates between the two calls.
config = self._fetch_config(model_id, commit_sha or "main", token)
return ModelArtifact(
source=self.name,
model_id=model_id,
commit_sha=commit_sha,
config=config,
siblings=siblings,
)
def _fetch_config(self, model_id: str, revision: str, token: str | None) -> dict[str, Any]:
url = _CONFIG_URL.format(model_id=model_id, revision=revision)
headers = {"Authorization": f"Bearer {token}"} if token else {}
try:
resp = httpx.get(url, headers=headers, timeout=self._timeout_s, follow_redirects=True)
except (httpx.TimeoutException, httpx.ConnectError) as e:
raise SourceUnavailableError(f"config.json fetch failed: {e}") from e
if resp.status_code == 404:
raise ModelNotFoundError(
f"Model '{model_id}' exists but has no config.json. "
"May be a GGUF-only or dataset repo (not supported in v0.1)."
)
if resp.status_code in (401, 403):
raise AuthRequiredError(hf_auth_error_message(model_id))
if resp.status_code == 429:
retry = resp.headers.get("Retry-After", "unknown")
raise SourceUnavailableError(f"HuggingFace rate limit (429). Retry-After: {retry}s.")
if resp.status_code >= 400:
raise SourceUnavailableError(f"config.json fetch returned HTTP {resp.status_code}")
try:
parsed: dict[str, Any] = json.loads(resp.text)
except json.JSONDecodeError as e:
raise SourceUnavailableError(
f"config.json is not valid JSON (line {e.lineno} col {e.colno}): {e.msg}"
) from e
return parsed