Spaces:
Running
Running
| """HuggingFace source. Uses `huggingface_hub` for metadata + `httpx` for config fetch. | |
| Anti-pattern warning: do NOT call `list_repo_files()` then head-request each file. | |
| Always use `model_info(files_metadata=True)` which returns all sibling sizes in | |
| ONE request. Verified in `tests/test_hf.py` by asserting HTTP call count. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from typing import Any | |
| import httpx | |
| from huggingface_hub import HfApi | |
| from huggingface_hub.utils import ( | |
| GatedRepoError, | |
| HfHubHTTPError, | |
| RepositoryNotFoundError, | |
| ) | |
| from llm_cal.model_source.auth import get_hf_token, hf_auth_error_message | |
| from llm_cal.model_source.base import ( | |
| AuthRequiredError, | |
| ModelArtifact, | |
| ModelNotFoundError, | |
| ModelSource, | |
| SiblingFile, | |
| SourceUnavailableError, | |
| ) | |
| _CONFIG_URL = "https://huggingface.co/{model_id}/resolve/{revision}/config.json" | |
| class HuggingFaceSource(ModelSource): | |
| name = "huggingface" | |
| def __init__(self, endpoint: str | None = None, timeout_s: float = 30.0) -> None: | |
| # huggingface_hub picks up HF_ENDPOINT env; we pass through for explicitness | |
| self._api = HfApi(endpoint=endpoint, token=get_hf_token()) | |
| self._timeout_s = timeout_s | |
| self._endpoint = endpoint or "https://huggingface.co" | |
| def fetch(self, model_id: str) -> ModelArtifact: | |
| token = get_hf_token() | |
| # Step 1: siblings + commit sha in ONE request. | |
| # CRITICAL: files_metadata=True — see module docstring. | |
| try: | |
| info = self._api.model_info( | |
| repo_id=model_id, | |
| files_metadata=True, | |
| token=token, | |
| ) | |
| except RepositoryNotFoundError as e: | |
| raise ModelNotFoundError(f"Model '{model_id}' not found on HuggingFace.") from e | |
| except GatedRepoError as e: | |
| raise AuthRequiredError(hf_auth_error_message(model_id)) from e | |
| except HfHubHTTPError as e: | |
| status = getattr(e.response, "status_code", None) | |
| if status in (401, 403): | |
| raise AuthRequiredError(hf_auth_error_message(model_id)) from e | |
| if status == 429: | |
| retry = e.response.headers.get("Retry-After", "unknown") | |
| raise SourceUnavailableError( | |
| f"HuggingFace rate limit (429). Retry-After: {retry}s. " | |
| "Setting HF_TOKEN increases your quota." | |
| ) from e | |
| raise SourceUnavailableError(f"HuggingFace error ({status}): {e}") from e | |
| except (httpx.TimeoutException, TimeoutError) as e: | |
| raise SourceUnavailableError( | |
| f"HuggingFace request timed out after {self._timeout_s}s." | |
| ) from e | |
| siblings = tuple( | |
| SiblingFile(filename=s.rfilename, size=s.size) for s in (info.siblings or []) | |
| ) | |
| commit_sha = info.sha | |
| # Step 2: fetch config.json. If commit sha is available, pin to it so we don't | |
| # race with repo updates between the two calls. | |
| config = self._fetch_config(model_id, commit_sha or "main", token) | |
| return ModelArtifact( | |
| source=self.name, | |
| model_id=model_id, | |
| commit_sha=commit_sha, | |
| config=config, | |
| siblings=siblings, | |
| ) | |
| def _fetch_config(self, model_id: str, revision: str, token: str | None) -> dict[str, Any]: | |
| url = _CONFIG_URL.format(model_id=model_id, revision=revision) | |
| headers = {"Authorization": f"Bearer {token}"} if token else {} | |
| try: | |
| resp = httpx.get(url, headers=headers, timeout=self._timeout_s, follow_redirects=True) | |
| except (httpx.TimeoutException, httpx.ConnectError) as e: | |
| raise SourceUnavailableError(f"config.json fetch failed: {e}") from e | |
| if resp.status_code == 404: | |
| raise ModelNotFoundError( | |
| f"Model '{model_id}' exists but has no config.json. " | |
| "May be a GGUF-only or dataset repo (not supported in v0.1)." | |
| ) | |
| if resp.status_code in (401, 403): | |
| raise AuthRequiredError(hf_auth_error_message(model_id)) | |
| if resp.status_code == 429: | |
| retry = resp.headers.get("Retry-After", "unknown") | |
| raise SourceUnavailableError(f"HuggingFace rate limit (429). Retry-After: {retry}s.") | |
| if resp.status_code >= 400: | |
| raise SourceUnavailableError(f"config.json fetch returned HTTP {resp.status_code}") | |
| try: | |
| parsed: dict[str, Any] = json.loads(resp.text) | |
| except json.JSONDecodeError as e: | |
| raise SourceUnavailableError( | |
| f"config.json is not valid JSON (line {e.lineno} col {e.colno}): {e.msg}" | |
| ) from e | |
| return parsed | |