Spaces:

bluemoonsoldout
/

llm-cal

Running

llm-cal / src /llm_cal /model_source /huggingface.py

GitHub Actions

Auto-deploy from GitHub Actions

cc6274a 15 days ago

4.76 kB

	"""HuggingFace source. Uses `huggingface_hub` for metadata + `httpx` for config fetch.

	Anti-pattern warning: do NOT call `list_repo_files()` then head-request each file.
	Always use `model_info(files_metadata=True)` which returns all sibling sizes in
	ONE request. Verified in `tests/test_hf.py` by asserting HTTP call count.
	"""

	from __future__ import annotations

	import json
	from typing import Any

	import httpx
	from huggingface_hub import HfApi
	from huggingface_hub.utils import (
	GatedRepoError,
	HfHubHTTPError,
	RepositoryNotFoundError,
	)

	from llm_cal.model_source.auth import get_hf_token, hf_auth_error_message
	from llm_cal.model_source.base import (
	AuthRequiredError,
	ModelArtifact,
	ModelNotFoundError,
	ModelSource,
	SiblingFile,
	SourceUnavailableError,
	)

	_CONFIG_URL = "https://huggingface.co/{model_id}/resolve/{revision}/config.json"


	class HuggingFaceSource(ModelSource):
	name = "huggingface"

	def __init__(self, endpoint: str \| None = None, timeout_s: float = 30.0) -> None:
	# huggingface_hub picks up HF_ENDPOINT env; we pass through for explicitness
	self._api = HfApi(endpoint=endpoint, token=get_hf_token())
	self._timeout_s = timeout_s
	self._endpoint = endpoint or "https://huggingface.co"

	def fetch(self, model_id: str) -> ModelArtifact:
	token = get_hf_token()

	# Step 1: siblings + commit sha in ONE request.
	# CRITICAL: files_metadata=True — see module docstring.
	try:
	info = self._api.model_info(
	repo_id=model_id,
	files_metadata=True,
	token=token,
	)
	except RepositoryNotFoundError as e:
	raise ModelNotFoundError(f"Model '{model_id}' not found on HuggingFace.") from e
	except GatedRepoError as e:
	raise AuthRequiredError(hf_auth_error_message(model_id)) from e
	except HfHubHTTPError as e:
	status = getattr(e.response, "status_code", None)
	if status in (401, 403):
	raise AuthRequiredError(hf_auth_error_message(model_id)) from e
	if status == 429:
	retry = e.response.headers.get("Retry-After", "unknown")
	raise SourceUnavailableError(
	f"HuggingFace rate limit (429). Retry-After: {retry}s. "
	"Setting HF_TOKEN increases your quota."
	) from e
	raise SourceUnavailableError(f"HuggingFace error ({status}): {e}") from e
	except (httpx.TimeoutException, TimeoutError) as e:
	raise SourceUnavailableError(
	f"HuggingFace request timed out after {self._timeout_s}s."
	) from e

	siblings = tuple(
	SiblingFile(filename=s.rfilename, size=s.size) for s in (info.siblings or [])
	)
	commit_sha = info.sha

	# Step 2: fetch config.json. If commit sha is available, pin to it so we don't
	# race with repo updates between the two calls.
	config = self._fetch_config(model_id, commit_sha or "main", token)

	return ModelArtifact(
	source=self.name,
	model_id=model_id,
	commit_sha=commit_sha,
	config=config,
	siblings=siblings,
	)

	def _fetch_config(self, model_id: str, revision: str, token: str \| None) -> dict[str, Any]:
	url = _CONFIG_URL.format(model_id=model_id, revision=revision)
	headers = {"Authorization": f"Bearer {token}"} if token else {}
	try:
	resp = httpx.get(url, headers=headers, timeout=self._timeout_s, follow_redirects=True)
	except (httpx.TimeoutException, httpx.ConnectError) as e:
	raise SourceUnavailableError(f"config.json fetch failed: {e}") from e

	if resp.status_code == 404:
	raise ModelNotFoundError(
	f"Model '{model_id}' exists but has no config.json. "
	"May be a GGUF-only or dataset repo (not supported in v0.1)."
	)
	if resp.status_code in (401, 403):
	raise AuthRequiredError(hf_auth_error_message(model_id))
	if resp.status_code == 429:
	retry = resp.headers.get("Retry-After", "unknown")
	raise SourceUnavailableError(f"HuggingFace rate limit (429). Retry-After: {retry}s.")
	if resp.status_code >= 400:
	raise SourceUnavailableError(f"config.json fetch returned HTTP {resp.status_code}")

	try:
	parsed: dict[str, Any] = json.loads(resp.text)
	except json.JSONDecodeError as e:
	raise SourceUnavailableError(
	f"config.json is not valid JSON (line {e.lineno} col {e.colno}): {e.msg}"
	) from e
	return parsed