paperhawk / providers /__init__.py
Nándorfi Vince
Initial paperhawk push to HF Space (LFS for binaries)
7ff7119
"""LLM provider factory — runtime injection via configurable_alternatives.
Usage::
from providers import get_chat_model
# Default profile (env: LLM_PROFILE)
llm = get_chat_model()
# Explicit profile selection
llm = get_chat_model("dummy")
# Runtime override inside a graph:
graph.invoke(state, config={"configurable": {"llm_profile": "ollama"}})
The configurable_alternatives pattern lets you switch the provider at runtime
after the graph is compiled — no restart required.
The 3 profiles:
* ``vllm`` — Qwen 2.5 served by vLLM on AMD MI300X (OpenAI-compatible API). Production default.
* ``ollama`` — local fallback (Qwen 2.5 7B Instruct via Ollama). Dev / data-privacy.
* ``dummy`` — deterministic stub (CI / eval / load tests). No network calls.
"""
from __future__ import annotations
from typing import Literal
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.runnables import ConfigurableField, Runnable
from config import settings
from providers.dummy_provider import DummyChatModel, build_dummy_chat
# Cached singleton — same configurable instance returned every time
_chat_model: Runnable | None = None
_embeddings = None # lazy: SentenceTransformerEmbeddings | None
def get_chat_model(
profile: Literal["vllm", "ollama", "dummy"] | None = None,
) -> Runnable:
"""Return the application chat-model. Profile selectable at runtime.
If ``profile=None`` (default): uses ``settings.llm_profile``.
Returns a Runnable that can switch providers at runtime via
``configurable_alternatives``. All three BaseChatModel implementations
support ``bind_tools()`` and ``with_structured_output()``.
"""
global _chat_model
if _chat_model is None:
env_profile = settings.llm_profile
base = _build_base_chat(env_profile)
# configurable_alternatives offers the other 2 profiles besides the default,
# BUT only if the underlying package can be imported. If e.g.
# langchain-openai is not installed (CI dummy-only run), the vllm
# alternative is skipped — runtime switching to it would then fail-fast
# with a single ImportError.
alternatives: dict[str, BaseChatModel] = {}
for alt_profile in ("vllm", "ollama", "dummy"):
if alt_profile == env_profile:
continue
try:
alternatives[alt_profile] = _build_base_chat(alt_profile)
except (ImportError, ModuleNotFoundError):
# Provider package is not installed — that's OK, just no swap available
continue
_chat_model = base.configurable_alternatives(
ConfigurableField(id="llm_profile"),
default_key=env_profile,
**alternatives,
)
if profile is None or profile == settings.llm_profile:
return _chat_model
# Explicit profile selection: via Runnable.with_config
return _chat_model.with_config({"configurable": {"llm_profile": profile}})
def _build_base_chat(profile: str) -> BaseChatModel:
"""Build a BaseChatModel for a single profile.
The vllm/ollama providers are lazy-imported so dummy-only runs do not
require ``langchain-openai`` or ``langchain-ollama`` to be installed
(CI-friendly).
"""
if profile == "dummy":
return build_dummy_chat()
if profile == "vllm":
from providers.vllm_provider import build_vllm_chat
return build_vllm_chat()
if profile == "ollama":
from providers.ollama_provider import build_ollama_chat
return build_ollama_chat()
raise ValueError(
f"Unknown LLM profile: {profile!r}. Available: vllm|ollama|dummy"
)
def get_embeddings():
"""Embedding model singleton (sentence-transformers, local).
Lazy-imported: the sentence-transformers package is only loaded when
embeddings are actually needed (Phase 3+). Phase 1 smoke tests do not
require it, so the lazy import protects CI/dummy-only runs.
"""
global _embeddings
if _embeddings is None:
from providers.embeddings import build_embeddings
_embeddings = build_embeddings()
return _embeddings
def get_dummy_handle() -> DummyChatModel:
"""Return a direct handle to the dummy provider (for state management).
The UI calls ``set_docs_hint(filenames)``: after upload, the dummy reads
the actual file list to choose tool parameters. Returns a fresh
DummyChatModel instance because the configurable_alternatives Runnable's
inner state is not exposed via the public API. The UI must set the
docs_hint on the SINGLETON instance (not on this returned handle) right
before invoking the graph — the LangGraph compile holds the singleton.
See ``app/main.py`` session-init for the correct pattern.
"""
return build_dummy_chat()
__all__ = [
"get_chat_model",
"get_embeddings",
"get_dummy_handle",
"DummyChatModel",
]