Spaces:

Vincsipe
/

paperhawk

Running

Nándorfi Vince

Initial paperhawk push to HF Space (LFS for binaries)

7ff7119 3 days ago

5 kB

	"""LLM provider factory — runtime injection via configurable_alternatives.

	Usage::

	from providers import get_chat_model

	# Default profile (env: LLM_PROFILE)
	llm = get_chat_model()

	# Explicit profile selection
	llm = get_chat_model("dummy")

	# Runtime override inside a graph:
	graph.invoke(state, config={"configurable": {"llm_profile": "ollama"}})

	The configurable_alternatives pattern lets you switch the provider at runtime
	after the graph is compiled — no restart required.

	The 3 profiles:
	* ``vllm`` — Qwen 2.5 served by vLLM on AMD MI300X (OpenAI-compatible API). Production default.
	* ``ollama`` — local fallback (Qwen 2.5 7B Instruct via Ollama). Dev / data-privacy.
	* ``dummy`` — deterministic stub (CI / eval / load tests). No network calls.
	"""

	from __future__ import annotations

	from typing import Literal

	from langchain_core.language_models.chat_models import BaseChatModel
	from langchain_core.runnables import ConfigurableField, Runnable

	from config import settings
	from providers.dummy_provider import DummyChatModel, build_dummy_chat


	# Cached singleton — same configurable instance returned every time
	_chat_model: Runnable \| None = None
	_embeddings = None # lazy: SentenceTransformerEmbeddings \| None


	def get_chat_model(
	profile: Literal["vllm", "ollama", "dummy"] \| None = None,
	) -> Runnable:
	"""Return the application chat-model. Profile selectable at runtime.

	If ``profile=None`` (default): uses ``settings.llm_profile``.

	Returns a Runnable that can switch providers at runtime via
	``configurable_alternatives``. All three BaseChatModel implementations
	support ``bind_tools()`` and ``with_structured_output()``.
	"""
	global _chat_model
	if _chat_model is None:
	env_profile = settings.llm_profile
	base = _build_base_chat(env_profile)
	# configurable_alternatives offers the other 2 profiles besides the default,
	# BUT only if the underlying package can be imported. If e.g.
	# langchain-openai is not installed (CI dummy-only run), the vllm
	# alternative is skipped — runtime switching to it would then fail-fast
	# with a single ImportError.
	alternatives: dict[str, BaseChatModel] = {}
	for alt_profile in ("vllm", "ollama", "dummy"):
	if alt_profile == env_profile:
	continue
	try:
	alternatives[alt_profile] = _build_base_chat(alt_profile)
	except (ImportError, ModuleNotFoundError):
	# Provider package is not installed — that's OK, just no swap available
	continue
	_chat_model = base.configurable_alternatives(
	ConfigurableField(id="llm_profile"),
	default_key=env_profile,
	**alternatives,
	)

	if profile is None or profile == settings.llm_profile:
	return _chat_model

	# Explicit profile selection: via Runnable.with_config
	return _chat_model.with_config({"configurable": {"llm_profile": profile}})


	def _build_base_chat(profile: str) -> BaseChatModel:
	"""Build a BaseChatModel for a single profile.

	The vllm/ollama providers are lazy-imported so dummy-only runs do not
	require ``langchain-openai`` or ``langchain-ollama`` to be installed
	(CI-friendly).
	"""
	if profile == "dummy":
	return build_dummy_chat()
	if profile == "vllm":
	from providers.vllm_provider import build_vllm_chat
	return build_vllm_chat()
	if profile == "ollama":
	from providers.ollama_provider import build_ollama_chat
	return build_ollama_chat()
	raise ValueError(
	f"Unknown LLM profile: {profile!r}. Available: vllm\|ollama\|dummy"
	)


	def get_embeddings():
	"""Embedding model singleton (sentence-transformers, local).

	Lazy-imported: the sentence-transformers package is only loaded when
	embeddings are actually needed (Phase 3+). Phase 1 smoke tests do not
	require it, so the lazy import protects CI/dummy-only runs.
	"""
	global _embeddings
	if _embeddings is None:
	from providers.embeddings import build_embeddings
	_embeddings = build_embeddings()
	return _embeddings


	def get_dummy_handle() -> DummyChatModel:
	"""Return a direct handle to the dummy provider (for state management).

	The UI calls ``set_docs_hint(filenames)``: after upload, the dummy reads
	the actual file list to choose tool parameters. Returns a fresh
	DummyChatModel instance because the configurable_alternatives Runnable's
	inner state is not exposed via the public API. The UI must set the
	docs_hint on the SINGLETON instance (not on this returned handle) right
	before invoking the graph — the LangGraph compile holds the singleton.

	See ``app/main.py`` session-init for the correct pattern.
	"""
	return build_dummy_chat()


	__all__ = [
	"get_chat_model",
	"get_embeddings",
	"get_dummy_handle",
	"DummyChatModel",
	]