File size: 5,003 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | """LLM provider factory β runtime injection via configurable_alternatives.
Usage::
from providers import get_chat_model
# Default profile (env: LLM_PROFILE)
llm = get_chat_model()
# Explicit profile selection
llm = get_chat_model("dummy")
# Runtime override inside a graph:
graph.invoke(state, config={"configurable": {"llm_profile": "ollama"}})
The configurable_alternatives pattern lets you switch the provider at runtime
after the graph is compiled β no restart required.
The 3 profiles:
* ``vllm`` β Qwen 2.5 served by vLLM on AMD MI300X (OpenAI-compatible API). Production default.
* ``ollama`` β local fallback (Qwen 2.5 7B Instruct via Ollama). Dev / data-privacy.
* ``dummy`` β deterministic stub (CI / eval / load tests). No network calls.
"""
from __future__ import annotations
from typing import Literal
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.runnables import ConfigurableField, Runnable
from config import settings
from providers.dummy_provider import DummyChatModel, build_dummy_chat
# Cached singleton β same configurable instance returned every time
_chat_model: Runnable | None = None
_embeddings = None # lazy: SentenceTransformerEmbeddings | None
def get_chat_model(
profile: Literal["vllm", "ollama", "dummy"] | None = None,
) -> Runnable:
"""Return the application chat-model. Profile selectable at runtime.
If ``profile=None`` (default): uses ``settings.llm_profile``.
Returns a Runnable that can switch providers at runtime via
``configurable_alternatives``. All three BaseChatModel implementations
support ``bind_tools()`` and ``with_structured_output()``.
"""
global _chat_model
if _chat_model is None:
env_profile = settings.llm_profile
base = _build_base_chat(env_profile)
# configurable_alternatives offers the other 2 profiles besides the default,
# BUT only if the underlying package can be imported. If e.g.
# langchain-openai is not installed (CI dummy-only run), the vllm
# alternative is skipped β runtime switching to it would then fail-fast
# with a single ImportError.
alternatives: dict[str, BaseChatModel] = {}
for alt_profile in ("vllm", "ollama", "dummy"):
if alt_profile == env_profile:
continue
try:
alternatives[alt_profile] = _build_base_chat(alt_profile)
except (ImportError, ModuleNotFoundError):
# Provider package is not installed β that's OK, just no swap available
continue
_chat_model = base.configurable_alternatives(
ConfigurableField(id="llm_profile"),
default_key=env_profile,
**alternatives,
)
if profile is None or profile == settings.llm_profile:
return _chat_model
# Explicit profile selection: via Runnable.with_config
return _chat_model.with_config({"configurable": {"llm_profile": profile}})
def _build_base_chat(profile: str) -> BaseChatModel:
"""Build a BaseChatModel for a single profile.
The vllm/ollama providers are lazy-imported so dummy-only runs do not
require ``langchain-openai`` or ``langchain-ollama`` to be installed
(CI-friendly).
"""
if profile == "dummy":
return build_dummy_chat()
if profile == "vllm":
from providers.vllm_provider import build_vllm_chat
return build_vllm_chat()
if profile == "ollama":
from providers.ollama_provider import build_ollama_chat
return build_ollama_chat()
raise ValueError(
f"Unknown LLM profile: {profile!r}. Available: vllm|ollama|dummy"
)
def get_embeddings():
"""Embedding model singleton (sentence-transformers, local).
Lazy-imported: the sentence-transformers package is only loaded when
embeddings are actually needed (Phase 3+). Phase 1 smoke tests do not
require it, so the lazy import protects CI/dummy-only runs.
"""
global _embeddings
if _embeddings is None:
from providers.embeddings import build_embeddings
_embeddings = build_embeddings()
return _embeddings
def get_dummy_handle() -> DummyChatModel:
"""Return a direct handle to the dummy provider (for state management).
The UI calls ``set_docs_hint(filenames)``: after upload, the dummy reads
the actual file list to choose tool parameters. Returns a fresh
DummyChatModel instance because the configurable_alternatives Runnable's
inner state is not exposed via the public API. The UI must set the
docs_hint on the SINGLETON instance (not on this returned handle) right
before invoking the graph β the LangGraph compile holds the singleton.
See ``app/main.py`` session-init for the correct pattern.
"""
return build_dummy_chat()
__all__ = [
"get_chat_model",
"get_embeddings",
"get_dummy_handle",
"DummyChatModel",
]
|