| """Centralized configuration with dual LLM backend support.""" |
|
|
| from __future__ import annotations |
|
|
| import os |
| from dotenv import load_dotenv |
|
|
| load_dotenv() |
|
|
|
|
| class Config: |
| """All settings read from environment variables with sensible defaults.""" |
|
|
| |
| LLM_BACKEND: str = os.getenv("LLM_BACKEND", "ollama") |
|
|
| |
| HF_MODEL_NAME: str = os.getenv("HF_MODEL_NAME", "Qwen/Qwen2-7B-Instruct") |
| HF_DEVICE: str = os.getenv("HF_DEVICE", "auto") |
| HF_MAX_NEW_TOKENS: int = int(os.getenv("HF_MAX_NEW_TOKENS", 2048)) |
|
|
| |
| OLLAMA_HOST: str = os.getenv("OLLAMA_HOST", "http://localhost:11434") |
| OLLAMA_MODEL: str = os.getenv("OLLAMA_MODEL", "llama2") |
|
|
| |
| GGUF_MODEL_PATH: str = os.getenv("GGUF_MODEL_PATH", "") |
| GGUF_N_CTX: int = int(os.getenv("GGUF_N_CTX", 4096)) |
| GGUF_N_GPU_LAYERS: int = int(os.getenv("GGUF_N_GPU_LAYERS", -1)) |
|
|
| |
| LMSTUDIO_URL: str = os.getenv("LMSTUDIO_URL", "http://localhost:1234") |
| LMSTUDIO_MODEL: str = os.getenv("LMSTUDIO_MODEL", "") |
|
|
| |
| EMBED_MODEL: str = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-large") |
|
|
| |
| FAISS_INDEX: str = os.getenv("FAISS_INDEX", "QModel.index") |
| METADATA_FILE: str = os.getenv("METADATA_FILE", "metadata.json") |
|
|
| |
| TOP_K_SEARCH: int = int(os.getenv("TOP_K_SEARCH", 20)) |
| TOP_K_RETURN: int = int(os.getenv("TOP_K_RETURN", 5)) |
|
|
| |
| TEMPERATURE: float = float(os.getenv("TEMPERATURE", 0.2)) |
| MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", 2048)) |
|
|
| |
| CACHE_SIZE: int = int(os.getenv("CACHE_SIZE", 512)) |
| CACHE_TTL: int = int(os.getenv("CACHE_TTL", 3600)) |
|
|
| |
| RERANK_ALPHA: float = float(os.getenv("RERANK_ALPHA", 0.6)) |
| HADITH_BOOST: float = float(os.getenv("HADITH_BOOST", 0.08)) |
|
|
| |
| CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", 0.30)) |
|
|
| |
| ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*") |
|
|
| MAX_EXAMPLES: int = int(os.getenv("MAX_EXAMPLES", 3)) |
|
|
|
|
| cfg = Config() |
|
|