"""Centralized configuration with dual LLM backend support.""" from __future__ import annotations import os from dotenv import load_dotenv load_dotenv() class Config: """All settings read from environment variables with sensible defaults.""" # Backend selection LLM_BACKEND: str = os.getenv("LLM_BACKEND", "ollama") # Hugging Face backend HF_MODEL_NAME: str = os.getenv("HF_MODEL_NAME", "Qwen/Qwen2-7B-Instruct") HF_DEVICE: str = os.getenv("HF_DEVICE", "auto") HF_MAX_NEW_TOKENS: int = int(os.getenv("HF_MAX_NEW_TOKENS", 2048)) # Ollama backend OLLAMA_HOST: str = os.getenv("OLLAMA_HOST", "http://localhost:11434") OLLAMA_MODEL: str = os.getenv("OLLAMA_MODEL", "llama2") # GGUF backend (llama-cpp-python) GGUF_MODEL_PATH: str = os.getenv("GGUF_MODEL_PATH", "") GGUF_N_CTX: int = int(os.getenv("GGUF_N_CTX", 4096)) GGUF_N_GPU_LAYERS: int = int(os.getenv("GGUF_N_GPU_LAYERS", -1)) # LM Studio backend LMSTUDIO_URL: str = os.getenv("LMSTUDIO_URL", "http://localhost:1234") LMSTUDIO_MODEL: str = os.getenv("LMSTUDIO_MODEL", "") # Embedding model EMBED_MODEL: str = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-large") # Index & data FAISS_INDEX: str = os.getenv("FAISS_INDEX", "QModel.index") METADATA_FILE: str = os.getenv("METADATA_FILE", "metadata.json") # Retrieval TOP_K_SEARCH: int = int(os.getenv("TOP_K_SEARCH", 20)) TOP_K_RETURN: int = int(os.getenv("TOP_K_RETURN", 5)) # Generation TEMPERATURE: float = float(os.getenv("TEMPERATURE", 0.2)) MAX_TOKENS: int = int(os.getenv("MAX_TOKENS", 2048)) # Caching CACHE_SIZE: int = int(os.getenv("CACHE_SIZE", 512)) CACHE_TTL: int = int(os.getenv("CACHE_TTL", 3600)) # Ranking RERANK_ALPHA: float = float(os.getenv("RERANK_ALPHA", 0.6)) HADITH_BOOST: float = float(os.getenv("HADITH_BOOST", 0.08)) # Safety CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", 0.30)) # CORS ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*") MAX_EXAMPLES: int = int(os.getenv("MAX_EXAMPLES", 3)) cfg = Config()