QModel / .env.example

Upload folder using huggingface_hub

6ab1c8e 24 days ago

8 kB

	# QModel 6 Configuration Template
	# ==================================
	# Copy this to .env and update values for your environment

	# LLM Backend Selection
	# Options: "ollama", "hf" (HuggingFace), "gguf" (local GGUF file), or "lmstudio"
	LLM_BACKEND=ollama

	# ─────────────────────────────────────────────────────────────────────
	# OLLAMA BACKEND (if LLM_BACKEND=ollama)
	# ─────────────────────────────────────────────────────────────────────
	OLLAMA_HOST=http://localhost:11434
	OLLAMA_MODEL=minimax-m2.7:cloud
	# Available models: llama3.1, mistral, neural-chat, openhermes

	# ─────────────────────────────────────────────────────────────────────
	# HUGGINGFACE BACKEND (if LLM_BACKEND=hf)
	# ─────────────────────────────────────────────────────────────────────
	# HF_MODEL_NAME=Qwen/Qwen2-7B-Instruct
	# HF_DEVICE=auto # Options: auto, cuda, cpu
	# HF_MAX_NEW_TOKENS=2048
	# Popular models:
	# - Qwen/Qwen2-7B-Instruct (excellent Arabic)
	# - mistralai/Mistral-7B-Instruct-v0.2
	# - meta-llama/Llama-2-13b-chat-hf

	# ─────────────────────────────────────────────────────────────────────
	# GGUF BACKEND (if LLM_BACKEND=gguf)
	# ─────────────────────────────────────────────────────────────────────
	# GGUF_MODEL_PATH=./models/Qwen3-32B-Q4_K_M.gguf
	# GGUF_N_CTX=4096 # Context window size
	# GGUF_N_GPU_LAYERS=-1 # -1 = offload all layers to GPU (Metal on Mac)

	# ─────────────────────────────────────────────────────────────────────
	# LM STUDIO BACKEND (if LLM_BACKEND=lmstudio)
	# ─────────────────────────────────────────────────────────────────────
	# LMSTUDIO_URL=http://localhost:1234
	# LMSTUDIO_MODEL=qwen2.5-7b-instruct # Model loaded in LM Studio

	# ─────────────────────────────────────────────────────────────────────
	# EMBEDDING MODEL (shared by all backends)
	# ─────────────────────────────────────────────────────────────────────
	EMBED_MODEL=intfloat/multilingual-e5-large

	# ─────────────────────────────────────────────────────────────────────
	# DATA FILES
	# ─────────────────────────────────────────────────────────────────────
	FAISS_INDEX=QModel.index
	METADATA_FILE=metadata.json

	# ─────────────────────────────────────────────────────────────────────
	# RETRIEVAL SETTINGS
	# ─────────────────────────────────────────────────────────────────────
	TOP_K_SEARCH=20 # Candidate pool size
	TOP_K_RETURN=5 # Final results returned to user

	# ─────────────────────────────────────────────────────────────────────
	# GENERATION SETTINGS
	# ─────────────────────────────────────────────────────────────────────
	TEMPERATURE=0.2 # 0.0=deterministic, 1.0=creative
	MAX_TOKENS=2048 # Max output length

	# ─────────────────────────────────────────────────────────────────────
	# SAFETY & QUALITY
	# ─────────────────────────────────────────────────────────────────────
	# Confidence threshold: Below this score, skip LLM and return "not found"
	# Prevents hallucinations but may miss valid results
	# Range: 0.0-1.0 (default 0.30)
	# Tune up (0.50+) for stricter, tune down (0.20) for looser
	CONFIDENCE_THRESHOLD=0.30

	# Hadith boost: Score bonus when intent=hadith
	# Prevents Quran verses from outranking relevant Hadiths
	HADITH_BOOST=0.08

	# ─────────────────────────────────────────────────────────────────────
	# RANKING
	# ─────────────────────────────────────────────────────────────────────
	RERANK_ALPHA=0.6 # 60% dense (embedding), 40% sparse (BM25)

	# ─────────────────────────────────────────────────────────────────────
	# CACHING
	# ─────────────────────────────────────────────────────────────────────
	CACHE_SIZE=512 # Max cache entries
	CACHE_TTL=3600 # Cache expiry in seconds

	# ─────────────────────────────────────────────────────────────────────
	# SECURITY
	# ─────────────────────────────────────────────────────────────────────
	ALLOWED_ORIGINS=* # CORS origins (restrict in production: origin1.com,origin2.com)

	# ─────────────────────────────────────────────────────────────────────
	# USAGE EXAMPLES
	# ─────────────────────────────────────────────────────────────────────
	#
	# Development (Ollama):
	# LLM_BACKEND=ollama
	# OLLAMA_HOST=http://localhost:11434
	# OLLAMA_MODEL=llama2
	#
	# Production (HuggingFace GPU):
	# LLM_BACKEND=hf
	# HF_MODEL_NAME=Qwen/Qwen2-7B-Instruct
	# HF_DEVICE=cuda
	#
	# Production (HuggingFace CPU):
	# LLM_BACKEND=hf
	# HF_MODEL_NAME=Qwen/Qwen2-7B-Instruct
	# HF_DEVICE=cpu