aelgendy commited on 24 days ago

Commit

eb1414a

1 Parent(s): 83f3c50

Upload folder using huggingface_hub

Browse files

Files changed (21) hide show

.dockerignore +0 -1
.env.example +15 -2
.gitattributes +1 -2
ARCHITECTURE.md +196 -97
app/__init__.py +1 -0
app/analysis.py +322 -0
app/arabic_nlp.py +98 -0
app/cache.py +49 -0
app/config.py +67 -0
app/llm.py +194 -0
app/models.py +174 -0
app/prompts.py +199 -0
app/routers/__init__.py +1 -0
app/routers/chat.py +163 -0
app/routers/hadith.py +212 -0
app/routers/ops.py +69 -0
app/routers/quran.py +149 -0
app/search.py +287 -0
app/state.py +235 -0
main.py +32 -1446
requirements.txt +15 -12

.dockerignore CHANGED Viewed

@@ -4,7 +4,6 @@ __pycache__
 .DS_Store
 .vscode
 .git
-.docker
 QModel.index
 metadata.json
 data/

 .DS_Store
 .vscode
 .git
 QModel.index
 metadata.json
 data/

.env.example CHANGED Viewed

@@ -3,7 +3,7 @@
 # Copy this to .env and update values for your environment
 # LLM Backend Selection
-# Options: "hf" (HuggingFace) or "ollama"
 LLM_BACKEND=ollama
 # ─────────────────────────────────────────────────────────────────────
@@ -25,7 +25,20 @@ OLLAMA_MODEL=minimax-m2.7:cloud
 #   - meta-llama/Llama-2-13b-chat-hf
 # ─────────────────────────────────────────────────────────────────────
-# EMBEDDING MODEL (shared by both backends)
 # ─────────────────────────────────────────────────────────────────────
 EMBED_MODEL=intfloat/multilingual-e5-large

 # Copy this to .env and update values for your environment
 # LLM Backend Selection
+# Options: "ollama", "hf" (HuggingFace), "gguf" (local GGUF file), or "lmstudio"
 LLM_BACKEND=ollama
 # ─────────────────────────────────────────────────────────────────────
 #   - meta-llama/Llama-2-13b-chat-hf
 # ─────────────────────────────────────────────────────────────────────
+# GGUF BACKEND (if LLM_BACKEND=gguf)
+# ─────────────────────────────────────────────────────────────────────
+# GGUF_MODEL_PATH=./models/qwen2-7b-instruct-q4_k_m.gguf
+# GGUF_N_CTX=4096              # Context window size
+# GGUF_N_GPU_LAYERS=-1         # -1 = offload all layers to GPU (Metal on Mac)
+# ─────────────────────────────────────────────────────────────────────
+# LM STUDIO BACKEND (if LLM_BACKEND=lmstudio)
+# ─────────────────────────────────────────────────────────────────────
+# LMSTUDIO_URL=http://localhost:1234
+# LMSTUDIO_MODEL=qwen2.5-7b-instruct   # Model loaded in LM Studio
+# ─────────────────────────────────────────────────────────────────────
+# EMBEDDING MODEL (shared by all backends)
 # ─────────────────────────────────────────────────────────────────────
 EMBED_MODEL=intfloat/multilingual-e5-large

.gitattributes CHANGED Viewed

@@ -1,4 +1,3 @@
 # Auto detect text files and perform LF normalization
 * text=auto
-metadata.json filter=lfs diff=lfs merge=lfs -text
-QModel.index filter=lfs diff=lfs merge=lfs -text

 # Auto detect text files and perform LF normalization
 * text=auto
+models/qwen2-7b-instruct-q8_0.gguf filter=lfs diff=lfs merge=lfs -text

ARCHITECTURE.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# QModel v4 Architecture — Detailed System Design
 > For a quick overview, see [README.md](README.md#architecture-overview)
@@ -7,31 +7,66 @@ A RAG system specialized **exclusively** in authenticated Qur'an and Hadith. No
 ## Core Capabilities
-### 1. **Quran Analysis**
-- **Verse Lookup**: Find verses by topic, keyword, or Surah
-- **Word Frequency**: Count word/phrase occurrences across all 114 Surahs
-- **Topic Tafsir**: Retrieve and explain related Quranic verses
-- **Bilingual**: Arabic (Uthmani) + English (Saheeh International)
-### 2. **Hadith Operations**
-- **Authentication Status**: Verify if a Hadith is in an authenticated collection
-- **Grade Display**: Show authenticity grade (Sahih, Hasan, Da'if, etc.)
-- **Topic Search**: Find Hadiths related to topics across 7 major collections
-- **Collection Navigation**: Filter by Bukhari, Muslim, Abu Dawud, Tirmidhi, Ibn Majah, Nasa'i, Malik
-### 3. **Safety First**
 - **Confidence Gating**: Low-confidence queries return "not found" instead of LLM guess
 - **Source Attribution**: Every answer cites exact verse/Hadith with reference
 - **Grade Filtering**: Optional: only return Sahih-authenticated Hadiths
 - **Verbatim Quotes**: Copy text directly from data, no paraphrasing
 ---
 ## Data Pipeline
 The system follows a three-phase approach:
-**Metadata Schema**:
 ```json
 {
   "id": "surah:verse or hadith_prefix_number",
@@ -65,100 +100,137 @@ build_index.py
 ### Phase 3: Retrieval & Ranking
-**Hybrid Search Algorithm**:
 1. Dense retrieval: FAISS semantic scoring
 2. Sparse retrieval: BM25 term-frequency ranking
 3. Fusion: 60% dense + 40% sparse
 4. Intent-aware boost: +0.08 to Hadith items when intent=hadith
 5. Type filter: Optional (quran_only / hadith_only / authenticated_only)
 ---
-## Core Components
-### `fetch_data.py` — Data Acquisition
-- Fetches complete Quran and 7 Hadith collections
-- Handles network retries + CDN redirects
-- Normalizes and validates data
-- Exports `data/quran.json` and `data/hadith.json`
-### `build_index.py` — Index Construction
-- Loads datasets and embeddings model
-- Creates dual-language FAISS vectors
-- Serializes to `QModel.index` + `metadata.json`
-### `main.py` — Inference Engine
-**Three processing layers**:
-1. **Query Layer** (Rewriting & Intent Detection)
-   - `rewrite_query()` — dual-language normalization, spelling correction
-   - `detect_analysis_intent()` — detects word frequency queries
-   - `detect_language()` — routes to Arabic or English persona
-2. **Retrieval Layer** (Semantic Search)
-   - `hybrid_search()` — FAISS + BM25 fusion
-   - `count_occurrences()` — exact/stemmed word frequency across dataset
-   - Caching at query level for fast follow-ups
-3. **Generation Layer** (Safe LLM Call)
-   - `chat_with_fallback()` — Ollama with 3-model fallback chain
-   - `build_context()` — formats retrieved items with scores
-   - `build_messages()` — intent-aware prompts with few-shot examples
-   - Confidence gate: skips LLM if top_score < threshold
-**Anti-Hallucination Measures**:
-- Few-shot examples including "not found" refusal path
-- Hardcoded format rules (box/citation format required)
-- Verbatim copy rules (no reconstruction from memory)
-- Confidence threshold gating (default: 0.30)
----
-## API Endpoints
-### `GET /ask?q=<question>&top_k=5`
-Returns structured Islamic answer with full lineage.
-**Response**:
-```json
-{
-  "question": "...",
-  "answer": "...",
-  "language": "arabic | english | mixed",
-  "intent": "tafsir | hadith | fatwa | count | general",
-  "analysis": {
-    "keyword": "محمد",
-    "total_count": 157,
-    "examples": [...]
-  },
-  "sources": [
-    {
-      "rank": 1,
-      "source": "Sahih al-Bukhari 1",
-      "type": "hadith",
-      "grade": "Sahih",
-      "_score": 0.876
-    }
-  ],
-  "top_score": 0.876,
-  "latency_ms": 342
-}
-```
-### `GET /debug/scores?q=<question>&top_k=10`
-Inspect raw retrieval scores without LLM call. Use to calibrate `CONFIDENCE_THRESHOLD`.
-### `POST /v1/chat/completions`
-OpenAI-compatible endpoint for language model clients.
 ---
 ## Configuration
-**`.env` priority**:
 ```
 OLLAMA_HOST              # Ollama server URL
 LLM_MODEL                # Primary model (e.g. minimax-m2.7:cloud)
 EMBED_MODEL              # Embedding model (intfloat/multilingual-e5-large)
 FAISS_INDEX              # Path to QModel.index
 METADATA_FILE            # Path to metadata.json
@@ -189,32 +261,47 @@ docker-compose up
 ---
-## Testing the System
-### 1. Word Frequency Query
 ```
-Q: "How many times is the word 'mercy' mentioned in the Quran?"
-→ Detects 'count' intent
-→ Calls count_occurrences()
-→ Returns: 114 occurrences with examples
 ```
-### 2. Hadith Authenticity Check
 ```
-Q: "Is the Hadith 'Actions are judged by intentions' authentic?"
-→ Searches dataset
-→ Returns: "Sahih al-Bukhari 1 — Grade: Sahih"
-→ LLM elaborates on significance
 ```
-### 3. Topic-Based Aya Retrieval
 ```
-Q: "What does the Quran say about patience?"
-→ Retrieves top 5 verses about patience
-→ Returns: Verses with Tafsir and interconnections
 ```
-### 4. Confidence Gate in Action
 ```
 Q: "Who was Muhammad's 7th wife?" (not in dataset)
 → Retrieval score: 0.15 (below 0.30 threshold)
@@ -222,14 +309,26 @@ Q: "Who was Muhammad's 7th wife?" (not in dataset)
 → LLM not called (prevents hallucination)
 ```
 ---
-## Roadmap: v4 Enhancements
-- [ ] Grade-based filtering: `?grade=sahih` to return only authenticated Hadiths
 - [ ] Chain of narrators: Display Isnad with full narrator details
 - [ ] Synonym expansion: Better topic matching (e.g., "mercy" → "rahma, compassion")
 - [ ] Multi-Surah topics: Topics spanning multiple Surahs
 - [ ] Batch processing: Handle multiple questions in one request
-- [ ] Streaming responses: SSE for long-form answers
 - [ ] Islamic calendar integration: Hijri date references

+# QModel v6 Architecture — Detailed System Design
 > For a quick overview, see [README.md](README.md#architecture-overview)
 ## Core Capabilities
+### 1. **Quran Verse Lookup** (by partial text)
+- Text search: find any verse by typing part of its Arabic or English text
+- Exact substring + fuzzy word-overlap matching
+### 2. **Quran Topic Search**
+- Semantic hybrid search to find verses related to any topic
+- Full Tafsir-aware prompting
+### 3. **Quran Word Frequency & Analytics**
+- Count how many times a word appears across all 114 Surahs
+- Per-surah breakdown with example verses
+- Chapter-level analytics (verse count, revelation type)
+### 4. **Hadith Lookup** (by partial text)
+- Text search across 9 Hadith collections
+- Optional collection filter
+### 5. **Hadith Topic Search**
+- Semantic hybrid search for Hadiths by topic
+- Optional grade filter (sahih, hasan, etc.)
+### 6. **Hadith Authenticity Verification**
+- Dual-method verification: text search + semantic search
+- Grade inference from collection name when not explicitly provided
+- Sources: Bukhari, Muslim, Abu Dawud, Tirmidhi, Ibn Majah, Nasa'i, Malik, Ahmad, Darimi
+### 7. **Safety First**
 - **Confidence Gating**: Low-confidence queries return "not found" instead of LLM guess
 - **Source Attribution**: Every answer cites exact verse/Hadith with reference
 - **Grade Filtering**: Optional: only return Sahih-authenticated Hadiths
 - **Verbatim Quotes**: Copy text directly from data, no paraphrasing
+## Modular Architecture (v6)
+```
+main.py                    ← Thin launcher (73 lines)
+app/
+  config.py               ← Config class (env vars)
+  llm.py                  ← LLM providers (Ollama, HuggingFace)
+  cache.py                ← TTL-LRU async cache
+  arabic_nlp.py           ← Arabic normalisation, stemming, language detection
+  search.py               ← Hybrid FAISS+BM25, text search, query rewriting
+  analysis.py             ← Intent detection, analytics, counting
+  prompts.py              ← Prompt engineering (persona, task instructions)
+  models.py               ← Pydantic schemas
+  state.py                ← AppState, lifespan, RAG pipeline
+  routers/
+    quran.py              ← 6 Quran endpoints
+    hadith.py             ← 5 Hadith endpoints
+    chat.py               ← 2 OpenAI-compatible + inference endpoints
+    ops.py                ← 3 operational endpoints (health, models, debug)
+```
 ---
 ## Data Pipeline
 The system follows a three-phase approach:
+**Metadata Schema** (47,179 entries: 6,236 Quran + 40,943 Hadith):
 ```json
 {
   "id": "surah:verse or hadith_prefix_number",
 ### Phase 3: Retrieval & Ranking
+**Hybrid Search Algorithm** (`app/search.py`):
 1. Dense retrieval: FAISS semantic scoring
 2. Sparse retrieval: BM25 term-frequency ranking
 3. Fusion: 60% dense + 40% sparse
 4. Intent-aware boost: +0.08 to Hadith items when intent=hadith
 5. Type filter: Optional (quran_only / hadith_only / authenticated_only)
+6. Phrase matching: Exact phrase + word-overlap scoring for text search
 ---
+## Module Reference
+### `app/config.py` — Configuration
+- `Config` dataclass with all environment variables
+- Singleton `cfg` instance
+- Loads `.env` via dotenv
+### `app/llm.py` — LLM Providers
+- `LLMProvider` abstract base class
+- `OllamaProvider` — primary (3-model fallback chain)
+- `HuggingFaceProvider` — alternative local inference
+- `create_llm_provider()` factory dispatches on `LLM_BACKEND` env var
+### `app/cache.py` — TTL-LRU Cache
+- `TTLCache` with size limit (1024) and TTL (300s)
+- Pre-built instances: `search_cache`, `analysis_cache`, `rewrite_cache`
+### `app/arabic_nlp.py` — Arabic NLP
+- `normalize_arabic()` — tashkeel removal, hamza normalization
+- `light_stem()` — prefix/suffix stripping
+- `tokenize_ar()` — Arabic-aware tokenization
+- `detect_language()` / `language_instruction()` — route persona by language
+### `app/search.py` — Retrieval Engine
+- `rewrite_query()` — dual-language normalization, LLM-assisted rewriting
+- `hybrid_search()` — FAISS + BM25 fusion with intent-aware boosting
+- `text_search()` — exact substring + word-overlap matching (for verse/hadith lookup by partial text)
+- `build_context()` — format retrieved items for LLM prompt
+### `app/analysis.py` — Analytics & Intent Detection
+- `detect_analysis_intent()` — identifies count / analytics / chapter queries
+- `count_occurrences()` — word frequency across all Surahs
+- `get_quran_analytics()` — chapter-level stats
+- `get_hadith_analytics()` — collection-level stats
+- `get_chapter_info()` — single Surah metadata
+- `get_verse()` — exact verse by surah:ayah
+- `detect_surah_info()` / `lookup_surah_info()` — Surah name resolution
+### `app/prompts.py` — Prompt Engineering
+- `PERSONA` — Islamic scholar persona definition
+- `TASK_INSTRUCTIONS` — verbatim-quoting, anti-hallucination rules
+- `FORMAT_RULES` — citation box format
+- `build_messages()` — intent-aware system + user message construction
+- `not_found_answer()` — safe "not in dataset" response
+### `app/models.py` — Pydantic Schemas
+All request/response models:
+- `ChatMessage`, `ChatCompletionRequest/Response/Choice` — OpenAI-compatible
+- `AskResponse`, `AnalysisResult`, `SourceItem` — RAG pipeline
+- `HadithVerifyResponse` — authenticity verification
+- `VerseItem`, `HadithItem`, `TextSearchResponse` — text search
+- `ChapterResponse`, `QuranAnalyticsResponse`, `HadithAnalyticsResponse` — analytics
+- `WordFrequencyResponse` — word counting
+- `ModelInfo`, `ModelsListResponse` — OpenAI models list
+### `app/state.py` — Application State & Lifecycle
+- `AppState` — holds FAISS index, metadata, embedder, LLM provider
+- `lifespan()` — async startup (loads index, model, metadata)
+- `check_ready()` — dependency guard for endpoints
+- `run_rag_pipeline()` — full RAG: rewrite → search → context → LLM → response
+- `infer_hadith_grade()` — grade detection from collection name
+---
+## API Endpoints (16 total)
+### Quran Router (`/quran/...`) — 6 endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/quran/search?q=...` | GET | Text search: find verses by partial Arabic/English text |
+| `/quran/topic?q=...&top_k=5` | GET | Semantic search: find verses related to a topic |
+| `/quran/word-frequency?word=...` | GET | Count word occurrences across all Surahs |
+| `/quran/analytics` | GET | Overall Quran stats (total verses, Surahs, types) |
+| `/quran/chapter/{number}` | GET | Single Surah metadata (name, verse count, type) |
+| `/quran/verse/{surah}:{ayah}` | GET | Exact verse lookup by reference |
+### Hadith Router (`/hadith/...`) — 5 endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/hadith/search?q=...&collection=...` | GET | Text search across collections |
+| `/hadith/topic?q=...&top_k=5&grade=...` | GET | Semantic search by topic with optional grade filter |
+| `/hadith/verify?q=...` | GET | Authenticity verification (text + semantic search) |
+| `/hadith/collection/{name}?limit=20` | GET | Browse a specific collection |
+| `/hadith/analytics` | GET | Collection-level statistics |
+### Chat Router — 2 endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/v1/chat/completions` | POST | OpenAI-compatible chat (SSE streaming supported) |
+| `/ask?q=...&top_k=5` | GET | Direct RAG query with full source attribution |
+### Ops Router — 3 endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/health` | GET | Readiness check |
+| `/v1/models` | GET | OpenAI-compatible model listing |
+| `/debug/scores?q=...&top_k=10` | GET | Raw retrieval scores (no LLM call) |
+---
+## Anti-Hallucination Measures
+- Few-shot examples including "not found" refusal path
+- Hardcoded format rules (box/citation format required)
+- Verbatim copy rules (no reconstruction from memory)
+- Confidence threshold gating (default: 0.30)
+- Grade inference for Hadith authenticity (collection-based)
 ---
 ## Configuration
+**`.env` variables**:
 ```
 OLLAMA_HOST              # Ollama server URL
 LLM_MODEL                # Primary model (e.g. minimax-m2.7:cloud)
+LLM_BACKEND              # "ollama" (default) or "huggingface"
 EMBED_MODEL              # Embedding model (intfloat/multilingual-e5-large)
 FAISS_INDEX              # Path to QModel.index
 METADATA_FILE            # Path to metadata.json
 ---
+## Testing Examples
+### 1. Quran Verse Lookup (Capability 1)
+```bash
+curl "http://localhost:8000/quran/search?q=bismillah"
 ```
+### 2. Quran Topic Search (Capability 2)
+```bash
+curl "http://localhost:8000/quran/topic?q=patience&top_k=5"
 ```
+### 3. Word Frequency (Capability 3)
+```bash
+curl "http://localhost:8000/quran/word-frequency?word=mercy"
+# → Returns: count per surah + total + examples
 ```
+### 4. Quran Analytics (Capability 3)
+```bash
+curl "http://localhost:8000/quran/analytics"
+curl "http://localhost:8000/quran/chapter/2"
 ```
+### 5. Hadith Text Search (Capability 4)
+```bash
+curl "http://localhost:8000/hadith/search?q=actions+are+judged+by+intentions"
 ```
+### 6. Hadith Topic Search (Capability 5)
+```bash
+curl "http://localhost:8000/hadith/topic?q=fasting&grade=sahih"
 ```
+### 7. Hadith Authenticity Verification (Capability 6)
+```bash
+curl "http://localhost:8000/hadith/verify?q=Actions+are+judged+by+intentions"
+# → Returns: found=true, grade="Sahih", source="Sahih al-Bukhari 1"
+```
+### 8. Confidence Gate in Action (Safety)
 ```
 Q: "Who was Muhammad's 7th wife?" (not in dataset)
 → Retrieval score: 0.15 (below 0.30 threshold)
 → LLM not called (prevents hallucination)
 ```
+### 9. OpenAI-Compatible Chat (Streaming)
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model":"qmodel","messages":[{"role":"user","content":"What does Islam say about charity?"}],"stream":true}'
+```
 ---
+## Roadmap: v6+ Enhancements
+- [x] Grade-based filtering: `?grade=sahih` to return only authenticated Hadiths
+- [x] Streaming responses: SSE for long-form answers
+- [x] Modular architecture: Separate routers, models, and services
+- [x] Dual LLM backend: Ollama + HuggingFace support
+- [x] Text search: Exact substring + fuzzy word-overlap matching
+- [x] Expanded endpoints: 16 endpoints across 4 routers
 - [ ] Chain of narrators: Display Isnad with full narrator details
 - [ ] Synonym expansion: Better topic matching (e.g., "mercy" → "rahma, compassion")
 - [ ] Multi-Surah topics: Topics spanning multiple Surahs
 - [ ] Batch processing: Handle multiple questions in one request
 - [ ] Islamic calendar integration: Hijri date references
+- [ ] Tafsir integration: Dedicated Tafsir endpoint with scholar citations

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """QModel v6 — Islamic RAG API."""

app/analysis.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""Quran / Hadith analytics — occurrence counting, surah metadata, dataset stats."""
+from __future__ import annotations
+import re
+from typing import Dict, List, Literal, Optional
+from app.arabic_nlp import light_stem, normalize_arabic, tokenize_ar
+from app.cache import analysis_cache
+from app.config import cfg
+# ═══════════════════════════════════════════════════════════════════════
+# INTENT DETECTION — frequency / surah info queries
+# ═══════════════════════════════════════════════════════════════════════
+_COUNT_EN = re.compile(
+    r"\b(how many|count|number of|frequency|occurrences? of|how often|"
+    r"times? (does|is|appears?))\b",
+    re.I,
+)
+_COUNT_AR = re.compile(
+    r"(كم مرة|كم عدد|كم تكرر|عدد مرات|تكرار|كم ذُكر|كم وردت?)"
+)
+_SURAH_VERSES_AR = re.compile(
+    r"كم\s+(?:عدد\s+)?آيات?\s*(?:في\s+|فى\s+)?(?:سورة|سوره)"
+    r"|عدد\s+آيات?\s+(?:سورة|سوره)"
+    r"|كم\s+آية\s+(?:في|فى)\s+(?:سورة|سوره)"
+    r"|(?:سورة|سوره)\s+[\u0600-\u06FF\s]+\s+(?:كم\s+آية|عدد\s+آيات?)"
+)
+_SURAH_VERSES_EN = re.compile(
+    r"(?:how many|number of)\s+(?:verses?|ayat|ayahs?)\s+(?:in|of|does)\b"
+    r"|\bsurah?\b.*\b(?:how many|number of)\s+(?:verses?|ayat|ayahs?)",
+    re.I,
+)
+_SURAH_TYPE_AR = re.compile(
+    r"(?:سورة|سوره)\s+[\u0600-\u06FF\s]+\s+(?:مكية|مدنية|مكي|مدني)"
+    r"|(?:هل|ما\s+نوع)\s+(?:سورة|سوره)\s+[\u0600-\u06FF\s]+\s+(?:مكية|مدنية)"
+)
+_SURAH_NAME_AR = re.compile(
+    r"(?:سورة|سوره)\s+([\u0600-\u06FF\u0750-\u077F\s]+)"
+)
+_SURAH_NAME_EN = re.compile(
+    r"\bsurah?\s+([a-zA-Z'\-]+(?:[\s\-][a-zA-Z'\-]+)*)",
+    re.I,
+)
+def _extract_surah_name(query: str) -> Optional[str]:
+    """Extract surah name from a query string."""
+    for pat in (_SURAH_NAME_AR, _SURAH_NAME_EN):
+        m = pat.search(query)
+        if m:
+            name = m.group(1).strip()
+            name = re.sub(r'[\s؟?!]+$', '', name)
+            name = re.sub(r'\s+(كم|عدد|هل|ما|في|فى)$', '', name)
+            if name:
+                return name
+    return None
+# ═══════════════════════════════════════════════════════════════════════
+# SURAH INFO DETECTION & LOOKUP
+# ═══════════════════════════════════════════════════════════════════════
+async def detect_surah_info(query: str, rewrite: dict) -> Optional[dict]:
+    """Detect if query asks about surah metadata (verse count, type, etc.)."""
+    is_verse_q = bool(_SURAH_VERSES_AR.search(query) or _SURAH_VERSES_EN.search(query))
+    is_type_q  = bool(_SURAH_TYPE_AR.search(query))
+    if not (is_verse_q or is_type_q):
+        if rewrite.get("intent") == "surah_info":
+            is_verse_q = True
+        elif rewrite.get("intent") == "count":
+            kw_text = " ".join(rewrite.get("keywords", []))
+            if any(w in kw_text for w in ("آيات", "آية", "verses", "ayat")):
+                is_verse_q = True
+            else:
+                return None
+        else:
+            return None
+    surah_name = _extract_surah_name(query)
+    if not surah_name:
+        return None
+    return {
+        "surah_query": surah_name,
+        "query_type": "verses" if is_verse_q else "type",
+    }
+async def lookup_surah_info(surah_query: str, dataset: list) -> Optional[dict]:
+    """Look up surah metadata from dataset entries."""
+    query_norm  = normalize_arabic(surah_query, aggressive=True).lower()
+    query_clean = re.sub(r"^(ال|al[\-\s']*)", "", query_norm, flags=re.I).strip()
+    for item in dataset:
+        if item.get("type") != "quran":
+            continue
+        for field in ("surah_name_ar", "surah_name_en", "surah_name_transliteration"):
+            val = item.get(field, "")
+            if not val:
+                continue
+            val_norm  = normalize_arabic(val, aggressive=True).lower()
+            val_clean = re.sub(r"^(ال|al[\-\s']*)", "", val_norm, flags=re.I).strip()
+            if (query_norm in val_norm or val_norm in query_norm
+                    or (query_clean and val_clean
+                        and (query_clean in val_clean or val_clean in query_clean))
+                    or (query_clean and query_clean in val_norm)):
+                return {
+                    "surah_number":              item.get("surah_number"),
+                    "surah_name_ar":             item.get("surah_name_ar", ""),
+                    "surah_name_en":             item.get("surah_name_en", ""),
+                    "surah_name_transliteration": item.get("surah_name_transliteration", ""),
+                    "total_verses":              item.get("total_verses"),
+                    "revelation_type":           item.get("revelation_type", ""),
+                }
+    return None
+# ═══════════════════════════════════════════════════════════════════════
+# ANALYSIS INTENT (word frequency detection)
+# ═══════════════════════════════════════════════════════════════════════
+async def detect_analysis_intent(query: str, rewrite: dict) -> Optional[str]:
+    """Detect if query is asking for word frequency analysis."""
+    if (_SURAH_VERSES_AR.search(query) or _SURAH_VERSES_EN.search(query)
+            or _SURAH_TYPE_AR.search(query)
+            or rewrite.get("intent") == "surah_info"):
+        return None
+    if rewrite.get("intent") == "count":
+        kws = rewrite.get("keywords", [])
+        kw_text = " ".join(kws)
+        if any(w in kw_text for w in ("آيات", "آية", "verses", "ayat")):
+            return None
+        return kws[0] if kws else None
+    if not (_COUNT_EN.search(query) or _COUNT_AR.search(query)):
+        return None
+    for pat in (_COUNT_EN, _COUNT_AR):
+        m = pat.search(query)
+        if m:
+            tail = query[m.end():].strip().split()
+            if tail:
+                return tail[0]
+    return None
+# ═══════════════════════════════════════════════════════════════════════
+# OCCURRENCE COUNTING
+# ═══════════════════════════════════════════════════════════════════════
+async def count_occurrences(keyword: str, dataset: list) -> dict:
+    """Count keyword occurrences with surah grouping."""
+    cached = await analysis_cache.get(keyword)
+    if cached:
+        return cached
+    kw_norm = normalize_arabic(keyword, aggressive=True).lower()
+    kw_stem = light_stem(kw_norm)
+    count   = 0
+    by_surah: Dict[int, Dict] = {}
+    examples: list = []
+    for item in dataset:
+        if item.get("type") != "quran":
+            continue
+        ar_norm  = normalize_arabic(item.get("arabic", ""), aggressive=True).lower()
+        combined = f"{ar_norm} {item.get('english', '')}".lower()
+        exact    = combined.count(kw_norm)
+        stemmed  = combined.count(kw_stem) - exact if kw_stem != kw_norm else 0
+        occ      = exact + stemmed
+        if occ > 0:
+            count += occ
+            surah_num = item.get("surah_number", 0)
+            if surah_num not in by_surah:
+                by_surah[surah_num] = {
+                    "name": item.get("surah_name_en", f"Surah {surah_num}"),
+                    "count": 0,
+                }
+            by_surah[surah_num]["count"] += occ
+            if len(examples) < cfg.MAX_EXAMPLES:
+                examples.append({
+                    "reference": item.get("source", ""),
+                    "arabic":    item.get("arabic", ""),
+                    "english":   item.get("english", ""),
+                })
+    result = {
+        "keyword":     keyword,
+        "kw_stemmed":  kw_stem,
+        "total_count": count,
+        "by_surah":    dict(sorted(by_surah.items())),
+        "examples":    examples,
+    }
+    await analysis_cache.set(result, keyword)
+    return result
+# ═══════════════════════════════════════════════════════════════════════
+# DATASET ANALYTICS — aggregate statistics
+# ═══════════════════════════════════════════════════════════════════════
+def get_quran_analytics(dataset: list) -> dict:
+    """Compute aggregate Quran statistics from dataset."""
+    surahs: Dict[int, dict] = {}
+    total_verses = 0
+    for item in dataset:
+        if item.get("type") != "quran":
+            continue
+        total_verses += 1
+        sn = item.get("surah_number", 0)
+        if sn not in surahs:
+            surahs[sn] = {
+                "surah_number":              sn,
+                "surah_name_ar":             item.get("surah_name_ar", ""),
+                "surah_name_en":             item.get("surah_name_en", ""),
+                "surah_name_transliteration": item.get("surah_name_transliteration", ""),
+                "revelation_type":           item.get("revelation_type", ""),
+                "total_verses":              item.get("total_verses", 0),
+                "verses_in_dataset":         0,
+            }
+        surahs[sn]["verses_in_dataset"] += 1
+    meccan  = sum(1 for s in surahs.values() if s.get("revelation_type", "").lower() == "meccan")
+    medinan = sum(1 for s in surahs.values() if s.get("revelation_type", "").lower() == "medinan")
+    return {
+        "total_verses_in_dataset": total_verses,
+        "total_surahs":            len(surahs),
+        "meccan_surahs":           meccan,
+        "medinan_surahs":          medinan,
+        "surahs":                  [surahs[k] for k in sorted(surahs)],
+    }
+def get_hadith_analytics(dataset: list) -> dict:
+    """Compute aggregate Hadith statistics from dataset."""
+    collections: Dict[str, dict] = {}
+    grades: Dict[str, int] = {}
+    total = 0
+    for item in dataset:
+        if item.get("type") != "hadith":
+            continue
+        total += 1
+        col = item.get("collection", "Unknown")
+        if col not in collections:
+            collections[col] = {"collection": col, "count": 0, "grades": {}}
+        collections[col]["count"] += 1
+        grade = item.get("grade", "Ungraded")
+        grades[grade] = grades.get(grade, 0) + 1
+        collections[col]["grades"][grade] = collections[col]["grades"].get(grade, 0) + 1
+    return {
+        "total_hadiths":  total,
+        "collections":    sorted(collections.values(), key=lambda c: c["count"], reverse=True),
+        "grade_summary":  dict(sorted(grades.items(), key=lambda x: x[1], reverse=True)),
+    }
+def get_chapter_info(chapter_number: int, dataset: list) -> Optional[dict]:
+    """Get all verses and metadata for a specific surah/chapter."""
+    verses = []
+    meta   = None
+    for item in dataset:
+        if item.get("type") != "quran":
+            continue
+        if item.get("surah_number") != chapter_number:
+            continue
+        if meta is None:
+            meta = {
+                "surah_number":              item.get("surah_number"),
+                "surah_name_ar":             item.get("surah_name_ar", ""),
+                "surah_name_en":             item.get("surah_name_en", ""),
+                "surah_name_transliteration": item.get("surah_name_transliteration", ""),
+                "revelation_type":           item.get("revelation_type", ""),
+                "total_verses":              item.get("total_verses", 0),
+            }
+        verses.append({
+            "ayah":    item.get("ayah_number") or item.get("verse_number"),
+            "arabic":  item.get("arabic", ""),
+            "english": item.get("english", ""),
+            "source":  item.get("source", ""),
+        })
+    if not meta:
+        return None
+    verses.sort(key=lambda v: v.get("ayah") or 0)
+    return {**meta, "verses": verses}
+def get_verse(surah: int, ayah: int, dataset: list) -> Optional[dict]:
+    """Get a specific verse by surah and ayah number."""
+    for item in dataset:
+        if item.get("type") != "quran":
+            continue
+        if item.get("surah_number") != surah:
+            continue
+        item_ayah = item.get("ayah_number") or item.get("verse_number")
+        if item_ayah == ayah:
+            return {
+                "surah_number":              item.get("surah_number"),
+                "surah_name_ar":             item.get("surah_name_ar", ""),
+                "surah_name_en":             item.get("surah_name_en", ""),
+                "surah_name_transliteration": item.get("surah_name_transliteration", ""),
+                "ayah":                      item_ayah,
+                "arabic":                    item.get("arabic", ""),
+                "english":                   item.get("english", ""),
+                "transliteration":           item.get("transliteration", ""),
+                "tafsir_en":                 item.get("tafsir_en", ""),
+                "tafsir_ar":                 item.get("tafsir_ar", ""),
+                "source":                    item.get("source", ""),
+                "revelation_type":           item.get("revelation_type", ""),
+            }
+    return None

app/arabic_nlp.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""Arabic NLP — normalisation, light stemming, language detection."""
+from __future__ import annotations
+import re
+from typing import Dict, List, Literal
+# ── Normalization patterns ─────────────────────────────────────────────
+_DIACRITICS   = re.compile(r"[\u064B-\u0655\u0656-\u0658\u0670\u06D6-\u06ED]")
+_ALEF_VARS    = re.compile(r"[أإآٱ]")
+_WAW_HAMZA    = re.compile(r"ؤ")
+_YA_HAMZA     = re.compile(r"ئ")
+_TA_MARBUTA   = re.compile(r"ة\b")
+_ALEF_MAQSURA = re.compile(r"ى")
+_TATWEEL      = re.compile(r"\u0640+")
+_PUNC_AR      = re.compile(r"[،؛؟!«»\u200c\u200d\u200f\u200e]")
+_MULTI_SPACE  = re.compile(r"\s{2,}")
+_NON_AR_EN    = re.compile(r"[^\u0600-\u06FF\u0750-\u077Fa-zA-Z0-9\s]")
+_SPELLING_MAP: Dict[str, str] = {
+    "قران":    "قرآن",
+    "القران":  "القرآن",
+    "اللہ":    "الله",
+}
+def normalize_arabic(text: str, *, aggressive: bool = False) -> str:
+    """Normalize Arabic text: diacritics, hamza, ta marbuta, etc."""
+    text = _DIACRITICS.sub("", text)
+    text = _TATWEEL.sub("", text)
+    text = _ALEF_VARS.sub("ا", text)
+    text = _WAW_HAMZA.sub("و", text)
+    text = _YA_HAMZA.sub("ي", text)
+    text = _TA_MARBUTA.sub("ه", text)
+    text = _ALEF_MAQSURA.sub("ي", text)
+    text = _PUNC_AR.sub(" ", text)
+    for variant, canonical in _SPELLING_MAP.items():
+        text = text.replace(variant, canonical)
+    if aggressive:
+        text = _NON_AR_EN.sub(" ", text)
+    return _MULTI_SPACE.sub(" ", text).strip()
+# ── Light stemming ─────────────────────────────────────────────────────
+_AR_PREFIXES = re.compile(
+    r"^(و|ف|ب|ل|ال|لل|وال|فال|بال|كال|ولل|ومن|وفي|وعن|وإلى|وعلى)\b"
+)
+_AR_SUFFIXES = re.compile(
+    r"(ون|ين|ان|ات|ها|هم|هن|كم|كن|نا|ني|تي|ي|ه|ك|ا|وا)$"
+)
+def light_stem(word: str) -> str:
+    """Light stemming: remove common Arabic affixes."""
+    w = _AR_PREFIXES.sub("", word)
+    w = _AR_SUFFIXES.sub("", w)
+    return w if len(w) >= 2 else word
+def tokenize_ar(text: str) -> List[str]:
+    """Tokenize and stem Arabic text."""
+    norm = normalize_arabic(text, aggressive=True).lower()
+    return [light_stem(t) for t in norm.split() if t]
+# ── Language detection ─────────────────────────────────────────────────
+_ARABIC_SCRIPT = re.compile(
+    r"[\u0600-\u06FF\u0750-\u077F\uFB50-\uFDFF\uFE70-\uFEFF]"
+)
+def detect_language(text: str) -> Literal["arabic", "english", "mixed"]:
+    """Detect if text is Arabic, English, or mixed."""
+    ar    = len(_ARABIC_SCRIPT.findall(text))
+    en    = len(re.findall(r"[a-zA-Z]", text))
+    tot   = ar + en or 1
+    ratio = ar / tot
+    if ratio > 0.70:
+        return "arabic"
+    if ratio < 0.30:
+        return "english"
+    return "mixed"
+def language_instruction(lang: str) -> str:
+    """Generate language-specific instruction for LLM."""
+    return {
+        "arabic": (
+            "يجب أن تكون الإجابة كاملةً باللغة العربية الفصحى تماماً. "
+            "لا تستخدم الإنجليزية أو أي لغة أخرى في أي جزء من الإجابة."
+        ),
+        "mixed": (
+            "The question mixes Arabic and English. Reply primarily in Arabic (الفصحى) "
+            "but you may transliterate key terms in English where essential."
+        ),
+        "english": "You MUST reply entirely in clear, formal English.",
+    }.get(lang, "You MUST reply entirely in clear, formal English.")

app/cache.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Async-safe TTL-LRU cache."""
+from __future__ import annotations
+import asyncio
+import hashlib
+import json
+import time
+from collections import OrderedDict
+from app.config import cfg
+class TTLCache:
+    """Async-safe LRU cache with per-entry TTL."""
+    def __init__(self, maxsize: int = 256, ttl: int = 3600):
+        self._cache: OrderedDict = OrderedDict()
+        self._maxsize = maxsize
+        self._ttl = ttl
+        self._lock = asyncio.Lock()
+    def _key(self, *args) -> str:
+        payload = json.dumps(args, ensure_ascii=False, sort_keys=True)
+        return hashlib.sha256(payload.encode()).hexdigest()[:20]
+    async def get(self, *args):
+        async with self._lock:
+            k = self._key(*args)
+            if k in self._cache:
+                value, ts = self._cache[k]
+                if time.monotonic() - ts < self._ttl:
+                    self._cache.move_to_end(k)
+                    return value
+                del self._cache[k]
+        return None
+    async def set(self, value, *args):
+        async with self._lock:
+            k = self._key(*args)
+            self._cache[k] = (value, time.monotonic())
+            self._cache.move_to_end(k)
+            if len(self._cache) > self._maxsize:
+                self._cache.popitem(last=False)
+search_cache   = TTLCache(maxsize=cfg.CACHE_SIZE, ttl=cfg.CACHE_TTL)
+analysis_cache = TTLCache(maxsize=cfg.CACHE_SIZE, ttl=cfg.CACHE_TTL)
+rewrite_cache  = TTLCache(maxsize=cfg.CACHE_SIZE, ttl=cfg.CACHE_TTL * 6)

app/config.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Centralized configuration with dual LLM backend support."""
+from __future__ import annotations
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class Config:
+    """All settings read from environment variables with sensible defaults."""
+    # Backend selection
+    LLM_BACKEND:          str   = os.getenv("LLM_BACKEND", "ollama")
+    # Hugging Face backend
+    HF_MODEL_NAME:        str   = os.getenv("HF_MODEL_NAME", "Qwen/Qwen2-7B-Instruct")
+    HF_DEVICE:            str   = os.getenv("HF_DEVICE", "auto")
+    HF_MAX_NEW_TOKENS:    int   = int(os.getenv("HF_MAX_NEW_TOKENS", 2048))
+    # Ollama backend
+    OLLAMA_HOST:          str   = os.getenv("OLLAMA_HOST", "http://localhost:11434")
+    OLLAMA_MODEL:         str   = os.getenv("OLLAMA_MODEL", "llama2")
+    # GGUF backend (llama-cpp-python)
+    GGUF_MODEL_PATH:      str   = os.getenv("GGUF_MODEL_PATH", "")
+    GGUF_N_CTX:           int   = int(os.getenv("GGUF_N_CTX", 4096))
+    GGUF_N_GPU_LAYERS:    int   = int(os.getenv("GGUF_N_GPU_LAYERS", -1))
+    # LM Studio backend
+    LMSTUDIO_URL:         str   = os.getenv("LMSTUDIO_URL", "http://localhost:1234")
+    LMSTUDIO_MODEL:       str   = os.getenv("LMSTUDIO_MODEL", "")
+    # Embedding model
+    EMBED_MODEL:          str   = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-large")
+    # Index & data
+    FAISS_INDEX:          str   = os.getenv("FAISS_INDEX", "QModel.index")
+    METADATA_FILE:        str   = os.getenv("METADATA_FILE", "metadata.json")
+    # Retrieval
+    TOP_K_SEARCH:         int   = int(os.getenv("TOP_K_SEARCH", 20))
+    TOP_K_RETURN:         int   = int(os.getenv("TOP_K_RETURN", 5))
+    # Generation
+    TEMPERATURE:          float = float(os.getenv("TEMPERATURE", 0.2))
+    MAX_TOKENS:           int   = int(os.getenv("MAX_TOKENS", 2048))
+    # Caching
+    CACHE_SIZE:           int   = int(os.getenv("CACHE_SIZE", 512))
+    CACHE_TTL:            int   = int(os.getenv("CACHE_TTL", 3600))
+    # Ranking
+    RERANK_ALPHA:         float = float(os.getenv("RERANK_ALPHA", 0.6))
+    HADITH_BOOST:         float = float(os.getenv("HADITH_BOOST", 0.08))
+    # Safety
+    CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", 0.30))
+    # CORS
+    ALLOWED_ORIGINS:      str   = os.getenv("ALLOWED_ORIGINS", "*")
+    MAX_EXAMPLES:         int   = int(os.getenv("MAX_EXAMPLES", 3))
+cfg = Config()

app/llm.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""LLM abstraction layer — Ollama and HuggingFace backends."""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import List
+from app.config import cfg
+logger = logging.getLogger("qmodel.llm")
+class LLMProvider:
+    """Abstract base for LLM providers."""
+    async def chat(
+        self, messages: List[dict], temperature: float, max_tokens: int
+    ) -> str:
+        raise NotImplementedError
+class OllamaProvider(LLMProvider):
+    """Ollama-based LLM provider."""
+    def __init__(self, host: str, model: str):
+        self.host = host
+        self.model = model
+        try:
+            import ollama
+            self.client = ollama.Client(host=host)
+        except ImportError:
+            raise ImportError("Install ollama:  pip install ollama")
+    async def chat(
+        self, messages: List[dict], temperature: float, max_tokens: int
+    ) -> str:
+        loop = asyncio.get_event_loop()
+        try:
+            result = await loop.run_in_executor(
+                None,
+                lambda: self.client.chat(
+                    model=self.model,
+                    messages=messages,
+                    options={"temperature": temperature, "num_predict": max_tokens},
+                ),
+            )
+            return result["message"]["content"].strip()
+        except Exception as exc:
+            logger.error("Ollama chat failed: %s", exc)
+            raise
+class GGUFProvider(LLMProvider):
+    """llama-cpp-python GGUF provider — runs GGUF models directly in-process."""
+    def __init__(self, model_path: str, n_ctx: int = 4096, n_gpu_layers: int = -1):
+        try:
+            from llama_cpp import Llama
+        except ImportError:
+            raise ImportError("Install llama-cpp-python:  pip install llama-cpp-python")
+        self.llm = Llama(
+            model_path=model_path,
+            n_ctx=n_ctx,
+            n_gpu_layers=n_gpu_layers,
+            verbose=False,
+        )
+    async def chat(
+        self, messages: List[dict], temperature: float, max_tokens: int
+    ) -> str:
+        loop = asyncio.get_event_loop()
+        try:
+            result = await loop.run_in_executor(
+                None,
+                lambda: self.llm.create_chat_completion(
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                ),
+            )
+            return result["choices"][0]["message"]["content"].strip()
+        except Exception as exc:
+            logger.error("GGUF chat failed: %s", exc)
+            raise
+class LMStudioProvider(LLMProvider):
+    """LM Studio provider — connects to LM Studio's OpenAI-compatible local API."""
+    def __init__(self, base_url: str, model: str):
+        self.base_url = base_url.rstrip("/")
+        self.model = model
+    async def chat(
+        self, messages: List[dict], temperature: float, max_tokens: int
+    ) -> str:
+        import httpx
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        try:
+            async with httpx.AsyncClient(timeout=120) as client:
+                resp = await client.post(
+                    f"{self.base_url}/v1/chat/completions", json=payload
+                )
+                resp.raise_for_status()
+                data = resp.json()
+            return data["choices"][0]["message"]["content"].strip()
+        except Exception as exc:
+            logger.error("LM Studio chat failed: %s", exc)
+            raise
+class HuggingFaceProvider(LLMProvider):
+    """Hugging Face transformers-based LLM provider."""
+    def __init__(self, model_name: str, device: str):
+        self.model_name = model_name
+        self.device = device
+        try:
+            from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                device_map=device,
+                torch_dtype="auto",
+            )
+            self.pipeline = TextGenerationPipeline(
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=0 if device != "cpu" else None,
+            )
+        except ImportError:
+            raise ImportError("Install transformers:  pip install transformers torch")
+    async def chat(
+        self, messages: List[dict], temperature: float, max_tokens: int
+    ) -> str:
+        prompt = self._format_messages(messages)
+        loop = asyncio.get_event_loop()
+        try:
+            result = await loop.run_in_executor(
+                None,
+                lambda: self.pipeline(
+                    prompt,
+                    max_new_tokens=max_tokens,
+                    temperature=temperature,
+                    do_sample=temperature > 0,
+                ),
+            )
+            generated = result[0]["generated_text"]
+            output = generated[len(prompt):].strip()
+            return output
+        except Exception as exc:
+            logger.error("HF chat failed: %s", exc)
+            raise
+    def _format_messages(self, messages: List[dict]) -> str:
+        prompt = ""
+        for msg in messages:
+            role = msg["role"]
+            content = msg["content"]
+            if role == "system":
+                prompt += f"{content}\n\n"
+            elif role == "user":
+                prompt += f"User: {content}\n"
+            elif role == "assistant":
+                prompt += f"Assistant: {content}\n"
+        prompt += "Assistant: "
+        return prompt
+def get_llm_provider() -> LLMProvider:
+    """Factory function to get the configured LLM provider."""
+    if cfg.LLM_BACKEND == "ollama":
+        logger.info("Using Ollama backend: %s @ %s", cfg.OLLAMA_MODEL, cfg.OLLAMA_HOST)
+        return OllamaProvider(cfg.OLLAMA_HOST, cfg.OLLAMA_MODEL)
+    elif cfg.LLM_BACKEND == "hf":
+        logger.info("Using HuggingFace backend: %s on %s", cfg.HF_MODEL_NAME, cfg.HF_DEVICE)
+        return HuggingFaceProvider(cfg.HF_MODEL_NAME, cfg.HF_DEVICE)
+    elif cfg.LLM_BACKEND == "gguf":
+        logger.info("Using GGUF backend: %s (ctx=%d, gpu_layers=%d)",
+                    cfg.GGUF_MODEL_PATH, cfg.GGUF_N_CTX, cfg.GGUF_N_GPU_LAYERS)
+        return GGUFProvider(cfg.GGUF_MODEL_PATH, cfg.GGUF_N_CTX, cfg.GGUF_N_GPU_LAYERS)
+    elif cfg.LLM_BACKEND == "lmstudio":
+        logger.info("Using LM Studio backend: %s @ %s", cfg.LMSTUDIO_MODEL, cfg.LMSTUDIO_URL)
+        return LMStudioProvider(cfg.LMSTUDIO_URL, cfg.LMSTUDIO_MODEL)
+    else:
+        raise ValueError(f"Unknown LLM_BACKEND: {cfg.LLM_BACKEND}")

app/models.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""Pydantic schemas for request / response models."""
+from __future__ import annotations
+from typing import Dict, List, Optional
+from pydantic import BaseModel, Field
+from app.config import cfg
+# ═══════════════════════════════════════════════════════════════════════
+# CORE SCHEMAS
+# ═══════════════════════════════════════════════════════════════════════
+class ChatMessage(BaseModel):
+    role:    str = Field(..., pattern="^(system|user|assistant)$")
+    content: str = Field(..., min_length=1, max_length=4000)
+class AnalysisResult(BaseModel):
+    keyword:     str
+    kw_stemmed:  str
+    total_count: int
+    by_surah:    Dict[int, Dict]
+    examples:    List[dict]
+class SourceItem(BaseModel):
+    source:  str
+    type:    str
+    grade:   Optional[str] = None
+    arabic:  str
+    english: str
+    _score:  float
+class AskResponse(BaseModel):
+    question:   str
+    answer:     str
+    language:   str
+    intent:     str
+    analysis:   Optional[AnalysisResult] = None
+    sources:    List[SourceItem]
+    top_score:  float
+    latency_ms: int
+class HadithVerifyResponse(BaseModel):
+    query:      str
+    found:      bool
+    collection: Optional[str] = None
+    grade:      Optional[str] = None
+    reference:  Optional[str] = None
+    arabic:     Optional[str] = None
+    english:    Optional[str] = None
+    latency_ms: int
+# ═══════════════════════════════════════════════════════════════════════
+# OPENAI-COMPATIBLE SCHEMAS
+# ═══════════════════════════════════════════════════════════════════════
+class ChatCompletionMessage(BaseModel):
+    role:    str = Field(..., description="Message role: system, user, or assistant")
+    content: str = Field(..., description="Message content")
+class ChatCompletionRequest(BaseModel):
+    model:       str             = Field(default="QModel", description="Model name")
+    messages:    List[ChatCompletionMessage] = Field(..., description="Messages")
+    temperature: Optional[float] = Field(default=cfg.TEMPERATURE, ge=0.0, le=2.0)
+    top_p:       Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
+    max_tokens:  Optional[int]   = Field(default=cfg.MAX_TOKENS, ge=1, le=8000)
+    top_k:       Optional[int]   = Field(default=5, ge=1, le=20, description="Islamic sources to retrieve")
+    stream:      Optional[bool]  = Field(default=False, description="Enable streaming responses")
+class ChatCompletionChoice(BaseModel):
+    index:         int
+    message:       ChatCompletionMessage
+    finish_reason: str = "stop"
+class ChatCompletionResponse(BaseModel):
+    id:         str
+    object:     str = "chat.completion"
+    created:    int
+    model:      str
+    choices:    List[ChatCompletionChoice]
+    usage:      dict
+    x_metadata: Optional[dict] = None
+class ModelInfo(BaseModel):
+    id:         str
+    object:     str = "model"
+    created:    int
+    owned_by:   str = "elgendy"
+    permission: List[dict] = Field(default_factory=list)
+    root:       Optional[str] = None
+    parent:     Optional[str] = None
+class ModelsListResponse(BaseModel):
+    object: str = "list"
+    data:   List[ModelInfo]
+# ═══════════════════════════════════════════════════════════════════════
+# NEW ENDPOINT SCHEMAS
+# ═══════════════════════════════════════════════════════════════════════
+class VerseItem(BaseModel):
+    surah_number:              Optional[int] = None
+    surah_name_ar:             str = ""
+    surah_name_en:             str = ""
+    surah_name_transliteration: str = ""
+    ayah:                      Optional[int] = None
+    arabic:                    str = ""
+    english:                   str = ""
+    transliteration:           str = ""
+    tafsir_en:                 str = ""
+    tafsir_ar:                 str = ""
+    source:                    str = ""
+    revelation_type:           str = ""
+    score:                     Optional[float] = None
+class HadithItem(BaseModel):
+    collection:    str = ""
+    reference:     str = ""
+    hadith_number: Optional[int] = None
+    chapter:       str = ""
+    arabic:        str = ""
+    english:       str = ""
+    grade:         Optional[str] = None
+    author:        str = ""
+    score:         Optional[float] = None
+class TextSearchResponse(BaseModel):
+    query:   str
+    count:   int
+    results: List[dict]
+class ChapterResponse(BaseModel):
+    surah_number:              int
+    surah_name_ar:             str
+    surah_name_en:             str
+    surah_name_transliteration: str
+    revelation_type:           str
+    total_verses:              int
+    verses:                    List[dict]
+class QuranAnalyticsResponse(BaseModel):
+    total_verses_in_dataset: int
+    total_surahs:            int
+    meccan_surahs:           int
+    medinan_surahs:          int
+    surahs:                  List[dict]
+class HadithAnalyticsResponse(BaseModel):
+    total_hadiths: int
+    collections:   List[dict]
+    grade_summary: dict
+class WordFrequencyResponse(BaseModel):
+    keyword:     str
+    kw_stemmed:  str
+    total_count: int
+    by_surah:    dict
+    examples:    List[dict]

app/prompts.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""Prompt engineering — system templates and message builders."""
+from __future__ import annotations
+from typing import Dict, List, Optional
+from app.arabic_nlp import language_instruction
+# ═══════════════════════════════════════════════════════════════════════
+# PROMPT TEMPLATES
+# ═══════════════════════════════════════════════════════════════════════
+PERSONA = (
+    "You are Sheikh QModel, a meticulous Islamic scholar with expertise "
+    "in Tafsir (Quranic exegesis), Hadith sciences, Fiqh, and Arabic. "
+    "You respond with scholarly rigor and modern clarity."
+)
+TASK_INSTRUCTIONS: Dict[str, str] = {
+    "tafsir": (
+        "The user asks about a Quranic verse. Steps:\n"
+        "1. Identify the verse(s) from context.\n"
+        "2. Provide Tafsir: linguistic analysis and deeper meaning.\n"
+        "3. Draw connections to related verses.\n"
+        "4. Answer the user's question directly."
+    ),
+    "hadith": (
+        "The user asks about a Hadith. Structure your answer:\n\n"
+        "1. الجواب — Give a direct answer to the question first.\n\n"
+        "2. نص الحديث — Quote the hadith text EXACTLY from context\n"
+        "   in the evidence box format. Show ALL relevant narrations found.\n\n"
+        "3. الشرح والتوضيح — Explain the meaning and implications.\n"
+        "   Mention notable scholars, narrators, or jurisprudential points.\n"
+        "   Draw connections to related Hadiths from the context.\n\n"
+        "4. الخلاصة — Summarize the key takeaway.\n\n"
+        "CRITICAL: If the Hadith is NOT in context, say so clearly.\n"
+        "Quote hadith text VERBATIM from context — never paraphrase the matn."
+    ),
+    "auth": (
+        "The user asks about Hadith authenticity. Structure your answer:\n\n"
+        "الجواب — Start with a CLEAR, CONFIDENT verdict (صحيح/حسن/ضعيف/موضوع).\n"
+        "Give a one-line ruling summary.\n\n"
+        "أولًا: متن الحديث\n"
+        "Quote ALL matching narrations from the context in evidence boxes.\n"
+        "Show every relevant version found across different collections.\n\n"
+        "ثانيًا: الأدلة على صحته (أو ضعفه)\n"
+        "Provide numbered evidence points (use ١، ٢، ٣):\n"
+        "  - Which authoritative collections contain it\n"
+        "  - The grading given by scholars (from the grade field in context)\n"
+        "  - Notable narrators and scholars who transmitted or commented on it\n\n"
+        "ثالثًا: أهمية الحديث\n"
+        "Explain the hadith's significance, its place in Islamic scholarship,\n"
+        "and any jurisprudential implications.\n\n"
+        "الخلاصة — Comprehensive summary restating the verdict with key evidence.\n\n"
+        "RULES:\n"
+        "• If found in Sahih Bukhari or Sahih Muslim → assert AUTHENTIC (Sahih).\n"
+        "• Quote hadith text VERBATIM from context — never paraphrase the matn.\n"
+        "• You may add scholarly commentary to explain significance and context.\n"
+        "• If NOT found in context → clearly state it is absent from the dataset.\n"
+        "• NEVER fabricate hadith text, grades, or source citations."
+    ),
+    "fatwa": (
+        "The user seeks a religious ruling. Steps:\n"
+        "1. Gather evidence from Quran + Sunnah in context.\n"
+        "2. Reason step-by-step to a conclusion.\n"
+        "3. If insufficient, state so explicitly."
+    ),
+    "count": (
+        "The user asks for word frequency. Steps:\n"
+        "1. State the ANALYSIS RESULT prominently.\n"
+        "2. List example occurrences with Surah names.\n"
+        "3. Comment on significance."
+    ),
+    "surah_info": (
+        "The user asks about surah metadata. Steps:\n"
+        "1. State the answer from the SURAH INFORMATION block EXACTLY.\n"
+        "2. Use the total_verses number precisely — do NOT guess or calculate.\n"
+        "3. Mention the revelation type (Meccan/Medinan) if available.\n"
+        "4. Optionally add brief scholarly context about the surah."
+    ),
+    "general": (
+        "The user has a general Islamic question. Structure your answer:\n\n"
+        "1. الجواب — Give a direct, clear answer first.\n\n"
+        "2. الأدلة — Support with evidence from context, quoting relevant\n"
+        "   texts in evidence boxes. Explain the evidence with scholarly depth.\n\n"
+        "3. الخلاصة — Conclude with a comprehensive summary."
+    ),
+}
+FORMAT_RULES = """\
+For EVERY supporting evidence, use this exact format:
+┌─────────────────────────────────────────────┐
+│  ❝ {Arabic text} ❞
+│  📝 Translation: {English translation}
+│  📖 Source: {exact citation from context}
+└─────────────────────────────────────────────┘
+ABSOLUTE RULES:
+• Copy Arabic hadith text, translations, and sources VERBATIM from context. Never paraphrase.
+• You may add scholarly commentary, explanation, and analysis around the quoted evidence.
+• NEVER fabricate hadith text, grades, verse numbers, or source citations.
+• If a specific Hadith/verse is NOT in context → respond with:
+    "هذا الحديث/الآية غير موجود في قاعدة البيانات." (Arabic)
+    or "This Hadith/verse is not in the available dataset." (English)
+• Never invent or guess content.
+• End with: "والله أعلم." (Arabic) or "And Allah knows best." (English)
+"""
+_SYSTEM_TEMPLATE = """\
+{persona}
+{lang_instruction}
+=== YOUR TASK ===
+{task}
+=== OUTPUT FORMAT ===
+{fmt}
+"""
+_CONTEXT_TEMPLATE = """\
+IMPORTANT: The database has already been searched for you.
+The relevant results are provided below — use ONLY this data to formulate your answer.
+Do NOT state that you need a database or ask the user for data. Answer from the context below.
+=== RETRIEVED DATABASE RESULTS ===
+{context}
+=== END DATABASE RESULTS ===
+Now answer the following question using ONLY the data above:
+"""
+def build_messages(
+    context: str,
+    question: str,
+    lang: str,
+    intent: str,
+    analysis: Optional[dict] = None,
+    surah_info: Optional[dict] = None,
+) -> List[dict]:
+    """Build system and user messages for LLM."""
+    if surah_info:
+        info_block = (
+            f"\n[SURAH INFORMATION]\n"
+            f"Surah Name (Arabic): {surah_info['surah_name_ar']}\n"
+            f"Surah Name (English): {surah_info['surah_name_en']}\n"
+            f"Surah Number: {surah_info['surah_number']}\n"
+            f"Total Verses: {surah_info['total_verses']}\n"
+            f"Revelation Type: {surah_info['revelation_type']}\n"
+            f"Transliteration: {surah_info['surah_name_transliteration']}\n"
+        )
+        context = info_block + context
+    if analysis:
+        by_surah_str = "\n  ".join([
+            f"Surah {s}: {data['name']} ({data['count']} times)"
+            for s, data in analysis["by_surah"].items()
+        ])
+        analysis_block = (
+            f"\n[ANALYSIS RESULT]\n"
+            f"The keyword «{analysis['keyword']}» appears {analysis['total_count']} times.\n"
+            f"  {by_surah_str}\n"
+        )
+        context = analysis_block + context
+    system = _SYSTEM_TEMPLATE.format(
+        persona=PERSONA,
+        lang_instruction=language_instruction(lang),
+        task=TASK_INSTRUCTIONS.get(intent, TASK_INSTRUCTIONS["general"]),
+        fmt=FORMAT_RULES,
+    )
+    context_block = _CONTEXT_TEMPLATE.format(context=context)
+    cot = {
+        "arabic": "فكّر خطوةً بخطوة، ثم أجب: ",
+        "mixed":  "Think step by step: ",
+    }.get(lang, "Think step by step: ")
+    return [
+        {"role": "system",  "content": system},
+        {"role": "user",    "content": context_block + cot + question},
+    ]
+def not_found_answer(lang: str) -> str:
+    """Safe fallback when confidence is too low."""
+    if lang == "arabic":
+        return (
+            "لم أجد في قاعدة البيانات ما يكفي للإجابة على هذا السؤال بدقة.\n"
+            "يُرجى الرجوع إلى مصادر إسلامية موثوقة.\n"
+            "والله أعلم."
+        )
+    return (
+        "The available dataset does not contain sufficient information to answer "
+        "this question accurately.\nPlease refer to trusted Islamic sources.\n"
+        "And Allah knows best."
+    )

app/routers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """QModel API routers."""

app/routers/chat.py ADDED Viewed

	@@ -0,0 +1,163 @@

+"""Chat / inference endpoints — OpenAI-compatible + /ask."""
+from __future__ import annotations
+import json
+import logging
+import time
+from typing import Optional
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import StreamingResponse
+from app.config import cfg
+from app.models import (
+    AskResponse,
+    ChatCompletionChoice,
+    ChatCompletionMessage,
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    SourceItem,
+)
+from app.state import check_ready, run_rag_pipeline, state
+logger = logging.getLogger("qmodel.chat")
+router = APIRouter(tags=["inference"])
+# ───────────────────────────────────────────────────────
+# POST /v1/chat/completions — OpenAI-compatible
+# ───────────────────────────────────────────────────────
+@router.post("/v1/chat/completions", response_model=ChatCompletionResponse)
+async def chat_completions(request: ChatCompletionRequest):
+    """OpenAI-compatible chat completions endpoint (for Open-WebUI integration)."""
+    check_ready()
+    user_messages = [m.content for m in request.messages if m.role == "user"]
+    if not user_messages:
+        raise HTTPException(status_code=400, detail="No user message in request")
+    question   = user_messages[-1]
+    top_k      = request.top_k or cfg.TOP_K_RETURN
+    try:
+        result = await run_rag_pipeline(question, top_k=top_k)
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error("Pipeline error: %s", exc)
+        raise HTTPException(status_code=500, detail="Internal pipeline error")
+    if request.stream:
+        return StreamingResponse(
+            _stream_response(result, request.model),
+            media_type="text/event-stream",
+        )
+    return ChatCompletionResponse(
+        id=f"qmodel-{int(time.time() * 1000)}",
+        created=int(time.time()),
+        model=request.model,
+        choices=[
+            ChatCompletionChoice(
+                index=0,
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content=result["answer"],
+                ),
+            )
+        ],
+        usage={
+            "prompt_tokens":     -1,
+            "completion_tokens": -1,
+            "total_tokens":      -1,
+        },
+        x_metadata={
+            "language":      result["language"],
+            "intent":        result["intent"],
+            "top_score":     round(result["top_score"], 4),
+            "latency_ms":    result["latency_ms"],
+            "sources_count": len(result["sources"]),
+            "sources": [
+                {
+                    "source": s.get("source") or s.get("reference", ""),
+                    "type":   s.get("type", ""),
+                    "grade":  s.get("grade"),
+                    "score":  round(s.get("_score", 0), 4),
+                }
+                for s in result.get("sources", [])[:5]
+            ],
+            "analysis": result.get("analysis"),
+        },
+    )
+async def _stream_response(result: dict, model: str):
+    """Stream response chunks in OpenAI SSE format."""
+    answer = result.get("answer", "")
+    for line in answer.split("\n"):
+        chunk = {
+            "id":      f"qmodel-{int(time.time() * 1000)}",
+            "object":  "chat.completion.chunk",
+            "created": int(time.time()),
+            "model":   model,
+            "choices": [{
+                "index": 0,
+                "delta": {"content": line + "\n"},
+                "finish_reason": None,
+            }],
+        }
+        yield f"data: {json.dumps(chunk)}\n\n"
+    final = {
+        "id":      f"qmodel-{int(time.time() * 1000)}",
+        "object":  "chat.completion.chunk",
+        "created": int(time.time()),
+        "model":   model,
+        "choices": [{
+            "index": 0,
+            "delta": {},
+            "finish_reason": "stop",
+        }],
+    }
+    yield f"data: {json.dumps(final)}\n\n"
+    yield "data: [DONE]\n\n"
+# ───────────────────────────────────────────────────────
+# GET /ask — main inference endpoint
+# ───────────────────────────────────────────────────────
+@router.get("/ask", response_model=AskResponse)
+async def ask(
+    q: str = Query(..., min_length=1, max_length=1000, description="Your Islamic question"),
+    top_k: int = Query(cfg.TOP_K_RETURN, ge=1, le=20, description="Number of sources"),
+    source_type: Optional[str] = Query(None, description="Filter: quran|hadith"),
+    grade_filter: Optional[str] = Query(None, description="Filter Hadith: sahih|hasan|all"),
+):
+    """Main inference endpoint — runs the full RAG pipeline."""
+    check_ready()
+    result = await run_rag_pipeline(q, top_k, source_type, grade_filter)
+    sources = [
+        SourceItem(
+            source=r.get("source") or r.get("reference") or "Unknown",
+            type=r.get("type", "unknown"),
+            grade=r.get("grade"),
+            arabic=r.get("arabic", ""),
+            english=r.get("english", ""),
+            _score=r.get("_score", 0.0),
+        )
+        for r in result["sources"]
+    ]
+    return AskResponse(
+        question=q,
+        answer=result["answer"],
+        language=result["language"],
+        intent=result["intent"],
+        analysis=result["analysis"],
+        sources=sources,
+        top_score=result["top_score"],
+        latency_ms=result["latency_ms"],
+    )

app/routers/hadith.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""Hadith endpoints — search, topic, verify, collection browse, analytics."""
+from __future__ import annotations
+import time
+from typing import Optional
+from fastapi import APIRouter, HTTPException, Query
+from app.analysis import get_hadith_analytics
+from app.models import (
+    HadithAnalyticsResponse,
+    HadithVerifyResponse,
+    TextSearchResponse,
+)
+from app.search import hybrid_search, rewrite_query, text_search
+from app.state import check_ready, state
+router = APIRouter(prefix="/hadith", tags=["hadith"])
+# ───────────────────────────────────────────────────────
+# GET /hadith/search — text-based hadith lookup (#5)
+# ───────────────────────────────────────────────────────
+@router.get("/search", response_model=TextSearchResponse)
+async def hadith_text_search(
+    q: str = Query(..., min_length=1, max_length=500, description="Text to search for (Arabic or English)"),
+    collection: Optional[str] = Query(None, description="Filter by collection name (e.g. bukhari, muslim)"),
+    limit: int = Query(10, ge=1, le=50),
+):
+    """Search for Hadith by partial text match (Arabic or English).
+    Performs exact substring matching plus word-overlap scoring.
+    Use this to find a hadith when you know part of the text.
+    """
+    check_ready()
+    results = text_search(q, state.dataset, source_type="hadith", limit=limit)
+    # Optional collection filter
+    if collection:
+        col_lower = collection.lower()
+        results = [
+            r for r in results
+            if col_lower in (r.get("collection", "") or r.get("reference", "")).lower()
+        ]
+    return TextSearchResponse(
+        query=q,
+        count=len(results),
+        results=[
+            {
+                "collection":    r.get("collection", ""),
+                "reference":     r.get("reference", ""),
+                "hadith_number": r.get("hadith_number"),
+                "chapter":       r.get("chapter", ""),
+                "arabic":        r.get("arabic", ""),
+                "english":       r.get("english", ""),
+                "grade":         r.get("grade"),
+                "score":         round(r.get("_score", 0), 4),
+            }
+            for r in results
+        ],
+    )
+# ───────────────────────────────────────────────────────
+# GET /hadith/topic — semantic topic search (#6)
+# ───────────────────────────────────────────────────────
+@router.get("/topic", response_model=TextSearchResponse)
+async def hadith_topic_search(
+    topic: str = Query(..., min_length=1, max_length=500, description="Topic or theme to search for"),
+    top_k: int = Query(10, ge=1, le=20),
+    grade_filter: Optional[str] = Query(None, description="Grade filter: sahih|hasan|all"),
+):
+    """Search for Hadith related to a topic/theme using semantic search."""
+    check_ready()
+    rewrite = await rewrite_query(topic, state.llm)
+    results = await hybrid_search(
+        topic, rewrite,
+        state.embed_model, state.faiss_index, state.dataset,
+        top_n=top_k, source_type="hadith", grade_filter=grade_filter,
+    )
+    return TextSearchResponse(
+        query=topic,
+        count=len(results),
+        results=[
+            {
+                "collection":    r.get("collection", ""),
+                "reference":     r.get("reference", ""),
+                "hadith_number": r.get("hadith_number"),
+                "chapter":       r.get("chapter", ""),
+                "arabic":        r.get("arabic", ""),
+                "english":       r.get("english", ""),
+                "grade":         r.get("grade"),
+                "score":         round(r.get("_score", 0), 4),
+            }
+            for r in results
+        ],
+    )
+# ───────────────────────────────────────────────────────
+# GET /hadith/verify — authenticity check (#7)
+# ───────────────────────────────────────────────────────
+@router.get("/verify", response_model=HadithVerifyResponse)
+async def verify_hadith(
+    q: str = Query(..., description="Hadith text or first few words"),
+    collection: Optional[str] = Query(None, description="Filter: bukhari|muslim|all"),
+):
+    """Verify if a Hadith is in authenticated collections and check its grade.
+    Uses both semantic search and text matching for best accuracy.
+    """
+    check_ready()
+    t0 = time.perf_counter()
+    # 1. Try text search first for exact matches
+    text_results = text_search(q, state.dataset, source_type="hadith", limit=5)
+    if collection:
+        col_lower = collection.lower()
+        text_results = [
+            r for r in text_results
+            if col_lower in (r.get("collection", "") or r.get("reference", "")).lower()
+        ]
+    # 2. Also try semantic search
+    semantic_results = await hybrid_search(
+        q,
+        {"ar_query": q, "en_query": q, "keywords": q.split()[:7], "intent": "auth"},
+        state.embed_model, state.faiss_index, state.dataset,
+        top_n=5, source_type="hadith",
+    )
+    # 3. Pick best result from either approach
+    best = None
+    if text_results and text_results[0].get("_score", 0) > 2.0:
+        best = text_results[0]
+    elif semantic_results:
+        best = semantic_results[0]
+    elif text_results:
+        best = text_results[0]
+    if best:
+        return HadithVerifyResponse(
+            query=q,
+            found=True,
+            collection=best.get("collection"),
+            grade=best.get("grade"),
+            reference=best.get("reference"),
+            arabic=best.get("arabic"),
+            english=best.get("english"),
+            latency_ms=int((time.perf_counter() - t0) * 1000),
+        )
+    return HadithVerifyResponse(
+        query=q,
+        found=False,
+        latency_ms=int((time.perf_counter() - t0) * 1000),
+    )
+# ───────────────────────────────────────────────────────
+# GET /hadith/collection/{name} — browse a collection
+# ───────────────────────────────────────────────────────
+@router.get("/collection/{name}")
+async def hadith_collection(
+    name: str,
+    limit: int = Query(20, ge=1, le=100),
+    offset: int = Query(0, ge=0),
+):
+    """Browse hadiths from a specific collection (e.g. bukhari, muslim, tirmidhi)."""
+    check_ready()
+    name_lower = name.lower()
+    matches = [
+        item for item in state.dataset
+        if item.get("type") == "hadith"
+        and name_lower in (item.get("collection", "") or item.get("reference", "")).lower()
+    ]
+    if not matches:
+        raise HTTPException(status_code=404, detail=f"Collection '{name}' not found")
+    total = len(matches)
+    page  = matches[offset:offset + limit]
+    return {
+        "collection": name,
+        "total": total,
+        "offset": offset,
+        "limit": limit,
+        "results": [
+            {
+                "reference":     item.get("reference", ""),
+                "hadith_number": item.get("hadith_number"),
+                "chapter":       item.get("chapter", ""),
+                "arabic":        item.get("arabic", ""),
+                "english":       item.get("english", ""),
+                "grade":         item.get("grade"),
+            }
+            for item in page
+        ],
+    }
+# ───────────────────────────────────────────────────────
+# GET /hadith/analytics — aggregate hadith statistics
+# ───────────────────────────────────────────────────────
+@router.get("/analytics", response_model=HadithAnalyticsResponse)
+async def hadith_analytics():
+    """Get aggregate Hadith analytics: collection counts, grade distribution."""
+    check_ready()
+    return HadithAnalyticsResponse(**get_hadith_analytics(state.dataset))

app/routers/ops.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""Operational endpoints — health, models, debug."""
+from __future__ import annotations
+import time
+from typing import Optional
+from fastapi import APIRouter, Query
+from app.config import cfg
+from app.models import ModelInfo, ModelsListResponse
+from app.search import hybrid_search, rewrite_query
+from app.state import check_ready, state
+router = APIRouter(tags=["ops"])
+@router.get("/health")
+def health():
+    """Health check endpoint."""
+    return {
+        "status":               "ok" if state.ready else "initialising",
+        "version":              "5.0.0",
+        "llm_backend":          cfg.LLM_BACKEND,
+        "dataset_size":         len(state.dataset) if state.dataset else 0,
+        "faiss_total":          state.faiss_index.ntotal if state.faiss_index else 0,
+        "confidence_threshold": cfg.CONFIDENCE_THRESHOLD,
+    }
+@router.get("/v1/models", response_model=ModelsListResponse, tags=["models"])
+def list_models():
+    """List available models (OpenAI-compatible)."""
+    return ModelsListResponse(
+        data=[
+            ModelInfo(id="QModel",  created=int(time.time()), owned_by="elgendy"),
+            ModelInfo(id="qmodel",  created=int(time.time()), owned_by="elgendy"),
+        ]
+    )
+@router.get("/debug/scores")
+async def debug_scores(
+    q: str = Query(..., min_length=1, max_length=1000),
+    top_k: int = Query(10, ge=1, le=20),
+):
+    """Debug: inspect raw retrieval scores without LLM generation."""
+    check_ready()
+    rewrite = await rewrite_query(q, state.llm)
+    results = await hybrid_search(
+        q, rewrite,
+        state.embed_model, state.faiss_index, state.dataset, top_k,
+    )
+    return {
+        "intent":    rewrite.get("intent"),
+        "threshold": cfg.CONFIDENCE_THRESHOLD,
+        "results": [
+            {
+                "rank":    i + 1,
+                "source":  r.get("source") or r.get("reference"),
+                "type":    r.get("type"),
+                "grade":   r.get("grade"),
+                "_dense":  round(r.get("_dense", 0), 4),
+                "_sparse": round(r.get("_sparse", 0), 4),
+                "_score":  round(r.get("_score", 0), 4),
+            }
+            for i, r in enumerate(results)
+        ],
+    }

app/routers/quran.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""Quran endpoints — search, topic, analytics, chapter, verse, word-frequency."""
+from __future__ import annotations
+import time
+from fastapi import APIRouter, HTTPException, Query
+from app.analysis import (
+    count_occurrences,
+    get_chapter_info,
+    get_quran_analytics,
+    get_verse,
+)
+from app.models import (
+    ChapterResponse,
+    QuranAnalyticsResponse,
+    TextSearchResponse,
+    VerseItem,
+    WordFrequencyResponse,
+)
+from app.search import hybrid_search, rewrite_query, text_search
+from app.state import check_ready, state
+router = APIRouter(prefix="/quran", tags=["quran"])
+# ───────────────────────────────────────────────────────
+# GET /quran/search — text-based verse lookup (#1)
+# ───────────────────────────────────────────────────────
+@router.get("/search", response_model=TextSearchResponse)
+async def quran_text_search(
+    q: str = Query(..., min_length=1, max_length=500, description="Text to search for (Arabic or English)"),
+    limit: int = Query(10, ge=1, le=50),
+):
+    """Search for Quran verses by partial text match (Arabic or English).
+    This performs exact substring matching plus fuzzy word-overlap matching.
+    Use this to find a verse when you know part of the text.
+    """
+    check_ready()
+    results = text_search(q, state.dataset, source_type="quran", limit=limit)
+    return TextSearchResponse(
+        query=q,
+        count=len(results),
+        results=[
+            {
+                "surah_number":   r.get("surah_number"),
+                "surah_name_ar":  r.get("surah_name_ar", ""),
+                "surah_name_en":  r.get("surah_name_en", ""),
+                "ayah":           r.get("ayah_number") or r.get("verse_number"),
+                "arabic":         r.get("arabic", ""),
+                "english":        r.get("english", ""),
+                "source":         r.get("source", ""),
+                "score":          round(r.get("_score", 0), 4),
+            }
+            for r in results
+        ],
+    )
+# ───────────────────────────────────────────────────────
+# GET /quran/topic — semantic topic search (#2)
+# ───────────────────────────────────────────────────────
+@router.get("/topic", response_model=TextSearchResponse)
+async def quran_topic_search(
+    topic: str = Query(..., min_length=1, max_length=500, description="Topic or theme to search for"),
+    top_k: int = Query(10, ge=1, le=20),
+):
+    """Search for Quran verses related to a topic/theme using semantic search."""
+    check_ready()
+    rewrite = await rewrite_query(topic, state.llm)
+    results = await hybrid_search(
+        topic, rewrite,
+        state.embed_model, state.faiss_index, state.dataset,
+        top_n=top_k, source_type="quran",
+    )
+    return TextSearchResponse(
+        query=topic,
+        count=len(results),
+        results=[
+            {
+                "surah_number":   r.get("surah_number"),
+                "surah_name_ar":  r.get("surah_name_ar", ""),
+                "surah_name_en":  r.get("surah_name_en", ""),
+                "ayah":           r.get("ayah_number") or r.get("verse_number"),
+                "arabic":         r.get("arabic", ""),
+                "english":        r.get("english", ""),
+                "source":         r.get("source", ""),
+                "score":          round(r.get("_score", 0), 4),
+            }
+            for r in results
+        ],
+    )
+# ───────────────────────────────────────────────────────
+# GET /quran/word-frequency — count word occurrences (#3)
+# ───────────────────────────────────────────────────────
+@router.get("/word-frequency", response_model=WordFrequencyResponse)
+async def quran_word_frequency(
+    word: str = Query(..., min_length=1, max_length=100, description="Word to count occurrences for"),
+):
+    """Count occurrences of a word in the Quran with surah breakdown."""
+    check_ready()
+    result = await count_occurrences(word, state.dataset)
+    return WordFrequencyResponse(**result)
+# ───────────────────────────────────────────────────────
+# GET /quran/analytics — aggregate statistics (#4)
+# ──────────────────────────────────────────────────��────
+@router.get("/analytics", response_model=QuranAnalyticsResponse)
+async def quran_analytics():
+    """Get aggregate Quran analytics: surah list, verse counts, Meccan/Medinan breakdown."""
+    check_ready()
+    return QuranAnalyticsResponse(**get_quran_analytics(state.dataset))
+# ───────────────────────────────────────────────────────
+# GET /quran/chapter/{number} — all verses in a chapter
+# ───────────────────────────────────────────────────────
+@router.get("/chapter/{number}", response_model=ChapterResponse)
+async def quran_chapter(number: int):
+    """Get all verses and metadata for a specific surah (chapter)."""
+    check_ready()
+    if number < 1 or number > 114:
+        raise HTTPException(status_code=400, detail="Surah number must be between 1 and 114")
+    info = get_chapter_info(number, state.dataset)
+    if not info:
+        raise HTTPException(status_code=404, detail=f"Surah {number} not found in dataset")
+    return ChapterResponse(**info)
+# ───────────────────────────────────────────────────────
+# GET /quran/verse/{surah}:{ayah} — specific verse
+# ───────────────────────────────────────────────────────
+@router.get("/verse/{surah}:{ayah}")
+async def quran_verse(surah: int, ayah: int):
+    """Get a specific verse by surah number and ayah number (e.g. /quran/verse/2:255)."""
+    check_ready()
+    if surah < 1 or surah > 114:
+        raise HTTPException(status_code=400, detail="Surah number must be between 1 and 114")
+    if ayah < 1:
+        raise HTTPException(status_code=400, detail="Ayah number must be >= 1")
+    verse = get_verse(surah, ayah, state.dataset)
+    if not verse:
+        raise HTTPException(status_code=404, detail=f"Verse {surah}:{ayah} not found")
+    return verse

app/search.py ADDED Viewed

	@@ -0,0 +1,287 @@

+"""Hybrid search engine — dense FAISS + BM25 re-ranking + text search."""
+from __future__ import annotations
+import json
+import logging
+import re
+from collections import Counter
+from typing import Dict, List, Literal, Optional
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from app.arabic_nlp import light_stem, normalize_arabic, tokenize_ar
+from app.cache import rewrite_cache, search_cache
+from app.config import cfg
+from app.llm import LLMProvider
+logger = logging.getLogger("qmodel.search")
+# ═══════════════════════════════════════════════════════════════════════
+# QUERY REWRITING
+# ═══════════════════════════════════════════════════════════════════════
+_REWRITE_SYSTEM = """\
+You are an Islamic-scholarship search query optimizer.
+Your ONLY job: rewrite the user's question to maximise retrieval from a Quranic + Hadith dataset.
+Reply ONLY with a valid JSON object — no markdown, no preamble:
+{
+  "ar_query": "<query in clear Arabic فصحى, ≤25 words>",
+  "en_query": "<query in clear English, ≤25 words>",
+  "keywords": ["<3-7 key Arabic or English terms from the question>"],
+  "intent": "<one of: fatwa | tafsir | hadith | count | surah_info | auth | general>"
+}
+Intent Detection Rules (CRITICAL):
+- 'surah_info' intent = asking about surah metadata: verse count, revelation type, surah number
+  (كم عدد آيات سورة, كم آية في سورة, how many verses in surah, is surah X meccan/medinan)
+- 'count' intent = asking for WORD frequency/occurrence count (كم مرة ذُكرت كلمة, how many times is word X mentioned)
+  NOTE: "كم عدد آيات سورة" is surah_info NOT count!
+- 'auth' intent = asking about authenticity (صحيح؟, هل صحيح, is it authentic, verify hadith grade)
+- 'hadith' intent = asking about specific hadith meaning/text (not authenticity)
+- 'tafsir' intent = asking about Quranic verses or Islamic ruling (fatwa)
+- 'general' intent = other questions
+Examples:
+- "كم عدد آيات سورة آل عمران" → intent: surah_info (asking about surah metadata!)
+- "كم آية في سورة البقرة" → intent: surah_info
+- "how many verses in surah al-baqara" → intent: surah_info
+- "هل سورة الفاتحة مكية أم مدنية" → intent: surah_info
+- "كم مرة ذُكرت كلمة مريم" → intent: count (asking about WORD frequency!)
+- "هل حديث إنما الأعمال بالنيات صحيح" → intent: auth (asking if authentic!)
+- "ما معنى حديث إنما الأعمال" → intent: hadith
+- "ما حكم الربا في الإسلام" → intent: fatwa
+"""
+async def rewrite_query(raw: str, llm: LLMProvider) -> Dict:
+    """Rewrite query for better retrieval."""
+    cached = await rewrite_cache.get(raw)
+    if cached:
+        return cached
+    fallback = {
+        "ar_query": normalize_arabic(raw),
+        "en_query": raw,
+        "keywords": raw.split()[:7],
+        "intent":   "general",
+    }
+    try:
+        text = await llm.chat(
+            messages=[
+                {"role": "system", "content": _REWRITE_SYSTEM},
+                {"role": "user",   "content": raw},
+            ],
+            max_tokens=220,
+            temperature=0.0,
+        )
+        text   = re.sub(r"```(?:json)?\n?|\n?```", "", text).strip()
+        result = json.loads(text)
+        for k in ("ar_query", "en_query", "keywords", "intent"):
+            result.setdefault(k, fallback[k])
+        await rewrite_cache.set(result, raw)
+        logger.info("Rewrite: intent=%s ar=%s", result["intent"], result["ar_query"][:60])
+        return result
+    except Exception as exc:
+        logger.warning("Query rewrite failed (%s) — using fallback", exc)
+        return fallback
+# ═══════════════════════════════════════════════════════════════════════
+# BM25 SCORING
+# ═══════════════════════════════════════════════════════════════════════
+def _bm25_score(
+    query_terms: List[str],
+    doc_text: str,
+    avg_dl: float,
+    k1: float = 1.5,
+    b: float  = 0.75,
+) -> float:
+    """BM25 term-frequency scoring."""
+    doc_tokens = tokenize_ar(doc_text)
+    dl         = len(doc_tokens)
+    tf         = Counter(doc_tokens)
+    score      = 0.0
+    for term in query_terms:
+        f      = tf.get(term, 0)
+        score += (f * (k1 + 1)) / (f + k1 * (1 - b + b * dl / max(avg_dl, 1)))
+    return score
+# ═══════════════════════════════════════════════════════════════════════
+# HYBRID SEARCH  — dense FAISS + BM25 re-ranking + filtering
+# ═══════════════════════════════════════════════════════════════════════
+async def hybrid_search(
+    raw_query: str,
+    rewrite: Dict,
+    embed_model: SentenceTransformer,
+    index: faiss.Index,
+    dataset: list,
+    top_n: int = cfg.TOP_K_RETURN,
+    source_type: Optional[Literal["quran", "hadith"]] = None,
+    grade_filter: Optional[str] = None,
+) -> list:
+    """Hybrid search: dense + sparse with optional filtering."""
+    cache_key = (raw_query, top_n, source_type, grade_filter)
+    cached = await search_cache.get(*cache_key)
+    if cached:
+        return cached
+    # ── 1. Dual-language dense retrieval ──────────────────────────────
+    ar_q = "query: " + rewrite["ar_query"]
+    en_q = "query: " + rewrite["en_query"]
+    embeddings = embed_model.encode(
+        [ar_q, en_q], normalize_embeddings=True, batch_size=2
+    ).astype("float32")
+    fused  = embeddings[0] + embeddings[1]
+    fused /= np.linalg.norm(fused)
+    distances, indices = index.search(fused.reshape(1, -1), cfg.TOP_K_SEARCH)
+    # ── 2. De-duplicate candidates & apply filters ─────────────────────
+    seen: set  = set()
+    candidates = []
+    for dist, idx in zip(distances[0], indices[0]):
+        item_idx = int(idx)
+        if item_idx not in seen and 0 <= item_idx < len(dataset):
+            seen.add(item_idx)
+            item = dataset[item_idx]
+            if source_type and item.get("type") != source_type:
+                continue
+            if grade_filter and item.get("type") == "hadith":
+                item_grade = item.get("grade", "").lower()
+                if grade_filter.lower() not in item_grade:
+                    continue
+            candidates.append({**item, "_dense": float(dist)})
+    if not candidates:
+        return []
+    # ── 3. BM25 sparse scoring ─────────────────────────────────────────
+    query_terms = [
+        light_stem(kw) for kw in rewrite.get("keywords", raw_query.split())
+    ]
+    avg_dl = sum(
+        len(tokenize_ar(c.get("arabic", "") + " " + c.get("english", "")))
+        for c in candidates
+    ) / max(len(candidates), 1)
+    for c in candidates:
+        doc        = c.get("arabic", "") + " " + c.get("english", "")
+        c["_sparse"] = _bm25_score(query_terms, doc, avg_dl)
+    # ── 3.5. Phrase matching boost for exact snippets ───────────────────
+    query_norm = normalize_arabic(raw_query, aggressive=False).lower()
+    for c in candidates:
+        if c.get("type") == "hadith":
+            ar_norm = normalize_arabic(c.get("arabic", ""), aggressive=False).lower()
+            query_fragments = query_norm.split()
+            for i in range(len(query_fragments) - 2):
+                phrase = " ".join(query_fragments[i:i+3])
+                if len(phrase) > 5 and phrase in ar_norm:
+                    c["_sparse"] += 2.0
+                    break
+    # ── 4. Score fusion ────────────────────────────────────────────────
+    α          = cfg.RERANK_ALPHA
+    intent     = rewrite.get("intent", "general")
+    if intent == "auth":
+        α = 0.75
+    max_sparse = max((c["_sparse"] for c in candidates), default=1.0) or 1.0
+    for c in candidates:
+        base_score = α * c["_dense"] + (1 - α) * c["_sparse"] / max_sparse
+        if intent == "hadith" and c.get("type") == "hadith":
+            base_score += cfg.HADITH_BOOST
+        c["_score"] = base_score
+    candidates.sort(key=lambda x: x["_score"], reverse=True)
+    results = candidates[:top_n]
+    await search_cache.set(results, *cache_key)
+    return results
+# ═══════════════════════════════════════════════════════════════════════
+# TEXT-BASED SEARCH  (exact substring + fuzzy matching)
+# ═══════════════════════════════════════════════════════════════════════
+def text_search(
+    query: str,
+    dataset: list,
+    source_type: Optional[Literal["quran", "hadith"]] = None,
+    limit: int = 10,
+) -> list:
+    """Search dataset by exact text match (Arabic or English).
+    Returns items sorted by relevance: exact matches first, then partial.
+    """
+    q_norm = normalize_arabic(query, aggressive=True).lower()
+    q_lower = query.lower().strip()
+    results = []
+    for item in dataset:
+        if source_type and item.get("type") != source_type:
+            continue
+        ar_raw = item.get("arabic", "")
+        en_raw = item.get("english", "")
+        ar_norm = normalize_arabic(ar_raw, aggressive=True).lower()
+        en_lower = en_raw.lower()
+        score = 0.0
+        # Exact substring in normalized Arabic
+        if q_norm and q_norm in ar_norm:
+            # Boost for shorter docs (more specific match)
+            score = 3.0 + (1.0 / max(len(ar_norm), 1)) * 100
+        # Exact substring in English
+        if q_lower and q_lower in en_lower:
+            score = max(score, 2.0 + (1.0 / max(len(en_lower), 1)) * 100)
+        # Exact substring in raw Arabic (with diacritics)
+        if query.strip() in ar_raw:
+            score = max(score, 4.0)
+        # Word-level overlap for lower-confidence matches
+        if score == 0.0:
+            q_tokens = set(q_norm.split())
+            ar_tokens = set(ar_norm.split())
+            en_tokens = set(en_lower.split())
+            ar_overlap = len(q_tokens & ar_tokens)
+            en_overlap = len(q_tokens & en_tokens)
+            best_overlap = max(ar_overlap, en_overlap)
+            if best_overlap >= max(2, len(q_tokens) * 0.5):
+                score = best_overlap / max(len(q_tokens), 1)
+        if score > 0:
+            results.append({**item, "_score": score})
+    results.sort(key=lambda x: x["_score"], reverse=True)
+    return results[:limit]
+def build_context(results: list) -> str:
+    """Format search results into context block for LLM."""
+    lines = []
+    for i, r in enumerate(results, 1):
+        source    = r.get("source") or r.get("reference") or "Unknown Source"
+        item_type = "Quranic Verse" if r.get("type") == "quran" else "Hadith"
+        grade_str = f" [Grade: {r.get('grade')}]" if r.get("grade") else ""
+        lines.append(
+            f"[{i}] 📌 {item_type}{grade_str} | {source} | score: {r.get('_score', 0):.3f}\n"
+            f"    Arabic : {r.get('arabic', '')}\n"
+            f"    English: {r.get('english', '')}"
+        )
+    return "\n\n".join(lines)

app/state.py ADDED Viewed

	@@ -0,0 +1,235 @@

+"""Application state, lifespan, and core RAG pipeline."""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import time
+from contextlib import asynccontextmanager
+from typing import Literal, Optional
+import faiss
+from fastapi import FastAPI, HTTPException
+from sentence_transformers import SentenceTransformer
+from app.analysis import (
+    count_occurrences,
+    detect_analysis_intent,
+    detect_surah_info,
+    lookup_surah_info,
+)
+from app.arabic_nlp import detect_language
+from app.config import cfg
+from app.llm import LLMProvider, get_llm_provider
+from app.prompts import build_messages, not_found_answer
+from app.search import build_context, hybrid_search, rewrite_query, text_search
+logger = logging.getLogger("qmodel.state")
+# ═══════════════════════════════════════════════════════════════════════
+# HADITH GRADE INFERENCE
+# ═══════════════════════════════════════════════════════════════════════
+def infer_hadith_grade(item: dict) -> dict:
+    """Infer hadith grade from collection name if not present."""
+    if item.get("type") != "hadith" or item.get("grade"):
+        return item
+    collection = item.get("collection", "").lower()
+    reference  = item.get("reference", "").lower()
+    combined   = f"{collection} {reference}"
+    if any(s in combined for s in ["sahih al-bukhari", "sahih bukhari", "bukhari"]):
+        item["grade"] = "Sahih"
+    elif any(s in combined for s in ["sahih muslim", "sahih al-muslim"]):
+        item["grade"] = "Sahih"
+    elif any(s in combined for s in ["sunan an-nasai", "sunan an-nasa", "nasa'i", "nasa"]):
+        item["grade"] = "Sahih"
+    elif any(s in combined for s in ["jami at-tirmidhi", "tirmidhi", "at-tirmidhi"]):
+        item["grade"] = "Hasan"
+    elif any(s in combined for s in ["sunan abu dawood", "abu dawood", "abo daud", "abou daoude"]):
+        item["grade"] = "Hasan"
+    elif any(s in combined for s in ["sunan ibn majah", "ibn majah", "ibn maja"]):
+        item["grade"] = "Hasan"
+    elif any(s in combined for s in ["muwatta malik", "muwatta", "malik"]):
+        item["grade"] = "Hasan"
+    elif any(s in combined for s in ["musnad ahmad", "ahmad", "ahmed"]):
+        item["grade"] = "Hasan/Sahih"
+    elif any(s in combined for s in ["sunan al-darimi", "darimi", "al-darimi"]):
+        item["grade"] = "Hasan"
+    return item
+# ═══════════════════════════════════════════════════════════════════════
+# APP STATE
+# ═══════════════════════════════════════════════════════════════════════
+class AppState:
+    embed_model: Optional[SentenceTransformer] = None
+    faiss_index: Optional[faiss.Index]         = None
+    dataset:     Optional[list]                = None
+    llm:         Optional[LLMProvider]         = None
+    ready:       bool                          = False
+state = AppState()
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize state on startup."""
+    logger.info("Loading embed model:  %s", cfg.EMBED_MODEL)
+    state.embed_model = SentenceTransformer(cfg.EMBED_MODEL)
+    logger.info("Loading FAISS index:  %s", cfg.FAISS_INDEX)
+    state.faiss_index = faiss.read_index(cfg.FAISS_INDEX)
+    logger.info("Loading metadata:     %s", cfg.METADATA_FILE)
+    with open(cfg.METADATA_FILE, "r", encoding="utf-8") as f:
+        state.dataset = json.load(f)
+    state.dataset = [infer_hadith_grade(item) for item in state.dataset]
+    logger.info("Initializing LLM provider: %s", cfg.LLM_BACKEND)
+    state.llm = get_llm_provider()
+    state.ready = True
+    logger.info(
+        "QModel v6 ready | backend=%s | dataset=%d | faiss=%d | threshold=%.2f",
+        cfg.LLM_BACKEND,
+        len(state.dataset) if state.dataset else 0,
+        state.faiss_index.ntotal if state.faiss_index else 0,
+        cfg.CONFIDENCE_THRESHOLD,
+    )
+    yield
+    state.ready = False
+    logger.info("QModel shutdown")
+def check_ready():
+    """Raise 503 if service isn't ready."""
+    if not state.ready:
+        raise HTTPException(
+            status_code=503,
+            detail="Service is still initialising. Please retry shortly.",
+        )
+# ═══════════════════════════════════════════════════════════════════════
+# CORE RAG PIPELINE
+# ═══════════════════════════���═══════════════════════════════════════════
+async def run_rag_pipeline(
+    question: str,
+    top_k: int = cfg.TOP_K_RETURN,
+    source_type: Optional[Literal["quran", "hadith"]] = None,
+    grade_filter: Optional[str] = None,
+) -> dict:
+    """Core RAG pipeline: rewrite -> search -> verify -> generate."""
+    t0 = time.perf_counter()
+    # 1. Query rewriting
+    rewrite = await rewrite_query(question, state.llm)
+    intent  = rewrite.get("intent", "general")
+    # 2. Concurrent: surah info + analysis intent + hybrid search + text search
+    surah_task  = detect_surah_info(question, rewrite)
+    kw_task     = detect_analysis_intent(question, rewrite)
+    search_task = hybrid_search(
+        question, rewrite,
+        state.embed_model, state.faiss_index, state.dataset,
+        top_k, source_type, grade_filter,
+    )
+    surah_det, analysis_kw, results = await asyncio.gather(
+        surah_task, kw_task, search_task,
+    )
+    # 2b. Text search fallback — catches exact matches missed by FAISS
+    #     (e.g. hadith text buried in long isnad chains)
+    #     Use rewritten ar_query (clean hadith text) + raw question for coverage.
+    seen_ids = {r.get("id") for r in results}
+    ar_q = rewrite.get("ar_query", "")
+    for q in dict.fromkeys([ar_q, question]):  # deduplicated, ar_query first
+        if not q:
+            continue
+        for hit in text_search(q, state.dataset, source_type, limit=top_k):
+            if hit.get("id") not in seen_ids:
+                results.append(hit)
+                seen_ids.add(hit.get("id"))
+    if len(results) > top_k:
+        results.sort(key=lambda x: x.get("_score", 0), reverse=True)
+        results = results[:top_k]
+    # 3a. Surah metadata lookup
+    surah_info = None
+    if surah_det:
+        surah_info = await lookup_surah_info(surah_det["surah_query"], state.dataset)
+        if surah_info:
+            intent = "surah_info"
+            logger.info(
+                "Surah info: %s -> %s (%d verses)",
+                surah_det["surah_query"],
+                surah_info["surah_name_en"],
+                surah_info.get("total_verses", 0),
+            )
+    # 3b. Word frequency count
+    analysis = None
+    if analysis_kw and not surah_info:
+        analysis = await count_occurrences(analysis_kw, state.dataset)
+        logger.info("Analysis: kw=%s count=%d", analysis_kw, analysis["total_count"])
+    # 4. Language detection
+    lang      = detect_language(question)
+    top_score = results[0].get("_score", 0.0) if results else 0.0
+    logger.info(
+        "Search done | intent=%s | top_score=%.3f | threshold=%.2f",
+        intent, top_score, cfg.CONFIDENCE_THRESHOLD,
+    )
+    # 5. Confidence gate (skip for surah_info)
+    if not surah_info and top_score < cfg.CONFIDENCE_THRESHOLD:
+        logger.warning(
+            "Low confidence (%.3f < %.2f) — returning safe fallback",
+            top_score, cfg.CONFIDENCE_THRESHOLD,
+        )
+        return {
+            "answer":     not_found_answer(lang),
+            "language":   lang,
+            "intent":     intent,
+            "analysis":   analysis,
+            "sources":    results,
+            "top_score":  top_score,
+            "latency_ms": int((time.perf_counter() - t0) * 1000),
+        }
+    # 6. Build context + prompt + LLM call
+    context  = build_context(results)
+    messages = build_messages(context, question, lang, intent, analysis, surah_info)
+    try:
+        answer = await state.llm.chat(
+            messages,
+            max_tokens=cfg.MAX_TOKENS,
+            temperature=cfg.TEMPERATURE,
+        )
+    except Exception as exc:
+        logger.error("LLM call failed: %s", exc)
+        raise HTTPException(status_code=502, detail="LLM service unavailable")
+    latency = int((time.perf_counter() - t0) * 1000)
+    logger.info(
+        "Pipeline done | intent=%s | lang=%s | top_score=%.3f | %d ms",
+        intent, lang, top_score, latency,
+    )
+    return {
+        "answer":     answer,
+        "language":   lang,
+        "intent":     intent,
+        "analysis":   analysis,
+        "sources":    results,
+        "top_score":  top_score,
+        "latency_ms": latency,
+    }

main.py CHANGED Viewed

@@ -1,1029 +1,55 @@
 """
-QModel v4 — Islamic RAG API
 ===========================
 Specialized Quran & Hadith system with dual LLM backend support.
-Features:
-  • Dual backend: Hugging Face (transformers) + Ollama
-  • Grade filtering: Return only Sahih/Hasan Hadiths
-  • Source filtering: Quran-only or Hadith-only queries
-  • Hadith verification: Quick auth check endpoint
-  • Word frequency: Enhanced with Surah grouping
-  • No hallucinations: Confidence gating + few-shot anti-hallucination
-  • Arabic & English: Full bilingual support with proper normalization
-Configuration via .env:
-  LLM_BACKEND=hf|ollama (default: hf)
-  HF_MODEL_NAME=<hf-model-id> (e.g. gpt2, default: Qwen/Qwen2-7B-Instruct)
-  OLLAMA_HOST=<url> (e.g. http://localhost:11434, default: http://localhost:11434)
-  OLLAMA_MODEL=<model> (e.g. llama2, default: llama2)
-  EMBED_MODEL=intfloat/multilingual-e5-large (embedding model)
 """
 from __future__ import annotations
-import asyncio
-import hashlib
-import json
 import logging
-import os
-import re
-import time
-from collections import Counter, OrderedDict
-from contextlib import asynccontextmanager
-from typing import Dict, List, Literal, Optional
-import faiss
-import numpy as np
 from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel, Field, validator
-from sentence_transformers import SentenceTransformer
 load_dotenv()
-# ═══════════════════════════════════════════════════════════════════════
-# LOGGING
-# ═══════════════════════════════════════════════════════════════════════
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
 )
-logger = logging.getLogger("qmodel")
-# ═══════════════════════════════════════════════════════════════════════
-# CONFIG & LLM FACTORY
-# ═══════════════════════════════════════════════════════════════════════
-class Config:
-    """Centralized configuration with dual backend support."""
-    # Backend selection
-    LLM_BACKEND:          str   = os.getenv("LLM_BACKEND", "ollama")  # "hf" or "ollama"
-    # Hugging Face backend
-    HF_MODEL_NAME:        str   = os.getenv("HF_MODEL_NAME", "Qwen/Qwen2-7B-Instruct")
-    HF_DEVICE:            str   = os.getenv("HF_DEVICE", "auto")
-    HF_MAX_NEW_TOKENS:    int   = int(os.getenv("HF_MAX_NEW_TOKENS", 2048))
-    # Ollama backend
-    OLLAMA_HOST:          str   = os.getenv("OLLAMA_HOST", "http://localhost:11434")
-    OLLAMA_MODEL:         str   = os.getenv("OLLAMA_MODEL", "llama2")
-    # Embedding model
-    EMBED_MODEL:          str   = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-large")
-    # Index & data
-    FAISS_INDEX:          str   = os.getenv("FAISS_INDEX", "QModel.index")
-    METADATA_FILE:        str   = os.getenv("METADATA_FILE", "metadata.json")
-    # Retrieval
-    TOP_K_SEARCH:         int   = int(os.getenv("TOP_K_SEARCH", 20))     # candidate pool
-    TOP_K_RETURN:         int   = int(os.getenv("TOP_K_RETURN", 5))      # final results
-    # Generation
-    TEMPERATURE:          float = float(os.getenv("TEMPERATURE", 0.2))
-    MAX_TOKENS:           int   = int(os.getenv("MAX_TOKENS", 2048))
-    # Caching
-    CACHE_SIZE:           int   = int(os.getenv("CACHE_SIZE", 512))
-    CACHE_TTL:            int   = int(os.getenv("CACHE_TTL", 3600))
-    # Ranking
-    RERANK_ALPHA:         float = float(os.getenv("RERANK_ALPHA", 0.6))  # 60% dense, 40% sparse
-    HADITH_BOOST:         float = float(os.getenv("HADITH_BOOST", 0.08))
-    # Safety
-    CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", 0.30))
-    # CORS
-    ALLOWED_ORIGINS:      str   = os.getenv("ALLOWED_ORIGINS", "*")
-    MAX_EXAMPLES:         int   = int(os.getenv("MAX_EXAMPLES", 3))
-cfg = Config()
-# ═══════════════════════════════════════════════════════════════════════
-# LLM ABSTRACTION LAYER
-# ═══════════════════════════════════════════════════════════════════════
-class LLMProvider:
-    """Abstract base for LLM providers."""
-    async def chat(
-        self, messages: List[dict], temperature: float, max_tokens: int
-    ) -> str:
-        raise NotImplementedError
-class OllamaProvider(LLMProvider):
-    """Ollama-based LLM provider."""
-    def __init__(self, host: str, model: str):
-        self.host = host
-        self.model = model
-        try:
-            import ollama
-            self.client = ollama.Client(host=host)
-        except ImportError:
-            raise ImportError("Install ollama:  pip install ollama")
-    async def chat(
-        self, messages: List[dict], temperature: float, max_tokens: int
-    ) -> str:
-        loop = asyncio.get_event_loop()
-        try:
-            result = await loop.run_in_executor(
-                None,
-                lambda: self.client.chat(
-                    model=self.model,
-                    messages=messages,
-                    options={"temperature": temperature, "num_predict": max_tokens},
-                ),
-            )
-            return result["message"]["content"].strip()
-        except Exception as exc:
-            logger.error("Ollama chat failed: %s", exc)
-            raise
-class HuggingFaceProvider(LLMProvider):
-    """Hugging Face transformers-based LLM provider."""
-    def __init__(self, model_name: str, device: str):
-        self.model_name = model_name
-        self.device = device
-        try:
-            from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                device_map=device,
-                torch_dtype="auto",
-            )
-            self.pipeline = TextGenerationPipeline(
-                model=self.model,
-                tokenizer=self.tokenizer,
-                device=0 if device != "cpu" else None,
-            )
-        except ImportError:
-            raise ImportError("Install transformers:  pip install transformers torch")
-    async def chat(
-        self, messages: List[dict], temperature: float, max_tokens: int
-    ) -> str:
-        # Format messages for the model
-        prompt = self._format_messages(messages)
-        loop = asyncio.get_event_loop()
-        try:
-            result = await loop.run_in_executor(
-                None,
-                lambda: self.pipeline(
-                    prompt,
-                    max_new_tokens=max_tokens,
-                    temperature=temperature,
-                    do_sample=temperature > 0,
-                ),
-            )
-            # Extract generated text
-            generated = result[0]["generated_text"]
-            # Remove the prompt from generated text
-            output = generated[len(prompt):].strip()
-            return output
-        except Exception as exc:
-            logger.error("HF chat failed: %s", exc)
-            raise
-    def _format_messages(self, messages: List[dict]) -> str:
-        """Format messages for the model."""
-        prompt = ""
-        for msg in messages:
-            role = msg["role"]
-            content = msg["content"]
-            if role == "system":
-                prompt += f"{content}\n\n"
-            elif role == "user":
-                prompt += f"User: {content}\n"
-            elif role == "assistant":
-                prompt += f"Assistant: {content}\n"
-        prompt += "Assistant: "
-        return prompt
-def get_llm_provider() -> LLMProvider:
-    """Factory function to get the configured LLM provider."""
-    if cfg.LLM_BACKEND == "ollama":
-        logger.info("Using Ollama backend: %s @ %s", cfg.OLLAMA_MODEL, cfg.OLLAMA_HOST)
-        return OllamaProvider(cfg.OLLAMA_HOST, cfg.OLLAMA_MODEL)
-    elif cfg.LLM_BACKEND == "hf":
-        logger.info("Using HuggingFace backend: %s on %s", cfg.HF_MODEL_NAME, cfg.HF_DEVICE)
-        return HuggingFaceProvider(cfg.HF_MODEL_NAME, cfg.HF_DEVICE)
-    else:
-        raise ValueError(f"Unknown LLM_BACKEND: {cfg.LLM_BACKEND}")
-# ═══════════════════════════════════════════════════════════════════════
-# ASYNC TTL-LRU CACHE
-# ═══════════════════════════════════════════════���═══════════════════════
-class TTLCache:
-    """Async-safe LRU cache with per-entry TTL."""
-    def __init__(self, maxsize: int = 256, ttl: int = 3600):
-        self._cache: OrderedDict = OrderedDict()
-        self._maxsize = maxsize
-        self._ttl = ttl
-        self._lock = asyncio.Lock()
-    def _key(self, *args) -> str:
-        payload = json.dumps(args, ensure_ascii=False, sort_keys=True)
-        return hashlib.sha256(payload.encode()).hexdigest()[:20]
-    async def get(self, *args):
-        async with self._lock:
-            k = self._key(*args)
-            if k in self._cache:
-                value, ts = self._cache[k]
-                if time.monotonic() - ts < self._ttl:
-                    self._cache.move_to_end(k)
-                    return value
-                del self._cache[k]
-        return None
-    async def set(self, value, *args):
-        async with self._lock:
-            k = self._key(*args)
-            self._cache[k] = (value, time.monotonic())
-            self._cache.move_to_end(k)
-            if len(self._cache) > self._maxsize:
-                self._cache.popitem(last=False)
-search_cache   = TTLCache(maxsize=cfg.CACHE_SIZE, ttl=cfg.CACHE_TTL)
-analysis_cache = TTLCache(maxsize=cfg.CACHE_SIZE, ttl=cfg.CACHE_TTL)
-rewrite_cache  = TTLCache(maxsize=cfg.CACHE_SIZE, ttl=cfg.CACHE_TTL * 6)
-# ═══════════════════════════════════════════════════════════════════════
-# ARABIC NLP  — normalisation + light stemming
-# ═══════════════════════════════════════════════════════════════════════
-_DIACRITICS   = re.compile(r"[\u064B-\u0655\u0656-\u0658\u0670\u06D6-\u06ED]")
-_ALEF_VARS    = re.compile(r"[أإآٱ]")
-_WAW_HAMZA    = re.compile(r"ؤ")
-_YA_HAMZA     = re.compile(r"ئ")
-_TA_MARBUTA   = re.compile(r"ة\b")
-_ALEF_MAQSURA = re.compile(r"ى")
-_TATWEEL      = re.compile(r"\u0640+")
-_PUNC_AR      = re.compile(r"[،؛؟!«»\u200c\u200d\u200f\u200e]")
-_MULTI_SPACE  = re.compile(r"\s{2,}")
-_NON_AR_EN    = re.compile(r"[^\u0600-\u06FF\u0750-\u077Fa-zA-Z0-9\s]")
-_SPELLING_MAP: Dict[str, str] = {
-    "قران":    "قرآن",
-    "القران":  "القرآن",
-    "اللہ":    "الله",
-}
-def normalize_arabic(text: str, *, aggressive: bool = False) -> str:
-    """Normalize Arabic text: diacritics, hamza, ta marbuta, etc."""
-    text = _DIACRITICS.sub("", text)
-    text = _TATWEEL.sub("", text)
-    text = _ALEF_VARS.sub("ا", text)
-    text = _WAW_HAMZA.sub("و", text)
-    text = _YA_HAMZA.sub("ي", text)
-    text = _TA_MARBUTA.sub("ه", text)
-    text = _ALEF_MAQSURA.sub("ي", text)
-    text = _PUNC_AR.sub(" ", text)
-    for variant, canonical in _SPELLING_MAP.items():
-        text = text.replace(variant, canonical)
-    if aggressive:
-        text = _NON_AR_EN.sub(" ", text)
-    return _MULTI_SPACE.sub(" ", text).strip()
-_AR_PREFIXES = re.compile(
-    r"^(و|ف|ب|ل|ال|لل|وال|فال|بال|كال|ولل|ومن|وفي|وعن|وإلى|وعلى)\b"
-)
-_AR_SUFFIXES = re.compile(
-    r"(ون|ين|ان|ات|ها|هم|هن|كم|كن|نا|ني|تي|ي|ه|ك|ا|وا)$"
-)
-def light_stem(word: str) -> str:
-    """Light stemming: remove common Arabic affixes."""
-    w = _AR_PREFIXES.sub("", word)
-    w = _AR_SUFFIXES.sub("", w)
-    return w if len(w) >= 2 else word
-def tokenize_ar(text: str) -> List[str]:
-    """Tokenize and stem Arabic text."""
-    norm = normalize_arabic(text, aggressive=True).lower()
-    return [light_stem(t) for t in norm.split() if t]
-# ═══════════════════════════════════════════════════════════════════════
-# LANGUAGE DETECTION
-# ═══════════════════════════════════════════════════════════════════════
-_ARABIC_SCRIPT = re.compile(
-    r"[\u0600-\u06FF\u0750-\u077F\uFB50-\uFDFF\uFE70-\uFEFF]"
-)
-def detect_language(text: str) -> Literal["arabic", "english", "mixed"]:
-    """Detect if text is Arabic, English, or mixed."""
-    ar    = len(_ARABIC_SCRIPT.findall(text))
-    en    = len(re.findall(r"[a-zA-Z]", text))
-    tot   = ar + en or 1
-    ratio = ar / tot
-    if ratio > 0.70:
-        return "arabic"
-    if ratio < 0.30:
-        return "english"
-    return "mixed"
-def language_instruction(lang: str) -> str:
-    """Generate language-specific instruction for LLM."""
-    return {
-        "arabic": (
-            "يجب أن تكون الإجابة كاملةً باللغة العربية الفصحى تماماً. "
-            "لا تستخدم الإ��جليزية أو أي لغة أخرى في أي جزء من الإجابة."
-        ),
-        "mixed": (
-            "The question mixes Arabic and English. Reply primarily in Arabic (الفصحى) "
-            "but you may transliterate key terms in English where essential."
-        ),
-        "english": "You MUST reply entirely in clear, formal English.",
-    }.get(lang, "You MUST reply entirely in clear, formal English.")
-# ═══════════════════════════════════════════════════════════════════════
-# QUERY REWRITING
-# ═══════════════════════════════════════════════════════════════════════
-_REWRITE_SYSTEM = """\
-You are an Islamic-scholarship search query optimizer.
-Your ONLY job: rewrite the user's question to maximise retrieval from a Quranic + Hadith dataset.
-Reply ONLY with a valid JSON object — no markdown, no preamble:
-{
-  "ar_query": "<query in clear Arabic فصحى, ≤25 words>",
-  "en_query": "<query in clear English, ≤25 words>",
-  "keywords": ["<3-7 key Arabic or English terms from the question>"],
-  "intent": "<one of: fatwa | tafsir | hadith | count | surah_info | auth | general>"
-}
-Intent Detection Rules (CRITICAL):
-- 'surah_info' intent = asking about surah metadata: verse count, revelation type, surah number
-  (كم عدد آيات سورة, كم آية في سورة, how many verses in surah, is surah X meccan/medinan)
-- 'count' intent = asking for WORD frequency/occurrence count (كم مرة ذُكرت كلمة, how many times is word X mentioned)
-  NOTE: "كم عدد آيات سورة" is surah_info NOT count!
-- 'auth' intent = asking about authenticity (صحيح؟, هل صحيح, is it authentic, verify hadith grade)
-- 'hadith' intent = asking about specific hadith meaning/text (not authenticity)
-- 'tafsir' intent = asking about Quranic verses or Islamic ruling (fatwa)
-- 'general' intent = other questions
-Examples:
-- "كم عدد آيات سورة آل عمران" → intent: surah_info (asking about surah metadata!)
-- "كم آية في سورة البقرة" → intent: surah_info
-- "how many verses in surah al-baqara" → intent: surah_info
-- "هل سورة الفاتحة مكية أم مدنية" → intent: surah_info
-- "كم مرة ذُكرت كلمة مريم" → intent: count (asking about WORD frequency!)
-- "هل حديث إنما الأعمال بالنيات صحيح" → intent: auth (asking if authentic!)
-- "ما معنى حديث إنما الأعمال" → intent: hadith
-- "ما حكم الربا في الإسلام" → intent: fatwa
-"""
-async def rewrite_query(raw: str, llm: LLMProvider) -> Dict:
-    """Rewrite query for better retrieval."""
-    cached = await rewrite_cache.get(raw)
-    if cached:
-        return cached
-    fallback = {
-        "ar_query": normalize_arabic(raw),
-        "en_query": raw,
-        "keywords": raw.split()[:7],
-        "intent":   "general",
-    }
-    try:
-        text = await llm.chat(
-            messages=[
-                {"role": "system", "content": _REWRITE_SYSTEM},
-                {"role": "user",   "content": raw},
-            ],
-            max_tokens=220,
-            temperature=0.0,
-        )
-        text   = re.sub(r"```(?:json)?\n?|\n?```", "", text).strip()
-        result = json.loads(text)
-        for k in ("ar_query", "en_query", "keywords", "intent"):
-            result.setdefault(k, fallback[k])
-        await rewrite_cache.set(result, raw)
-        logger.info("Rewrite: intent=%s ar=%s", result["intent"], result["ar_query"][:60])
-        return result
-    except Exception as exc:
-        logger.warning("Query rewrite failed (%s) — using fallback", exc)
-        return fallback
-# ═══════════════════════════════════════════════════════════════════════
-# INTENT DETECTION  (frequency / count queries / hadith auth)
-# ═══════════════════════════════════════════════════════════════════════
-_COUNT_EN = re.compile(
-    r"\b(how many|count|number of|frequency|occurrences? of|how often|"
-    r"times? (does|is|appears?))\b",
-    re.I,
-)
-_COUNT_AR = re.compile(
-    r"(كم مرة|كم عدد|كم تكرر|عدد مرات|تكرار|كم ذُكر|كم وردت?)"
-)
-_AUTH_EN = re.compile(
-    r"\b(authentic|is.*authentic|authenticity|sahih|hasan|weak|daif|verify)\b",
-    re.I,
-)
-_AUTH_AR = re.compile(
-    r"(صحيح|حسن|ضعيف|درجة|صحة|تصحيح|هل.*صحيح|هل.*ضعيف)"
-)
-# ── Surah metadata queries (verse count, revelation type, etc.) ───────
-_SURAH_VERSES_AR = re.compile(
-    r"كم\s+(?:عدد\s+)?آيات?\s*(?:في\s+|فى\s+)?(?:سورة|سوره)"
-    r"|عدد\s+آيات?\s+(?:سورة|سوره)"
-    r"|كم\s+آية\s+(?:في|فى)\s+(?:سورة|سوره)"
-    r"|(?:سورة|سوره)\s+[\u0600-\u06FF\s]+\s+(?:كم\s+آية|عدد\s+آيات?)"
-)
-_SURAH_VERSES_EN = re.compile(
-    r"(?:how many|number of)\s+(?:verses?|ayat|ayahs?)\s+(?:in|of|does)\b"
-    r"|\bsurah?\b.*\b(?:how many|number of)\s+(?:verses?|ayat|ayahs?)",
-    re.I,
-)
-_SURAH_TYPE_AR = re.compile(
-    r"(?:سورة|سوره)\s+[\u0600-\u06FF\s]+\s+(?:مكية|مدنية|مكي|مدني)"
-    r"|(?:هل|ما\s+نوع)\s+(?:سورة|سوره)\s+[\u0600-\u06FF\s]+\s+(?:مكية|مدنية)"
-)
-_SURAH_NAME_AR = re.compile(
-    r"(?:سورة|سوره)\s+([\u0600-\u06FF\u0750-\u077F\s]+)"
-)
-_SURAH_NAME_EN = re.compile(
-    r"\bsurah?\s+([a-zA-Z'\-]+(?:[\s\-][a-zA-Z'\-]+)*)",
-    re.I,
-)
-def _extract_surah_name(query: str) -> Optional[str]:
-    """Extract surah name from a query string."""
-    for pat in (_SURAH_NAME_AR, _SURAH_NAME_EN):
-        m = pat.search(query)
-        if m:
-            name = m.group(1).strip()
-            # Clean trailing punctuation and question words
-            name = re.sub(r'[\s؟?!]+$', '', name)
-            name = re.sub(r'\s+(كم|عدد|هل|ما|في|فى)$', '', name)
-            if name:
-                return name
-    return None
-async def detect_surah_info(query: str, rewrite: dict) -> Optional[dict]:
-    """Detect if query asks about surah metadata (verse count, type, etc.)."""
-    is_verse_q = bool(_SURAH_VERSES_AR.search(query) or _SURAH_VERSES_EN.search(query))
-    is_type_q  = bool(_SURAH_TYPE_AR.search(query))
-    if not (is_verse_q or is_type_q):
-        # Also check LLM rewrite intent
-        if rewrite.get("intent") == "surah_info":
-            is_verse_q = True
-        elif rewrite.get("intent") == "count":
-            kw_text = " ".join(rewrite.get("keywords", []))
-            if any(w in kw_text for w in ("آيات", "آية", "verses", "ayat")):
-                is_verse_q = True
-            else:
-                return None
-        else:
-            return None
-    surah_name = _extract_surah_name(query)
-    if not surah_name:
-        return None
-    return {
-        "surah_query": surah_name,
-        "query_type": "verses" if is_verse_q else "type",
-    }
-async def lookup_surah_info(surah_query: str, dataset: list) -> Optional[dict]:
-    """Look up surah metadata from dataset entries."""
-    query_norm = normalize_arabic(surah_query, aggressive=True).lower()
-    query_clean = re.sub(r"^(ال|al[\-\s']*)", "", query_norm, flags=re.I).strip()
-    for item in dataset:
-        if item.get("type") != "quran":
-            continue
-        for field in ("surah_name_ar", "surah_name_en", "surah_name_transliteration"):
-            val = item.get(field, "")
-            if not val:
-                continue
-            val_norm = normalize_arabic(val, aggressive=True).lower()
-            val_clean = re.sub(r"^(ال|al[\-\s']*)", "", val_norm, flags=re.I).strip()
-            if (query_norm in val_norm or val_norm in query_norm
-                    or (query_clean and val_clean
-                        and (query_clean in val_clean or val_clean in query_clean))
-                    or (query_clean and query_clean in val_norm)):
-                return {
-                    "surah_number": item.get("surah_number"),
-                    "surah_name_ar": item.get("surah_name_ar", ""),
-                    "surah_name_en": item.get("surah_name_en", ""),
-                    "surah_name_transliteration": item.get("surah_name_transliteration", ""),
-                    "total_verses": item.get("total_verses"),
-                    "revelation_type": item.get("revelation_type", ""),
-                }
-    return None
-async def detect_analysis_intent(query: str, rewrite: Dict) -> Optional[str]:
-    """Detect if query is asking for word frequency analysis."""
-    # Skip surah metadata queries — those are handled by detect_surah_info
-    if (_SURAH_VERSES_AR.search(query) or _SURAH_VERSES_EN.search(query)
-            or _SURAH_TYPE_AR.search(query)
-            or rewrite.get("intent") == "surah_info"):
-        return None
-    if rewrite.get("intent") == "count":
-        kws = rewrite.get("keywords", [])
-        # Skip if keywords suggest surah metadata, not word frequency
-        kw_text = " ".join(kws)
-        if any(w in kw_text for w in ("آيات", "آية", "verses", "ayat")):
-            return None
-        return kws[0] if kws else None
-    if not (_COUNT_EN.search(query) or _COUNT_AR.search(query)):
-        return None
-    # Simple heuristic: last word after "how many"
-    for pat in (_COUNT_EN, _COUNT_AR):
-        m = pat.search(query)
-        if m:
-            tail = query[m.end():].strip().split()
-            if tail:
-                return tail[0]
-    return None
-# ═══════════════════════════════════════════════════════════════════════
-# OCCURRENCE ANALYSIS  (exact + stemmed matching)
-# ═══════════════════════════════════════════════════════════════════════
-async def count_occurrences(keyword: str, dataset: list) -> dict:
-    """Count keyword occurrences with Surah grouping."""
-    cached = await analysis_cache.get(keyword)
-    if cached:
-        return cached
-    kw_norm  = normalize_arabic(keyword, aggressive=True).lower()
-    kw_stem  = light_stem(kw_norm)
-    count    = 0
-    by_surah: Dict[int, Dict] = {}
-    examples: list = []
-    for item in dataset:
-        if item.get("type") != "quran":
-            continue
-        ar_norm  = normalize_arabic(item.get("arabic", ""), aggressive=True).lower()
-        combined = f"{ar_norm} {item.get('english', '')}".lower()
-        exact    = combined.count(kw_norm)
-        stemmed  = combined.count(kw_stem) - exact if kw_stem != kw_norm else 0
-        occ      = exact + stemmed
-        if occ > 0:
-            count += occ
-            surah_num = item.get("surah_number", 0)
-            if surah_num not in by_surah:
-                by_surah[surah_num] = {
-                    "name": item.get("surah_name_en", f"Surah {surah_num}"),
-                    "count": 0,
-                }
-            by_surah[surah_num]["count"] += occ
-            if len(examples) < cfg.MAX_EXAMPLES:
-                examples.append({
-                    "reference": item.get("source", ""),
-                    "arabic": item.get("arabic", ""),
-                    "english": item.get("english", ""),
-                })
-    result = {
-        "keyword":     keyword,
-        "kw_stemmed":  kw_stem,
-        "total_count": count,
-        "by_surah": dict(sorted(by_surah.items())),
-        "examples":    examples,
-    }
-    await analysis_cache.set(result, keyword)
-    return result
-# ═══════════════════════════════════════════════════════════════════════
-# HYBRID SEARCH  — dense FAISS + BM25 re-ranking + filtering
-# ═══════════════════════════════════════════════════════════════════════
-def _bm25_score(
-    query_terms: List[str],
-    doc_text: str,
-    avg_dl: float,
-    k1: float = 1.5,
-    b: float  = 0.75,
-) -> float:
-    """BM25 term-frequency scoring."""
-    doc_tokens = tokenize_ar(doc_text)
-    dl         = len(doc_tokens)
-    tf         = Counter(doc_tokens)
-    score      = 0.0
-    for term in query_terms:
-        f      = tf.get(term, 0)
-        score += (f * (k1 + 1)) / (f + k1 * (1 - b + b * dl / max(avg_dl, 1)))
-    return score
-async def hybrid_search(
-    raw_query: str,
-    rewrite: Dict,
-    embed_model: SentenceTransformer,
-    index: faiss.Index,
-    dataset: list,
-    top_n: int = cfg.TOP_K_RETURN,
-    source_type: Optional[Literal["quran", "hadith"]] = None,
-    grade_filter: Optional[str] = None,
-) -> list:
-    """Hybrid search: dense + sparse with optional filtering."""
-    cache_key = (raw_query, top_n, source_type, grade_filter)
-    cached = await search_cache.get(*cache_key)
-    if cached:
-        return cached
-    # ── 1. Dual-language dense retrieval ──────────────────────────────
-    ar_q = "query: " + rewrite["ar_query"]
-    en_q = "query: " + rewrite["en_query"]
-    embeddings = embed_model.encode(
-        [ar_q, en_q], normalize_embeddings=True, batch_size=2
-    ).astype("float32")
-    fused  = embeddings[0] + embeddings[1]
-    fused /= np.linalg.norm(fused)
-    distances, indices = index.search(fused.reshape(1, -1), cfg.TOP_K_SEARCH)
-    # ── 2. De-duplicate candidates & apply filters ─────────────────────
-    seen: set  = set()
-    candidates = []
-    for dist, idx in zip(distances[0], indices[0]):
-        item_idx = int(idx)
-        if item_idx not in seen and 0 <= item_idx < len(dataset):
-            seen.add(item_idx)
-            item = dataset[item_idx]
-            # Source type filter
-            if source_type and item.get("type") != source_type:
-                continue
-            # Grade filter (Hadith only)
-            if grade_filter and item.get("type") == "hadith":
-                item_grade = item.get("grade", "").lower()
-                if grade_filter.lower() not in item_grade:
-                    continue
-            candidates.append({**item, "_dense": float(dist)})
-    if not candidates:
-        return []
-    # ── 3. BM25 sparse scoring ───────────���─────────────────────────────
-    query_terms = [
-        light_stem(kw) for kw in rewrite.get("keywords", raw_query.split())
-    ]
-    avg_dl = sum(
-        len(tokenize_ar(c.get("arabic", "") + " " + c.get("english", "")))
-        for c in candidates
-    ) / max(len(candidates), 1)
-    for c in candidates:
-        doc        = c.get("arabic", "") + " " + c.get("english", "")
-        c["_sparse"] = _bm25_score(query_terms, doc, avg_dl)
-    # ── 3.5. Phrase matching boost for exact snippets ───────────────────
-    query_norm = normalize_arabic(raw_query, aggressive=False).lower()
-    for c in candidates:
-        # For hadiths: if query contains specific text, boost exact match
-        if c.get("type") == "hadith":
-            ar_norm = normalize_arabic(c.get("arabic", ""), aggressive=False).lower()
-            # Check if any significant phrase (3+ words) from query appears in hadith
-            query_fragments = query_norm.split()
-            for i in range(len(query_fragments) - 2):
-                phrase = " ".join(query_fragments[i:i+3])
-                if len(phrase) > 5 and phrase in ar_norm:  # phrase is 5+ chars
-                    c["_sparse"] += 2.0  # boost exact phrase match
-                    break
-    # ── 4. Score fusion ────────────────────────────────────────────────
-    α          = cfg.RERANK_ALPHA
-    intent     = rewrite.get("intent", "general")
-    # For hadith authenticity queries, rely more on semantic search
-    if intent == "auth":
-        α = 0.75  # 75% dense, 25% sparse (vs default 60/40)
-    max_sparse = max((c["_sparse"] for c in candidates), default=1.0) or 1.0
-    for c in candidates:
-        base_score = α * c["_dense"] + (1 - α) * c["_sparse"] / max_sparse
-        if intent == "hadith" and c.get("type") == "hadith":
-            base_score += cfg.HADITH_BOOST
-        c["_score"] = base_score
-    candidates.sort(key=lambda x: x["_score"], reverse=True)
-    results = candidates[:top_n]
-    await search_cache.set(results, *cache_key)
-    return results
-def build_context(results: list) -> str:
-    """Format search results into context block for LLM."""
-    lines = []
-    for i, r in enumerate(results, 1):
-        source    = r.get("source") or r.get("reference") or "Unknown Source"
-        item_type = "Quranic Verse" if r.get("type") == "quran" else "Hadith"
-        grade_str = f" [Grade: {r.get('grade')}]" if r.get("grade") else ""
-        lines.append(
-            f"[{i}] 📌 {item_type}{grade_str} | {source} | score: {r.get('_score', 0):.3f}\n"
-            f"    Arabic : {r.get('arabic', '')}\n"
-            f"    English: {r.get('english', '')}"
-        )
-    return "\n\n".join(lines)
-# ═══════════════════════════════════════════════════════════════════════
-# PROMPT ENGINEERING
-# ═══════════════════════════════════════════════════════════════════════
-_PERSONA = (
-    "You are Sheikh QModel, a meticulous Islamic scholar with expertise "
-    "in Tafsir (Quranic exegesis), Hadith sciences, Fiqh, and Arabic. "
-    "You respond with scholarly rigor and modern clarity."
-)
-_TASK_INSTRUCTIONS: Dict[str, str] = {
-    "tafsir": (
-        "The user asks about a Quranic verse. Steps:\n"
-        "1. Identify the verse(s) from context.\n"
-        "2. Provide Tafsir: linguistic analysis and deeper meaning.\n"
-        "3. Draw connections to related verses.\n"
-        "4. Answer the user's question directly."
-    ),
-    "hadith": (
-        "The user asks about a Hadith. Steps:\n"
-        "1. Quote the text EXACTLY from the context below.\n"
-        "2. Explain the meaning and implications.\n"
-        "3. Note any related Hadiths.\n"
-        "CRITICAL: If the Hadith is NOT in context, say so clearly."
-    ),
-    "auth": (
-        "The user asks about Hadith authenticity. YOU MUST:\n"
-        "1. Check if the Hadith is in the context below.\n"
-        "2. If FOUND, state the grade (Sahih, Hasan, Da'if, etc.) confidently.\n"
-        "3. If found in Sahih Bukhari or Sahih Muslim, assert it is AUTHENTIC (Sahih).\n"
-        "4. Provide the Hadith text from context and explain its authenticity basis.\n"
-        "5. If NOT found after careful search, clearly state it's absent from the dataset.\n"
-        "CRITICAL: Use the context provided. Do not rely on your training data."
-    ),
-    "fatwa": (
-        "The user seeks a religious ruling. Steps:\n"
-        "1. Gather evidence from Quran + Sunnah in context.\n"
-        "2. Reason step-by-step to a conclusion.\n"
-        "3. If insufficient, state so explicitly."
-    ),
-    "count": (
-        "The user asks for word frequency. Steps:\n"
-        "1. State the ANALYSIS RESULT prominently.\n"
-        "2. List example occurrences with Surah names.\n"
-        "3. Comment on significance."
-    ),
-    "surah_info": (
-        "The user asks about surah metadata. Steps:\n"
-        "1. State the answer from the SURAH INFORMATION block EXACTLY.\n"
-        "2. Use the total_verses number precisely — do NOT guess or calculate.\n"
-        "3. Mention the revelation type (Meccan/Medinan) if available.\n"
-        "4. Optionally add brief scholarly context about the surah."
-    ),
-    "general": (
-        "The user has a general Islamic question. Steps:\n"
-        "1. Give a direct answer first.\n"
-        "2. Support with evidence from context.\n"
-        "3. Conclude with a summary."
-    ),
-}
-_FORMAT_RULES = """\
-For EVERY supporting evidence, use this exact format:
-┌─────────────────────────────────────────────┐
-│  ❝ {Arabic text} ❞
-│  📝 Translation: {English translation}
-│  📖 Source: {exact citation from context}
-└─────────────────────────────────────────────┘
-ABSOLUTE RULES:
-• Use ONLY content from the Islamic Context block. Zero outside knowledge.
-• Copy Arabic text and translations VERBATIM from context. Never paraphrase.
-• If a specific Hadith/verse is NOT in context → respond with:
-    "هذا الحديث/الآية غير موجود في قاعدة البيانات." (Arabic)
-    or "This Hadith/verse is not in the available dataset." (English)
-• Never invent or guess content.
-• End with: "والله أعلم." (Arabic) or "And Allah knows best." (English)
-"""
-_SYSTEM_TEMPLATE = """\
-{persona}
-{lang_instruction}
-=== YOUR TASK ===
-{task}
-=== OUTPUT FORMAT ===
-{fmt}
-=== ISLAMIC CONTEXT ===
-{context}
-=== END CONTEXT ===
-"""
-def build_messages(
-    context: str,
-    question: str,
-    lang: str,
-    intent: str,
-    analysis: Optional[dict] = None,
-    surah_info: Optional[dict] = None,
-) -> List[dict]:
-    """Build system and user messages for LLM."""
-    if surah_info:
-        info_block = (
-            f"\n[SURAH INFORMATION]\n"
-            f"Surah Name (Arabic): {surah_info['surah_name_ar']}\n"
-            f"Surah Name (English): {surah_info['surah_name_en']}\n"
-            f"Surah Number: {surah_info['surah_number']}\n"
-            f"Total Verses: {surah_info['total_verses']}\n"
-            f"Revelation Type: {surah_info['revelation_type']}\n"
-            f"Transliteration: {surah_info['surah_name_transliteration']}\n"
-        )
-        context = info_block + context
-    if analysis:
-        by_surah_str = "\n  ".join([
-            f"Surah {s}: {data['name']} ({data['count']} times)"
-            for s, data in analysis["by_surah"].items()
-        ])
-        analysis_block = (
-            f"\n[ANALYSIS RESULT]\n"
-            f"The keyword «{analysis['keyword']}» appears {analysis['total_count']} times.\n"
-            f"  {by_surah_str}\n"
-        )
-        context = analysis_block + context
-    system = _SYSTEM_TEMPLATE.format(
-        persona=_PERSONA,
-        lang_instruction=language_instruction(lang),
-        task=_TASK_INSTRUCTIONS.get(intent, _TASK_INSTRUCTIONS["general"]),
-        fmt=_FORMAT_RULES,
-        context=context,
-    )
-    cot = {
-        "arabic": "فكّر خطوةً بخطوة، ثم أجب: ",
-        "mixed":  "Think step by step: ",
-    }.get(lang, "Think step by step: ")
-    return [
-        {"role": "system", "content": system},
-        {"role": "user",   "content": cot + question},
-    ]
-def _not_found_answer(lang: str) -> str:
-    """Safe fallback when confidence is too low."""
-    if lang == "arabic":
-        return (
-            "لم أجد في قاعدة البيانات ما يكفي للإجابة على هذا السؤال بدقة.\n"
-            "يُرجى الرجوع إلى مصادر إسلامية موثوقة.\n"
-            "والله أعلم."
-        )
-    return (
-        "The available dataset does not contain sufficient information to answer "
-        "this question accurately.\nPlease refer to trusted Islamic sources.\n"
-        "And Allah knows best."
-    )
-# ═══════════════════════════════════════════════════════════════════════
-# HADITH GRADE INFERENCE
-# ═══════════════════════════════════════════════════════════════════════
-def infer_hadith_grade(item: dict) -> dict:
-    """Infer hadith grade from collection name if not present."""
-    if item.get("type") != "hadith" or item.get("grade"):
-        return item
-    # Map collection names to grades
-    collection = item.get("collection", "").lower()
-    reference = item.get("reference", "").lower()
-    combined = f"{collection} {reference}"
-    # Sahih collections (highest authenticity)
-    if any(s in combined for s in ["sahih al-bukhari", "sahih bukhari", "bukhari"]):
-        item["grade"] = "Sahih"
-    elif any(s in combined for s in ["sahih muslim", "sahih al-muslim"]):
-        item["grade"] = "Sahih"
-    elif any(s in combined for s in ["sunan an-nasai", "sunan an-nasa", "nasa'i", "nasa"]):
-        item["grade"] = "Sahih"
-    # Hasan collections
-    elif any(s in combined for s in ["jami at-tirmidhi", "tirmidhi", "at-tirmidhi"]):
-        item["grade"] = "Hasan"
-    elif any(s in combined for s in ["sunan abu dawood", "abu dawood", "abo daud", "abou daoude"]):
-        item["grade"] = "Hasan"
-    elif any(s in combined for s in ["sunan ibn majah", "ibn majah", "ibn maja"]):
-        item["grade"] = "Hasan"
-    elif any(s in combined for s in ["muwatta malik", "muwatta", "malik"]):
-        item["grade"] = "Hasan"
-    # New collections from enrichment
-    elif any(s in combined for s in ["musnad ahmad", "ahmad", "ahmed"]):
-        item["grade"] = "Hasan/Sahih"
-    elif any(s in combined for s in ["sunan al-darimi", "darimi", "al-darimi"]):
-        item["grade"] = "Hasan"
-    return item
-# ═══════════════════════════════════════════════════════════════════════
-# APP STATE
-# ═══════════════════════════════════════════════════════════════════════
-class AppState:
-    embed_model: Optional[SentenceTransformer] = None
-    faiss_index: Optional[faiss.Index]         = None
-    dataset:     Optional[list]                = None
-    llm:         Optional[LLMProvider]         = None
-    ready:       bool                          = False
-state = AppState()
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Initialize state on startup."""
-    logger.info("⏳  Loading embed model:  %s", cfg.EMBED_MODEL)
-    state.embed_model = SentenceTransformer(cfg.EMBED_MODEL)
-    logger.info("⏳  Loading FAISS index:  %s", cfg.FAISS_INDEX)
-    state.faiss_index = faiss.read_index(cfg.FAISS_INDEX)
-    logger.info("⏳  Loading metadata:     %s", cfg.METADATA_FILE)
-    with open(cfg.METADATA_FILE, "r", encoding="utf-8") as f:
-        state.dataset = json.load(f)
-    # Infer hadith grades from collection names
-    state.dataset = [infer_hadith_grade(item) for item in state.dataset]
-    logger.info("⏳  Initializing LLM provider: %s", cfg.LLM_BACKEND)
-    state.llm = get_llm_provider()
-    state.ready = True
-    logger.info(
-        "✅  QModel v4 ready | backend=%s | dataset=%d | faiss=%d | threshold=%.2f",
-        cfg.LLM_BACKEND,
-        len(state.dataset) if state.dataset else 0,
-        state.faiss_index.ntotal if state.faiss_index else 0,
-        cfg.CONFIDENCE_THRESHOLD,
-    )
-    yield
-    state.ready = False
-    logger.info("🛑  QModel shutdown")
 # ═══════════════════════════════════════════════════════════════════════
 # FASTAPI APP
 # ═══════════════════════════════════════════════════════════════════════
 app = FastAPI(
-    title="QModel v4 — Islamic RAG API",
-    description="Specialized Quran & Hadith system with dual LLM backend",
-    version="4.0.0",
     lifespan=lifespan,
 )
@@ -1035,451 +61,11 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ═══════════════════════════════════════════════════════════════════════
-# SCHEMAS
-# ═══════════════════════════════════════════════════════════════════════
-class ChatMessage(BaseModel):
-    role:    str = Field(..., pattern="^(system|user|assistant)$")
-    content: str = Field(..., min_length=1, max_length=4000)
-class AnalysisResult(BaseModel):
-    keyword:     str
-    kw_stemmed:  str
-    total_count: int
-    by_surah:    Dict[int, Dict]
-    examples:    List[dict]
-class SourceItem(BaseModel):
-    source:        str
-    type:          str
-    grade:         Optional[str] = None
-    arabic:        str
-    english:       str
-    _score:        float
-class AskResponse(BaseModel):
-    question:     str
-    answer:       str
-    language:     str
-    intent:       str
-    analysis:     Optional[AnalysisResult] = None
-    sources:      List[SourceItem]
-    top_score:    float
-    latency_ms:   int
-class HadithVerifyResponse(BaseModel):
-    query:        str
-    found:        bool
-    collection:   Optional[str] = None
-    grade:        Optional[str] = None
-    reference:    Optional[str] = None
-    arabic:       Optional[str] = None
-    english:      Optional[str] = None
-    latency_ms:   int
-# ═══════════════════════════════════════════════════════════════════════
-# OPENAI-COMPATIBLE SCHEMAS  (for Open-WebUI integration)
-# ═══════════════════════════════════════════════════════════════════════
-class ChatCompletionMessage(BaseModel):
-    role:    str = Field(..., description="Message role: system, user, or assistant")
-    content: str = Field(..., description="Message content")
-class ChatCompletionRequest(BaseModel):
-    model:       str            = Field(default="QModel", description="Model name")
-    messages:    List[ChatCompletionMessage] = Field(..., description="Messages for the model")
-    temperature: Optional[float] = Field(default=cfg.TEMPERATURE, ge=0.0, le=2.0)
-    top_p:       Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
-    max_tokens:  Optional[int]   = Field(default=cfg.MAX_TOKENS, ge=1, le=8000)
-    top_k:       Optional[int]   = Field(default=5, ge=1, le=20, description="Islamic sources to retrieve")
-    stream:      Optional[bool]  = Field(default=False, description="Enable streaming responses")
-class ChatCompletionChoice(BaseModel):
-    index:        int
-    message:      ChatCompletionMessage
-    finish_reason: str = "stop"
-class ChatCompletionResponse(BaseModel):
-    id:      str
-    object:  str = "chat.completion"
-    created: int
-    model:   str
-    choices: List[ChatCompletionChoice]
-    usage:   dict
-    x_metadata: Optional[dict] = None  # QModel-specific metadata
-class ModelInfo(BaseModel):
-    id:       str
-    object:   str = "model"
-    created:  int
-    owned_by: str = "elgendy"
-    permission: List[dict] = Field(default_factory=list)
-    root:     Optional[str] = None
-    parent:   Optional[str] = None
-class ModelsListResponse(BaseModel):
-    object: str = "list"
-    data:   List[ModelInfo]
-# ═══════════════════════════════════════════════════════════════════════
-# CORE RAG PIPELINE
-# ═══════════════════════════════════════════════════════════════════════
-async def run_rag_pipeline(
-    question: str,
-    top_k: int = cfg.TOP_K_RETURN,
-    source_type: Optional[Literal["quran", "hadith"]] = None,
-    grade_filter: Optional[str] = None,
-) -> dict:
-    """Core RAG pipeline: rewrite → search → verify → generate."""
-    t0 = time.perf_counter()
-    # 1. Query rewriting
-    rewrite = await rewrite_query(question, state.llm)
-    intent  = rewrite.get("intent", "general")
-    # 2. Surah info detection + analysis intent + hybrid search — concurrently
-    surah_task = detect_surah_info(question, rewrite)
-    kw_task, search_task = (
-        detect_analysis_intent(question, rewrite),
-        hybrid_search(
-            question, rewrite,
-            state.embed_model, state.faiss_index, state.dataset,
-            top_k, source_type, grade_filter,
-        ),
-    )
-    surah_det, analysis_kw, results = await asyncio.gather(
-        surah_task, kw_task, search_task,
-    )
-    # 3a. Surah metadata lookup (if detected)
-    surah_info = None
-    if surah_det:
-        surah_info = await lookup_surah_info(surah_det["surah_query"], state.dataset)
-        if surah_info:
-            intent = "surah_info"
-            logger.info(
-                "Surah info: %s → %s (%d verses)",
-                surah_det["surah_query"],
-                surah_info["surah_name_en"],
-                surah_info.get("total_verses", 0),
-            )
-    # 3b. Keyword frequency count (if needed and NOT a surah info query)
-    analysis = None
-    if analysis_kw and not surah_info:
-        analysis = await count_occurrences(analysis_kw, state.dataset)
-        logger.info("Analysis: kw=%s count=%d", analysis_kw, analysis["total_count"])
-    # 4. Language detection
-    lang      = detect_language(question)
-    top_score = results[0].get("_score", 0.0) if results else 0.0
-    logger.info(
-        "Search done | intent=%s | top_score=%.3f | threshold=%.2f",
-        intent, top_score, cfg.CONFIDENCE_THRESHOLD,
-    )
-    # 5. Confidence gate — skip for surah_info (metadata is from dataset, not search)
-    if not surah_info and top_score < cfg.CONFIDENCE_THRESHOLD:
-        logger.warning(
-            "Low confidence (%.3f < %.2f) — returning safe fallback",
-            top_score, cfg.CONFIDENCE_THRESHOLD,
-        )
-        return {
-            "answer":     _not_found_answer(lang),
-            "language":   lang,
-            "intent":     intent,
-            "analysis":   analysis,
-            "sources":    results,
-            "top_score":  top_score,
-            "latency_ms": int((time.perf_counter() - t0) * 1000),
-        }
-    # 6. Build context + prompt + LLM call
-    context  = build_context(results)
-    messages = build_messages(context, question, lang, intent, analysis, surah_info)
-    try:
-        answer = await state.llm.chat(
-            messages,
-            max_tokens=cfg.MAX_TOKENS,
-            temperature=cfg.TEMPERATURE,
-        )
-    except Exception as exc:
-        logger.error("LLM call failed: %s", exc)
-        raise HTTPException(status_code=502, detail="LLM service unavailable")
-    latency = int((time.perf_counter() - t0) * 1000)
-    logger.info(
-        "Pipeline done | intent=%s | lang=%s | top_score=%.3f | %d ms",
-        intent, lang, top_score, latency,
-    )
-    return {
-        "answer":     answer,
-        "language":   lang,
-        "intent":     intent,
-        "analysis":   analysis,
-        "sources":    results,
-        "top_score":  top_score,
-        "latency_ms": latency,
-    }
-def _check_ready():
-    if not state.ready:
-        raise HTTPException(
-            status_code=503,
-            detail="Service is still initialising. Please retry shortly.",
-        )
-# ═══════════════════════════════════════════════════════════════════════
-# ENDPOINTS
-# ═══════════════════════════════════════════════════════════════════════
-@app.get("/health", tags=["ops"])
-def health():
-    """Health check endpoint."""
-    return {
-        "status":               "ok" if state.ready else "initialising",
-        "version":              "4.0.0",
-        "llm_backend":          cfg.LLM_BACKEND,
-        "dataset_size":         len(state.dataset)        if state.dataset    else 0,
-        "faiss_total":          state.faiss_index.ntotal  if state.faiss_index else 0,
-        "confidence_threshold": cfg.CONFIDENCE_THRESHOLD,
-    }
-@app.get("/v1/models", response_model=ModelsListResponse, tags=["models"])
-def list_models():
-    """List available models (OpenAI-compatible)."""
-    return ModelsListResponse(
-        data=[
-            ModelInfo(
-                id="QModel",
-                created=int(time.time()),
-                owned_by="elgendy",
-            ),
-            ModelInfo(
-                id="qmodel",  # Lowercase variant for compatibility
-                created=int(time.time()),
-                owned_by="elgendy",
-            ),
-        ]
-    )
-@app.post("/v1/chat/completions", response_model=ChatCompletionResponse, tags=["inference"])
-async def chat_completions(request: ChatCompletionRequest):
-    """OpenAI-compatible chat completions endpoint (for Open-WebUI integration)."""
-    _check_ready()
-    # Extract user message (last message with role="user")
-    user_messages = [m.content for m in request.messages if m.role == "user"]
-    if not user_messages:
-        raise HTTPException(status_code=400, detail="No user message in request")
-    question = user_messages[-1]
-    top_k = request.top_k or cfg.TOP_K_RETURN
-    temperature = request.temperature or cfg.TEMPERATURE
-    max_tokens = request.max_tokens or cfg.MAX_TOKENS
-    try:
-        result = await run_rag_pipeline(question, top_k=top_k)
-    except HTTPException:
-        raise
-    except Exception as exc:
-        logger.error("Pipeline error: %s", exc)
-        raise HTTPException(status_code=500, detail=str(exc))
-    # Handle streaming if requested
-    if request.stream:
-        return StreamingResponse(
-            _stream_response(result, request.model),
-            media_type="text/event-stream",
-        )
-    # Format response in OpenAI schema
-    return ChatCompletionResponse(
-        id=f"qmodel-{int(time.time() * 1000)}",
-        created=int(time.time()),
-        model=request.model,
-        choices=[
-            ChatCompletionChoice(
-                index=0,
-                message=ChatCompletionMessage(
-                    role="assistant",
-                    content=result["answer"],
-                ),
-            )
-        ],
-        usage={
-            "prompt_tokens":     -1,
-            "completion_tokens": -1,
-            "total_tokens":      -1,
-        },
-        x_metadata={
-            "language":       result["language"],
-            "intent":         result["intent"],
-            "top_score":      round(result["top_score"], 4),
-            "latency_ms":     result["latency_ms"],
-            "sources_count":  len(result["sources"]),
-            "sources": [
-                {
-                    "source": s.get("source") or s.get("reference", ""),
-                    "type":   s.get("type", ""),
-                    "grade":  s.get("grade"),
-                    "score":  round(s.get("_score", 0), 4),
-                }
-                for s in result.get("sources", [])[:5]
-            ],
-            "analysis": result.get("analysis"),
-        },
-    )
-async def _stream_response(result: dict, model: str):
-    """Stream response chunks in OpenAI format."""
-    import json
-    # Send answer in chunks
-    answer = result.get("answer", "")
-    for line in answer.split("\n"):
-        chunk = {
-            "id": f"qmodel-{int(time.time() * 1000)}",
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": model,
-            "choices": [{
-                "index": 0,
-                "delta": {"content": line + "\n"},
-                "finish_reason": None,
-            }],
-        }
-        yield f"data: {json.dumps(chunk)}\n\n"
-    # Send final chunk
-    final_chunk = {
-        "id": f"qmodel-{int(time.time() * 1000)}",
-        "object": "chat.completion.chunk",
-        "created": int(time.time()),
-        "model": model,
-        "choices": [{
-            "index": 0,
-            "delta": {},
-            "finish_reason": "stop",
-        }],
-    }
-    yield f"data: {json.dumps(final_chunk)}\n\n"
-    yield "data: [DONE]\n\n"
-@app.get("/ask", response_model=AskResponse, tags=["inference"])
-async def ask(
-    q: str = Query(..., min_length=1, max_length=1000, description="Your Islamic question"),
-    top_k: int = Query(cfg.TOP_K_RETURN, ge=1, le=20, description="Number of sources"),
-    source_type: Optional[str] = Query(None, description="Filter: quran|hadith"),
-    grade_filter: Optional[str] = Query(None, description="Filter Hadith: sahih|hasan|,all"),
-):
-    """Main inference endpoint."""
-    _check_ready()
-    result = await run_rag_pipeline(q, top_k, source_type, grade_filter)
-    sources = [
-        SourceItem(
-            source=r.get("source") or r.get("reference") or "Unknown",
-            type=r.get("type", "unknown"),
-            grade=r.get("grade"),
-            arabic=r.get("arabic", ""),
-            english=r.get("english", ""),
-            _score=r.get("_score", 0.0),
-        )
-        for r in result["sources"]
-    ]
-    return AskResponse(
-        question=q,
-        answer=result["answer"],
-        language=result["language"],
-        intent=result["intent"],
-        analysis=result["analysis"],
-        sources=sources,
-        top_score=result["top_score"],
-        latency_ms=result["latency_ms"],
-    )
-@app.get("/hadith/verify", response_model=HadithVerifyResponse, tags=["hadith"])
-async def verify_hadith(
-    q: str = Query(..., description="First few words or query of Hadith"),
-    collection: Optional[str] = Query(None, description="Filter: bukhari|muslim|all"),
-):
-    """Verify if a Hadith is in authenticated collections."""
-    _check_ready()
-    t0 = time.perf_counter()
-    results = await hybrid_search(
-        q, {"ar_query": q, "en_query": q, "keywords": q.split(), "intent": "hadith"},
-        state.embed_model, state.faiss_index, state.dataset,
-        top_n=5, source_type="hadith", grade_filter="sahih",
-    )
-    if results:
-        r = results[0]
-        return HadithVerifyResponse(
-            query=q,
-            found=True,
-            collection=r.get("collection"),
-            grade=r.get("grade"),
-            reference=r.get("reference"),
-            arabic=r.get("arabic"),
-            english=r.get("english"),
-            latency_ms=int((time.perf_counter() - t0) * 1000),
-        )
-    return HadithVerifyResponse(
-        query=q,
-        found=False,
-        latency_ms=int((time.perf_counter() - t0) * 1000),
-    )
-@app.get("/debug/scores", tags=["ops"])
-async def debug_scores(
-    q: str = Query(..., min_length=1, max_length=1000),
-    top_k: int = Query(10, ge=1, le=20),
-):
-    """Debug: inspect raw retrieval scores without LLM."""
-    _check_ready()
-    rewrite = await rewrite_query(q, state.llm)
-    results = await hybrid_search(q, rewrite, state.embed_model, state.faiss_index, state.dataset, top_k)
-    return {
-        "intent":    rewrite.get("intent"),
-        "threshold": cfg.CONFIDENCE_THRESHOLD,
-        "results": [
-            {
-                "rank":    i + 1,
-                "source":  r.get("source") or r.get("reference"),
-                "type":    r.get("type"),
-                "grade":   r.get("grade"),
-                "_dense":  round(r.get("_dense", 0), 4),
-                "_sparse": round(r.get("_sparse", 0), 4),
-                "_score":  round(r.get("_score", 0), 4),
-            }
-            for i, r in enumerate(results)
-        ],
-    }
 if __name__ == "__main__":

 """
+QModel v6 — Islamic RAG API
 ===========================
 Specialized Quran & Hadith system with dual LLM backend support.
+Modular architecture — see app/ package for implementation:
+  app/config.py      – Config (env vars)
+  app/llm.py         – LLM providers (Ollama, HuggingFace)
+  app/cache.py       – TTL-LRU async cache
+  app/arabic_nlp.py  – Arabic normalisation & stemming
+  app/search.py      – Hybrid FAISS + BM25 search, text search
+  app/analysis.py    – Intent detection, analytics, counting
+  app/prompts.py     – Prompt engineering
+  app/models.py      – Pydantic schemas
+  app/state.py       – AppState, lifespan, RAG pipeline
+  app/routers/       – FastAPI routers (quran, hadith, chat, ops)
 """
 from __future__ import annotations
 import logging
 from dotenv import load_dotenv
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 load_dotenv()
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
 )
+from app.config import cfg
+from app.state import lifespan
+from app.routers import chat, hadith, ops, quran
 # ═══════════════════════════════════════════════════════════════════════
 # FASTAPI APP
 # ═══════════════════════════════════════════════════════════════════════
 app = FastAPI(
+    title="QModel v6 — Islamic RAG API",
+    description=(
+        "Specialized Quran & Hadith system with dual LLM backend.\n\n"
+        "**Capabilities:**\n"
+        "- Quran verse lookup by text or topic\n"
+        "- Quran word frequency & analytics\n"
+        "- Hadith lookup by text or topic\n"
+        "- Hadith authenticity verification\n"
+        "- OpenAI-compatible chat completions"
+    ),
+    version="5.0.0",
     lifespan=lifespan,
 )
     allow_headers=["*"],
 )
+# Register routers
+app.include_router(ops.router)
+app.include_router(chat.router)
+app.include_router(quran.router)
+app.include_router(hadith.router)
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,21 +1,24 @@
 # Web framework
-fastapi==0.104.1
-uvicorn[standard]==0.24.0
-pydantic==2.4.2
 # Core: Embeddings & Search
-sentence-transformers==2.2.2
-faiss-cpu==1.7.4
-numpy==1.24.3
 # Optional: HuggingFace backend
-transformers==4.34.1
-torch==2.1.1
-accelerate==0.24.1
 # Optional: Ollama backend
-ollama==0.0.48
 # Configuration & Data
-python-dotenv==1.0.0
-requests==2.31.0

 # Web framework
+fastapi>=0.104.1
+uvicorn[standard]>=0.24.0
+pydantic>=2.4.2
 # Core: Embeddings & Search
+sentence-transformers>=2.2.2
+faiss-cpu>=1.7.4
+numpy>=1.24.3
 # Optional: HuggingFace backend
+transformers>=4.34.1
+torch>=2.1.1
+accelerate>=0.24.1
 # Optional: Ollama backend
+ollama>=0.0.48
+# Optional: GGUF backend (llama-cpp-python)
+llama-cpp-python>=0.2.0
 # Configuration & Data
+python-dotenv>=1.0.0
+requests>=2.31.0