Spaces:

m97j
/

knowledge-engine

Runtime error

App Files Files Community

m97j commited on 5 days ago

Commit

b62e029

0 Parent(s):

Initial commit

Browse files

Files changed (22) hide show

.gitattributes +35 -0
.github/workflows/deploy.yml +45 -0
.gitignore +1 -0
Dockerfile +22 -0
README.md +106 -0
api/dependencies.py +12 -0
api/schemas/search.py +44 -0
api/v1/search.py +91 -0
api/v1/system.py +12 -0
core/config.py +54 -0
core/exceptions.py +72 -0
core/logger.py +45 -0
main.py +109 -0
models/embedder.py +98 -0
models/reranker.py +77 -0
requirements.txt +31 -0
scripts/data_pipeline.py +387 -0
scripts/setup_db.py +51 -0
services/search_service.py +146 -0
storage/qdrant_client.py +74 -0
storage/sqlite_client.py +72 -0
templates/index.html +92 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,45 @@

+name: Deploy to Hugging Face Spaces
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "api/**"
+      - "core/**"
+      - "models/**"
+      - "services/**"
+      - "storage/**"
+      - "scripts/setup_db.py"
+      - "templates/**"
+      - "static/**"
+      - "utils/**"
+      - "main.py"
+      - "Dockerfile"
+      - "requirements.txt"
+      - ".github/workflows/deploy.yml"
+      - "README.md"
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install HF CLI
+        run: pip install "huggingface_hub[cli]"
+      - name: Push to HF Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git remote add hf https://m97j:$HF_TOKEN@huggingface.co/spaces/m97j/knowledge-engine
+          git push --force hf main

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ _old.git/

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# Dockerfile
+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+COPY . .
+VOLUME ["/app/data"]
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,106 @@

+---
+title: Knowledge Engine
+emoji: 🔍
+colorFrom: purple
+colorTo: gray
+sdk: docker
+app_port: 7860
+license: apache-2.0
+pinned: false
+---
+# 🔍 Knowledge Engine
+[![Spaces](https://img.shields.io/badge/Demo-Spaces-FF9D00?logo=huggingface)](https://huggingface.co/spaces/m97j/knowledge-engine)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg?logo=python)](https://www.python.org/downloads/release/python-3100/)
+[![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-yellow.svg)](https://opensource.org/licenses/Apache-2.0)
+> **High-performance Hybrid Search & Reranking Engine based on BGE-M3.** > An advanced knowledge retrieval API system that combines Dense/Sparse embeddings and optimizes precision with Cross-Encoders.
+---
+## 🚀 Key Features
+* **Hybrid Search:** Seamlessly combines Dense & Sparse vector retrieval using Qdrant's Native Fusion API (BGE-M3).
+* **Re-ranking:** Ensures top-tier precision by re-ordering search results via Cross-Encoder models.
+* **Clean Architecture:** Highly modularized layers (API, Service, Storage, Models) for superior maintainability and scalability.
+* **CI/CD Pipeline:** Fully automated deployment to Hugging Face Spaces using GitHub Actions and Docker.
+* **Auto-Healing Data:** Robust startup logic via FastAPI `lifespan` that automatically synchronizes and validates the knowledge base.
+---
+## 🏗 Project Structure
+This project follows the **Separation of Concerns (SoC)** principle to ensure the system remains extensible and testable.
+```text
+├── api/          # API Routing & Dependency Injection (DI)
+├── core/         # Global Configuration (Pydantic Settings) & Exception Handling
+├── models/       # AI Model Inference (Embedder, Reranker)
+├── services/     # Business Logic & Search Pipeline Orchestration
+├── storage/      # Infrastructure Layer (Qdrant, SQLite Clients)
+├── scripts/      # Data Pipeline & Database Setup Scripts
+├── templates/    # Demo UI (Jinja2 Templates)
+└── main.py       # App Entry Point & Lifespan Management
+```
+---
+## 🛠 Tech Stack
+* **Framework:** FastAPI
+* **Vector DB:** Qdrant (Local Path Mode)
+* **RDBMS:** SQLite (Metadata & Corpus Storage)
+* **ML Models:**
+    * `BAAI/bge-m3` (Multi-functional Embedding)
+    * `BAAI/bge-reranker-v2-m3` (Cross-Encoder)
+* **DevOps:** Docker, GitHub Actions, Hugging Face Hub
+---
+## 🔧 Installation & Setup
+### Prerequisites
+* Python 3.10 or higher
+* Hugging Face Access Token (Read/Write)
+### Running Locally
+1. Clone the repository:
+   ```bash
+   git clone [https://github.com/m97j/knowledge-engine.git](https://github.com/m97j/knowledge-engine.git)
+   cd knowledge-engine
+   ```
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+3. Run the application (The system will automatically download the necessary DB files on startup):
+   ```bash
+   python main.py
+   # OR using uvicorn
+   uvicorn main:app --host 0.0.0.0 --port 7860
+   ```
+---
+## 📡 API Endpoints
+| Method | Endpoint | Description |
+| :--- | :--- | :--- |
+| `GET` | `/` | Redirects to Search Demo UI |
+| `POST` | `/api/v1/search/` | Executes JSON-based Hybrid Search |
+| `GET` | `/api/v1/system/health/ping` | System health check (Heartbeat) |
+---
+## 💡 Architecture Insights
+1.  **Dependency Injection:** Uses FastAPI `app.state` to manage singletons of AI models and DB clients, allowing for easy mocking during unit testing.
+2.  **Hybrid RAG Pipeline:** Beyond simple vector similarity, this engine leverages Sparse embeddings for keyword-level precision, merged via Reciprocal Rank Fusion (RRF).
+3.  **Deployment Ready:** Optimized for PaaS environments (like HF Spaces) through a containerized Docker setup and automated CI/CD.
+---
+## 📄 Documentation
+For more detailed technical documentation, design decisions, and troubleshooting, please visit:
+* [Personal Archive Link](https://minjae-portfolio.vercel.app/projects/ke)
+* [Technical Design Blog](https://minjae-portfolio.vercel.app/blogs/ke-pd)
+---

api/dependencies.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# api/dependencies.py
+from fastapi import Request
+from services.search_service import HybridSearchService
+def get_search_service(request: Request) -> HybridSearchService:
+    """
+    Dependency injection function for FastAPI Depends().
+    """
+    return request.app.state.search_service

api/schemas/search.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# api/schemas/search.py
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+# ---------------------------
+# Request
+# ---------------------------
+class SearchRequest(BaseModel):
+    query: str = Field(..., description="Search query")
+    top_k: int = Field(default=5, ge=1, le=50)
+    # optional
+    use_reranker: Optional[bool] = True
+# ---------------------------
+# Document metadata
+# ---------------------------
+class DocumentMetadata(BaseModel):
+    doc_id: int
+    title: str
+    lang: str
+    url: Optional[str] = None
+    date_modified: Optional[str] = None
+# ---------------------------
+# Result item (LLM-friendly)
+# ---------------------------
+class SearchResultItem(BaseModel):
+    chunk_id: int
+    text: str
+    score: float = Field(..., description="Reranking score (0.0 to 1.0)")
+    metadata: DocumentMetadata
+    scoring_details: Optional[Dict[str, Any]] = None     # optional
+# ---------------------------
+# Response
+# ---------------------------
+class SearchResponse(BaseModel):
+    query: str
+    results: List[SearchResultItem]
+    latency_ms: int

api/v1/search.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# api/v1/search.py
+from fastapi import APIRouter, Depends, Form, HTTPException, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from api.dependencies import get_search_service
+from api.schemas.search import SearchRequest, SearchResponse
+from core.logger import setup_logger
+from services.search_service import HybridSearchService
+logger = setup_logger("search_api")
+router = APIRouter(prefix="/search", tags=["Search"])
+templates = Jinja2Templates(directory="templates")
+# -------------------------------------
+# Json API Endpoint for Hybrid Search
+# -------------------------------------
+@router.post("/", response_model=SearchResponse, summary="Execute Hybrid Search (JSON)")
+async def execute_search(
+    request_data: SearchRequest,
+    search_service: HybridSearchService = Depends(get_search_service)
+):
+    """
+    Execute a hybrid search using the provided query and parameters.
+    """
+    try:
+        search_output = search_service.search(
+            query=request_data.query,
+            top_k=request_data.top_k
+        )
+        return SearchResponse(
+            query=search_output["query"],
+            results=search_output["results"],
+            latency_ms=search_output["latency_ms"]
+        )
+    except ValueError as ve:
+        logger.warning(f"Invalid search request: {ve}")
+        raise HTTPException(status_code=400, detail=str(ve))
+    except Exception as e:
+        logger.error(f"Search Execution Failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal server error during search.")
+# -------------------------------------
+# HTML Demo Endpoint for Manual Testing
+# -------------------------------------
+@router.get("/demo", response_class=HTMLResponse, summary="Search Demo UI (GET)")
+async def demo_page_get(request: Request):
+    """
+    Render a simple HTML page with a search form for manual testing of the hybrid search functionality.
+    """
+    return templates.TemplateResponse(
+        "index.html",
+        {"request": request, "results": None, "query": ""}
+    )
+@router.post("/demo", response_class=HTMLResponse, summary="Search Demo UI (POST)")
+async def demo_page_post(
+    request: Request,
+    query: str = Form(...),
+    search_service: HybridSearchService = Depends(get_search_service)
+):
+    """
+    Handle form submission from the demo page, execute the search, and render results in the same template.
+    """
+    try:
+        search_output = search_service.search(query=query, top_k=5)
+        return templates.TemplateResponse(
+            "index.html",
+            {
+                "request": request,
+                "results": search_output["results"],
+                "query": query,
+                "latency_ms": search_output["latency_ms"]
+            }
+        )
+    except Exception as e:
+        logger.error(f"Demo Search Failed: {e}", exc_info=True)
+        return templates.TemplateResponse(
+            "index.html",
+            {
+                "request": request,
+                "results": None,
+                "query": query,
+                "error_message": "An error occurred while processing your search. Please try again."
+            },
+            status_code=500
+        )

api/v1/system.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# api/v1/system.py
+from fastapi import APIRouter
+router = APIRouter(prefix="/health", tags=["Health Check"])
+# ---------------------------
+# Debug endpoint (optional)
+# ---------------------------
+@router.get("/ping")
+def ping():
+    return {"message": "pong"}

core/config.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# core/config.py
+from functools import lru_cache
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """
+    This is a class that manages application global settings.
+    It reads values from .env files or system environment variables and strictly validates types using Pydantic.
+    """
+    # 1. Project Info
+    PROJECT_NAME: str = Field(default="Knowledge Engine", description="Project name")
+    VERSION: str = Field(default="1.0.0", description="API version")
+    ENVIRONMENT: str = Field(default="development", description="Execution environment (development, staging, production)")
+    LOG_LEVEL: str = Field(default="INFO", description="Global logging level")
+    DATA_DIR: str = Field(default="./data", description="Data storage directory path")
+    REPO_ID: str = Field(default="m97j/ke-store", description="Hugging Face repository ID")
+    # 2. Storage Settings (Vector DB & RDBMS)
+    QDRANT_PATH: str = Field(default="./data/qdrant", description="Qdrant local storage path")
+    QDRANT_COLLECTION: str = Field(default="knowledge_base", description="Qdrant collection name")
+    SQLITE_PATH: str = Field(default="./data/corpus/corpus.sqlite", description="SQLite DB file path")
+    # 3. Model Settings (Embedder & Reranker)
+    EMBEDDER_NAME: str = Field(default="BAAI/bge-m3", description="FlagEmbedding model name")
+    RERANKER_NAME: str = Field(default="BAAI/bge-reranker-v2-m3", description="Cross-Encoder model name")
+    USE_FP16: bool = Field(default=True, description="Whether to use FP16 precision in GPU environment")
+    # 4. Search Hyperparameters
+    DEFAULT_TOP_K: int = Field(default=5, description="Final number of documents to return")
+    QDRANT_FETCH_LIMIT: int = Field(default=50, description="Number of candidates to fetch from Vector DB before reranking")
+    # Pydantic v2 settings
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=True, # case-sensitive environment variables
+        extra="ignore"       # ignore unexpected fields in .env or environment variables
+    )
+@lru_cache()
+def get_settings() -> Settings:
+    """
+    It caches and returns the Settings object as a Singleton.
+    It offers performance advantages as it does not read or parse the file every time.
+    """
+    return Settings()
+# Instantiate as a global variable so that it can be easily imported from other modules
+settings = get_settings()

core/exceptions.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# core/exceptions.py
+from fastapi import Request, status
+from fastapi.responses import JSONResponse
+from core.logger import setup_logger
+logger = setup_logger("exception_handler")
+# ---------------------------------------------------
+# Base Exception (Parent class of all custom errors)
+# ---------------------------------------------------
+class KnowledgeEngineException(Exception):
+    """Base custom exception class for the Knowledge Engine application"""
+    def __init__(self, message: str, status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR):
+        self.message = message
+        self.status_code = status_code
+        super().__init__(self.message)
+# ---------------------------------------------------
+# Domain Specific Exceptions (Hierarchical error)
+# ---------------------------------------------------
+class ModelLoadError(KnowledgeEngineException):
+    """models/ layer where model (Embedder/Reranker) loading fails"""
+    def __init__(self, message: str):
+        super().__init__(message, status_code=status.HTTP_503_SERVICE_UNAVAILABLE)
+class DatabaseError(KnowledgeEngineException):
+    """storage/ layer where Qdrant or SQLite integration fails"""
+    def __init__(self, message: str):
+        super().__init__(message, status_code=status.HTTP_503_SERVICE_UNAVAILABLE)
+class SearchExecutionError(KnowledgeEngineException):
+    """services/ layer where the search pipeline (Hybrid Search) encounters a logical error"""
+    def __init__(self, message: str):
+        super().__init__(message, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
+class InvalidQueryError(KnowledgeEngineException):
+    """api/ layer where user input is invalid (e.g., empty query, unsupported parameters)"""
+    def __init__(self, message: str):
+        super().__init__(message, status_code=status.HTTP_400_BAD_REQUEST)
+# -----------------------------------
+# FastAPI Exception Handler
+# -----------------------------------
+async def custom_exception_handler(request: Request, exc: KnowledgeEngineException):
+    """
+    When a custom exception occurs in a FastAPI app,
+    catch it and convert it into a consistent JSON error response.
+    """
+    logger.error(f"[{exc.status_code}] {request.method} {request.url} - {exc.message}")
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={
+            "error": exc.__class__.__name__,
+            "message": exc.message,
+            "path": str(request.url.path)
+        }
+    )
+async def global_exception_handler(request: Request, exc: Exception):
+    """Catch any unhandled exceptions that are not instances of KnowledgeEngineException,
+    log them, and return a generic error response."""
+    logger.critical(f"Unhandled Exception: {str(exc)}", exc_info=True) # Log stack trace for debugging
+    return JSONResponse(
+        status_code=500,
+        content={"error": "InternalServerError", "message": "An unexpected error occurred."}
+    )
+def setup_exception_handlers(app):
+    """Register custom exception handlers to the FastAPI app."""
+    app.add_exception_handler(KnowledgeEngineException, custom_exception_handler)
+    app.add_exception_handler(Exception, global_exception_handler)

core/logger.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# core/logger.py
+import logging
+import sys
+from typing import Optional
+try:
+    from core.config import settings
+    DEFAULT_LOG_LEVEL = settings.LOG_LEVEL
+except ImportError:
+    DEFAULT_LOG_LEVEL = "INFO"
+# logging format: timestamp | log level | logger name | message
+LOG_FORMAT = "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s"
+DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
+def setup_logger(name: str, level: Optional[str] = None) -> logging.Logger:
+    """
+    Returns a standardized logger instance for use in each module.
+    Usage: logger = setup_logger(__name__)
+    """
+    logger = logging.getLogger(name)
+    # If the logger already has a handler set up (to prevent duplicate calls), return as is.
+    if logger.handlers:
+        return logger
+    # Set the log level
+    log_level = level or DEFAULT_LOG_LEVEL
+    logger.setLevel(getattr(logging, log_level.upper(), logging.INFO))
+    # Prevent duplicate logging (do not propagate to parent loggers)
+    logger.propagate = False
+    # Create console handler and set level
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logger.level)
+    # Apply formatter
+    formatter = logging.Formatter(fmt=LOG_FORMAT, datefmt=DATE_FORMAT)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    return logger

main.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# main.py
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+from starlette.middleware.cors import CORSMiddleware
+from api.v1 import search, system
+from core.config import settings
+from core.exceptions import setup_exception_handlers
+from core.logger import setup_logger
+from models.embedder import TextEmbedder
+from models.reranker import TextReranker
+from scripts.setup_db import download_knowledge_base
+from services.search_service import HybridSearchService
+from storage.qdrant_client import QdrantStorage
+from storage.sqlite_client import SQLiteStorage
+logger = setup_logger("knowledge_engine")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    FastAPI Lifespan function to manage startup and shutdown events.
+    On startup, it initializes all necessary components (DB connections, models, services) and injects them into the app state.
+    On shutdown, it ensures that all resources are properly cleaned up (e.g., closing DB connections).
+     - This approach centralizes all initialization logic in one place, making it easier to manage dependencies and handle errors during startup.
+     - If any critical error occurs during startup, it logs the error and prevents the server from starting in an unstable state.
+    """
+    logger.info("🚀 Starting Knowledge Engine API...")
+    qdrant_client = None
+    sqlite_client = None
+    try:
+        # 0. Prepare dependency data (DB) (Download if unavailable, skip if available)
+        logger.info("Checking and preparing Knowledge Base data...")
+        download_knowledge_base()
+        # 1. Infrastructure Connection (Database)
+        qdrant_client = QdrantStorage(path=settings.QDRANT_PATH, collection_name=settings.QDRANT_COLLECTION)
+        sqlite_client = SQLiteStorage(db_path=settings.SQLITE_PATH)
+        # 2. Load AI Model (Singleton)
+        embedder = TextEmbedder(model_name=settings.EMBEDDER_NAME, use_fp16=True)
+        reranker = TextReranker(model_name=settings.RERANKER_NAME)
+        # 3. Business Service Orchestration (Instantiate the HybridSearchService with all dependencies)
+        search_service = HybridSearchService(
+            qdrant=qdrant_client,
+            sqlite=sqlite_client,
+            embedder=embedder,
+            reranker=reranker
+        )
+        # 4. Injecting services into FastAPI app state for global accessibility in routers
+        app.state.search_service = search_service
+        logger.info("✅ All services and models initialized successfully.")
+        yield  # --- From this point, the server starts receiving traffic ---
+    except Exception as e:
+        logger.critical(f"❌ Application failed to start: {e}", exc_info=True)
+        raise e
+    finally:
+        logger.info("🛑 Shutting down. Cleaning up resources...")
+        # Safe termination of DB connections, etc.
+        if qdrant_client is not None: qdrant_client.close()
+        if sqlite_client is not None: sqlite_client.close()
+        logger.info("Resources cleaned up.")
+# ---------------------------
+# FastAPI Instance Creation
+# ---------------------------
+app = FastAPI(
+    title="Hybrid RAG Knowledge Engine API",
+    description="Qdrant and BGE-M3-based high-performance hybrid search engine API",
+    version="0.1.0",
+    lifespan=lifespan
+)
+# CORS Setup
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static files (CSS, JS, etc.) if needed (e.g., for demo pages)
+# app.mount("/static", StaticFiles(directory="static"), name="static")
+# ---------------------------
+# Router Registration
+# ---------------------------
+app.include_router(system.router, prefix="/api/v1")
+app.include_router(search.router, prefix="/api/v1")
+@app.get("/", include_in_schema=False)
+async def root():
+    return RedirectResponse(url="/api/v1/search/demo")
+# -----------------------------------
+# Register global exception handlers
+# -----------------------------------
+setup_exception_handlers(app)

models/embedder.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# models/embedder.py
+from typing import Any, Dict, List
+import torch
+from FlagEmbedding import BGEM3FlagModel
+from pydantic import BaseModel
+from core.exceptions import ModelLoadError
+from core.logger import setup_logger
+logger = setup_logger("embedder")
+# Data structure for return (Type Hinting)
+class EmbedderResult(BaseModel):
+    dense_vector: List[float]
+    sparse_indices: List[int]
+    sparse_values: List[float]
+class TextEmbedder:
+    """
+    Converts the input text into Dense Vectors and Sparse Vectors (Lexical Weights) using the BGE-M3 model.
+    """
+    def __init__(self, model_name: str = "BAAI/bge-m3", use_fp16: bool = False):
+        self.model_name = model_name
+        self.device = self._get_device()
+        try:
+            logger.info(f"⏳ Loading Embedder Model: {self.model_name} on {self.device}")
+            self.model = BGEM3FlagModel(
+                self.model_name,
+                use_fp16=(use_fp16 and self.device.startswith("cuda"))
+            )
+            self._warmup()
+            logger.info("✅ Embedder Model loaded successfully.")
+        except Exception as e:
+            logger.critical(f"❌ Failed to load Embedder Model: {e}", exc_info=True)
+            raise ModelLoadError(f"Embedder initialization failed: {e}")
+    def _get_device(self) -> str:
+        if torch.cuda.is_available():
+            return "cuda"
+        elif torch.backends.mps.is_available():
+            return "mps" # Apple Silicon
+        return "cpu"
+    def _warmup(self):
+        logger.info("Warming up embedder model with a dummy input.")
+        self.encode_query("Hello world")
+    def encode_query(self, text: str) -> EmbedderResult:
+        """
+        Converts a single query text into Qdrant hybrid search format.
+        """
+        try:
+            # 1. model inference to get dense vector and sparse lexical weights
+            output = self.model.encode(
+                text,
+                return_dense=True,
+                return_sparse=True,
+                return_colbert_vecs=False
+            )
+            dense_vec = output['dense_vecs'].tolist()
+            lexical_weights: Dict[str, float] = output['lexical_weights']
+            # 2. Sparse Vector Transformation (Qdrant specifications: token_id array, weight array)
+            sparse_indices = []
+            sparse_values = []
+            # Convert text tokens into unique IDs (integers) using the BGE-M3 tokenizer
+            for token_str, weight in lexical_weights.items():
+                # Get the ID of the string token through the tokenizer (vocab index)
+                token_id = self.model.tokenizer.convert_tokens_to_ids(token_str)
+                if token_id is not None:
+                    sparse_indices.append(token_id)
+                    sparse_values.append(float(weight))
+            return EmbedderResult(
+                dense_vector=dense_vec,
+                sparse_indices=sparse_indices,
+                sparse_values=sparse_values
+            )
+        except Exception as e:
+            logger.error(f"Failed to encode query '{text}': {e}")
+            raise RuntimeError(f"Embedding generation failed: {e}")
+    # options for batch encoding if needed in the future
+    def encode_documents(self, texts: List[str], batch_size: int = 12) -> Dict[str, Any]:
+        return self.model.encode(
+            texts,
+            batch_size=batch_size,
+            max_length=8192, # BGE-M3's max token length
+            return_dense=True,
+            return_sparse=True,
+            return_colbert_vecs=False
+        )

models/reranker.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# models/reranker.py
+from typing import Any, Dict, List
+import torch
+from FlagEmbedding import FlagReranker
+from core.exceptions import ModelLoadError
+from core.logger import setup_logger
+logger = setup_logger("reranker")
+class TextReranker:
+    """
+    Using the BGE-Reranker model, the documents retrieved in the first search are reordered (Cross-Encoding) by comparing them with the query.
+    """
+    def __init__(self, model_name: str = "BAAI/bge-reranker-v2-m3", use_fp16: bool = False):
+        self.model_name = model_name
+        self.device = self._get_device()
+        self._warmup()
+        try:
+            logger.info(f"⏳ Loading Reranker Model: {self.model_name} on {self.device}")
+            self.reranker = FlagReranker(
+                self.model_name,
+                use_fp16=(use_fp16 and self.device.startswith("cuda"))
+            )
+            logger.info("✅ Reranker Model loaded successfully.")
+        except Exception as e:
+            logger.critical(f"❌ Failed to load Reranker Model: {e}", exc_info=True)
+            raise ModelLoadError(f"Reranker initialization failed: {e}")
+    def _get_device(self) -> str:
+        if torch.cuda.is_available():
+            return "cuda"
+        elif torch.backends.mps.is_available():
+            return "mps"
+        return "cpu"
+    def _warmup(self):
+        logger.info("Warming up reranker model with a dummy input.")
+        self.rerank(query="Hello world", documents=[{"text": "Hello world"}])
+    def rerank(self, query: str, documents: List[Dict[str, Any]], text_key: str = "text") -> List[Dict[str, Any]]:
+        """
+        Takes a list of documents as input, recalculates their similarity to the query, and returns the results sorted by score.
+        :param query: The original search query string
+        :param documents: A list of dictionaries in the form [{'chunk_id': 1, 'text': '...'}, ...]
+        :param text_key: The key name in the document dictionary containing the body text
+        """
+        if not documents:
+            return []
+        # Generate pairs for Cross-Encoder input: [[query, doc1], [query, doc2], ...]
+        sentence_pairs = [[query, doc[text_key]] for doc in documents]
+        try:
+            # 1. Batch score calculation
+            scores = self.reranker.compute_score(sentence_pairs, normalize=True)
+            # Wrap in a list because compute_score can return a float when there is only one input document
+            if isinstance(scores, float):
+                scores = [scores]
+            # 2. Inject rerank_score into source document dictionarys
+            for i, doc in enumerate(documents):
+                doc["rerank_score"] = float(scores[i])
+            # 3. Sort by score (descending)
+            reranked_docs = sorted(documents, key=lambda x: x["rerank_score"], reverse=True)
+            return reranked_docs
+        except Exception as e:
+            logger.error(f"Reranking failed for query '{query}': {e}")
+            raise RuntimeError(f"Reranking process failed: {e}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+# Web Server
+fastapi==0.135.1
+uvicorn[standard]==0.40.0
+jinja2==3.1.5
+python-multipart==0.0.22
+starlette==0.52.1
+# Vector Search & Embeddings
+qdrant-client==1.16.2
+FlagEmbedding==1.3.5
+torch==2.9.1 --index-url https://download.pytorch.org/whl/cpu
+numpy==2.4.2
+sentence-transformers==5.2.3
+# Data Modeling & Settings
+pydantic==2.12.2
+pydantic-settings==2.13.1
+python-dotenv==1.2.1
+# Hugging Face Stack
+huggingface_hub>=0.25.0
+transformers>=4.44.0
+tokenizers>=0.19.0
+accelerate>=0.34.0
+# ONNX Runtime
+onnxruntime>=1.19.0
+# Utils
+tqdm==4.67.2
+requests==2.32.5

scripts/data_pipeline.py ADDED Viewed

	@@ -0,0 +1,387 @@

+# scripts/data_pipeline.py
+import json
+import os
+import re
+import sqlite3
+import numpy as np
+from datasets import load_dataset
+from FlagEmbedding import BGEM3FlagModel
+from qdrant_client import QdrantClient
+from qdrant_client.models import (Distance, OptimizersConfigDiff, PointStruct,
+                                  ScalarQuantization, ScalarQuantizationConfig,
+                                  ScalarType, SparseIndexParams, SparseVector,
+                                  SparseVectorParams, VectorParams)
+from tqdm import tqdm
+from transformers import AutoTokenizer
+class KnowledgeEngineBuilder:
+    def __init__(self, base_dir="ke_store", dim=1024):
+        self.base_dir = base_dir
+        self.dim = dim
+        print("Loading BGE-M3 Model and Tokenizer...")
+        self.model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)
+        self.tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-m3')
+        self.max_tokens = 384
+        self.overlap_count = 2
+        self._init_dirs()
+        self._init_sqlite()
+        self._init_meta()
+        self._init_qdrant()
+    # ---------------------------
+    # INIT & SETUP
+    # ---------------------------
+    def _init_dirs(self):
+        for d in ["corpus", "qdrant", "build_cache/embeddings"]:
+            os.makedirs(os.path.join(self.base_dir, d), exist_ok=True)
+    def _init_qdrant(self):
+        self.qdrant_path = f"{self.base_dir}/qdrant"
+        self.qdrant_client = QdrantClient(path=self.qdrant_path)
+        self.collection_name = "knowledge_base"
+        if not self.qdrant_client.collection_exists(self.collection_name):
+            print(f"Creating Qdrant collection: {self.collection_name}")
+            self.qdrant_client.create_collection(
+                collection_name=self.collection_name,
+                vectors_config={
+                    "dense": VectorParams(size=self.dim, distance=Distance.COSINE, on_disk=True)
+                },
+                sparse_vectors_config={
+                    "sparse": SparseVectorParams(index=SparseIndexParams(on_disk=True))
+                },
+                quantization_config=ScalarQuantization(
+                    scalar=ScalarQuantizationConfig(type=ScalarType.INT8, always_ram=True)
+                ),
+                optimizers_config=OptimizersConfigDiff(indexing_threshold=0)
+            )
+    def _optimize_sqlite(self, conn):
+        conn.execute("PRAGMA journal_mode=WAL;")
+        conn.execute("PRAGMA synchronous=NORMAL;")
+        conn.execute("PRAGMA temp_store=MEMORY;")
+        conn.execute("PRAGMA cache_size=-2000000")
+    def _init_sqlite(self):
+        self.conn = sqlite3.connect(f"{self.base_dir}/corpus/corpus.sqlite")
+        self._optimize_sqlite(self.conn)
+        cur = self.conn.cursor()
+        cur.execute("""
+        CREATE TABLE IF NOT EXISTS documents (
+            doc_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            external_id TEXT, title TEXT, lang TEXT, url TEXT,
+            wikidata_id TEXT, date_modified TEXT, full_text TEXT)
+        """)
+        cur.execute("""
+        CREATE TABLE IF NOT EXISTS chunks (
+            chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            doc_id INTEGER, chunk_index INTEGER, text TEXT,
+            token_length INTEGER, section TEXT, lang TEXT)
+        """)
+        cur.execute("""
+        CREATE TABLE IF NOT EXISTS spans (
+            span_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            chunk_id INTEGER, span_index INTEGER, text TEXT, char_length INTEGER)
+        """)
+        cur.execute("CREATE INDEX IF NOT EXISTS idx_chunks_doc_id ON chunks(doc_id)")
+        cur.execute("CREATE INDEX IF NOT EXISTS idx_spans_chunk_id ON spans(chunk_id)")
+        cur.execute("CREATE INDEX IF NOT EXISTS idx_chunks_lang ON chunks(lang)")
+        self.conn.commit()
+    def _init_meta(self):
+        self.meta_path = f"{self.base_dir}/corpus/meta.json"
+        cur = self.conn.cursor()
+        cur.execute("SELECT MAX(doc_id) FROM documents")
+        db_doc = cur.fetchone()[0] or 0
+        cur.execute("SELECT MAX(chunk_id) FROM chunks")
+        db_chunk = cur.fetchone()[0] or 0
+        cur.execute("SELECT MAX(span_id) FROM spans")
+        db_span = cur.fetchone()[0] or 0
+        self.meta = {
+            "last_doc_id": db_doc + 1,
+            "last_chunk_id": db_chunk + 1,
+            "last_span_id": db_span + 1
+        }
+        self._save_meta()
+    def _save_meta(self):
+        with open(self.meta_path, "w") as f:
+            json.dump(self.meta, f, indent=4)
+    # ---------------------------
+    # TEXT PROCESSING & INGESTION
+    # ---------------------------
+    def split_sentences(self, text):
+        text = re.sub(r'[ \t]+', ' ', text)
+        pattern = r'(?<=[.!?。！？])(?<![Ar|Dr|Mr|Ms|St]\.)(?<![A-Z]\.)\s+'
+        sentences = re.split(pattern, text)
+        final_sentences = []
+        for s in sentences:
+            sub_parts = [p.strip() for p in s.split('\n') if p.strip()]
+            final_sentences.extend(sub_parts)
+        return [s for s in final_sentences if len(s) > 1]
+    def count_tokens(self, text):
+        return len(self.tokenizer.encode(text, add_special_tokens=False))
+    def get_token_counts_batch(self, texts):
+        if not texts: return []
+        encodings = self.tokenizer(texts, add_special_tokens=False, padding=False, truncation=False)
+        return [len(ids) for ids in encodings['input_ids']]
+    def _split_monster_sentence(self, sentence):
+        words = sentence.split(' ')
+        sub_spans, current_sub, current_toks = [], [], 0
+        for word in words:
+            word_toks = self.count_tokens(word)
+            if word_toks > self.max_tokens:
+                if current_sub:
+                    sub_spans.append(" ".join(current_sub))
+                    current_sub, current_toks = [], 0
+                half = len(word) // 2
+                sub_spans.extend([word[:half], word[half:]])
+                continue
+            space_tok = 1 if current_sub else 0
+            if current_toks + word_toks + space_tok > self.max_tokens and current_sub:
+                sub_spans.append(" ".join(current_sub))
+                current_sub, current_toks = [word], word_toks
+            else:
+                current_sub.append(word)
+                current_toks += word_toks + space_tok
+        if current_sub: sub_spans.append(" ".join(current_sub))
+        return sub_spans
+    def chunk_text(self, text):
+        raw_sentences = self.split_sentences(text)
+        sentence_lengths = self.get_token_counts_batch(raw_sentences)
+        refined_spans = []
+        for s, length in zip(raw_sentences, sentence_lengths):
+            if length > self.max_tokens: refined_spans.extend(self._split_monster_sentence(s))
+            else: refined_spans.append(s)
+        span_toks_list = self.get_token_counts_batch(refined_spans)
+        chunks, current_spans, current_tokens = [], [], 0
+        for span, span_toks in zip(refined_spans, span_toks_list):
+            if current_tokens + span_toks > self.max_tokens and current_spans:
+                chunk_text = " ".join(current_spans)
+                chunks.append((chunk_text, self.count_tokens(chunk_text), list(current_spans)))
+                actual_overlap = min(self.overlap_count, len(current_spans) - 1)
+                if actual_overlap > 0:
+                    current_spans = current_spans[-actual_overlap:]
+                    current_tokens = self.count_tokens(" ".join(current_spans)) + 1
+                else:
+                    current_spans, current_tokens = [], 0
+            current_spans.append(span)
+            current_tokens += span_toks + 1
+        if current_spans:
+            chunk_text = " ".join(current_spans)
+            chunks.append((chunk_text, self.count_tokens(chunk_text), list(current_spans)))
+        return chunks
+    def ingest(self, lang="ko", batch_size=32, limit=None):
+        """
+         - The dataset is read in a streaming manner to handle large corpora without memory issues.
+         - Each document is processed to create chunks based on token limits, with an overlap strategy to ensure comprehensive coverage of the text.
+            - The processed documents, chunks, and spans are stored in SQLite with appropriate indexing for efficient retrieval during search.
+         """
+        ds = load_dataset("HuggingFaceFW/finewiki", lang, split="train", streaming=True)
+        cur = self.conn.cursor()
+        count = 0
+        batch_docs, batch_chunks, batch_spans = [], [], []
+        for item in tqdm(ds, desc=f"Ingesting {lang}"):
+            if limit and count >= limit: break
+            doc_id = self.meta["last_doc_id"]
+            batch_docs.append((doc_id, item["id"], item["title"], lang, item["url"], item.get("wikidata_id", ""), item.get("date_modified", ""), item["text"]))
+            for c_idx, (chunk_text, token_len, span_list) in enumerate(self.chunk_text(item["text"])):
+                chunk_id = self.meta["last_chunk_id"]
+                batch_chunks.append((chunk_id, doc_id, c_idx, chunk_text, token_len, item["title"], lang))
+                for s_idx, span_text in enumerate(span_list):
+                    batch_spans.append((self.meta["last_span_id"], chunk_id, s_idx, span_text, len(span_text)))
+                    self.meta["last_span_id"] += 1
+                self.meta["last_chunk_id"] += 1
+            self.meta["last_doc_id"] += 1
+            count += 1
+            if len(batch_docs) >= batch_size:
+                self._commit_batch(cur, batch_docs, batch_chunks, batch_spans)
+                batch_docs, batch_chunks, batch_spans = [], [], []
+                if count % (batch_size * 10) == 0: self._save_meta()
+        self._commit_batch(cur, batch_docs, batch_chunks, batch_spans)
+        self.conn.commit()
+        self.conn.execute("PRAGMA wal_checkpoint(FULL);")
+        self._save_meta()
+    def _commit_batch(self, cur, docs, chunks, spans):
+        if not docs: return
+        cur.executemany("INSERT INTO documents VALUES (?,?,?,?,?,?,?,?)", docs)
+        cur.executemany("INSERT INTO chunks VALUES (?,?,?,?,?,?,?)", chunks)
+        cur.executemany("INSERT INTO spans VALUES (?,?,?,?,?)", spans)
+    # ---------------------------
+    # EMBED TO DISK
+    # ---------------------------
+    def embed_corpus(self, lang="ko", batch_size=128, save_interval=100000):
+        """
+        Text is read in batches from SQLite, embeddings are generated using BGE-M3, and then saved to disk.
+         - Embedding generation is performed on the GPU, and data is saved to disk in fixed batches to manage memory.
+         - Dense vectors are saved in NumPy's .npz format to ensure fast loading and low disk usage.
+         - Sparse vectors are saved in JSONL format to provide flexibility and readability.
+         - The saved embeddings are subsequently uploaded to Qdrant for use in searches.
+         - This method is designed to reliably generate and save embeddings even on large-scale datasets.
+        """
+        cur = self.conn.cursor()
+        cur.execute("SELECT chunk_id, text FROM chunks WHERE lang=?", (lang,))
+        rows = cur.fetchall()
+        part_id = 0
+        id_buffer = []
+        dense_buffer = []
+        sparse_buffer = []
+        save_dir = f"{self.base_dir}/build_cache/embeddings"
+        for i in tqdm(range(0, len(rows), batch_size), desc=f"1/2 GPU Embedding ({lang})"):
+            batch = rows[i:i+batch_size]
+            ids = [r[0] for r in batch]
+            texts = [r[1] for r in batch]
+            output = self.model.encode(
+                texts, batch_size=len(texts), max_length=self.max_tokens,
+                return_dense=True, return_sparse=True, return_colbert_vecs=False
+            )
+            id_buffer.extend(ids)
+            dense_buffer.append(output['dense_vecs'])
+            for sp_dict in output['lexical_weights']:
+                sparse_buffer.append({str(k): float(v) for k, v in sp_dict.items()})
+            # Save to disk when a certain number is reached (prevents memory explosion)
+            if len(id_buffer) >= save_interval:
+                self._save_embedding_part(save_dir, lang, part_id, id_buffer, dense_buffer, sparse_buffer)
+                part_id += 1
+                id_buffer, dense_buffer, sparse_buffer = [], [], []
+        # Save the last remaining scraps
+        self._save_embedding_part(save_dir, lang, part_id, id_buffer, dense_buffer, sparse_buffer)
+        print(f"Embedding Generation Complete. Saved to {save_dir}")
+    def _save_embedding_part(self, save_dir, lang, part_id, ids, dense_chunks, sparse_list):
+        if not ids: return
+        # Dense & IDs: High-speed storage as NumPy binaries
+        np.savez(f"{save_dir}/ebd_{lang}_{part_id}.npz",
+                 ids=np.array(ids, dtype=np.int64),
+                 dense=np.vstack(dense_chunks))
+        # Sparse: Save in JSONL format (one line at a time)
+        with open(f"{save_dir}/sparse_{lang}_{part_id}.jsonl", 'w', encoding='utf-8') as f:
+            for sp in sparse_list:
+                f.write(json.dumps(sp) + '\n')
+    # ---------------------------
+    # BUILD QDRANT INDEX
+    # ---------------------------
+    def build_qdrant_index(self, lang="ko", batch_size=2000):
+        """
+        The generated embeddings are read from disk and uploaded to Qdrant in batches.
+         - This method reads the saved dense and sparse embeddings, constructs the appropriate data structures for Qdrant, and uploads them in batches to manage memory and ensure efficient indexing.
+         - After all data is uploaded, it triggers Qdrant's indexing process to optimize search performance.
+         - The use of batch uploads and on-disk storage allows this process to scale to large datasets without overwhelming system memory.
+        """
+        save_dir = f"{self.base_dir}/build_cache/embeddings"
+        files = sorted([f for f in os.listdir(save_dir) if f.startswith(f"ebd_{lang}_") and f.endswith(".npz")])
+        for file_name in files:
+            part_id = file_name.split("_")[-1].split(".")[0]
+            # 1. Load file and convert to Qdrant point structure
+            npz_path = os.path.join(save_dir, file_name)
+            sparse_path = os.path.join(save_dir, f"sparse_{lang}_{part_id}.jsonl")
+            data = np.load(npz_path)
+            ids = data['ids']
+            dense_vecs = data['dense']
+            with open(sparse_path, 'r', encoding='utf-8') as f:
+                sparse_vecs = [json.loads(line) for line in f]
+            points_batch = []
+            # 2. Qdrant Upload Loop
+            for i in tqdm(range(len(ids)), desc=f"2/2 Qdrant Uploading (Part {part_id})"):
+                chunk_id = int(ids[i])
+                sparse_dict = sparse_vecs[i]
+                point = PointStruct(
+                    id=chunk_id,
+                    vector={
+                        "dense": dense_vecs[i].tolist(),
+                        "sparse": SparseVector(
+                            indices=[int(k) for k in sparse_dict.keys()],
+                            values=list(sparse_dict.values())
+                        )
+                    },
+                    payload={"chunk_id": chunk_id, "lang": lang}
+                )
+                points_batch.append(point)
+                # Upload when stacked to batch size
+                if len(points_batch) >= batch_size:
+                    self.qdrant_client.upload_points(
+                        collection_name=self.collection_name,
+                        points=points_batch
+                    )
+                    points_batch = []
+            # Uploading leftover scraps
+            if points_batch:
+                self.qdrant_client.upload_points(
+                    collection_name=self.collection_name,
+                    points=points_batch
+                )
+        print("Data upload complete. Enabling HNSW Indexing...")
+        # 3. [Key] After all uploads are complete, re-enable indexing (default 20,000) to optimize the graph
+        self.qdrant_client.update_collection(
+            collection_name=self.collection_name,
+            optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
+        )
+        print("Qdrant Indexing Complete!")
+    def close(self):
+        if hasattr(self, 'conn') and self.conn:
+            self.conn.close()
+if __name__ == "__main__":
+    builder = KnowledgeEngineBuilder()
+    try:
+        builder.ingest(lang="ko", batch_size=32, limit=10000)  # Process only 10,000 documents as an example
+        builder.embed_corpus(lang="ko", batch_size=128, save_interval=5000)
+        builder.build_qdrant_index(lang="ko", batch_size=2000)
+    finally:
+        builder.close()

scripts/setup_db.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# scripts/setup_db.py
+import os
+import sys
+from huggingface_hub import snapshot_download
+from huggingface_hub.utils import HfHubHTTPError
+from core.config import settings
+from core.logger import setup_logger
+logger = setup_logger("setup_db")
+def download_knowledge_base():
+    """
+    Checks if the SQLite DB and Qdrant data already exist locally. If not, it downloads them from the specified Hugging Face repository.
+     - It uses snapshot_download with allow_patterns to only download the necessary files, optimizing speed and storage.
+     - If the files already exist, it logs a message and skips the download.
+    """
+    sqlite_path = settings.SQLITE_PATH
+    qdrant_dir = settings.QDRANT_PATH
+    if os.path.exists(sqlite_path) and os.path.isdir(qdrant_dir):
+        logger.info(f"⚡ SQLite DB and Qdrant data already exist at {sqlite_path} and {qdrant_dir}. Skipping download.")
+        return
+    repo_id = settings.REPO_ID
+    local_dir = settings.DATA_DIR
+    logger.info(f"📥 Downloading DBs from HF Repo: {repo_id} to {local_dir}...")
+    try:
+        download_path = snapshot_download(
+            repo_id=repo_id,
+            repo_type="dataset",
+            local_dir=local_dir,
+            allow_patterns=["corpus/*", "qdrant/*"],
+            ignore_patterns=["build_cache/*", ".gitattributes"],
+            max_workers=4
+        )
+        logger.info(f"✅ Download complete! Data is ready at: {download_path}")
+    except HfHubHTTPError as e:
+        logger.error(f"❌ HTTP Error during download: {e}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"❌ Unexpected error during download: {e}", exc_info=True)
+        sys.exit(1)
+if __name__ == "__main__":
+    download_knowledge_base()

services/search_service.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# services/search_service.py
+import time
+from typing import Any, Dict, List
+from api.schemas.search import DocumentMetadata, SearchResultItem
+from core.exceptions import SearchExecutionError
+from core.logger import setup_logger
+from models.embedder import TextEmbedder
+from models.reranker import TextReranker
+from storage.qdrant_client import QdrantStorage
+from storage.sqlite_client import SQLiteStorage
+logger = setup_logger("search_service")
+class HybridSearchService:
+    """
+    It is a business logic service that derives final search results by integrating
+    Qdrant (Vector DB), SQLite (RDBMS), Embedder, and Reranker.
+    """
+    def __init__(self, qdrant: QdrantStorage, sqlite: SQLiteStorage, embedder: TextEmbedder, reranker: TextReranker):
+        self.qdrant = qdrant
+        self.sqlite = sqlite
+        self.embedder = embedder
+        self.reranker = reranker
+    def search(self, query: str, top_k: int = 5, limit: int = 50) -> Dict[str, Any]:
+        """
+        Receives user queries and performs hybrid search and reranking.
+        :param query: User search query
+        :param top_k: Number of documents to return (after reranking)
+        :param limit: Number of candidate documents to fetch from Qdrant (after RRF fusion, before reranking)
+        """
+        start_time = time.time()
+        logger.info(f"🔍 Starting search pipeline for query: '{query}'")
+        try:
+            # 1. Query Embedding (Dense, Sparse Extraction)
+            encoded_query = self.embedder.encode_query(query)
+            # 2. Qdrant Hybrid Search (Extract limit of candidates using RRF method)
+            qdrant_results = self.qdrant.hybrid_search(
+                dense_vector=encoded_query.dense_vector,
+                sparse_indices=encoded_query.sparse_indices,
+                sparse_values=encoded_query.sparse_values,
+                limit=limit
+            )
+            if not qdrant_results:
+                logger.warning("No results found in Vector DB.")
+                return self._build_empty_response(query, start_time)
+            chunk_ids = [res.id for res in qdrant_results]
+            # 3. Get Dict in SQLite for O(1) Mapping of Source Text and Metadata
+            sqlite_data_map = self.sqlite.get_enriched_chunks_dict(chunk_ids)
+            # 4. Data Preparation for Reranking (Merging Qdrant and SQLite Data)
+            chunks_for_reranking = []
+            for rank, res in enumerate(qdrant_results, start=1):
+                # Defense Logic: Skip data inconsistencies (Desync) in Vector DB but not in SQLite
+                chunk_info = sqlite_data_map.get(res.id)
+                if not chunk_info:
+                    logger.warning(f"Data Desync: chunk_id {res.id} found in Qdrant but missing in SQLite.")
+                    continue
+                chunks_for_reranking.append({
+                    "chunk_id": res.id,
+                    "text": chunk_info["text"],
+                    "metadata": chunk_info["metadata"],
+                    "rrf_score": res.score,
+                    "rrf_rank": rank
+                })
+            if not chunks_for_reranking:
+                return self._build_empty_response(query, start_time)
+            # 5. Perform Cross-Encoder Reranking
+            # Return a list sorted in descending order after recalculating context-based precise scores
+            reranked_docs = self.reranker.rerank(
+                query=query,
+                documents=chunks_for_reranking,
+                text_key="text"
+            )
+            # 6. Top-K Truncation and Mapping to Pydantic Schema (SearchResultItem) Specification
+            final_results = []
+            for doc in reranked_docs[:top_k]:
+                final_results.append(SearchResultItem(
+                    chunk_id=doc["chunk_id"],
+                    text=doc["text"],
+                    score=round(doc["rerank_score"], 4), # Neatly rounded to 4 decimal places
+                    metadata=DocumentMetadata(**doc["metadata"])
+                ).model_dump()) # Convert to dict for FastAPI compatibility
+            latency_ms = int((time.time() - start_time) * 1000)
+            logger.info(f"✅ Search completed in {latency_ms}ms. Found {len(final_results)} final chunks.")
+            return {
+                "query": query,
+                "results": final_results,
+                "latency_ms": latency_ms
+            }
+        except Exception as e:
+            # Wrap unexpected errors in custom errors and throw them to the router
+            logger.error(f"❌ Pipeline failed: {str(e)}", exc_info=True)
+            raise SearchExecutionError(f"Search pipeline failed: {str(e)}")
+    def _build_empty_response(self, query: str, start_time: float) -> Dict[str, Any]:
+        """Build a standard response format when no search results are found"""
+        return {
+            "query": query,
+            "results": [],
+            "latency_ms": int((time.time() - start_time) * 1000)
+        }
+    # ---------------------------------------------------------
+    # LLM-Friendly Prompt Formatter
+    # (Utility used when injecting into Agents or VLMs)
+    # ---------------------------------------------------------
+    def format_for_llm(self, search_results: List[Dict[str, Any]]) -> str:
+        """
+        Converts the retrieved JSON results into a Markdown/XML mixed format best understood by LLM.
+        (This method can be optionally called by API routers or other Agent systems)
+        """
+        if not search_results:
+            return "No relevant knowledge (documents) available."
+        context_blocks = []
+        for i, res in enumerate(search_results, start=1):
+            meta = res["metadata"]
+            source = meta.get("title", f"Document_{meta.get('doc_id')}")
+            # LLM recognizes text enclosed in XML tags (<doc>) as the clearest 'referencing context'.
+            block = (
+                f"<doc id=\"{i}\" source=\"{source}\" "
+                f"url=\"{meta.get('url', 'N/A')}\" "
+                f"relevance_score=\"{res['score']}\">\n"
+                f"{res['text']}\n"
+                f"</doc>"
+            )
+            context_blocks.append(block)
+        return "\n\n".join(context_blocks)

storage/qdrant_client.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# storage/qdrant_client.py
+from typing import List
+from qdrant_client import QdrantClient, models
+from core.exceptions import DatabaseError
+from core.logger import setup_logger
+logger = setup_logger("qdrant_client")
+class QdrantStorage:
+    """
+    Qdrant client performing hybrid search based on dense and sparse vectors
+    """
+    def __init__(self, path: str, collection_name: str = "knowledge_base"):
+        self.path = path
+        self.collection_name = collection_name
+        try:
+            # Local file system-based Qdrant connection (v1.10+)
+            self.client = QdrantClient(path=self.path)
+            logger.info(f"✅ Connected to local Qdrant at {self.path} (Collection: {self.collection_name})")
+        except Exception as e:
+            logger.critical(f"❌ Qdrant connection failed: {e}")
+            raise e
+    def hybrid_search(
+        self,
+        dense_vector: List[float],
+        sparse_indices: List[int],
+        sparse_values: List[float],
+        limit: int = 100
+    ) -> List[models.ScoredPoint]:
+        """
+        Qdrant's Native Fusion API to perform hybrid search with dense and sparse vectors.
+        Calculates RRF (Reciprocal Rank Fusion) at the database level and returns the results.
+        """
+        try:
+            # Qdrant v1.10+ Latest Syntax: Fusion processing after multiple searches using Prefetch
+            results = self.client.query_points(
+                collection_name=self.collection_name,
+                prefetch=[
+                    # 1. Sparse search query
+                    models.Prefetch(
+                        query=models.SparseVector(
+                            indices=sparse_indices,
+                            values=sparse_values
+                        ),
+                        using="sparse",
+                        limit=limit,
+                    ),
+                    # 2. Dense search query
+                    models.Prefetch(
+                        query=dense_vector,
+                        using="dense",
+                        limit=limit,
+                    ),
+                ],
+                # 3. Score merging (Fusion) of the two results above using the RRF method
+                query=models.FusionQuery(fusion=models.Fusion.RRF),
+                limit=limit,
+                with_payload=True
+            )
+            return results.points
+        except Exception as e:
+            logger.error(f"❌ Hybrid search failed: {e}", exc_info=True)
+            raise DatabaseError(f"Qdrant Hybrid search execution failed: {e}")
+    def close(self):
+        """Qdrant client connection cleanup (if applicable)"""
+        if hasattr(self, 'client') and self.client:
+            self.client.close()
+            logger.info("🛑 Qdrant client connection closed.")

storage/sqlite_client.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# storage/sqlite_client.py
+import sqlite3
+from typing import Any, Dict, List
+from core.exceptions import DatabaseError
+from core.logger import setup_logger
+logger = setup_logger("sqlite_client")
+class SQLiteStorage:
+    def __init__(self, db_path: str):
+        self.db_path = db_path
+        try:
+            self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+            self.conn.row_factory = sqlite3.Row
+            logger.info(f"✅ Connected to SQLite at {self.db_path}")
+        except sqlite3.Error as e:
+            logger.critical(f"❌ SQLite connection failed: {e}")
+            raise DatabaseError(f"Database connection failed: {e}")
+    def get_enriched_chunks_dict(self, chunk_ids: List[int]) -> Dict[int, Dict[str, Any]]:
+        """
+        Given a list of chunk_ids, retrieves the corresponding text and metadata from the SQLite database.
+         - This is designed for O(1) access in the search service, where we need to quickly map chunk_ids from Qdrant results to their full text and metadata for reranking and final response construction.
+         - The returned dictionary is structured as { chunk_id: { "text": "...", "metadata": {...} } }, allowing for efficient lookups during the search pipeline.
+         - The SQL query uses a JOIN to combine data from the chunks and documents tables, ensuring we get all necessary information in a single query for performance optimization.
+         - If the list of chunk_ids is empty, it returns an empty dictionary immediately to avoid unnecessary database queries.
+         - Error handling is included to catch and log any database issues that arise during query execution.
+        """
+        if not chunk_ids:
+            return {}
+        placeholders = ",".join("?" * len(chunk_ids))
+        query = f"""
+            SELECT
+                c.chunk_id, c.text AS chunk_text,
+                d.doc_id, d.title, d.lang, d.url, d.date_modified
+            FROM chunks c
+            JOIN documents d ON c.doc_id = d.doc_id
+            WHERE c.chunk_id IN ({placeholders})
+        """
+        try:
+            cur = self.conn.cursor()
+            cur.execute(query, chunk_ids)
+            rows = cur.fetchall()
+            # Transform the result into a dictionary for O(1) access: { chunk_id: { "text": "...", "metadata": {...} } }
+            result_dict = {}
+            for row in rows:
+                result_dict[row["chunk_id"]] = {
+                    "text": row["chunk_text"],
+                    "metadata": {
+                        "doc_id": row["doc_id"],
+                        "title": row["title"],
+                        "lang": row["lang"],
+                        "url": row["url"],
+                        "date_modified": row["date_modified"]
+                    }
+                }
+            return result_dict
+        except sqlite3.Error as e:
+            logger.error(f"Failed to fetch enriched chunks: {e}")
+            raise DatabaseError(f"Query execution failed: {e}")
+    def close(self):
+        if hasattr(self, 'conn') and self.conn:
+            self.conn.close()
+            logger.info("🛑 SQLite connection closed.")

templates/index.html ADDED Viewed

	@@ -0,0 +1,92 @@

+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="UTF-8">
+  <title>Hybrid Knowledge Engine</title>
+  <style>
+    body {
+      max-width: 800px;
+      margin: 0 auto;
+      padding: 20px;
+      line-height: 1.6;
+    }
+    .search-box {
+      background: #f4f4f4;
+      padding: 20px;
+      border-radius: 8px;
+      margin-bottom: 30px;
+    }
+    .result-item {
+      border-bottom: 1px solid #eee;
+      padding: 15px 0;
+    }
+    .metadata {
+      font-size: 0.85em;
+      color: #666;
+      margin-bottom: 5px;
+    }
+    .score {
+      color: #2c3e50;
+      font-weight: bold;
+      background: #ecf0f1;
+      padding: 2px 6px;
+      border-radius: 4px;
+    }
+    .content {
+      margin-top: 10px;
+      color: #333;
+    }
+    .latency {
+      color: #999;
+      font-size: 0.9em;
+      text-align: right;
+    }
+  </style>
+</head>
+<body>
+  <h1>Knowledge Engine</h1>
+  <div class="search-box">
+    <form method="post" action="/api/v1/search/demo">
+      <input type="text" name="query" value="{{ query }}" placeholder="Enter Query" style="width: 80%; padding: 10px;"
+        required>
+      <button type="submit" style="padding: 10px 20px; cursor: pointer;">Search</button>
+    </form>
+  </div>
+  {% if results is not none %}
+  <div class="latency">Search time: {{ latency_ms }}ms</div>
+  <h2>Results for "{{ query }}"</h2>
+  {% if results|length > 0 %}
+  {% for r in results %}
+  <div class="result-item">
+    <div class="metadata">
+      <span class="score">Score: {{ r.score }}</span> |
+      <strong>source: {{ r.metadata.title }}</strong>
+      {% if r.metadata.url %} | <a href="{{ r.metadata.url }}" target="_blank">Link</a>{% endif %}
+    </div>
+    <div class="content">
+      {{ r.text }}
+    </div>
+  </div>
+  {% endfor %}
+  {% else %}
+  <p>No search results found.</p>
+  {% endif %}
+  {% endif %}
+  {% if error_message %}
+  <p style="color: red;">{{ error_message }}</p>
+  {% endif %}
+</body>
+</html>