Spaces:

LazyHuman
/

plexi-api

Running

App Files Files Community

LazyHuman10 commited on Apr 6

Commit

3b6130d

0 Parent(s):

Initial commit for HF Space

Browse files

Files changed (7) hide show

Dockerfile +21 -0
README.md +40 -0
__pycache__/main.cpython-313.pyc +0 -0
__pycache__/rag.cpython-313.pyc +0 -0
main.py +211 -0
rag.py +196 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+# HuggingFace Spaces — Plexi API
+# Uses Python 3.11 slim. HF Spaces expects the app on port 7860.
+FROM python:3.11-slim
+WORKDIR /app
+# System deps for sentence-transformers (tokenizers, etc.)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# HuggingFace Spaces default port
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+# Plexi API
+FastAPI RAG backend deployed on HuggingFace Spaces.
+## What It Does
+- Loads the pre-built LlamaIndex vector store from the `plexi-materials` GitHub repo at startup
+- Exposes three endpoints consumed by the Cloudflare Worker middleman
+## Endpoints
+| Method | Path | Purpose |
+|---|---|---|
+| `GET` | `/health` | Liveness probe — used by keep-alive GitHub Actions |
+| `GET` | `/manifest` | Proxies + caches `manifest.json` from the materials repo |
+| `POST` | `/retrieve` | Embeds query, searches index, returns scoped top-k chunks |
+## Local Development
+```bash
+pip install -r requirements.txt
+uvicorn main:app --reload --port 7860
+```
+Visit `http://localhost:7860/docs` for the interactive API docs.
+## Environment Variables
+| Variable | Default | Purpose |
+|---|---|---|
+| `MATERIALS_REPO` | `KunalGupta25/plexi-materials` | GitHub repo with study materials |
+| `MANIFEST_BRANCH` | `main` | Branch that holds `manifest.json` and `index/` |
+| `ALLOWED_ORIGINS` | (Cloudflare Pages URL) | CORS allowed origins |
+## Deploy to HuggingFace Spaces
+1. Create a new Space with **Docker** SDK
+2. Push this folder as the Space repo
+3. Set environment variables in the Space settings
+4. HF will build and run the Dockerfile automatically

__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (7.69 kB). View file

__pycache__/rag.cpython-313.pyc ADDED Viewed

Binary file (7.94 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,211 @@

+"""
+main.py — Plexi API (FastAPI service for HuggingFace Spaces)
+============================================================
+Endpoints:
+  POST /retrieve   — embed query + vector search (scope-filtered)
+  GET  /manifest   — proxy + cache the materials manifest.json
+  GET  /health     — liveness probe (also used by keep-alive cron)
+The heavy resources (index + embedding model) are loaded ONCE at startup via
+FastAPI's lifespan context manager and shared across all requests.
+"""
+import os
+import time
+from contextlib import asynccontextmanager
+from functools import lru_cache
+import requests
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+from rag import (
+    DEFAULT_TOP_K,
+    MATERIALS_REPO,
+    MANIFEST_BRANCH,
+    format_context,
+    load_index,
+    retrieve_chunks,
+)
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+ALLOWED_ORIGINS = os.getenv(
+    "ALLOWED_ORIGINS",
+    # Default: allow the Cloudflare Pages domain + localhost for dev
+    "https://plexi.lazyhideout.tech,http://localhost:5173,http://localhost:4173",
+).split(",")
+# ---------------------------------------------------------------------------
+# Startup / Shutdown — load heavy resources once
+# ---------------------------------------------------------------------------
+_state: dict = {}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load the RAG index at startup; release on shutdown."""
+    print("Loading RAG index from GitHub…")
+    t0 = time.time()
+    index, error = load_index()
+    elapsed = round(time.time() - t0, 2)
+    if error:
+        print(f"⚠️  RAG index unavailable: {error}")
+        _state["index"] = None
+        _state["index_error"] = error
+    else:
+        print(f"✅ RAG index loaded in {elapsed}s")
+        _state["index"] = index
+        _state["index_error"] = None
+    _state["index_loaded"] = index is not None
+    _state["startup_ts"] = time.time()
+    yield
+    # Cleanup (nothing heavy to clean up here)
+    _state.clear()
+# ---------------------------------------------------------------------------
+# App
+# ---------------------------------------------------------------------------
+app = FastAPI(
+    title="Plexi API",
+    description=(
+        "RAG retrieval backend for Plexi. "
+        "Accepts student queries and returns relevant study material chunks."
+    ),
+    version="1.0.0",
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=ALLOWED_ORIGINS,
+    allow_credentials=False,
+    allow_methods=["GET", "POST", "OPTIONS"],
+    allow_headers=["Content-Type"],
+)
+# ---------------------------------------------------------------------------
+# Request / Response models
+# ---------------------------------------------------------------------------
+class RetrieveRequest(BaseModel):
+    query: str = Field(..., min_length=1, max_length=2000)
+    semester: str = Field(..., min_length=1, max_length=100)
+    subject: str = Field(..., min_length=1, max_length=100)
+    top_k: int = Field(default=DEFAULT_TOP_K, ge=1, le=20)
+class ChunkResult(BaseModel):
+    text: str
+    score: float | None
+    filename: str | None
+    subject: str | None
+class RetrieveResponse(BaseModel):
+    chunks: list[ChunkResult]
+    query: str
+    semester: str
+    subject: str
+    rag_active: bool
+    context_formatted: str
+# ---------------------------------------------------------------------------
+# Manifest caching (simple in-memory, 5-minute TTL)
+# ---------------------------------------------------------------------------
+_manifest_cache: dict = {"data": None, "fetched_at": 0}
+MANIFEST_TTL = 300  # seconds
+def _get_manifest() -> dict:
+    now = time.time()
+    if _manifest_cache["data"] and (now - _manifest_cache["fetched_at"]) < MANIFEST_TTL:
+        return _manifest_cache["data"]
+    url = f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json"
+    resp = requests.get(url, timeout=15)
+    resp.raise_for_status()
+    data = resp.json()
+    _manifest_cache["data"] = data
+    _manifest_cache["fetched_at"] = now
+    return data
+# ---------------------------------------------------------------------------
+# Routes
+# ---------------------------------------------------------------------------
+@app.get("/health")
+def health():
+    """Liveness probe — also pinged by the GitHub Actions keep-alive cron."""
+    uptime = round(time.time() - _state.get("startup_ts", time.time()), 1)
+    return {
+        "status": "ok",
+        "index_loaded": _state.get("index_loaded", False),
+        "index_error": _state.get("index_error"),
+        "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
+        "uptime_seconds": uptime,
+    }
+@app.get("/manifest")
+def get_manifest():
+    """
+    Proxy and cache the study materials manifest.json from GitHub.
+    The Cloudflare Worker also caches this in KV — this is a double layer.
+    """
+    try:
+        data = _get_manifest()
+        return JSONResponse(content=data)
+    except requests.HTTPError as err:
+        raise HTTPException(status_code=502, detail=f"GitHub fetch failed: {err}")
+    except Exception as err:
+        raise HTTPException(status_code=500, detail=str(err))
+@app.post("/retrieve", response_model=RetrieveResponse)
+def retrieve(body: RetrieveRequest):
+    """
+    Core RAG endpoint.
+    1. Embeds the query using all-MiniLM-L6-v2 (local, fast ~5-10ms)
+    2. Searches the pre-built LlamaIndex vector store
+    3. Filters results by semester + subject metadata
+    4. Returns top-k chunks + a formatted context string for the LLM prompt
+    """
+    index = _state.get("index")
+    chunks = retrieve_chunks(
+        index=index,
+        query=body.query,
+        semester=body.semester,
+        subject=body.subject,
+        top_k=body.top_k,
+    )
+    context_formatted = format_context(chunks)
+    return RetrieveResponse(
+        chunks=chunks,
+        query=body.query,
+        semester=body.semester,
+        subject=body.subject,
+        rag_active=index is not None,
+        context_formatted=context_formatted,
+    )
+# ---------------------------------------------------------------------------
+# Run (for local development only — HF uses Dockerfile CMD)
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)

rag.py ADDED Viewed

	@@ -0,0 +1,196 @@

+"""
+rag.py — Plexi RAG Engine
+=========================
+Handles everything related to the LlamaIndex vector index:
+  - Downloading the pre-built index from GitHub
+  - Loading HuggingFace sentence-transformer embeddings
+  - Embedding queries and retrieving top-k chunks scoped by semester + subject
+  - Extracting text from PDFs for full-context fallback
+  - Formatting retrieved chunks for the LLM system prompt
+"""
+import io
+import os
+import tempfile
+from pathlib import Path
+import requests
+# ---------------------------------------------------------------------------
+# Optional LlamaIndex — graceful degradation if not installed
+# ---------------------------------------------------------------------------
+try:
+    from llama_index.core import Settings, StorageContext, load_index_from_storage
+    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    LLAMA_INDEX_AVAILABLE = True
+except ImportError:
+    LLAMA_INDEX_AVAILABLE = False
+try:
+    import PyPDF2
+    PYPDF2_AVAILABLE = True
+except ImportError:
+    PYPDF2_AVAILABLE = False
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+MATERIALS_REPO = os.getenv("MATERIALS_REPO", "KunalGupta25/plexi-materials")
+MANIFEST_BRANCH = os.getenv("MANIFEST_BRANCH", "main")
+EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
+INDEX_FILES = [
+    "default__vector_store.json",
+    "docstore.json",
+    "graph_store.json",
+    "image__vector_store.json",
+    "index_store.json",
+]
+DEFAULT_TOP_K = 5
+# ---------------------------------------------------------------------------
+# Index loading (called once at FastAPI startup)
+# ---------------------------------------------------------------------------
+def load_index():
+    """
+    Download the pre-built LlamaIndex from the materials repo and return a
+    VectorStoreIndex ready for querying.
+    Returns (index, error_msg). index is None if loading failed.
+    """
+    if not LLAMA_INDEX_AVAILABLE:
+        return None, "llama-index-core is not installed."
+    index_base_url = (
+        f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/index"
+    )
+    index_dir = tempfile.mkdtemp(prefix="plexi_index_")
+    for filename in INDEX_FILES:
+        url = f"{index_base_url}/{filename}"
+        try:
+            resp = requests.get(url, timeout=30)
+            resp.raise_for_status()
+            with open(os.path.join(index_dir, filename), "wb") as fh:
+                fh.write(resp.content)
+        except Exception as err:
+            return None, f"Failed to download index file '{filename}': {err}"
+    try:
+        embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_ID)
+        Settings.embed_model = embed_model
+        Settings.llm = None
+        storage_ctx = StorageContext.from_defaults(persist_dir=index_dir)
+        index = load_index_from_storage(storage_ctx)
+        return index, None
+    except Exception as err:
+        return None, f"Failed to load index from storage: {err}"
+def load_embed_model():
+    """Load and return the HuggingFace embedding model (for health checks)."""
+    if not LLAMA_INDEX_AVAILABLE:
+        return None
+    return HuggingFaceEmbedding(model_name=EMBED_MODEL_ID)
+# ---------------------------------------------------------------------------
+# Retrieval
+# ---------------------------------------------------------------------------
+def _matches_scope(node, semester: str, subject: str) -> bool:
+    """Return True when a retrieved node belongs to the active semester + subject."""
+    metadata = getattr(node.node, "metadata", {}) or {}
+    return (
+        metadata.get("semester") == semester
+        and metadata.get("subject") == subject
+    )
+def retrieve_chunks(
+    index,
+    query: str,
+    semester: str,
+    subject: str,
+    top_k: int = DEFAULT_TOP_K,
+) -> list[dict]:
+    """
+    Embed the query, retrieve top-k chunks from the index scoped to the
+    given semester + subject.
+    Returns a list of dicts:
+        { text, score, filename, subject }
+    """
+    if index is None:
+        return []
+    try:
+        # Fetch more than needed so we have room to filter by scope
+        retriever = index.as_retriever(similarity_top_k=max(top_k * 5, 10))
+        nodes = retriever.retrieve(query)
+        scoped = [n for n in nodes if _matches_scope(n, semester, subject)]
+        return [
+            {
+                "text": node.node.get_content(),
+                "score": round(float(node.score), 4) if node.score is not None else None,
+                "filename": (getattr(node.node, "metadata", {}) or {}).get("filename"),
+                "subject": (getattr(node.node, "metadata", {}) or {}).get("subject"),
+            }
+            for node in scoped[:top_k]
+        ]
+    except Exception as err:
+        print(f"Retrieval error: {err}")
+        return []
+# ---------------------------------------------------------------------------
+# Context formatting (for system prompt injection)
+# ---------------------------------------------------------------------------
+def format_context(chunks: list[dict]) -> str:
+    """Format retrieved chunks as a numbered block for the LLM system prompt."""
+    if not chunks:
+        return "(No relevant context retrieved for this query.)"
+    parts = []
+    for i, chunk in enumerate(chunks, start=1):
+        score_info = f"  [relevance: {chunk['score']}]" if chunk.get("score") else ""
+        source = chunk.get("filename") or chunk.get("subject") or "Unknown source"
+        parts.append(
+            f"--- Chunk {i} | {source}{score_info} ---\n{chunk['text']}\n"
+        )
+    return "\n".join(parts)
+# ---------------------------------------------------------------------------
+# PDF text extraction (used for full-context fallback loading)
+# ---------------------------------------------------------------------------
+def read_pdf_text(pdf_bytes: bytes) -> str:
+    """Extract plain text from PDF bytes. Returns empty string on failure."""
+    if not PYPDF2_AVAILABLE:
+        return ""
+    text_parts = []
+    try:
+        reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
+        for page in reader.pages:
+            try:
+                page_text = page.extract_text()
+                if page_text:
+                    # Sanitise surrogate pairs that can appear in some PDFs
+                    filtered = page_text.encode("utf-16", "surrogatepass").decode(
+                        "utf-16", "ignore"
+                    )
+                    text_parts.append(filtered)
+            except Exception:
+                pass
+    except Exception:
+        return pdf_bytes.decode("utf-8", errors="ignore") if pdf_bytes else ""
+    return "\n".join(text_parts)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi>=0.115.0,<1.0.0
+uvicorn[standard]>=0.30.0,<1.0.0
+pydantic>=2.0.0,<3.0.0
+requests>=2.31.0,<3.0.0
+python-dotenv>=1.0.0
+PyPDF2>=3.0.0,<4.0.0
+llama-index-core>=0.11.0,<0.13.0
+llama-index-embeddings-huggingface>=0.3.0,<1.0.0
+sentence-transformers>=3.0.0,<4.0.0