Spaces:
Running
Running
| """ | |
| LexMind — FastAPI Backend (Pinecone + HuggingFace Inference API) | |
| Run with: uvicorn main:app --reload --port 8000 | |
| """ | |
| import os | |
| import re | |
| import json | |
| from pathlib import Path | |
| from typing import Optional | |
| import httpx | |
| import fitz # PyMuPDF | |
| import torch | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.responses import FileResponse | |
| from pydantic import BaseModel | |
| from sentence_transformers import SentenceTransformer | |
| from pinecone import Pinecone | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # ── Configuration ───────────────────────────────────────────────────────────── | |
| PINECONE_API_KEY = os.getenv("pinecone", "") | |
| HF_API_KEY = os.getenv("hf_tokens", "") | |
| JUDGEMENTS_INDEX = "legal-judgements" | |
| LEGAL_FRAMEWORK_INDEX = "legal-framework" | |
| LOCAL_MODEL_DIR = "./models/bge-small" | |
| EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5" | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Both stages use the same model — change here to use different ones | |
| HF_ROUTER_MODEL = "meta-llama/Llama-3.1-8B-Instruct" # Stage 1: conversation + routing | |
| HF_LEGAL_MODEL = "meta-llama/Llama-3.1-8B-Instruct" # Stage 2: legal RAG answer | |
| HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions" | |
| BGE_PREFIX = "Represent this sentence for searching relevant passages: " | |
| TOP_K = 10 | |
| CONSTITUTION_TOP_K = 5 | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # ── Load embedding model ────────────────────────────────────────────────────── | |
| def load_embed_model() -> SentenceTransformer: | |
| local = Path(LOCAL_MODEL_DIR) | |
| if local.exists() and any(local.iterdir()): | |
| print(f"✅ Loading bge-small from '{LOCAL_MODEL_DIR}'") | |
| else: | |
| print(f"📥 Downloading {EMBED_MODEL_NAME} (~130 MB)…") | |
| local.mkdir(parents=True, exist_ok=True) | |
| m = SentenceTransformer(EMBED_MODEL_NAME) | |
| m.save(str(local)) | |
| print(f"✅ Model saved to '{LOCAL_MODEL_DIR}'") | |
| model = SentenceTransformer(str(local)) | |
| model = model.to(DEVICE) | |
| print(f" Embedding device: {DEVICE}") | |
| return model | |
| embed_model = load_embed_model() | |
| # ── Connect to Pinecone ─────────────────────────────────────────────────────── | |
| print("🔌 Connecting to Pinecone…") | |
| pc = Pinecone(api_key=PINECONE_API_KEY) | |
| judgements_index = pc.Index(JUDGEMENTS_INDEX) | |
| print(f"✅ Judgements index | vectors: {judgements_index.describe_index_stats().total_vector_count}") | |
| try: | |
| legal_index = pc.Index(LEGAL_FRAMEWORK_INDEX) | |
| print(f"✅ Legal framework index | vectors: {legal_index.describe_index_stats().total_vector_count}") | |
| except Exception: | |
| legal_index = None | |
| print("⚠️ Legal framework index not found — run build_pinecone_legal.py.") | |
| # ── FastAPI app ─────────────────────────────────────────────────────────────── | |
| app = FastAPI(title="LexMind API", version="3.0.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ── Pydantic models ─────────────────────────────────────────────────────────── | |
| class SearchRequest(BaseModel): | |
| query: str | |
| top_k: int = 10 | |
| offset: int = 0 | |
| year_from: Optional[int] = None | |
| year_to: Optional[int] = None | |
| class ChatRequest(BaseModel): | |
| message: str | |
| context: str = "" | |
| system_prompt: str = "" | |
| model_override: str = "" | |
| class DroppedCitationModel(BaseModel): | |
| file_name: str = "" | |
| year: str = "" | |
| content: str = "" | |
| score: float = 0.0 | |
| class SmartChatRequest(BaseModel): | |
| message: str | |
| case_text: str = "" # user's case description | |
| dropped_citation: Optional[DroppedCitationModel] = None # only if user dragged a doc | |
| # ── HuggingFace helper ──────────────────────────────────────────────────────── | |
| async def call_hf( | |
| model: str, | |
| system: str, | |
| user: str, | |
| temperature: float = 0.4, | |
| max_tokens: int = 1024, | |
| timeout: int = 120, | |
| ) -> str: | |
| headers = { | |
| "Authorization": f"Bearer {HF_API_KEY}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "model": model, | |
| "messages": [ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user}, | |
| ], | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": 0.9, | |
| "stream": False, | |
| } | |
| async with httpx.AsyncClient(timeout=timeout) as client: | |
| r = await client.post(HF_CHAT_URL, headers=headers, json=payload) | |
| if r.status_code != 200: | |
| print(f"[HF ERROR] status={r.status_code} model={model} body={r.text[:400]}") | |
| if r.status_code == 401: | |
| raise HTTPException(status_code=401, | |
| detail="Invalid HuggingFace API key.") | |
| if r.status_code == 403: | |
| raise HTTPException(status_code=403, | |
| detail=f"Access denied for '{model}'. Accept the license at huggingface.co/{model}") | |
| if r.status_code == 404: | |
| raise HTTPException(status_code=404, | |
| detail=f"Model '{model}' not found.") | |
| if r.status_code == 429: | |
| raise HTTPException(status_code=429, | |
| detail="HuggingFace rate limit hit. Please wait and retry.") | |
| if r.status_code == 503: | |
| raise HTTPException(status_code=503, | |
| detail=f"Model '{model}' is loading (~20s). Please retry.") | |
| r.raise_for_status() | |
| data = r.json() | |
| choices = data.get("choices", []) | |
| if choices: | |
| content = choices[0].get("message", {}).get("content", "") | |
| if content: | |
| return content.strip() | |
| if isinstance(data, list) and data: | |
| return data[0].get("generated_text", "").strip() | |
| raise HTTPException(status_code=500, | |
| detail=f"Unexpected HF response: {str(data)[:200]}") | |
| # ── Embed helper ────────────────────────────────────────────────────────────── | |
| def embed_query(text: str) -> list[float]: | |
| return embed_model.encode( | |
| BGE_PREFIX + text, | |
| normalize_embeddings=True, | |
| device=DEVICE | |
| ).tolist() | |
| # ── Routes ──────────────────────────────────────────────────────────────────── | |
| async def health(): | |
| hf_ok = False | |
| try: | |
| async with httpx.AsyncClient(timeout=5) as client: | |
| r = await client.get( | |
| "https://huggingface.co/api/whoami", | |
| headers={"Authorization": f"Bearer {HF_API_KEY}"} | |
| ) | |
| hf_ok = r.status_code == 200 | |
| except Exception: | |
| pass | |
| j_stats = judgements_index.describe_index_stats() | |
| l_stats = legal_index.describe_index_stats() if legal_index else None | |
| return { | |
| "status": "ok", | |
| "huggingface": "authenticated" if hf_ok else "check HF_API_KEY", | |
| "router_model": HF_ROUTER_MODEL, | |
| "legal_model": HF_LEGAL_MODEL, | |
| "judgements_vectors": j_stats.total_vector_count, | |
| "legal_vectors": l_stats.total_vector_count if l_stats else 0, | |
| "embed_device": DEVICE, | |
| } | |
| async def search(req: SearchRequest): | |
| """Semantic search over judgements Pinecone index with pagination and optional year filtering.""" | |
| if not req.query.strip(): | |
| raise HTTPException(status_code=400, detail="Query cannot be empty.") | |
| has_year_filter = req.year_from is not None and req.year_to is not None | |
| if has_year_filter: | |
| fetch_k = min(300, max(req.offset + req.top_k * 10, 150)) | |
| else: | |
| fetch_k = min(req.offset + req.top_k, 100) | |
| try: | |
| result = judgements_index.query( | |
| vector=embed_query(req.query), | |
| top_k=fetch_k, | |
| include_metadata=True, | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}") | |
| output = [] | |
| for m in result.get("matches", []): | |
| meta = m.get("metadata", {}) | |
| year_str = meta.get("year", "unknown") | |
| if has_year_filter: | |
| try: | |
| year_int = int(year_str) | |
| if not (req.year_from <= year_int <= req.year_to): | |
| continue | |
| except (ValueError, TypeError): | |
| continue | |
| output.append({ | |
| "file_name": meta.get("file_name", "Unknown"), | |
| "year": year_str, | |
| "source": meta.get("source", ""), | |
| "score": round(float(m.get("score", 0)), 4), | |
| "content": meta.get("content", ""), | |
| }) | |
| output.sort(key=lambda x: x["score"], reverse=True) | |
| paginated = output[req.offset: req.offset + req.top_k] | |
| return { | |
| "results": paginated, | |
| "count": len(output), | |
| } | |
| async def extract_pdf(file: UploadFile = File(...)): | |
| """Extract full text from an uploaded PDF.""" | |
| if not file.filename.lower().endswith(".pdf"): | |
| raise HTTPException(status_code=400, detail="Only PDF files are accepted.") | |
| contents = await file.read() | |
| try: | |
| doc = fitz.open(stream=contents, filetype="pdf") | |
| pages = [page.get_text() for page in doc] | |
| doc.close() | |
| text = "\n\n".join(pages).strip() | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"PDF extraction failed: {str(e)}") | |
| return {"text": text, "pages": len(pages), "filename": file.filename} | |
| async def legal_context(req: SearchRequest): | |
| """Retrieve legal framework chunks from Pinecone.""" | |
| if not legal_index: | |
| return {"results": [], "count": 0} | |
| if not req.query.strip(): | |
| raise HTTPException(status_code=400, detail="Query cannot be empty.") | |
| try: | |
| result = legal_index.query( | |
| vector=embed_query(req.query), | |
| top_k=min(req.top_k or CONSTITUTION_TOP_K, 10), | |
| include_metadata=True, | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Legal context search failed: {str(e)}") | |
| output = [] | |
| for m in result.get("matches", []): | |
| meta = m.get("metadata", {}) | |
| output.append({ | |
| "source": meta.get("source", "Unknown"), | |
| "type": meta.get("type", ""), | |
| "section": meta.get("section", ""), | |
| "score": round(float(m.get("score", 0)), 4), | |
| "content": meta.get("content", ""), | |
| }) | |
| output.sort(key=lambda x: x["score"], reverse=True) | |
| return {"results": output, "count": len(output)} | |
| async def chat_legacy(req: ChatRequest): | |
| """Legacy endpoint — used by CitationCard summarize and AI compare features.""" | |
| system = ( | |
| "You are LexMind, a professional Indian legal research assistant. " | |
| "Answer concisely and professionally based only on the provided context." | |
| ) | |
| user = ( | |
| f"CONTEXT:\n{req.context}\n\nQUESTION: {req.message}" | |
| if req.context.strip() else req.message | |
| ) | |
| try: | |
| reply = await call_hf(HF_LEGAL_MODEL, system, user) | |
| return {"reply": reply} | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Chat failed: {str(e)}") | |
| async def smart_chat(req: SmartChatRequest): | |
| """ | |
| Two-stage conversational chat: | |
| Stage 1 — LLM1 (Llama-3.1-8B): | |
| - Always knows the user's case description | |
| - Handles casual conversation naturally | |
| - If legal question detected, produces a precise rag_query for LLM2 | |
| - Has NO knowledge of retrieved judgements | |
| - Only knows about a dropped citation if user explicitly dragged one in | |
| Stage 2 — LLM2 (Llama-3.1-8B): | |
| - Only called when Stage 1 detects a legal question | |
| - Gets: legal framework from Pinecone + dropped citation (if any) | |
| - Returns grounded legal answer with [LAW: source] citations | |
| """ | |
| # ── Build case context for LLM1 ────────────────────────────────────────── | |
| case_ctx = "" | |
| if req.case_text.strip(): | |
| case_ctx = f"\nCURRENT USER CASE:\n{req.case_text[:800]}\n" | |
| dropped_ctx = "" | |
| if req.dropped_citation and req.dropped_citation.content.strip(): | |
| name = (req.dropped_citation.file_name or '').replace('_', ' ').strip() | |
| dropped_ctx = ( | |
| f"\nUSER HAS SHARED THIS JUDGEMENT FOR DISCUSSION:\n" | |
| f"Case: {name} ({req.dropped_citation.year or '?'})\n" | |
| f"{req.dropped_citation.content[:2000]}\n" | |
| ) | |
| # ── Stage 1: Router + conversationalist ────────────────────────────────── | |
| router_system = f"""You are LexMind, a friendly and professional Indian legal research assistant. | |
| {case_ctx}{dropped_ctx} | |
| YOUR BEHAVIOUR: | |
| - For casual messages (greetings, thanks, small talk): reply naturally and warmly in 1-2 sentences. | |
| - For questions about the shared judgement above (if any): you can answer directly from it. | |
| - For legal questions requiring Constitution/IPC/CrPC/BSA knowledge: identify what needs to be looked up. | |
| - Never make up legal information you are not sure about. | |
| Respond ONLY with valid JSON, no extra text, no markdown fences: | |
| For casual chat: | |
| {{"intent": "chat", "response": "your warm friendly reply here", "rag_query": null}} | |
| For a legal question you can answer from the shared judgement: | |
| {{"intent": "citation", "response": "your answer from the judgement", "rag_query": null}} | |
| For a legal question needing Constitution/IPC/CrPC/BSA lookup: | |
| {{"intent": "legal", "response": null, "rag_query": "precise 3-8 word search query"}}""" | |
| router_user = f'User message: "{req.message}"' | |
| try: | |
| raw = await call_hf( | |
| HF_ROUTER_MODEL, | |
| router_system, | |
| router_user, | |
| temperature=0.2, | |
| max_tokens=300, | |
| timeout=60, | |
| ) | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Stage 1 failed: {str(e)}") | |
| # ── Parse Stage 1 JSON ──────────────────────────────────────────────────── | |
| intent = "chat" | |
| response = None | |
| rag_query = None | |
| try: | |
| clean = re.sub(r"```json|```", "", raw).strip() | |
| match = re.search(r"\{.*\}", clean, re.DOTALL) | |
| parsed = json.loads(match.group(0) if match else clean) | |
| intent = parsed.get("intent", "chat") | |
| response = parsed.get("response") | |
| rag_query = parsed.get("rag_query") | |
| except Exception: | |
| # JSON parse failed — treat raw text as a casual reply | |
| intent = "chat" | |
| response = raw.strip() if raw.strip() else "How can I help you?" | |
| # ── Stage 1 exits: casual or citation answer ────────────────────────────── | |
| if intent in ("chat", "citation"): | |
| return { | |
| "reply": response or "How can I help you today?", | |
| "intent": intent, | |
| } | |
| # ── Stage 2: Legal RAG answer ───────────────────────────────────────────── | |
| search_q = rag_query or req.message | |
| # 2a. Search Pinecone legal-framework index | |
| legal_ctx = "" | |
| if legal_index and search_q: | |
| try: | |
| law_result = legal_index.query( | |
| vector=embed_query(search_q), | |
| top_k=CONSTITUTION_TOP_K, | |
| include_metadata=True, | |
| ) | |
| matches = sorted( | |
| law_result.get("matches", []), | |
| key=lambda x: x.get("score", 0), | |
| reverse=True, | |
| ) | |
| if matches: | |
| legal_ctx = "RELEVANT LEGAL FRAMEWORK (Constitution / IPC / CrPC / BSA):\n\n" | |
| for m in matches: | |
| meta = m.get("metadata", {}) | |
| src = meta.get("source", "Law") | |
| sec = meta.get("section", "") | |
| legal_ctx += f"[LAW: {src}{' S.' + str(sec) if sec else ''}]\n" | |
| legal_ctx += f"{meta.get('content', '')[:600]}\n\n---\n\n" | |
| except Exception: | |
| pass # continue without legal context | |
| # 2b. Build Stage 2 context | |
| # Includes: case description + dropped citation (if any) + legal framework | |
| # Does NOT include retrieved judgements | |
| stage2_context = "" | |
| if req.case_text.strip(): | |
| stage2_context += f"USER'S CASE:\n{req.case_text[:800]}\n\n" | |
| if dropped_ctx: | |
| stage2_context += dropped_ctx + "\n" | |
| if legal_ctx: | |
| stage2_context += legal_ctx | |
| legal_system = """You are LexMind, a professional Indian legal research assistant. | |
| KNOWLEDGE BASE YOU CAN USE: | |
| - The user's case description (if provided) | |
| - A shared judgement (if user dragged one in) | |
| - Indian Constitution, IPC, CrPC, BSA 2023 — cited as [LAW: source S.section] | |
| KNOWLEDGE GAPS — be honest if asked about these: | |
| - Code of Civil Procedure (CPC) — not in your knowledge base | |
| - Indian Contract Act — not in your knowledge base | |
| - Transfer of Property Act — not in your knowledge base | |
| RULES: | |
| 1. Answer ONLY from the provided context. Never fabricate. | |
| 2. Cite laws as [LAW: IPC S.302] or [LAW: Indian Constitution Art.21]. | |
| 3. If context is insufficient: "I don't have enough information on this. Please search for relevant citations." | |
| 4. Be concise, clear, and professional. | |
| 5. Answer directly — no preamble like "Based on the context provided…".""" | |
| legal_user = ( | |
| f"QUESTION: {req.message}\n\nCONTEXT:\n{stage2_context}" | |
| if stage2_context.strip() | |
| else req.message | |
| ) | |
| try: | |
| reply = await call_hf( | |
| HF_LEGAL_MODEL, | |
| legal_system, | |
| legal_user, | |
| temperature=0.2, | |
| max_tokens=1024, | |
| timeout=120, | |
| ) | |
| return {"reply": reply, "intent": "legal"} | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Stage 2 failed: {str(e)}") | |
| # ── Serve React frontend ────────────────────────────────────────────────────── | |
| # Built frontend output is generated under ../frontend/dist (relative to backend/) | |
| dist_path = Path("../frontend/dist") | |
| if dist_path.exists(): | |
| app.mount("/assets", StaticFiles(directory=str(dist_path / "assets")), name="assets") | |
| async def serve_frontend(): | |
| return FileResponse(str(dist_path / "index.html")) | |
| async def serve_spa(full_path: str): | |
| return FileResponse(str(dist_path / "index.html")) | |