import base64 import os from fastapi import FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional, List # Importing your internal logic from src.processor import get_text_from_base64 from src.analyzer import run_analysis app = FastAPI( title="Intelligent Document Processing API", description="Backend Engine for AI-powered Summary, Entity Extraction, and Sentiment Analysis.", version="1.0.0" ) # 1. ADD CORS MIDDLEWARE HERE # This allows the task site's "Test" button to talk to your API app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Configuration API_KEY = os.getenv("API_KEY", "sk_track2_987654321") # --- Data Models --- class AnalysisRequest(BaseModel): fileName: str fileType: str fileBase64: str class EntityResults(BaseModel): names: List[str] organizations: List[str] dates: List[str] amounts: List[str] # Simplified to match the task site's expected fields exactly class AnalysisResponse(BaseModel): fileName: str summary: str entities: EntityResults sentiment: str # 2. ADD ROOT ROUTE HERE # --- API Endpoint --- @app.post("/api/document-analyze", response_model=AnalysisResponse) async def analyze_document( data: AnalysisRequest, x_api_key: Optional[str] = Header(None) ): # 1. Security Check if x_api_key != API_KEY: raise HTTPException(status_code=401, detail="Invalid API Key") try: # 2. Extract & Decode file_b64 = data.fileBase64 # Base64 Padding Fix missing_padding = len(file_b64) % 4 if missing_padding: file_b64 += '=' * (4 - missing_padding) try: file_bytes = base64.b64decode(file_b64) except Exception: raise HTTPException(status_code=400, detail="Corrupted file data") # 3. OCR / Extraction text = get_text_from_base64(file_bytes, data.fileType.lower()) if not text or len(text.strip()) < 10: raise ValueError("Document is empty or text is unreadable (OCR Failed)") # 4. AI Analysis analysis = run_analysis(text) # 5. Return Structured Response (Matched to Task Site) return { "fileName": data.fileName, "summary": analysis["summary"], "entities": { "names": list(set(analysis["entities"].get("names", []))), "organizations": list(set(analysis["entities"].get("organizations", []))), "dates": list(set(analysis["entities"].get("dates", []))), "amounts": list(set(analysis["entities"].get("amounts", []))) }, "sentiment": analysis["sentiment"] } except ValueError as ve: raise HTTPException(status_code=422, detail=str(ve)) except Exception as e: print(f"🔥 DEPLOYMENT ERROR: {str(e)}") raise HTTPException(status_code=500, detail="Internal Server Error during AI analysis")