Spaces:
Sleeping
Sleeping
| import base64 | |
| import os | |
| from fastapi import FastAPI, Header, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional, List | |
| # Importing your internal logic | |
| from src.processor import get_text_from_base64 | |
| from src.analyzer import run_analysis | |
| app = FastAPI( | |
| title="Intelligent Document Processing API", | |
| description="Backend Engine for AI-powered Summary, Entity Extraction, and Sentiment Analysis.", | |
| version="1.0.0" | |
| ) | |
| # 1. ADD CORS MIDDLEWARE HERE | |
| # This allows the task site's "Test" button to talk to your API | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Configuration | |
| API_KEY = os.getenv("API_KEY", "sk_track2_987654321") | |
| # --- Data Models --- | |
| class AnalysisRequest(BaseModel): | |
| fileName: str | |
| fileType: str | |
| fileBase64: str | |
| class EntityResults(BaseModel): | |
| names: List[str] | |
| organizations: List[str] | |
| dates: List[str] | |
| amounts: List[str] | |
| # Simplified to match the task site's expected fields exactly | |
| class AnalysisResponse(BaseModel): | |
| fileName: str | |
| summary: str | |
| entities: EntityResults | |
| sentiment: str | |
| # 2. ADD ROOT ROUTE HERE | |
| # --- API Endpoint --- | |
| async def analyze_document( | |
| data: AnalysisRequest, | |
| x_api_key: Optional[str] = Header(None) | |
| ): | |
| # 1. Security Check | |
| if x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid API Key") | |
| try: | |
| # 2. Extract & Decode | |
| file_b64 = data.fileBase64 | |
| # Base64 Padding Fix | |
| missing_padding = len(file_b64) % 4 | |
| if missing_padding: | |
| file_b64 += '=' * (4 - missing_padding) | |
| try: | |
| file_bytes = base64.b64decode(file_b64) | |
| except Exception: | |
| raise HTTPException(status_code=400, detail="Corrupted file data") | |
| # 3. OCR / Extraction | |
| text = get_text_from_base64(file_bytes, data.fileType.lower()) | |
| if not text or len(text.strip()) < 10: | |
| raise ValueError("Document is empty or text is unreadable (OCR Failed)") | |
| # 4. AI Analysis | |
| analysis = run_analysis(text) | |
| # 5. Return Structured Response (Matched to Task Site) | |
| return { | |
| "fileName": data.fileName, | |
| "summary": analysis["summary"], | |
| "entities": { | |
| "names": list(set(analysis["entities"].get("names", []))), | |
| "organizations": list(set(analysis["entities"].get("organizations", []))), | |
| "dates": list(set(analysis["entities"].get("dates", []))), | |
| "amounts": list(set(analysis["entities"].get("amounts", []))) | |
| }, | |
| "sentiment": analysis["sentiment"] | |
| } | |
| except ValueError as ve: | |
| raise HTTPException(status_code=422, detail=str(ve)) | |
| except Exception as e: | |
| print(f"🔥 DEPLOYMENT ERROR: {str(e)}") | |
| raise HTTPException(status_code=500, detail="Internal Server Error during AI analysis") |