File size: 3,152 Bytes
082ea80
 
6e91407
 
f2af3a8
 
 
 
082ea80
 
 
f2af3a8
 
 
 
 
 
6e91407
 
c57f05e
 
6e91407
c57f05e
 
 
 
 
6e91407
082ea80
 
6e91407
f2af3a8
 
6e91407
f2af3a8
 
 
 
 
 
 
 
6e91407
f2af3a8
 
 
 
 
 
6e91407
 
f2af3a8
 
 
 
 
 
6e91407
082ea80
0913538
082ea80
 
0913538
f2af3a8
082ea80
0913538
f2af3a8
 
 
082ea80
f2af3a8
 
 
0913538
 
 
 
082ea80
0913538
 
f2af3a8
0913538
f2af3a8
082ea80
6e91407
082ea80
0913538
f2af3a8
082ea80
f2af3a8
 
 
 
082ea80
6e91407
082ea80
f2af3a8
 
 
082ea80
0913538
6e91407
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import base64
import os
from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List

# Importing your internal logic
from src.processor import get_text_from_base64 
from src.analyzer import run_analysis

app = FastAPI(
    title="Intelligent Document Processing API",
    description="Backend Engine for AI-powered Summary, Entity Extraction, and Sentiment Analysis.",
    version="1.0.0"
)

# 1. ADD CORS MIDDLEWARE HERE
# This allows the task site's "Test" button to talk to your API
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], 
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Configuration
API_KEY = os.getenv("API_KEY", "sk_track2_987654321")

# --- Data Models ---
class AnalysisRequest(BaseModel):
    fileName: str
    fileType: str 
    fileBase64: str

class EntityResults(BaseModel):
    names: List[str]
    organizations: List[str]
    dates: List[str]
    amounts: List[str]

# Simplified to match the task site's expected fields exactly
class AnalysisResponse(BaseModel):
    fileName: str
    summary: str
    entities: EntityResults
    sentiment: str

# 2. ADD ROOT ROUTE HERE

# --- API Endpoint ---
@app.post("/api/document-analyze", response_model=AnalysisResponse)
async def analyze_document(
    data: AnalysisRequest,
    x_api_key: Optional[str] = Header(None)
):
    # 1. Security Check
    if x_api_key != API_KEY:
        raise HTTPException(status_code=401, detail="Invalid API Key")

    try:
        # 2. Extract & Decode
        file_b64 = data.fileBase64
        
        # Base64 Padding Fix
        missing_padding = len(file_b64) % 4
        if missing_padding:
            file_b64 += '=' * (4 - missing_padding)
        
        try:
            file_bytes = base64.b64decode(file_b64)
        except Exception:
            raise HTTPException(status_code=400, detail="Corrupted file data")

        # 3. OCR / Extraction
        text = get_text_from_base64(file_bytes, data.fileType.lower())
        
        if not text or len(text.strip()) < 10:
            raise ValueError("Document is empty or text is unreadable (OCR Failed)")

        # 4. AI Analysis
        analysis = run_analysis(text)
        
        # 5. Return Structured Response (Matched to Task Site)
        return {
            "fileName": data.fileName,
            "summary": analysis["summary"],
            "entities": {
                "names": list(set(analysis["entities"].get("names", []))),
                "organizations": list(set(analysis["entities"].get("organizations", []))),
                "dates": list(set(analysis["entities"].get("dates", []))),
                "amounts": list(set(analysis["entities"].get("amounts", [])))
            },
            "sentiment": analysis["sentiment"] 
        }

    except ValueError as ve:
        raise HTTPException(status_code=422, detail=str(ve))
    except Exception as e:
        print(f"🔥 DEPLOYMENT ERROR: {str(e)}") 
        raise HTTPException(status_code=500, detail="Internal Server Error during AI analysis")