| from fastapi import FastAPI, UploadFile, File |
| from fastapi.responses import JSONResponse |
| from pdf2image import convert_from_bytes |
| from PIL import Image |
| import pytesseract |
| import io |
|
|
| app = FastAPI() |
|
|
| @app.post("/ocr") |
| async def extract_text(file: UploadFile = File(...)): |
| filename = file.filename.lower() |
| allowed_ext = (".jpg", ".jpeg", ".png", ".pdf") |
|
|
| if not filename.endswith(allowed_ext): |
| return JSONResponse( |
| content={"error": "❌ Unsupported file format! Please upload JPG, PNG, or PDF."}, |
| status_code=400 |
| ) |
|
|
| contents = await file.read() |
| extracted_text = "" |
|
|
| try: |
| if filename.endswith(".pdf"): |
| images = convert_from_bytes(contents) |
| for page in images: |
| text = pytesseract.image_to_string(page, lang="hin+eng") |
| extracted_text += text + "\n\n" |
| else: |
| image = Image.open(io.BytesIO(contents)) |
| text = pytesseract.image_to_string(image, lang="hin+eng") |
| extracted_text = text |
|
|
| return {"text": extracted_text.strip() or "⚠️ No text found."} |
| |
| except Exception as e: |
| return JSONResponse( |
| content={"error": "🚫 Failed to process file", "details": str(e)}, |
| status_code=500 |
| ) |
|
|