Spaces:
Running
Running
| from fastapi import FastAPI, File, UploadFile | |
| import pdfplumber | |
| import io | |
| app = FastAPI() | |
| async def extract(file: UploadFile = File(...)): | |
| pdf_bytes = await file.read() | |
| stream = [] | |
| with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: | |
| for page in pdf.pages: | |
| # Extract text first | |
| text = page.extract_text() | |
| if text: | |
| stream.append({ | |
| "type": "text", | |
| "content": text | |
| }) | |
| # Extract tables | |
| tables = page.extract_tables() | |
| for table in tables: | |
| stream.append({ | |
| "type": "table", | |
| "content": table | |
| }) | |
| return { | |
| "stream": stream | |
| } | |
| async def health(): | |
| return { | |
| "status": "ok" | |
| } |