pdf_analysis / app.py
randusertry's picture
Create app.py
a96d54a verified
raw
history blame contribute delete
877 Bytes
from fastapi import FastAPI, File, UploadFile
import pdfplumber
import io
app = FastAPI()
@app.post("/extract")
async def extract(file: UploadFile = File(...)):
pdf_bytes = await file.read()
stream = []
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
for page in pdf.pages:
# Extract text first
text = page.extract_text()
if text:
stream.append({
"type": "text",
"content": text
})
# Extract tables
tables = page.extract_tables()
for table in tables:
stream.append({
"type": "table",
"content": table
})
return {
"stream": stream
}
@app.get("/health")
async def health():
return {
"status": "ok"
}