honesdev's picture
Update app.py
8f54669 verified
import json
import torch
import gradio as gr
from PIL import Image
from transformers import AutoModelForImageTextToText, AutoProcessor
from peft import PeftModel
from pdf2image import convert_from_path
import tempfile
import os
BASE_MODEL = "HuggingFaceTB/SmolVLM2-500M-Instruct"
ADAPTER = "honesdev/smolvlm2-500-invoice-extractor-v3"
PROMPT = "Extract the invoice details and return a JSON object only. Return null for any missing fields."
print("Loading processor...")
processor = AutoProcessor.from_pretrained(BASE_MODEL)
print("Loading base model...")
model = AutoModelForImageTextToText.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float32, # CPU requires float32
device_map="cpu"
)
print("Loading adapter...")
model = PeftModel.from_pretrained(model, ADAPTER)
model.eval()
print("Model ready ✓")
def extract_invoice(file):
"""
Accepts a PDF or image invoice and returns extracted fields as JSON.
"""
if file is None:
return "Please upload an invoice image or PDF."
# Handle PDF — convert first page to image
if file.name.lower().endswith(".pdf"):
pages = convert_from_path(file.name, dpi=150)
image = pages[0].convert("RGB")
else:
image = Image.open(file.name).convert("RGB")
messages = [
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": PROMPT}
]
}
]
text = processor.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = processor(
text=text,
images=[image],
return_tensors="pt",
)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
do_sample=False,
)
generated = outputs[0][inputs["input_ids"].shape[1]:]
response = processor.decode(generated, skip_special_tokens=True)
try:
parsed = json.loads(response.strip())
return json.dumps(parsed, indent=2)
except json.JSONDecodeError:
return response.strip()
demo = gr.Interface(
fn=extract_invoice,
inputs=gr.File(
label="Upload Invoice (PDF or Image)",
file_types=[".pdf", ".png", ".jpg", ".jpeg"]
),
outputs=gr.Textbox(
label="Extracted Fields (JSON)",
lines=25,
),
title="🧾 Invoice Extractor",
description="""
Upload an invoice as a PDF or image. The model extracts key fields automatically.
**Extracts:** vendor name · customer name · bank account · issue date · due date · currency · line items · total amount
**Model:** SmolVLM2-500M fine-tuned on 4924 invoices (synthetic + real)
⚠️ Running on CPU — extraction takes 30-60 seconds per invoice.
""",
theme=gr.themes.Soft()
)
demo.launch()