Spaces:
Sleeping
Sleeping
| import json | |
| import torch | |
| import gradio as gr | |
| from PIL import Image | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| from peft import PeftModel | |
| from pdf2image import convert_from_path | |
| import tempfile | |
| import os | |
| BASE_MODEL = "HuggingFaceTB/SmolVLM2-500M-Instruct" | |
| ADAPTER = "honesdev/smolvlm2-500-invoice-extractor-v3" | |
| PROMPT = "Extract the invoice details and return a JSON object only. Return null for any missing fields." | |
| print("Loading processor...") | |
| processor = AutoProcessor.from_pretrained(BASE_MODEL) | |
| print("Loading base model...") | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float32, # CPU requires float32 | |
| device_map="cpu" | |
| ) | |
| print("Loading adapter...") | |
| model = PeftModel.from_pretrained(model, ADAPTER) | |
| model.eval() | |
| print("Model ready ✓") | |
| def extract_invoice(file): | |
| """ | |
| Accepts a PDF or image invoice and returns extracted fields as JSON. | |
| """ | |
| if file is None: | |
| return "Please upload an invoice image or PDF." | |
| # Handle PDF — convert first page to image | |
| if file.name.lower().endswith(".pdf"): | |
| pages = convert_from_path(file.name, dpi=150) | |
| image = pages[0].convert("RGB") | |
| else: | |
| image = Image.open(file.name).convert("RGB") | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": PROMPT} | |
| ] | |
| } | |
| ] | |
| text = processor.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = processor( | |
| text=text, | |
| images=[image], | |
| return_tensors="pt", | |
| ) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=False, | |
| ) | |
| generated = outputs[0][inputs["input_ids"].shape[1]:] | |
| response = processor.decode(generated, skip_special_tokens=True) | |
| try: | |
| parsed = json.loads(response.strip()) | |
| return json.dumps(parsed, indent=2) | |
| except json.JSONDecodeError: | |
| return response.strip() | |
| demo = gr.Interface( | |
| fn=extract_invoice, | |
| inputs=gr.File( | |
| label="Upload Invoice (PDF or Image)", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg"] | |
| ), | |
| outputs=gr.Textbox( | |
| label="Extracted Fields (JSON)", | |
| lines=25, | |
| ), | |
| title="🧾 Invoice Extractor", | |
| description=""" | |
| Upload an invoice as a PDF or image. The model extracts key fields automatically. | |
| **Extracts:** vendor name · customer name · bank account · issue date · due date · currency · line items · total amount | |
| **Model:** SmolVLM2-500M fine-tuned on 4924 invoices (synthetic + real) | |
| ⚠️ Running on CPU — extraction takes 30-60 seconds per invoice. | |
| """, | |
| theme=gr.themes.Soft() | |
| ) | |
| demo.launch() |