Spaces:

honesdev
/

invoice-extractor

Sleeping

App Files Files Community

invoice-extractor / app.py

honesdev

Update app.py

8f54669 verified 16 days ago

raw

history blame contribute delete

2.87 kB

	import json
	import torch
	import gradio as gr
	from PIL import Image
	from transformers import AutoModelForImageTextToText, AutoProcessor
	from peft import PeftModel
	from pdf2image import convert_from_path
	import tempfile
	import os

	BASE_MODEL = "HuggingFaceTB/SmolVLM2-500M-Instruct"
	ADAPTER = "honesdev/smolvlm2-500-invoice-extractor-v3"
	PROMPT = "Extract the invoice details and return a JSON object only. Return null for any missing fields."

	print("Loading processor...")
	processor = AutoProcessor.from_pretrained(BASE_MODEL)

	print("Loading base model...")
	model = AutoModelForImageTextToText.from_pretrained(
	BASE_MODEL,
	torch_dtype=torch.float32, # CPU requires float32
	device_map="cpu"
	)

	print("Loading adapter...")
	model = PeftModel.from_pretrained(model, ADAPTER)
	model.eval()
	print("Model ready ✓")


	def extract_invoice(file):
	"""
	Accepts a PDF or image invoice and returns extracted fields as JSON.
	"""
	if file is None:
	return "Please upload an invoice image or PDF."

	# Handle PDF — convert first page to image
	if file.name.lower().endswith(".pdf"):
	pages = convert_from_path(file.name, dpi=150)
	image = pages[0].convert("RGB")
	else:
	image = Image.open(file.name).convert("RGB")

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image"},
	{"type": "text", "text": PROMPT}
	]
	}
	]

	text = processor.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	inputs = processor(
	text=text,
	images=[image],
	return_tensors="pt",
	)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=False,
	)

	generated = outputs[0][inputs["input_ids"].shape[1]:]
	response = processor.decode(generated, skip_special_tokens=True)

	try:
	parsed = json.loads(response.strip())
	return json.dumps(parsed, indent=2)
	except json.JSONDecodeError:
	return response.strip()


	demo = gr.Interface(
	fn=extract_invoice,
	inputs=gr.File(
	label="Upload Invoice (PDF or Image)",
	file_types=[".pdf", ".png", ".jpg", ".jpeg"]
	),
	outputs=gr.Textbox(
	label="Extracted Fields (JSON)",
	lines=25,
	),
	title="🧾 Invoice Extractor",
	description="""
	Upload an invoice as a PDF or image. The model extracts key fields automatically.

	Extracts: vendor name · customer name · bank account · issue date · due date · currency · line items · total amount

	Model: SmolVLM2-500M fine-tuned on 4924 invoices (synthetic + real)

	⚠️ Running on CPU — extraction takes 30-60 seconds per invoice.
	""",
	theme=gr.themes.Soft()
	)

	demo.launch()