import json
import torch
import gradio as gr
from PIL import Image
from transformers import AutoModelForImageTextToText, AutoProcessor
from peft import PeftModel
from pdf2image import convert_from_path
import tempfile
import os

BASE_MODEL = "HuggingFaceTB/SmolVLM2-500M-Instruct"
ADAPTER = "honesdev/smolvlm2-500-invoice-extractor-v3"
PROMPT = "Extract the invoice details and return a JSON object only. Return null for any missing fields."

print("Loading processor...")
processor = AutoProcessor.from_pretrained(BASE_MODEL)

print("Loading base model...")
model = AutoModelForImageTextToText.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float32,  # CPU requires float32
    device_map="cpu"
)

print("Loading adapter...")
model = PeftModel.from_pretrained(model, ADAPTER)
model.eval()
print("Model ready ✓")


def extract_invoice(file):
    """
    Accepts a PDF or image invoice and returns extracted fields as JSON.
    """
    if file is None:
        return "Please upload an invoice image or PDF."

    # Handle PDF — convert first page to image
    if file.name.lower().endswith(".pdf"):
        pages = convert_from_path(file.name, dpi=150)
        image = pages[0].convert("RGB")
    else:
        image = Image.open(file.name).convert("RGB")

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": PROMPT}
            ]
        }
    ]

    text = processor.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = processor(
        text=text,
        images=[image],
        return_tensors="pt",
    )

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=False,
        )

    generated = outputs[0][inputs["input_ids"].shape[1]:]
    response = processor.decode(generated, skip_special_tokens=True)

    try:
        parsed = json.loads(response.strip())
        return json.dumps(parsed, indent=2)
    except json.JSONDecodeError:
        return response.strip()


demo = gr.Interface(
    fn=extract_invoice,
    inputs=gr.File(
        label="Upload Invoice (PDF or Image)",
        file_types=[".pdf", ".png", ".jpg", ".jpeg"]
    ),
    outputs=gr.Textbox(
        label="Extracted Fields (JSON)",
        lines=25,
    ),
    title="🧾 Invoice Extractor",
    description="""
Upload an invoice as a PDF or image. The model extracts key fields automatically.

**Extracts:** vendor name · customer name · bank account · issue date · due date · currency · line items · total amount

**Model:** SmolVLM2-500M fine-tuned on 4924 invoices (synthetic + real)

⚠️ Running on CPU — extraction takes 30-60 seconds per invoice.
    """,
    theme=gr.themes.Soft()
)

demo.launch()