# Copyright (C) 2026 Embedl AB
"""Run inference on the Embedl Mobilevit Small INT8 model via torch.export.

This script loads the shipped ``embedl_mobilevit_small_int8.pt2``
artifact with ``torch.export.load`` and runs a single image through
it. No TensorRT or ONNX runtime is required — just PyTorch.

Usage::

    python infer_pt2.py --image path/to/image.jpg
"""

import argparse
from pathlib import Path

import numpy as np
import torch
from PIL import Image

PT2_PATH = Path(__file__).with_name("embedl_mobilevit_small_int8.pt2")
INPUT_SIZE = (256, 256)
MEAN = np.array([0.0, 0.0, 0.0], dtype=np.float32)
STD = np.array([1.0, 1.0, 1.0], dtype=np.float32)


def preprocess(image_path: Path) -> torch.Tensor:
    # MobileViT-Small uses BGR channel order, [0, 1] range, NO mean/std
    # normalization (matches the upstream HF processor: do_normalize=None).
    image = Image.open(image_path).convert("RGB").resize(INPUT_SIZE)
    arr = np.asarray(image, dtype=np.float32) / 255.0
    arr = (arr - MEAN) / STD
    arr = arr[..., ::-1].copy()          # RGB -> BGR
    arr = arr.transpose(2, 0, 1)[None]   # NCHW
    return torch.from_numpy(arr)


def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--image", required=True, type=Path)
    parser.add_argument("--topk", type=int, default=5)
    args = parser.parse_args()

    if not PT2_PATH.exists():
        raise SystemExit(
            f"Expected {PT2_PATH.name} next to this script. "
            "Did you `huggingface-cli download` the repo?"
        )

    # The ExportedProgram captured the model in eval mode at export
    # time, so no further .eval() / no_grad toggling is needed (and
    # neither is supported on the .module() wrapper).
    model = torch.export.load(str(PT2_PATH)).module()

    x = preprocess(args.image)
    logits = model(x)
    probs = torch.softmax(logits, dim=-1).squeeze(0)

    topk_vals, topk_idx = probs.topk(args.topk)
    print(f"Top-{args.topk} predictions for {args.image}:")
    for rank, (idx, val) in enumerate(zip(topk_idx.tolist(), topk_vals.tolist()), 1):
        print(f"  {rank}. class {idx:>5d}  ({val * 100:5.2f}%)")


if __name__ == "__main__":
    main()