TechyCode
/

multimodal-sentiment-model

Model card Files Files and versions

multimodal-sentiment-model / inference.py

TechyCode's picture

Upload 3 files

39cf95a verified 13 days ago

history blame contribute delete

1.87 kB


	# inference.py —— drop this next to best_model.pt for your webapp
	import re, json, torch, torch.nn.functional as F
	from pathlib import Path
	from PIL import Image
	from torchvision import transforms
	from transformers import BertTokenizer
	# Import your model class here:
	# from model import MultimodalSentimentModel

	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	META = json.load(open("deployment/model_meta.json"))
	CONFIG = META["config"]

	tokenizer = BertTokenizer.from_pretrained(CONFIG["BERT_MODEL"])

	img_transform = transforms.Compose([
	transforms.Resize((CONFIG["IMAGE_SIZE"], CONFIG["IMAGE_SIZE"])),
	transforms.ToTensor(),
	transforms.Normalize(META["img_mean"], META["img_std"]),
	])

	def load_model():
	model = MultimodalSentimentModel(CONFIG).to(DEVICE)
	ckpt = torch.load("deployment/best_model.pt", map_location=DEVICE)
	model.load_state_dict(ckpt["model_state"])
	model.eval()
	return model

	def predict(model, text: str, image_path: str) -> dict:
	text = re.sub(r"http\S+", "", text)
	text = re.sub(r"@\w+", "", text)
	text = re.sub(r"#(\w+)", r"\1", text).strip() or "no text"

	enc = tokenizer(text, max_length=CONFIG["MAX_TEXT_LEN"],
	padding="max_length", truncation=True, return_tensors="pt")
	input_ids = enc["input_ids"].to(DEVICE)
	attention_mask = enc["attention_mask"].to(DEVICE)

	img = img_transform(Image.open(image_path).convert("RGB")).unsqueeze(0).to(DEVICE)

	with torch.no_grad():
	logits = model(input_ids, attention_mask, img)
	probs = F.softmax(logits, dim=-1).cpu().numpy()[0]

	pred_idx = probs.argmax()
	return {
	"label" : META["label_names"][pred_idx],
	"confidence" : float(probs[pred_idx]),
	"probabilities": {n: float(p) for n, p in zip(META["label_names"], probs)},
	}