""" Inference script for multimodal fraudulent paper detection. """ import os import sys import torch import numpy as np from transformers import AutoTokenizer import argparse import json from model import MultimodalFraudDetector def predict_fraud(model, tokenizer, text, tabular, metadata, device): """Predict fraud probability for a single paper.""" model.eval() # Tokenize text encoding = tokenizer( text, max_length=512, padding='max_length', truncation=True, return_tensors='pt' ) input_ids = encoding['input_ids'].to(device) attention_mask = encoding['attention_mask'].to(device) tabular = torch.tensor(tabular, dtype=torch.float32).unsqueeze(0).to(device) metadata = torch.tensor(metadata, dtype=torch.float32).unsqueeze(0).to(device) with torch.no_grad(): outputs = model( text_input_ids=input_ids, text_attention_mask=attention_mask, tabular_features=tabular, metadata_features=metadata ) logits = outputs['logits'] probs = torch.softmax(logits, dim=1) fraud_prob = probs[0, 1].item() modality_scores = outputs['modality_scores'][0].cpu().numpy() anomaly_score = outputs['anomaly_score'][0].item() return { 'fraud_probability': fraud_prob, 'is_fraudulent': fraud_prob > 0.5, 'modality_contributions': { 'text': float(modality_scores[0]), 'image': float(modality_scores[1]), 'tabular': float(modality_scores[2]), 'metadata': float(modality_scores[3]) }, 'anomaly_score': anomaly_score } def explain_prediction(result): """Generate human-readable explanation.""" explanations = [] if result['fraud_probability'] > 0.5: explanations.append(f"FRAUDULENT (probability: {result['fraud_probability']:.2%})") else: explanations.append(f"AUTHENTIC (fraud probability: {result['fraud_probability']:.2%})") # Modality contributions contrib = result['modality_contributions'] max_modality = max(contrib, key=contrib.get) explanations.append(f"Primary fraud indicator: {max_modality} modality (score: {contrib[max_modality]:.3f})") if result['anomaly_score'] > 0.7: explanations.append(f"High anomaly score ({result['anomaly_score']:.3f}): Paper shows strong outlier patterns") return "\n".join(explanations) def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_path', required=True) parser.add_argument('--text', default='') parser.add_argument('--title', default='') parser.add_argument('--output', default='prediction.json') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load model checkpoint = torch.load(args.model_path, map_location=device) model_args = checkpoint.get('args', {}) model = MultimodalFraudDetector( text_model=model_args.get('text_model', 'allenai/scibert_scivocab_uncased'), tabular_features=10, metadata_features=12 ).to(device) model.load_state_dict(checkpoint['model_state_dict']) model.eval() tokenizer = AutoTokenizer.from_pretrained(model_args.get('text_model')) # Prepare input text = f"{args.title} [SEP] {args.text}" # Dummy features for demo (in production, extract from actual paper) tabular = np.random.randn(10).astype(np.float32) metadata = np.random.randn(12).astype(np.float32) # Predict result = predict_fraud(model, tokenizer, text, tabular, metadata, device) result['explanation'] = explain_prediction(result) print(result['explanation']) with open(args.output, 'w') as f: json.dump(result, f, indent=2) print(f"\nSaved to {args.output}") if __name__ == '__main__': main()