""" Model loaders for the AI detection pipeline. Uses `desklib/ai-text-detector-v1.01` — a DeBERTa-v3-large classifier that currently tops the RAID benchmark for modern LLM detection (ChatGPT, Claude, Gemini, Llama, Grok, etc). The model ships a custom head, so we load it via the `DesklibAIDetectionModel` wrapper defined in `utils.desklib_model`. """ import logging from functools import lru_cache import torch from transformers import AutoTokenizer from utils.desklib_model import DesklibAIDetectionModel logger = logging.getLogger(__name__) DETECTOR_MODEL_ID = "desklib/ai-text-detector-v1.01" @lru_cache(maxsize=1) def load_detector_model(): """Load the desklib AI detector (DeBERTa-v3-large + custom head). Returns (model, tokenizer, device). First call downloads ~1.75 GB and caches it under `~/.cache/huggingface`. Subsequent calls return the cached in-process instance. """ if torch.cuda.is_available(): device = torch.device("cuda") elif torch.backends.mps.is_available(): device = torch.device("mps") else: device = torch.device("cpu") logger.info("Loading detector %s on %s", DETECTOR_MODEL_ID, device) tokenizer = AutoTokenizer.from_pretrained(DETECTOR_MODEL_ID) model = DesklibAIDetectionModel.from_pretrained(DETECTOR_MODEL_ID) model.to(device) model.eval() logger.info("Detector ready") return model, tokenizer, device @torch.no_grad() def predict_ai_probability(text, model, tokenizer, device, max_len=768): """Return probability (0..1) that `text` is AI-generated.""" encoded = tokenizer( text, padding="max_length", truncation=True, max_length=max_len, return_tensors="pt", ) input_ids = encoded["input_ids"].to(device) attention_mask = encoded["attention_mask"].to(device) outputs = model(input_ids=input_ids, attention_mask=attention_mask) logits = outputs["logits"] return torch.sigmoid(logits).item()