Spaces:

bughead
/

humanzise-api

Sleeping

App Files Files Community

humanzise-api / utils /model_loaders.py

bughead

Initial Humanzise backend deployment

325e5a1 9 days ago

raw

history blame contribute delete

1.99 kB

	"""
	Model loaders for the AI detection pipeline.

	Uses `desklib/ai-text-detector-v1.01` — a DeBERTa-v3-large classifier that
	currently tops the RAID benchmark for modern LLM detection (ChatGPT, Claude,
	Gemini, Llama, Grok, etc). The model ships a custom head, so we load it via
	the `DesklibAIDetectionModel` wrapper defined in `utils.desklib_model`.
	"""
	import logging
	from functools import lru_cache

	import torch
	from transformers import AutoTokenizer

	from utils.desklib_model import DesklibAIDetectionModel

	logger = logging.getLogger(__name__)

	DETECTOR_MODEL_ID = "desklib/ai-text-detector-v1.01"


	@lru_cache(maxsize=1)
	def load_detector_model():
	"""Load the desklib AI detector (DeBERTa-v3-large + custom head).

	Returns (model, tokenizer, device). First call downloads ~1.75 GB
	and caches it under `~/.cache/huggingface`. Subsequent calls return
	the cached in-process instance.
	"""
	if torch.cuda.is_available():
	device = torch.device("cuda")
	elif torch.backends.mps.is_available():
	device = torch.device("mps")
	else:
	device = torch.device("cpu")

	logger.info("Loading detector %s on %s", DETECTOR_MODEL_ID, device)
	tokenizer = AutoTokenizer.from_pretrained(DETECTOR_MODEL_ID)
	model = DesklibAIDetectionModel.from_pretrained(DETECTOR_MODEL_ID)
	model.to(device)
	model.eval()
	logger.info("Detector ready")
	return model, tokenizer, device


	@torch.no_grad()
	def predict_ai_probability(text, model, tokenizer, device, max_len=768):
	"""Return probability (0..1) that `text` is AI-generated."""
	encoded = tokenizer(
	text,
	padding="max_length",
	truncation=True,
	max_length=max_len,
	return_tensors="pt",
	)
	input_ids = encoded["input_ids"].to(device)
	attention_mask = encoded["attention_mask"].to(device)

	outputs = model(input_ids=input_ids, attention_mask=attention_mask)
	logits = outputs["logits"]
	return torch.sigmoid(logits).item()