Spaces:
Sleeping
Sleeping
| """ | |
| AI text detection powered by the desklib DeBERTa-v3 classifier. | |
| Scores the FULL text and each sentence. Returns the per-sentence bucket | |
| breakdown the frontend expects PLUS the honest raw mean probability. | |
| """ | |
| import nltk | |
| from nltk.tokenize import sent_tokenize | |
| from utils.model_loaders import load_detector_model, predict_ai_probability | |
| nltk.download("punkt", quiet=True) | |
| def classify_text_hf(text, threshold_ai=0.75, threshold_mid=0.4, threshold_soft=0.15): | |
| """Classify the input text. | |
| Returns: | |
| classification_map: dict[sentence] -> label bucket | |
| percentages: dict[bucket] -> percentage of sentences | |
| mean_ai_probability: float 0..1 (full-text score) | |
| The full-text probability is also used as the headline AI score because | |
| detectors are more reliable on full paragraphs than individual sentences. | |
| """ | |
| model, tokenizer, device = load_detector_model() | |
| # Overall score: run the full text through the model once | |
| full_prob = predict_ai_probability(text, model, tokenizer, device) | |
| sentences = sent_tokenize(text) or [text] | |
| classification_map = {} | |
| counts = { | |
| "AI-generated": 0, | |
| "AI-generated & AI-refined": 0, | |
| "Human-written": 0, | |
| "Human-written & AI-refined": 0, | |
| } | |
| for sentence in sentences: | |
| if not sentence.strip(): | |
| continue | |
| prob = predict_ai_probability(sentence, model, tokenizer, device) | |
| if prob >= threshold_ai: | |
| label = "AI-generated" | |
| elif prob >= threshold_mid: | |
| label = "AI-generated & AI-refined" | |
| elif prob >= threshold_soft: | |
| label = "Human-written & AI-refined" | |
| else: | |
| label = "Human-written" | |
| classification_map[sentence] = label | |
| counts[label] += 1 | |
| total = sum(counts.values()) | |
| percentages = { | |
| cat: round((count / total) * 100, 2) if total > 0 else 0 | |
| for cat, count in counts.items() | |
| } | |
| return classification_map, percentages, full_prob | |