""" Phase 3 - AI code detector. After empirical testing on CodeMirage Python (200 samples), the LLMSniffer checkpoints were dropped: - Java head: confirmed broken (logit spread <0.07 across diverse samples) - Python head: AUC 0.523 on CodeMirage (essentially random) The CodeT5p-based multilingual model from Gurioli et al. ("Is This You, LLM?", SANER 2025) is now used for all languages. It scored AUC 0.753 on CodeMirage Python, with a calibrated threshold of 0.77. Public API: phase3_classify(code: str, language: str) -> dict returns { "p_ai": float in [0, 1] - probability of AI authorship "verdict": "AI" | "HUMAN" - based on calibrated threshold "confidence": str - "high" for native langs, "medium" otherwise "head_used": str - always "multilingual" "threshold": float - threshold used for the verdict } phase3_classify_routed(code: str, language: str) -> dict Backwards-compatible alias for phase3_classify. Earlier code routed Python -> 3a and other langs -> 3b; now everything goes to 3b. """ from __future__ import annotations import json from pathlib import Path from phase3.classifier_b import phase3b_classify # --------------------------------------------------------------------------- # # Calibration # --------------------------------------------------------------------------- # _THIS_DIR = Path(__file__).resolve().parent _CALIBRATOR_PATH = _THIS_DIR / "calibrator_3b.json" # Sensible default if the calibrator file is missing. Empirically derived # from the 200-sample CodeMirage Python calibration (balanced accuracy max). _DEFAULT_THRESHOLD = 0.77 _threshold_cache = None def _get_threshold() -> float: """Load the calibrated threshold once. Falls back to 0.77 if missing.""" global _threshold_cache if _threshold_cache is not None: return _threshold_cache if _CALIBRATOR_PATH.exists(): try: with open(_CALIBRATOR_PATH) as fh: data = json.load(fh) _threshold_cache = float(data["threshold"]) except Exception: _threshold_cache = _DEFAULT_THRESHOLD else: _threshold_cache = _DEFAULT_THRESHOLD return _threshold_cache # --------------------------------------------------------------------------- # # Public API # --------------------------------------------------------------------------- # def phase3_classify(code: str, language: str) -> dict: """ Score code for AI authorship using the multilingual CodeT5p classifier, apply the calibrated threshold, and return a verdict. """ result = phase3b_classify(code, language) threshold = _get_threshold() p_ai = result["p_ai"] return { "p_ai": p_ai, "verdict": "AI" if p_ai > threshold else "HUMAN", "confidence": result["confidence"], "head_used": result["head_used"], "threshold": threshold, } # Backwards compatibility for callers using the older "routed" name. phase3_classify_routed = phase3_classify # --------------------------------------------------------------------------- # # Smoke test # --------------------------------------------------------------------------- # _HUMAN_PYTHON = """ def fib(n): a, b = 0, 1 for _ in range(n): a, b = b, a + b return a """ _AI_PYTHON = ''' def calculate_fibonacci_number(n: int) -> int: """ Calculate the nth Fibonacci number using an iterative approach. """ if n < 0: raise ValueError("Input must be a non-negative integer.") previous_value, current_value = 0, 1 for _ in range(n): previous_value, current_value = current_value, previous_value + current_value return previous_value ''' if __name__ == "__main__": print(f"Threshold (from calibrator): {_get_threshold():.4f}") print() for label, code in [("HUMAN python", _HUMAN_PYTHON), ("AI python", _AI_PYTHON)]: result = phase3_classify(code, "python") print(f"{label:15s} p_ai={result['p_ai']:.4f} " f"verdict={result['verdict']:5s} conf={result['confidence']}")