Spaces:

WissalllK
/

esicodehub-ai

Sleeping

App Files Files Community

esicodehub-ai / phase3 /classifier.py

WissalllK

Add ESIcodeHub AI detection service

a937307 5 days ago

raw

history blame contribute delete

4.21 kB

	"""
	Phase 3 - AI code detector.

	After empirical testing on CodeMirage Python (200 samples), the LLMSniffer
	checkpoints were dropped:
	- Java head: confirmed broken (logit spread <0.07 across diverse samples)
	- Python head: AUC 0.523 on CodeMirage (essentially random)

	The CodeT5p-based multilingual model from Gurioli et al. ("Is This You, LLM?",
	SANER 2025) is now used for all languages. It scored AUC 0.753 on CodeMirage
	Python, with a calibrated threshold of 0.77.

	Public API:
	phase3_classify(code: str, language: str) -> dict
	returns {
	"p_ai": float in [0, 1] - probability of AI authorship
	"verdict": "AI" \| "HUMAN" - based on calibrated threshold
	"confidence": str - "high" for native langs, "medium" otherwise
	"head_used": str - always "multilingual"
	"threshold": float - threshold used for the verdict
	}

	phase3_classify_routed(code: str, language: str) -> dict
	Backwards-compatible alias for phase3_classify. Earlier code routed
	Python -> 3a and other langs -> 3b; now everything goes to 3b.
	"""

	from __future__ import annotations

	import json
	from pathlib import Path

	from phase3.classifier_b import phase3b_classify


	# --------------------------------------------------------------------------- #
	# Calibration
	# --------------------------------------------------------------------------- #

	_THIS_DIR = Path(__file__).resolve().parent
	_CALIBRATOR_PATH = _THIS_DIR / "calibrator_3b.json"

	# Sensible default if the calibrator file is missing. Empirically derived
	# from the 200-sample CodeMirage Python calibration (balanced accuracy max).
	_DEFAULT_THRESHOLD = 0.77

	_threshold_cache = None


	def _get_threshold() -> float:
	"""Load the calibrated threshold once. Falls back to 0.77 if missing."""
	global _threshold_cache
	if _threshold_cache is not None:
	return _threshold_cache

	if _CALIBRATOR_PATH.exists():
	try:
	with open(_CALIBRATOR_PATH) as fh:
	data = json.load(fh)
	_threshold_cache = float(data["threshold"])
	except Exception:
	_threshold_cache = _DEFAULT_THRESHOLD
	else:
	_threshold_cache = _DEFAULT_THRESHOLD
	return _threshold_cache


	# --------------------------------------------------------------------------- #
	# Public API
	# --------------------------------------------------------------------------- #

	def phase3_classify(code: str, language: str) -> dict:
	"""
	Score code for AI authorship using the multilingual CodeT5p classifier,
	apply the calibrated threshold, and return a verdict.
	"""
	result = phase3b_classify(code, language)
	threshold = _get_threshold()
	p_ai = result["p_ai"]

	return {
	"p_ai": p_ai,
	"verdict": "AI" if p_ai > threshold else "HUMAN",
	"confidence": result["confidence"],
	"head_used": result["head_used"],
	"threshold": threshold,
	}


	# Backwards compatibility for callers using the older "routed" name.
	phase3_classify_routed = phase3_classify


	# --------------------------------------------------------------------------- #
	# Smoke test
	# --------------------------------------------------------------------------- #

	_HUMAN_PYTHON = """
	def fib(n):
	a, b = 0, 1
	for _ in range(n):
	a, b = b, a + b
	return a
	"""

	_AI_PYTHON = '''
	def calculate_fibonacci_number(n: int) -> int:
	"""
	Calculate the nth Fibonacci number using an iterative approach.
	"""
	if n < 0:
	raise ValueError("Input must be a non-negative integer.")
	previous_value, current_value = 0, 1
	for _ in range(n):
	previous_value, current_value = current_value, previous_value + current_value
	return previous_value
	'''


	if __name__ == "__main__":
	print(f"Threshold (from calibrator): {_get_threshold():.4f}")
	print()

	for label, code in [("HUMAN python", _HUMAN_PYTHON), ("AI python", _AI_PYTHON)]:
	result = phase3_classify(code, "python")
	print(f"{label:15s} p_ai={result['p_ai']:.4f} "
	f"verdict={result['verdict']:5s} conf={result['confidence']}")