audio_analyzer / synthetic_detector.py
Mr7Explorer's picture
Update synthetic_detector.py
e536308 verified
import numpy as np
import librosa
def detect_synthetic_voice(y, sr, spectral):
"""
Compute a heuristic synthetic probability score.
This is NOT a classifier β€” just an informational hint.
Uses:
- MFCC variance (synthetic voices are extremely stable)
- HF spectral symmetry (AI vocoders often show smooth HF rolloff)
- Pitch jitter (AI voices have ~0 jitter)
"""
# ============================================================
# 1. MFCC Stability
# ============================================================
try:
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfcc_var = float(np.mean(np.var(mfcc, axis=1)))
except Exception:
mfcc_var = 0.0
# Human speech β†’ high variance
# Synthetic β†’ overly stable
mfcc_score = 1.0 - np.tanh(mfcc_var / 50.0)
# ============================================================
# 2. Pitch Jitter
# ============================================================
try:
f0 = librosa.yin(y, fmin=80, fmax=400)
f0 = f0[np.isfinite(f0)]
if len(f0) > 5:
jitter = float(np.std(f0) / np.mean(f0))
else:
jitter = 0.0
except Exception:
jitter = 0.0
# Human β†’ jitter = 1–5%
# AI β†’ jitter close to 0
jitter_score = 1.0 - np.tanh(jitter * 10)
# ============================================================
# 3. HF Smoothness / Symmetry
# ============================================================
hf_env = spectral.get("hf_env", None)
if hf_env is not None:
# Measure average change between bins
diffs = np.abs(np.diff(hf_env))
smoothness = 1.0 - np.tanh(np.mean(diffs) / 5.0)
else:
smoothness = 0.0
# ============================================================
# Combine Scores
# ============================================================
prob = float(np.clip((mfcc_score + jitter_score + smoothness) / 3.0, 0, 1))
label = "AI" if prob >= 0.55 else "Human"
return {
"synthetic_probability": prob,
"synthetic_label": label,
"mfcc_variance": mfcc_var,
"pitch_jitter": jitter,
"hf_smoothness": float(smoothness)
}