Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import librosa | |
| def detect_synthetic_voice(y, sr, spectral): | |
| """ | |
| Compute a heuristic synthetic probability score. | |
| This is NOT a classifier β just an informational hint. | |
| Uses: | |
| - MFCC variance (synthetic voices are extremely stable) | |
| - HF spectral symmetry (AI vocoders often show smooth HF rolloff) | |
| - Pitch jitter (AI voices have ~0 jitter) | |
| """ | |
| # ============================================================ | |
| # 1. MFCC Stability | |
| # ============================================================ | |
| try: | |
| mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| mfcc_var = float(np.mean(np.var(mfcc, axis=1))) | |
| except Exception: | |
| mfcc_var = 0.0 | |
| # Human speech β high variance | |
| # Synthetic β overly stable | |
| mfcc_score = 1.0 - np.tanh(mfcc_var / 50.0) | |
| # ============================================================ | |
| # 2. Pitch Jitter | |
| # ============================================================ | |
| try: | |
| f0 = librosa.yin(y, fmin=80, fmax=400) | |
| f0 = f0[np.isfinite(f0)] | |
| if len(f0) > 5: | |
| jitter = float(np.std(f0) / np.mean(f0)) | |
| else: | |
| jitter = 0.0 | |
| except Exception: | |
| jitter = 0.0 | |
| # Human β jitter = 1β5% | |
| # AI β jitter close to 0 | |
| jitter_score = 1.0 - np.tanh(jitter * 10) | |
| # ============================================================ | |
| # 3. HF Smoothness / Symmetry | |
| # ============================================================ | |
| hf_env = spectral.get("hf_env", None) | |
| if hf_env is not None: | |
| # Measure average change between bins | |
| diffs = np.abs(np.diff(hf_env)) | |
| smoothness = 1.0 - np.tanh(np.mean(diffs) / 5.0) | |
| else: | |
| smoothness = 0.0 | |
| # ============================================================ | |
| # Combine Scores | |
| # ============================================================ | |
| prob = float(np.clip((mfcc_score + jitter_score + smoothness) / 3.0, 0, 1)) | |
| label = "AI" if prob >= 0.55 else "Human" | |
| return { | |
| "synthetic_probability": prob, | |
| "synthetic_label": label, | |
| "mfcc_variance": mfcc_var, | |
| "pitch_jitter": jitter, | |
| "hf_smoothness": float(smoothness) | |
| } | |