Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import librosa | |
| import scipy.signal as sps | |
| def compute_spectral_analysis(y, sr, n_fft=4096): | |
| """Comprehensive spectral analysis tuned for speech QC.""" | |
| hop_length = n_fft // 4 | |
| # ============================================================ | |
| # STFT → Magnitude + dB Conversion | |
| # ============================================================ | |
| S = np.abs(librosa.stft( | |
| y, | |
| n_fft=n_fft, | |
| hop_length=hop_length, | |
| window="hann" | |
| )) | |
| freqs = np.linspace(0, sr / 2, S.shape[0]) | |
| # Convert amplitude to dB scale | |
| S_db = librosa.amplitude_to_db(S, ref=np.max) | |
| # ============================================================ | |
| # 90th Percentile Energy Envelope | |
| # ============================================================ | |
| S_power = S ** 2 | |
| energy = np.percentile(S_power, 90, axis=1) + 1e-20 | |
| total_energy = float(np.sum(energy)) | |
| cum_energy = np.cumsum(energy) | |
| roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy) | |
| roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy) | |
| freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)]) | |
| freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)]) | |
| # ============================================================ | |
| # Updated HF Envelope: 90th percentile of dB | |
| # ============================================================ | |
| mean_db_per_bin = np.percentile(S_db, 90, axis=1) | |
| peak_db = float(np.max(S_db)) | |
| threshold_db = peak_db - 60 | |
| non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0] | |
| highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0 | |
| # ============================================================ | |
| # Speech-Centric Band Energy Distribution | |
| # ============================================================ | |
| def band_energy(low, high): | |
| i1 = np.searchsorted(freqs, low) | |
| i2 = np.searchsorted(freqs, high) | |
| return float(100 * np.sum(energy[i1:i2]) / total_energy) | |
| def band_energy_above(f): | |
| idx = np.searchsorted(freqs, f) | |
| return float(100 * np.sum(energy[idx:]) / total_energy) | |
| energy_stats = { | |
| "below_100hz": band_energy(0, 100), | |
| "100_500hz": band_energy(100, 500), | |
| "500_2khz": band_energy(500, 2000), | |
| "2k_8khz": band_energy(2000, 8000), | |
| "8k_12khz": band_energy(8000, 12000), | |
| "12k_16khz": band_energy(12000, 16000), | |
| "above_16khz": band_energy_above(16000) | |
| } | |
| # ============================================================ | |
| # Brick-wall Detection | |
| # ============================================================ | |
| diffs = np.diff(mean_db_per_bin) | |
| big_drop_idx = np.where(diffs < -20)[0] | |
| brick_wall = bool(big_drop_idx.size) | |
| brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None | |
| # ============================================================ | |
| # Spectral Notch Detection (Median-filtering) | |
| # ============================================================ | |
| smooth = sps.medfilt(mean_db_per_bin, kernel_size=9) | |
| minima = sps.argrelextrema(smooth, np.less)[0] | |
| notches = [] | |
| for m in minima: | |
| left = smooth[max(0, m - 6):m] | |
| right = smooth[m + 1:min(len(smooth), m + 7)] | |
| neighbor_peak = max( | |
| left.max() if left.size else -999, | |
| right.max() if right.size else -999 | |
| ) | |
| depth = neighbor_peak - smooth[m] | |
| if depth >= 15 and freqs[m] > 100: | |
| notches.append({ | |
| "freq": float(freqs[m]), | |
| "depth_db": float(depth) | |
| }) | |
| # ============================================================ | |
| # Additional Spectral Descriptors | |
| # ============================================================ | |
| centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr))) | |
| bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr))) | |
| flatness = float(np.mean(librosa.feature.spectral_flatness(S=S))) | |
| rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr))) | |
| return { | |
| "S_db": S_db, | |
| "freqs": freqs, | |
| "hop_length": hop_length, | |
| "n_fft": n_fft, | |
| "rolloff_85pct": freq_at_85, | |
| "rolloff_95pct": freq_at_95, | |
| "highest_freq_minus60db": highest_freq, | |
| "energy_distribution": energy_stats, | |
| "brick_wall_detected": brick_wall, | |
| "brick_wall_freq": brick_freq, | |
| "spectral_notches": notches, | |
| "spectral_centroid": centroid, | |
| "spectral_bandwidth": bandwidth, | |
| "spectral_flatness": flatness, | |
| "spectral_rolloff": rolloff, | |
| "hf_env": mean_db_per_bin, | |
| "lf_env": mean_db_per_bin[:200] if len(mean_db_per_bin) > 200 else mean_db_per_bin | |
| } | |