Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pathlib import Path | |
| from datetime import datetime | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| import numpy as np | |
| import librosa | |
| import soundfile as sf | |
| import matplotlib.pyplot as plt | |
| import scipy.signal as sps | |
| # Local Modules (must exist in repo root) | |
| from io_utils import read_audio_info | |
| from time_domain import compute_time_domain_stats | |
| from spectral import compute_spectral_analysis | |
| from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE | |
| from issue_detection import detect_audio_issues | |
| from synthetic_detector import detect_synthetic_voice | |
| from report_generator import create_report | |
| def analyze_audio(audio_file, progress=gr.Progress()): | |
| if audio_file is None: | |
| return None, "β οΈ Please upload an audio file." | |
| try: | |
| path = Path(audio_file) | |
| # File info & load | |
| progress(0.10, desc="Reading file...") | |
| info = read_audio_info(str(path)) | |
| progress(0.25, desc="Loading waveform...") | |
| y, sr = librosa.load(str(path), sr=None, mono=True) | |
| # Time-domain | |
| progress(0.35, desc="Time-domain analysis...") | |
| time_stats = compute_time_domain_stats(y) | |
| # Spectral | |
| progress(0.50, desc="Spectral analysis...") | |
| spectral = compute_spectral_analysis(y, sr) | |
| # Loudness | |
| progress(0.60, desc="Computing LUFS...") | |
| lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None | |
| # Issue detection | |
| progress(0.70, desc="Detecting issues...") | |
| issues = detect_audio_issues(spectral, time_stats) | |
| # Synthetic detection (informational) | |
| progress(0.78, desc="Synthetic voice estimation...") | |
| synthetic = detect_synthetic_voice(y, sr, spectral) | |
| # Scoring | |
| progress(0.82, desc="Scoring...") | |
| critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL") | |
| high = sum(1 for _, sev, _ in issues if sev == "HIGH") | |
| medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM") | |
| low = sum(1 for _, sev, _ in issues if sev == "LOW") | |
| score_value = 100 - (critical * 30) - (high * 15) - (medium * 5) | |
| score_value = max(0, score_value) | |
| # Matplotlib-safe colors | |
| if score_value >= 90: | |
| grade, quality = "A", "EXCELLENT" | |
| color = "#b3ffb3" | |
| recommendation = "Excellent for TTS dataset" | |
| elif score_value >= 75: | |
| grade, quality = "B", "GOOD" | |
| color = "#ccffcc" | |
| recommendation = "Good quality; suitable for TTS" | |
| elif score_value >= 60: | |
| grade, quality = "C", "FAIR" | |
| color = "#fff6b3" | |
| recommendation = "Fair; contains noticeable processing artifacts" | |
| elif score_value >= 40: | |
| grade, quality = "D", "POOR" | |
| color = "#ffd9b3" | |
| recommendation = "Poor quality; not recommended for TTS" | |
| else: | |
| grade, quality = "F", "CRITICAL" | |
| color = "#ffb3b3" | |
| recommendation = "Severely degraded or heavily processed audio" | |
| cleanliness_score = max(0, 100 - (medium * 5 + low * 3)) | |
| processing_severity = (critical * 3) + (high * 2) + medium | |
| score_dict = { | |
| "score": score_value, | |
| "grade": grade, | |
| "quality": quality, | |
| "recommendation": recommendation, | |
| "cleanliness_score": cleanliness_score, | |
| "processing_severity": processing_severity, | |
| "critical": critical, | |
| "high": high, | |
| "medium": medium, | |
| "low": low, | |
| "color": color | |
| } | |
| # Build audio_data payload | |
| audio_data = { | |
| "filename": path.name, | |
| "info": info, | |
| "time_stats": time_stats, | |
| "spectral": spectral, | |
| "lufs": lufs, | |
| "issues": issues, | |
| "score": score_dict, | |
| "synthetic": synthetic, | |
| "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| } | |
| # Create reports dir | |
| progress(0.92, desc="Rendering PNG report...") | |
| report_dir = Path("reports") | |
| report_dir.mkdir(exist_ok=True) | |
| output_file = report_dir / f"{path.stem}_report.png" | |
| create_report(audio_data, str(output_file)) | |
| # Build Markdown summary (with spectral block) | |
| s = score_dict | |
| e = spectral["energy_distribution"] | |
| md = f""" | |
| # π΅ Audio Forensic Summary Report | |
| ## π File Information | |
| - **Name:** `{audio_data['filename']}` | |
| - **Duration:** {info['duration']:.2f}s | |
| - **Sample Rate:** {info['samplerate']} Hz | |
| - **Channels:** {info['channels']} | |
| --- | |
| ## π Loudness (ITU-R BS.1770-3) | |
| """ | |
| if lufs is not None: | |
| md += f"- **Integrated LUFS:** {lufs:.2f} LUFS \n" | |
| if -25 <= lufs <= -21: | |
| md += f" - **Status:** PASS β (Compliant β23 LUFS Β±2)\n" | |
| else: | |
| md += f" - **Status:** FAIL β (Not compliant with β23 LUFS Β±2)\n" | |
| else: | |
| md += "- **Integrated LUFS:** Not available (pyloudnorm missing) \n" | |
| md += f""" | |
| --- | |
| ## π§ͺ Audio Quality Score | |
| - **Score:** {s['score']}/100 | |
| - **Grade:** {s['grade']} | |
| - **Quality:** {s['quality']} | |
| - **Recommendation:** {s['recommendation']} | |
| --- | |
| ## π§ Time-Domain Characteristics | |
| | Metric | Value | | |
| |--------|--------| | |
| | Peak Level | {time_stats['peak_db']:.2f} dBFS | | |
| | RMS Level | {time_stats['rms_db']:.2f} dBFS | | |
| | Crest Factor | {time_stats['crest_factor_db']:.2f} dB | | |
| | Noise Floor | {time_stats['noise_floor']:.6f} | | |
| | SNR | {time_stats['snr_db']:.1f} dB | | |
| | ZCR | {time_stats['zero_crossing_rate']:.4f} | | |
| --- | |
| ## π Spectral Analysis | |
| | Parameter | Value | | |
| |----------|--------| | |
| | Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz | | |
| | 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz | | |
| | 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz | | |
| | Highest Frequency (β60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz | | |
| --- | |
| ## π Energy Distribution (Speech Frequency Bands) | |
| | Band | Energy % | | |
| |------|----------| | |
| | <100 Hz | {e['below_100hz']:.2f}% | | |
| | 100β500 Hz | {e['100_500hz']:.2f}% | | |
| | 500β2k Hz | {e['500_2khz']:.2f}% | | |
| | 2kβ8k Hz | {e['2k_8khz']:.2f}% | | |
| | 8kβ12k Hz | {e['8k_12khz']:.2f}% | | |
| | 12kβ16k Hz | {e['12k_16khz']:.2f}% | | |
| | >16k Hz | {e['above_16khz']:.2f}% | | |
| --- | |
| ## π€ Synthetic Voice Estimate (Informational Only) | |
| - **Probability:** {synthetic.get('synthetic_probability', 0.0):.3f} | |
| - **Label:** {synthetic.get('synthetic_label', 'unknown')} | |
| --- | |
| ## β οΈ Issues Detected: {len(issues)} | |
| """ | |
| if issues: | |
| icons = {"CRITICAL":"π΄","HIGH":"π ","MEDIUM":"π‘","LOW":"π’"} | |
| for issue, sev, desc in issues: | |
| md += f"- {icons.get(sev,'βͺ')} **[{sev}] {issue}** β {desc}\n" | |
| else: | |
| md += "- β No issues detected.\n" | |
| md += f""" | |
| --- | |
| π **PNG Forensic Report Saved:** `{output_file.name}` | |
| π Generated: {audio_data['timestamp']} | |
| """ | |
| return str(output_file), md | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return None, f"# β Analysis Failed\n{str(e)}" | |
| # Gradio UI | |
| with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo: | |
| gr.Markdown(""" | |
| # π§ AUDIO FORENSIC ANALYZER | |
| Upload an audio file to generate a forensic-quality report: | |
| - HF/LF rolloff detection | |
| - LPF/HPF / Brickwall detection | |
| - Noise-reduction artifacts | |
| - Compression and clipping indicators | |
| - Spectral notches | |
| - LUFS (ITU-R BS.1770-3) check | |
| - Synthetic voice estimation (informational) | |
| Outputs a PNG report + Markdown summary | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| audio_in = gr.Audio(label="π Upload Audio", type="filepath") | |
| analyze_btn = gr.Button("π Analyze Audio", variant="primary") | |
| with gr.Column(scale=2): | |
| png_out = gr.Image(label="π Forensic PNG Report", type="filepath", height=600) | |
| summary_out = gr.Markdown(label="π Summary Report") | |
| analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) | |