import gradio as gr from pathlib import Path from datetime import datetime import warnings warnings.filterwarnings('ignore') import numpy as np import librosa import soundfile as sf import matplotlib.pyplot as plt import scipy.signal as sps # Local Modules (must exist in repo root) from io_utils import read_audio_info from time_domain import compute_time_domain_stats from spectral import compute_spectral_analysis from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE from issue_detection import detect_audio_issues from synthetic_detector import detect_synthetic_voice from report_generator import create_report def analyze_audio(audio_file, progress=gr.Progress()): if audio_file is None: return None, "โš ๏ธ Please upload an audio file." try: path = Path(audio_file) # File info & load progress(0.10, desc="Reading file...") info = read_audio_info(str(path)) progress(0.25, desc="Loading waveform...") y, sr = librosa.load(str(path), sr=None, mono=True) # Time-domain progress(0.35, desc="Time-domain analysis...") time_stats = compute_time_domain_stats(y) # Spectral progress(0.50, desc="Spectral analysis...") spectral = compute_spectral_analysis(y, sr) # Loudness progress(0.60, desc="Computing LUFS...") lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None # Issue detection progress(0.70, desc="Detecting issues...") issues = detect_audio_issues(spectral, time_stats) # Synthetic detection (informational) progress(0.78, desc="Synthetic voice estimation...") synthetic = detect_synthetic_voice(y, sr, spectral) # Scoring progress(0.82, desc="Scoring...") critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL") high = sum(1 for _, sev, _ in issues if sev == "HIGH") medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM") low = sum(1 for _, sev, _ in issues if sev == "LOW") score_value = 100 - (critical * 30) - (high * 15) - (medium * 5) score_value = max(0, score_value) # Matplotlib-safe colors if score_value >= 90: grade, quality = "A", "EXCELLENT" color = "#b3ffb3" recommendation = "Excellent for TTS dataset" elif score_value >= 75: grade, quality = "B", "GOOD" color = "#ccffcc" recommendation = "Good quality; suitable for TTS" elif score_value >= 60: grade, quality = "C", "FAIR" color = "#fff6b3" recommendation = "Fair; contains noticeable processing artifacts" elif score_value >= 40: grade, quality = "D", "POOR" color = "#ffd9b3" recommendation = "Poor quality; not recommended for TTS" else: grade, quality = "F", "CRITICAL" color = "#ffb3b3" recommendation = "Severely degraded or heavily processed audio" cleanliness_score = max(0, 100 - (medium * 5 + low * 3)) processing_severity = (critical * 3) + (high * 2) + medium score_dict = { "score": score_value, "grade": grade, "quality": quality, "recommendation": recommendation, "cleanliness_score": cleanliness_score, "processing_severity": processing_severity, "critical": critical, "high": high, "medium": medium, "low": low, "color": color } # Build audio_data payload audio_data = { "filename": path.name, "info": info, "time_stats": time_stats, "spectral": spectral, "lufs": lufs, "issues": issues, "score": score_dict, "synthetic": synthetic, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } # Create reports dir progress(0.92, desc="Rendering PNG report...") report_dir = Path("reports") report_dir.mkdir(exist_ok=True) output_file = report_dir / f"{path.stem}_report.png" create_report(audio_data, str(output_file)) # Build Markdown summary (with spectral block) s = score_dict e = spectral["energy_distribution"] md = f""" # ๐ŸŽต Audio Forensic Summary Report ## ๐Ÿ“ File Information - **Name:** `{audio_data['filename']}` - **Duration:** {info['duration']:.2f}s - **Sample Rate:** {info['samplerate']} Hz - **Channels:** {info['channels']} --- ## ๐ŸŽš Loudness (ITU-R BS.1770-3) """ if lufs is not None: md += f"- **Integrated LUFS:** {lufs:.2f} LUFS \n" if -25 <= lufs <= -21: md += f" - **Status:** PASS โœ… (Compliant โˆ’23 LUFS ยฑ2)\n" else: md += f" - **Status:** FAIL โŒ (Not compliant with โˆ’23 LUFS ยฑ2)\n" else: md += "- **Integrated LUFS:** Not available (pyloudnorm missing) \n" md += f""" --- ## ๐Ÿงช Audio Quality Score - **Score:** {s['score']}/100 - **Grade:** {s['grade']} - **Quality:** {s['quality']} - **Recommendation:** {s['recommendation']} --- ## ๐Ÿ”ง Time-Domain Characteristics | Metric | Value | |--------|--------| | Peak Level | {time_stats['peak_db']:.2f} dBFS | | RMS Level | {time_stats['rms_db']:.2f} dBFS | | Crest Factor | {time_stats['crest_factor_db']:.2f} dB | | Noise Floor | {time_stats['noise_floor']:.6f} | | SNR | {time_stats['snr_db']:.1f} dB | | ZCR | {time_stats['zero_crossing_rate']:.4f} | --- ## ๐ŸŽ› Spectral Analysis | Parameter | Value | |----------|--------| | Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz | | 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz | | 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz | | Highest Frequency (โˆ’60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz | --- ## ๐Ÿ”Š Energy Distribution (Speech Frequency Bands) | Band | Energy % | |------|----------| | <100 Hz | {e['below_100hz']:.2f}% | | 100โ€“500 Hz | {e['100_500hz']:.2f}% | | 500โ€“2k Hz | {e['500_2khz']:.2f}% | | 2kโ€“8k Hz | {e['2k_8khz']:.2f}% | | 8kโ€“12k Hz | {e['8k_12khz']:.2f}% | | 12kโ€“16k Hz | {e['12k_16khz']:.2f}% | | >16k Hz | {e['above_16khz']:.2f}% | --- ## ๐Ÿค– Synthetic Voice Estimate (Informational Only) - **Probability:** {synthetic.get('synthetic_probability', 0.0):.3f} - **Label:** {synthetic.get('synthetic_label', 'unknown')} --- ## โš ๏ธ Issues Detected: {len(issues)} """ if issues: icons = {"CRITICAL":"๐Ÿ”ด","HIGH":"๐ŸŸ ","MEDIUM":"๐ŸŸก","LOW":"๐ŸŸข"} for issue, sev, desc in issues: md += f"- {icons.get(sev,'โšช')} **[{sev}] {issue}** โ€” {desc}\n" else: md += "- โœ… No issues detected.\n" md += f""" --- ๐Ÿ“Š **PNG Forensic Report Saved:** `{output_file.name}` ๐Ÿ•’ Generated: {audio_data['timestamp']} """ return str(output_file), md except Exception as e: import traceback traceback.print_exc() return None, f"# โŒ Analysis Failed\n{str(e)}" # Gradio UI with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo: gr.Markdown(""" # ๐ŸŽง AUDIO FORENSIC ANALYZER Upload an audio file to generate a forensic-quality report: - HF/LF rolloff detection - LPF/HPF / Brickwall detection - Noise-reduction artifacts - Compression and clipping indicators - Spectral notches - LUFS (ITU-R BS.1770-3) check - Synthetic voice estimation (informational) Outputs a PNG report + Markdown summary """) with gr.Row(): with gr.Column(scale=1): audio_in = gr.Audio(label="๐Ÿ“ Upload Audio", type="filepath") analyze_btn = gr.Button("๐Ÿ” Analyze Audio", variant="primary") with gr.Column(scale=2): png_out = gr.Image(label="๐Ÿ“Š Forensic PNG Report", type="filepath", height=600) summary_out = gr.Markdown(label="๐Ÿ“‹ Summary Report") analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)