audio_analyzer / app.py
Mr7Explorer's picture
Update app.py
77bde25 verified
import gradio as gr
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import scipy.signal as sps
# Local Modules (must exist in repo root)
from io_utils import read_audio_info
from time_domain import compute_time_domain_stats
from spectral import compute_spectral_analysis
from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
from issue_detection import detect_audio_issues
from synthetic_detector import detect_synthetic_voice
from report_generator import create_report
def analyze_audio(audio_file, progress=gr.Progress()):
if audio_file is None:
return None, "⚠️ Please upload an audio file."
try:
path = Path(audio_file)
# File info & load
progress(0.10, desc="Reading file...")
info = read_audio_info(str(path))
progress(0.25, desc="Loading waveform...")
y, sr = librosa.load(str(path), sr=None, mono=True)
# Time-domain
progress(0.35, desc="Time-domain analysis...")
time_stats = compute_time_domain_stats(y)
# Spectral
progress(0.50, desc="Spectral analysis...")
spectral = compute_spectral_analysis(y, sr)
# Loudness
progress(0.60, desc="Computing LUFS...")
lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
# Issue detection
progress(0.70, desc="Detecting issues...")
issues = detect_audio_issues(spectral, time_stats)
# Synthetic detection (informational)
progress(0.78, desc="Synthetic voice estimation...")
synthetic = detect_synthetic_voice(y, sr, spectral)
# Scoring
progress(0.82, desc="Scoring...")
critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
high = sum(1 for _, sev, _ in issues if sev == "HIGH")
medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
low = sum(1 for _, sev, _ in issues if sev == "LOW")
score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
score_value = max(0, score_value)
# Matplotlib-safe colors
if score_value >= 90:
grade, quality = "A", "EXCELLENT"
color = "#b3ffb3"
recommendation = "Excellent for TTS dataset"
elif score_value >= 75:
grade, quality = "B", "GOOD"
color = "#ccffcc"
recommendation = "Good quality; suitable for TTS"
elif score_value >= 60:
grade, quality = "C", "FAIR"
color = "#fff6b3"
recommendation = "Fair; contains noticeable processing artifacts"
elif score_value >= 40:
grade, quality = "D", "POOR"
color = "#ffd9b3"
recommendation = "Poor quality; not recommended for TTS"
else:
grade, quality = "F", "CRITICAL"
color = "#ffb3b3"
recommendation = "Severely degraded or heavily processed audio"
cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
processing_severity = (critical * 3) + (high * 2) + medium
score_dict = {
"score": score_value,
"grade": grade,
"quality": quality,
"recommendation": recommendation,
"cleanliness_score": cleanliness_score,
"processing_severity": processing_severity,
"critical": critical,
"high": high,
"medium": medium,
"low": low,
"color": color
}
# Build audio_data payload
audio_data = {
"filename": path.name,
"info": info,
"time_stats": time_stats,
"spectral": spectral,
"lufs": lufs,
"issues": issues,
"score": score_dict,
"synthetic": synthetic,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# Create reports dir
progress(0.92, desc="Rendering PNG report...")
report_dir = Path("reports")
report_dir.mkdir(exist_ok=True)
output_file = report_dir / f"{path.stem}_report.png"
create_report(audio_data, str(output_file))
# Build Markdown summary (with spectral block)
s = score_dict
e = spectral["energy_distribution"]
md = f"""
# 🎡 Audio Forensic Summary Report
## πŸ“ File Information
- **Name:** `{audio_data['filename']}`
- **Duration:** {info['duration']:.2f}s
- **Sample Rate:** {info['samplerate']} Hz
- **Channels:** {info['channels']}
---
## 🎚 Loudness (ITU-R BS.1770-3)
"""
if lufs is not None:
md += f"- **Integrated LUFS:** {lufs:.2f} LUFS \n"
if -25 <= lufs <= -21:
md += f" - **Status:** PASS βœ… (Compliant βˆ’23 LUFS Β±2)\n"
else:
md += f" - **Status:** FAIL ❌ (Not compliant with βˆ’23 LUFS Β±2)\n"
else:
md += "- **Integrated LUFS:** Not available (pyloudnorm missing) \n"
md += f"""
---
## πŸ§ͺ Audio Quality Score
- **Score:** {s['score']}/100
- **Grade:** {s['grade']}
- **Quality:** {s['quality']}
- **Recommendation:** {s['recommendation']}
---
## πŸ”§ Time-Domain Characteristics
| Metric | Value |
|--------|--------|
| Peak Level | {time_stats['peak_db']:.2f} dBFS |
| RMS Level | {time_stats['rms_db']:.2f} dBFS |
| Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
| Noise Floor | {time_stats['noise_floor']:.6f} |
| SNR | {time_stats['snr_db']:.1f} dB |
| ZCR | {time_stats['zero_crossing_rate']:.4f} |
---
## πŸŽ› Spectral Analysis
| Parameter | Value |
|----------|--------|
| Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
| 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
| 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
| Highest Frequency (βˆ’60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
---
## πŸ”Š Energy Distribution (Speech Frequency Bands)
| Band | Energy % |
|------|----------|
| <100 Hz | {e['below_100hz']:.2f}% |
| 100–500 Hz | {e['100_500hz']:.2f}% |
| 500–2k Hz | {e['500_2khz']:.2f}% |
| 2k–8k Hz | {e['2k_8khz']:.2f}% |
| 8k–12k Hz | {e['8k_12khz']:.2f}% |
| 12k–16k Hz | {e['12k_16khz']:.2f}% |
| >16k Hz | {e['above_16khz']:.2f}% |
---
## πŸ€– Synthetic Voice Estimate (Informational Only)
- **Probability:** {synthetic.get('synthetic_probability', 0.0):.3f}
- **Label:** {synthetic.get('synthetic_label', 'unknown')}
---
## ⚠️ Issues Detected: {len(issues)}
"""
if issues:
icons = {"CRITICAL":"πŸ”΄","HIGH":"🟠","MEDIUM":"🟑","LOW":"🟒"}
for issue, sev, desc in issues:
md += f"- {icons.get(sev,'βšͺ')} **[{sev}] {issue}** β€” {desc}\n"
else:
md += "- βœ… No issues detected.\n"
md += f"""
---
πŸ“Š **PNG Forensic Report Saved:** `{output_file.name}`
πŸ•’ Generated: {audio_data['timestamp']}
"""
return str(output_file), md
except Exception as e:
import traceback
traceback.print_exc()
return None, f"# ❌ Analysis Failed\n{str(e)}"
# Gradio UI
with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
gr.Markdown("""
# 🎧 AUDIO FORENSIC ANALYZER
Upload an audio file to generate a forensic-quality report:
- HF/LF rolloff detection
- LPF/HPF / Brickwall detection
- Noise-reduction artifacts
- Compression and clipping indicators
- Spectral notches
- LUFS (ITU-R BS.1770-3) check
- Synthetic voice estimation (informational)
Outputs a PNG report + Markdown summary
""")
with gr.Row():
with gr.Column(scale=1):
audio_in = gr.Audio(label="πŸ“ Upload Audio", type="filepath")
analyze_btn = gr.Button("πŸ” Analyze Audio", variant="primary")
with gr.Column(scale=2):
png_out = gr.Image(label="πŸ“Š Forensic PNG Report", type="filepath", height=600)
summary_out = gr.Markdown(label="πŸ“‹ Summary Report")
analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)