Spaces:

Mr7Explorer
/

audio_analyzer

Sleeping

App Files Files Community

audio_analyzer / app.py

Mr7Explorer

Update app.py

77bde25 verified 4 months ago

raw

history blame contribute delete

8.26 kB

	import gradio as gr
	from pathlib import Path
	from datetime import datetime
	import warnings
	warnings.filterwarnings('ignore')

	import numpy as np
	import librosa
	import soundfile as sf
	import matplotlib.pyplot as plt
	import scipy.signal as sps

	# Local Modules (must exist in repo root)
	from io_utils import read_audio_info
	from time_domain import compute_time_domain_stats
	from spectral import compute_spectral_analysis
	from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
	from issue_detection import detect_audio_issues
	from synthetic_detector import detect_synthetic_voice
	from report_generator import create_report


	def analyze_audio(audio_file, progress=gr.Progress()):
	if audio_file is None:
	return None, "⚠️ Please upload an audio file."

	try:
	path = Path(audio_file)

	# File info & load
	progress(0.10, desc="Reading file...")
	info = read_audio_info(str(path))

	progress(0.25, desc="Loading waveform...")
	y, sr = librosa.load(str(path), sr=None, mono=True)

	# Time-domain
	progress(0.35, desc="Time-domain analysis...")
	time_stats = compute_time_domain_stats(y)

	# Spectral
	progress(0.50, desc="Spectral analysis...")
	spectral = compute_spectral_analysis(y, sr)

	# Loudness
	progress(0.60, desc="Computing LUFS...")
	lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None

	# Issue detection
	progress(0.70, desc="Detecting issues...")
	issues = detect_audio_issues(spectral, time_stats)

	# Synthetic detection (informational)
	progress(0.78, desc="Synthetic voice estimation...")
	synthetic = detect_synthetic_voice(y, sr, spectral)

	# Scoring
	progress(0.82, desc="Scoring...")
	critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
	high = sum(1 for _, sev, _ in issues if sev == "HIGH")
	medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
	low = sum(1 for _, sev, _ in issues if sev == "LOW")

	score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
	score_value = max(0, score_value)

	# Matplotlib-safe colors
	if score_value >= 90:
	grade, quality = "A", "EXCELLENT"
	color = "#b3ffb3"
	recommendation = "Excellent for TTS dataset"
	elif score_value >= 75:
	grade, quality = "B", "GOOD"
	color = "#ccffcc"
	recommendation = "Good quality; suitable for TTS"
	elif score_value >= 60:
	grade, quality = "C", "FAIR"
	color = "#fff6b3"
	recommendation = "Fair; contains noticeable processing artifacts"
	elif score_value >= 40:
	grade, quality = "D", "POOR"
	color = "#ffd9b3"
	recommendation = "Poor quality; not recommended for TTS"
	else:
	grade, quality = "F", "CRITICAL"
	color = "#ffb3b3"
	recommendation = "Severely degraded or heavily processed audio"

	cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
	processing_severity = (critical * 3) + (high * 2) + medium

	score_dict = {
	"score": score_value,
	"grade": grade,
	"quality": quality,
	"recommendation": recommendation,
	"cleanliness_score": cleanliness_score,
	"processing_severity": processing_severity,
	"critical": critical,
	"high": high,
	"medium": medium,
	"low": low,
	"color": color
	}

	# Build audio_data payload
	audio_data = {
	"filename": path.name,
	"info": info,
	"time_stats": time_stats,
	"spectral": spectral,
	"lufs": lufs,
	"issues": issues,
	"score": score_dict,
	"synthetic": synthetic,
	"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	}

	# Create reports dir
	progress(0.92, desc="Rendering PNG report...")
	report_dir = Path("reports")
	report_dir.mkdir(exist_ok=True)
	output_file = report_dir / f"{path.stem}_report.png"

	create_report(audio_data, str(output_file))

	# Build Markdown summary (with spectral block)
	s = score_dict
	e = spectral["energy_distribution"]

	md = f"""
	# 🎵 Audio Forensic Summary Report

	## 📁 File Information
	- Name: `{audio_data['filename']}`
	- Duration: {info['duration']:.2f}s
	- Sample Rate: {info['samplerate']} Hz
	- Channels: {info['channels']}

	---

	## 🎚 Loudness (ITU-R BS.1770-3)
	"""

	if lufs is not None:
	md += f"- Integrated LUFS: {lufs:.2f} LUFS \n"
	if -25 <= lufs <= -21:
	md += f" - Status: PASS ✅ (Compliant −23 LUFS ±2)\n"
	else:
	md += f" - Status: FAIL ❌ (Not compliant with −23 LUFS ±2)\n"
	else:
	md += "- Integrated LUFS: Not available (pyloudnorm missing) \n"

	md += f"""
	---

	## 🧪 Audio Quality Score
	- Score: {s['score']}/100
	- Grade: {s['grade']}
	- Quality: {s['quality']}
	- Recommendation: {s['recommendation']}

	---

	## 🔧 Time-Domain Characteristics
	\| Metric \| Value \|
	\|--------\|--------\|
	\| Peak Level \| {time_stats['peak_db']:.2f} dBFS \|
	\| RMS Level \| {time_stats['rms_db']:.2f} dBFS \|
	\| Crest Factor \| {time_stats['crest_factor_db']:.2f} dB \|
	\| Noise Floor \| {time_stats['noise_floor']:.6f} \|
	\| SNR \| {time_stats['snr_db']:.1f} dB \|
	\| ZCR \| {time_stats['zero_crossing_rate']:.4f} \|

	---

	## 🎛 Spectral Analysis
	\| Parameter \| Value \|
	\|----------\|--------\|
	\| Spectral Centroid \| {spectral['spectral_centroid']:.1f} Hz \|
	\| 85% Rolloff \| {spectral['rolloff_85pct']:.1f} Hz \|
	\| 95% Rolloff \| {spectral['rolloff_95pct']:.1f} Hz \|
	\| Highest Frequency (−60 dB) \| {spectral['highest_freq_minus60db']:.1f} Hz \|

	---

	## 🔊 Energy Distribution (Speech Frequency Bands)
	\| Band \| Energy % \|
	\|------\|----------\|
	\| <100 Hz \| {e['below_100hz']:.2f}% \|
	\| 100–500 Hz \| {e['100_500hz']:.2f}% \|
	\| 500–2k Hz \| {e['500_2khz']:.2f}% \|
	\| 2k–8k Hz \| {e['2k_8khz']:.2f}% \|
	\| 8k–12k Hz \| {e['8k_12khz']:.2f}% \|
	\| 12k–16k Hz \| {e['12k_16khz']:.2f}% \|
	\| >16k Hz \| {e['above_16khz']:.2f}% \|

	---

	## 🤖 Synthetic Voice Estimate (Informational Only)
	- Probability: {synthetic.get('synthetic_probability', 0.0):.3f}
	- Label: {synthetic.get('synthetic_label', 'unknown')}

	---

	## ⚠️ Issues Detected: {len(issues)}
	"""

	if issues:
	icons = {"CRITICAL":"🔴","HIGH":"🟠","MEDIUM":"🟡","LOW":"🟢"}
	for issue, sev, desc in issues:
	md += f"- {icons.get(sev,'⚪')} [{sev}] {issue} — {desc}\n"
	else:
	md += "- ✅ No issues detected.\n"

	md += f"""
	---

	📊 PNG Forensic Report Saved: `{output_file.name}`
	🕒 Generated: {audio_data['timestamp']}
	"""

	return str(output_file), md

	except Exception as e:
	import traceback
	traceback.print_exc()
	return None, f"# ❌ Analysis Failed\n{str(e)}"


	# Gradio UI
	with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
	gr.Markdown("""
	# 🎧 AUDIO FORENSIC ANALYZER
	Upload an audio file to generate a forensic-quality report:
	- HF/LF rolloff detection
	- LPF/HPF / Brickwall detection
	- Noise-reduction artifacts
	- Compression and clipping indicators
	- Spectral notches
	- LUFS (ITU-R BS.1770-3) check
	- Synthetic voice estimation (informational)
	Outputs a PNG report + Markdown summary
	""")

	with gr.Row():
	with gr.Column(scale=1):
	audio_in = gr.Audio(label="📁 Upload Audio", type="filepath")
	analyze_btn = gr.Button("🔍 Analyze Audio", variant="primary")
	with gr.Column(scale=2):
	png_out = gr.Image(label="📊 Forensic PNG Report", type="filepath", height=600)

	summary_out = gr.Markdown(label="📋 Summary Report")

	analyze_btn.click(fn=analyze_audio, inputs=[audio_in], outputs=[png_out, summary_out])

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)