Spaces:

connaaa
/

mic-id

Sleeping

File size: 3,804 Bytes

#!/usr/bin/env python3
"""Quick CLI to score audio clips with the trained Mic-ID model."""

from __future__ import annotations

import argparse
import io
import os
from pathlib import Path
from typing import Iterable, List

import joblib
import librosa
import numpy as np

BASE_DIR = Path(__file__).resolve().parent
CACHE_ROOT = BASE_DIR / ".cache"
NUMBA_CACHE_DIR = CACHE_ROOT / "numba"
MPL_CACHE_DIR = CACHE_ROOT / "matplotlib"
for path in (NUMBA_CACHE_DIR, MPL_CACHE_DIR):
    path.mkdir(parents=True, exist_ok=True)
os.environ.setdefault("NUMBA_CACHE_DIR", str(NUMBA_CACHE_DIR))
os.environ.setdefault("MPLCONFIGDIR", str(MPL_CACHE_DIR))

from features import extract_features
from devices import describe_label

MODEL_PATH = Path("models/model.pkl")
ENCODER_PATH = Path("models/label_encoder.pkl")
AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg"}


def load_model():
    if not MODEL_PATH.exists() or not ENCODER_PATH.exists():
        raise SystemExit("Trained artefacts not found. Run `python train.py` first.")
    clf = joblib.load(MODEL_PATH)
    le = joblib.load(ENCODER_PATH)
    return clf, le


def load_audio(path: Path, sr: int = 16000) -> tuple[np.ndarray, int]:
    if path.suffix.lower() == ".wav":
        y, sr = librosa.load(path, sr=sr, mono=True)
        return y, sr
    # fall back to BytesIO so we also support .mp3/.m4a just like the Streamlit app
    with path.open("rb") as f:
        data = io.BytesIO(f.read())
    y, sr = librosa.load(data, sr=sr, mono=True)
    return y, sr


def normalise_audio(y: np.ndarray) -> np.ndarray:
    rms = float(np.sqrt(np.mean(y**2)) + 1e-8)
    return y * (0.05 / rms), rms


def discover_inputs(paths: Iterable[Path]) -> List[Path]:
    """Expand directories into audio files, preserving explicit file ordering."""
    collected: list[Path] = []
    for path in paths:
        if path.is_dir():
            matches = sorted(
                p for p in path.rglob("*")
                if p.is_file() and p.suffix.lower() in AUDIO_EXTENSIONS
            )
            if not matches:
                print(f"[!] No audio files found under directory: {path}")
                continue
            collected.extend(matches)
        else:
            collected.append(path)
    return collected


def main() -> None:
    parser = argparse.ArgumentParser(description="Score WAV/MP3/M4A clips with the Mic-ID classifier.")
    parser.add_argument(
        "paths",
        nargs="+",
        type=Path,
        help="Audio files or directories containing audio to score",
    )
    parser.add_argument("--topk", type=int, default=3, help="How many ranked predictions to show per file")
    args = parser.parse_args()

    clf, le = load_model()
    topk = max(1, min(args.topk, len(le.classes_)))

    inputs = discover_inputs(args.paths)
    if not inputs:
        raise SystemExit("No valid audio inputs found. Provide files or directories with supported formats.")

    for path in inputs:
        if not path.exists():
            print(f"[!] Skipping missing file: {path}")
            continue
        try:
            y, sr = load_audio(path)
        except Exception as exc:  # pragma: no cover - friendly CLI message
            print(f"[!] Failed to load {path}: {exc}")
            continue
        y, rms = normalise_audio(y)
        feats = extract_features(y, sr).reshape(1, -1)
        proba = clf.predict_proba(feats)[0]
        order = np.argsort(proba)[::-1]
        print(f"\nFile: {path}")
        print(f"RMS loudness: {20 * np.log10(rms + 1e-12):.1f} dBFS")
        for rank, idx in enumerate(order[:topk], start=1):
            label = le.classes_[idx]
            friendly = describe_label(label)
            print(f"  {rank}. {friendly} — {proba[idx] * 100:.1f}%")


if __name__ == "__main__":
    main()