sakhi / scripts /benchmark_whisper.py
Tushar9802's picture
HF Space deploy — initial
745f62a
"""Benchmark different Whisper models on Hindi medical audio.
Usage:
python scripts/benchmark_whisper.py
Tests each model on test_audio/*.mp3, reports:
- Transcription output (first 200 chars)
- Whether key medical values appear as digits
- Time taken
- VRAM usage
"""
import time
import os
import sys
import torch
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.hindi_normalize import normalize_transcript
AUDIO_DIR = "test_audio"
AUDIO_FILES = [f for f in os.listdir(AUDIO_DIR) if f.endswith((".mp3", ".wav"))]
# Expected digit substrings in ANC normal transcript
ANC_NORMAL_EXPECT = ["110", "70", "58", "11.5", "24"]
ANC_DANGER_EXPECT = ["155", "100"]
MODELS = [
("vasista22/whisper-hindi-small", {"chunk_length_s": 30}),
("collabora/whisper-large-v2-hindi", {"chunk_length_s": 30}),
]
def get_vram_mb():
if torch.cuda.is_available():
return torch.cuda.memory_allocated() / 1024 / 1024
return 0
def test_model(model_name, pipe_kwargs):
from transformers import pipeline as hf_pipeline
print(f"\n{'='*60}")
print(f"MODEL: {model_name}")
print(f"{'='*60}")
vram_before = get_vram_mb()
print(f"Loading... (VRAM before: {vram_before:.0f} MB)")
t0 = time.time()
pipe = hf_pipeline(
"automatic-speech-recognition",
model=model_name,
device="cuda",
)
load_time = time.time() - t0
vram_after = get_vram_mb()
print(f"Loaded in {load_time:.1f}s (VRAM: {vram_after:.0f} MB, delta: {vram_after - vram_before:.0f} MB)")
for audio_file in AUDIO_FILES:
audio_path = os.path.join(AUDIO_DIR, audio_file)
print(f"\n--- {audio_file} ---")
t0 = time.time()
result = pipe(audio_path, **pipe_kwargs)
elapsed = time.time() - t0
raw = result["text"].strip()
normalized = normalize_transcript(raw)
print(f"Time: {elapsed:.1f}s")
print(f"Raw ({len(raw)} chars): {raw[:200]}")
print(f"Normalized ({len(normalized)} chars): {normalized[:200]}")
# Check for expected digits
expect = ANC_NORMAL_EXPECT if "normal" in audio_file else ANC_DANGER_EXPECT
for val in expect:
found_raw = val in raw
found_norm = val in normalized
status = "RAW" if found_raw else ("NORM" if found_norm else "MISS")
print(f" {val}: {status}")
# Free VRAM
del pipe
torch.cuda.empty_cache()
import gc; gc.collect()
if __name__ == "__main__":
for model_name, kwargs in MODELS:
try:
test_model(model_name, kwargs)
except Exception as e:
print(f"\nERROR with {model_name}: {e}")