File size: 961 Bytes
745f62a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
"""One-shot transcription of a single audio file via the project's faster-whisper setup."""
import os, sys, time
os.environ["PYTHONIOENCODING"] = "utf-8"
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from faster_whisper import WhisperModel

audio = sys.argv[1]
ct2 = os.path.join(os.path.dirname(__file__), "..", "models", "whisper-hindi-ct2")

t0 = time.time()
model = WhisperModel(ct2, device="cuda", compute_type="float16")
print(f"[load] {time.time()-t0:.1f}s")

t0 = time.time()
segments, info = model.transcribe(audio, language="hi", task="transcribe", vad_filter=True)
segs = list(segments)
elapsed = time.time() - t0

print(f"[asr] {elapsed:.1f}s, lang={info.language} prob={info.language_probability:.2f}, dur={info.duration:.1f}s")
print("---")
for s in segs:
    print(f"[{s.start:5.1f}-{s.end:5.1f}] {s.text.strip()}")
print("---")
print("FULL:", " ".join(s.text.strip() for s in segs))