Beijuka's picture
Upload folder using huggingface_hub
825ba7c verified
import torch
import gradio as gr
from model import ECAPA_gender
SAMPLE_AUDIO = [
("Sample 1", "samples/00001.wav"),
("Sample 2", "samples/00002.wav"),
]
model = ECAPA_gender.from_pretrained("Beijuka/voice-gender-classifier")
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def predict_gender_confidence(audio_file):
if audio_file is None:
return "No audio provided"
try:
path = audio_file if isinstance(audio_file, str) else getattr(audio_file, "name", None)
if not path:
return "No audio path provided"
audio = model.load_audio(path)
audio = audio.to(device)
with torch.no_grad():
logits = model.forward(audio)
probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
pred_idx = logits.argmax(dim=1).item()
gender_pred = model.pred2gender[pred_idx].capitalize()
confidence = probs[pred_idx] * 100
return f"{gender_pred}{confidence:.1f}% confidence"
except Exception as e:
return f"Error: {e}"
with gr.Blocks(title="Voice Gender Classifier") as demo:
gr.Markdown("""
## Voice Gender Classifier
Upload or record a short audio clip to predict speaker gender. Try the built-in samples if you need test audio.
""")
audio_input = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload or record audio",
)
prediction = gr.Textbox(label="Prediction", interactive=False)
gr.Examples(
examples=[path for _, path in SAMPLE_AUDIO],
inputs=audio_input,
outputs=prediction,
fn=predict_gender_confidence,
label="Try sample audios",
)
audio_input.change(fn=predict_gender_confidence, inputs=audio_input, outputs=prediction)
demo.launch(share=True)