| import librosa, joblib, numpy as np, gradio as gr |
| from scipy.interpolate import interp1d |
| from pyAudioAnalysis import ShortTermFeatures |
| from pydub.silence import detect_nonsilent |
| from pydub import AudioSegment |
|
|
|
|
| def smart_resize(arr, target_size): |
| current_size = arr.shape[1] |
|
|
| current_idx = np.linspace(0, current_size - 1, current_size) |
| target_idx = np.linspace(0, current_size - 1, target_size) |
|
|
| |
| interp_func = interp1d(current_idx, arr.squeeze(), kind='linear', fill_value="extrapolate") |
| resized_arr = interp_func(target_idx) |
| |
| return resized_arr.reshape(1, target_size) |
|
|
| def remove_silence(wav_file): |
| audSeg = AudioSegment.from_wav(wav_file) |
| non_silence_ranges = detect_nonsilent(audSeg, min_silence_len=5, silence_thresh=-30) |
|
|
| if not non_silence_ranges: |
| sound = audSeg |
| else: |
| start = non_silence_ranges[0][0] |
| end = non_silence_ranges[-1][1] |
| trimmed_sound = audSeg[start:end] |
| sound = trimmed_sound |
|
|
| sound.export('audio.wav', format="wav") |
|
|
| def transform_data(audio): |
| remove_silence(audio) |
| x, sr = librosa.load('audio.wav') |
|
|
| result, f_names = ShortTermFeatures.feature_extraction(x, sr, 0.050*sr, 0.025*sr) |
|
|
| resize_features = smart_resize(result.reshape(1,-1), 20) |
|
|
| return resize_features |
|
|
| def predict(newdf, loaded_model): |
| |
| prediction = loaded_model.predict(newdf) |
|
|
| proba = loaded_model.predict_proba(newdf) |
|
|
| return prediction, proba[0] |
| |
| def get_label(newpred): |
| if newpred == 0: |
| return 'No' |
| else: |
| return 'Si' |
|
|
| def load_model(): |
| ram_for = joblib.load('models/sgd_90.pkl') |
|
|
| return ram_for |
| |
| def main(audio): |
| newdf = transform_data(audio) |
| loaded_model = load_model() |
| newpred, proba = predict(newdf, loaded_model) |
| final = get_label(newpred) |
| |
| return final, {'Si probability': proba[1], |
| 'No probability': proba[0]} |
|
|
| demo = gr.Interface( |
| title = "Autoagent | YES or NO Classification - Layer7", |
| description = """<h3>This model is useful to classify if the user says 'Si' or 'No'. 🎙️ </h3> |
| <img src="https://huggingface.co/spaces/Adrian8as/imagen/resolve/main/output.png" width="350" height="350"/> <br> |
| <b>Record your voice:</b>""", |
| allow_flagging = "never", |
| fn = main, |
| inputs = gr.Audio( |
| sources=["microphone"], |
| type="filepath", |
| ), |
| outputs = [gr.Textbox(label="Clasification"),"label"] |
| ) |
| |
| if __name__ == "__main__": |
| demo.launch(show_api=False) |