import gradio as gr import librosa from tensorflow.keras.models import load_model import numpy as np import audio2numpy as a2n # load model model = load_model("BBNN_model.hdf5") # basic variables for mel spectrogram target_sr = 22050 frame_size = 2048 frame_shift_len = 1024 n_mels = 128 genre_classes = { 0: "Blues", 1: "Classical", 2: "Country", 3: "Disco", 4: "Hiphop", 5: "Jazz", 6: "Metal", 7: "Pop", 8: "Reggae", 9: "Rock" } def get_melspec_feature(X, target_sr, frame_size, frame_shift_len, n_mels): melspec_feature = [] for audio in X: audio_melspec = librosa.feature.melspectrogram(y=audio, sr=target_sr, n_fft=frame_size, hop_length=frame_shift_len) audio_melspec = librosa.power_to_db(audio_melspec) audio_melspec = audio_melspec.T melspec_feature.append(audio_melspec) return np.array(melspec_feature, dtype=np.float32) def predict_genre(audio): print(audio) melspec = get_melspec_feature(audio, target_sr, frame_size, frame_shift_len, n_mels) prediction = model.predict(melspec)[0] return {genre_classes[i]: float(prediction[i]) for i in range(5)} iface = gr.Interface( predict_genre, inputs=gr.inputs.Audio(), outputs=gr.outputs.Label(num_top_classes=5), title="Music Genre Classifier", live=True ) iface.launch()