shubhamrazzsharma commited on
Commit
479868f
Β·
verified Β·
1 Parent(s): 09fec51

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn as nn
4
+ import librosa
5
+ import numpy as np
6
+
7
+ # ─────────────────────────────────────────────
8
+ # 1. PASTE YOUR CNN ARCHITECTURE HERE
9
+ # (copy the class definition from your Kaggle notebook)
10
+ # ─────────────────────────────────────────────
11
+ class CNNModel(nn.Module):
12
+ def __init__(self, num_classes=10):
13
+ super(CNNModel, self).__init__()
14
+ # ⬇⬇ REPLACE THIS BLOCK WITH YOUR ACTUAL ARCHITECTURE ⬇⬇
15
+ self.conv1 = nn.Sequential(
16
+ nn.Conv2d(1, 32, kernel_size=3, padding=1),
17
+ nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2)
18
+ )
19
+ self.conv2 = nn.Sequential(
20
+ nn.Conv2d(32, 64, kernel_size=3, padding=1),
21
+ nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2)
22
+ )
23
+ self.conv3 = nn.Sequential(
24
+ nn.Conv2d(64, 128, kernel_size=3, padding=1),
25
+ nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2)
26
+ )
27
+ self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
28
+ self.classifier = nn.Sequential(
29
+ nn.Flatten(),
30
+ nn.Linear(128, 256), nn.ReLU(), nn.Dropout(0.3),
31
+ nn.Linear(256, num_classes)
32
+ )
33
+ # ⬆⬆ REPLACE UP TO HERE ⬆⬆
34
+
35
+ def forward(self, x):
36
+ x = self.conv1(x)
37
+ x = self.conv2(x)
38
+ x = self.conv3(x)
39
+ x = self.global_avg_pool(x)
40
+ return self.classifier(x)
41
+
42
+ # ─────────────────────────────────────────────
43
+ # 2. CONFIG β€” change these if needed
44
+ # ─────────────────────────────────────────────
45
+ NUM_CLASSES = 10
46
+ SAMPLE_RATE = 22050
47
+ N_MELS = 128
48
+ N_FFT = 2048
49
+ HOP_LENGTH = 512
50
+ DURATION = 30 # seconds of audio to use
51
+ TARGET_SHAPE = (128, 512) # must match your training shape
52
+
53
+ GENRES = [
54
+ "blues", "classical", "country", "disco", "hiphop",
55
+ "jazz", "metal", "pop", "reggae", "rock"
56
+ ]
57
+
58
+ # ─────────────────────────────────────────────
59
+ # 3. LOAD MODEL (runs once at startup)
60
+ # ─────────────────────────────────────────────
61
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
62
+
63
+ model = CNNModel(num_classes=NUM_CLASSES)
64
+ model.load_state_dict(
65
+ torch.load("best_model (1).pth", map_location=device)
66
+ )
67
+ model.to(device)
68
+ model.eval()
69
+
70
+ # ─────────────────────────────────────────────
71
+ # 4. PREPROCESSING β€” same pipeline as training
72
+ # ─────────────────────────────────────────────
73
+ def audio_to_melspectrogram(audio_path):
74
+ y, sr = librosa.load(audio_path, sr=SAMPLE_RATE, duration=DURATION, mono=True)
75
+
76
+ # Pad if clip is shorter than DURATION
77
+ target_length = SAMPLE_RATE * DURATION
78
+ if len(y) < target_length:
79
+ y = np.pad(y, (0, target_length - len(y)))
80
+
81
+ mel = librosa.feature.melspectrogram(
82
+ y=y, sr=sr, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH
83
+ )
84
+ mel_db = librosa.power_to_db(mel, ref=np.max)
85
+
86
+ # Resize to training shape (128, 512)
87
+ if mel_db.shape != TARGET_SHAPE:
88
+ from PIL import Image
89
+ import PIL
90
+ mel_img = Image.fromarray(mel_db)
91
+ mel_img = mel_img.resize((TARGET_SHAPE[1], TARGET_SHAPE[0]), PIL.Image.BILINEAR)
92
+ mel_db = np.array(mel_img)
93
+
94
+ # Normalize to [0, 1]
95
+ mel_db = (mel_db - mel_db.min()) / (mel_db.max() - mel_db.min() + 1e-6)
96
+ return mel_db
97
+
98
+ # ─────────────────────────────────────────────
99
+ # 5. INFERENCE
100
+ # ─────────────────────────────────────────────
101
+ def predict_genre(audio_path):
102
+ if audio_path is None:
103
+ return {}
104
+
105
+ try:
106
+ mel = audio_to_melspectrogram(audio_path) # (128, 512)
107
+ tensor = torch.tensor(mel, dtype=torch.float32)
108
+ tensor = tensor.unsqueeze(0).unsqueeze(0).to(device) # (1, 1, 128, 512)
109
+
110
+ with torch.no_grad():
111
+ logits = model(tensor)
112
+ probs = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
113
+
114
+ return {GENRES[i]: float(probs[i]) for i in range(NUM_CLASSES)}
115
+
116
+ except Exception as e:
117
+ return {"error": str(e)}
118
+
119
+ # ──────────────────────────────��──────────────
120
+ # 6. GRADIO UI
121
+ # ─────────────────────────────────────────────
122
+ with gr.Blocks(title="Music Genre Classifier") as demo:
123
+ gr.Markdown("## 🎡 Music Genre Classifier")
124
+ gr.Markdown("Upload a song clip and the model will predict its genre.")
125
+
126
+ with gr.Row():
127
+ audio_input = gr.Audio(type="filepath", label="Upload Audio (.wav / .mp3)")
128
+
129
+ predict_btn = gr.Button("Predict Genre", variant="primary")
130
+
131
+ output = gr.Label(num_top_classes=5, label="Genre Probabilities")
132
+
133
+ predict_btn.click(fn=predict_genre, inputs=audio_input, outputs=output)
134
+
135
+ gr.Examples(
136
+ examples=[], # optionally add example audio file paths here
137
+ inputs=audio_input
138
+ )
139
+
140
+ demo.launch()