Spaces:

shubhamrazzsharma
/

Music_Classifier

Sleeping

App Files Files Community

shubhamrazzsharma commited on 25 days ago

Commit

479868f

verified ·

1 Parent(s): 09fec51

Create app.py

Browse files

Files changed (1) hide show

app.py +140 -0

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import gradio as gr
+import torch
+import torch.nn as nn
+import librosa
+import numpy as np
+# ─────────────────────────────────────────────
+# 1. PASTE YOUR CNN ARCHITECTURE HERE
+#    (copy the class definition from your Kaggle notebook)
+# ─────────────────────────────────────────────
+class CNNModel(nn.Module):
+    def __init__(self, num_classes=10):
+        super(CNNModel, self).__init__()
+        # ⬇⬇ REPLACE THIS BLOCK WITH YOUR ACTUAL ARCHITECTURE ⬇⬇
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(1, 32, kernel_size=3, padding=1),
+            nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2)
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2)
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2)
+        )
+        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.classifier = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(128, 256), nn.ReLU(), nn.Dropout(0.3),
+            nn.Linear(256, num_classes)
+        )
+        # ⬆⬆ REPLACE UP TO HERE ⬆⬆
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.global_avg_pool(x)
+        return self.classifier(x)
+# ─────────────────────────────────────────────
+# 2. CONFIG — change these if needed
+# ─────────────────────────────────────────────
+NUM_CLASSES  = 10
+SAMPLE_RATE  = 22050
+N_MELS       = 128
+N_FFT        = 2048
+HOP_LENGTH   = 512
+DURATION     = 30          # seconds of audio to use
+TARGET_SHAPE = (128, 512)  # must match your training shape
+GENRES = [
+    "blues", "classical", "country", "disco", "hiphop",
+    "jazz", "metal", "pop", "reggae", "rock"
+]
+# ─────────────────────────────────────────────
+# 3. LOAD MODEL (runs once at startup)
+# ─────────────────────────────────────────────
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = CNNModel(num_classes=NUM_CLASSES)
+model.load_state_dict(
+    torch.load("best_model (1).pth", map_location=device)
+)
+model.to(device)
+model.eval()
+# ─────────────────────────────────────────────
+# 4. PREPROCESSING — same pipeline as training
+# ─────────────────────────────────────────────
+def audio_to_melspectrogram(audio_path):
+    y, sr = librosa.load(audio_path, sr=SAMPLE_RATE, duration=DURATION, mono=True)
+    # Pad if clip is shorter than DURATION
+    target_length = SAMPLE_RATE * DURATION
+    if len(y) < target_length:
+        y = np.pad(y, (0, target_length - len(y)))
+    mel = librosa.feature.melspectrogram(
+        y=y, sr=sr, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH
+    )
+    mel_db = librosa.power_to_db(mel, ref=np.max)
+    # Resize to training shape (128, 512)
+    if mel_db.shape != TARGET_SHAPE:
+        from PIL import Image
+        import PIL
+        mel_img = Image.fromarray(mel_db)
+        mel_img = mel_img.resize((TARGET_SHAPE[1], TARGET_SHAPE[0]), PIL.Image.BILINEAR)
+        mel_db = np.array(mel_img)
+    # Normalize to [0, 1]
+    mel_db = (mel_db - mel_db.min()) / (mel_db.max() - mel_db.min() + 1e-6)
+    return mel_db
+# ─────────────────────────────────────────────
+# 5. INFERENCE
+# ─────────────────────────────────────────────
+def predict_genre(audio_path):
+    if audio_path is None:
+        return {}
+    try:
+        mel = audio_to_melspectrogram(audio_path)                  # (128, 512)
+        tensor = torch.tensor(mel, dtype=torch.float32)
+        tensor = tensor.unsqueeze(0).unsqueeze(0).to(device)       # (1, 1, 128, 512)
+        with torch.no_grad():
+            logits = model(tensor)
+            probs  = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
+        return {GENRES[i]: float(probs[i]) for i in range(NUM_CLASSES)}
+    except Exception as e:
+        return {"error": str(e)}
+# ──────────────────────────────��──────────────
+# 6. GRADIO UI
+# ─────────────────────────────────────────────
+with gr.Blocks(title="Music Genre Classifier") as demo:
+    gr.Markdown("## 🎵 Music Genre Classifier")
+    gr.Markdown("Upload a song clip and the model will predict its genre.")
+    with gr.Row():
+        audio_input = gr.Audio(type="filepath", label="Upload Audio (.wav / .mp3)")
+    predict_btn = gr.Button("Predict Genre", variant="primary")
+    output = gr.Label(num_top_classes=5, label="Genre Probabilities")
+    predict_btn.click(fn=predict_genre, inputs=audio_input, outputs=output)
+    gr.Examples(
+        examples=[],           # optionally add example audio file paths here
+        inputs=audio_input
+    )
+demo.launch()