Spaces:

mippia
/

MPD-demo

Sleeping

App Files Files Community

slslslrhfem commited on Sep 16, 2025

Commit

f3cb625

1 Parent(s): 773ceaa

change download mechanism

Browse files

Files changed (1) hide show

app.py +34 -24

app.py CHANGED Viewed

@@ -210,7 +210,7 @@ def find_song_file_by_title(song_title):
 @spaces.GPU(duration=300)
 def process_audio_for_matching(audio_file):
     if audio_file is None:
-        return None, """
         <div style='text-align: center; color: #dc2626; padding: 30px; background: #fef2f2; border-radius: 12px; border: 2px dashed #fecaca;'>
             <h3>No Audio File</h3>
             <p>Please upload an audio file to get started!</p>
@@ -220,7 +220,7 @@ def process_audio_for_matching(audio_file):
     result = inference(audio_file)
     if result.get('message') != 'success':
-        return None, f"""
         <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
             <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
             <p style="color: #a16207; font-size: 1.1em;">{result.get('message', 'Unknown error occurred')}</p>
@@ -229,29 +229,35 @@ def process_audio_for_matching(audio_file):
     matches = result.get('matches', [])
     if not matches:
-        return None, """
         <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
             <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
             <p style="color: #a16207; font-size: 1.1em;">No matching vocals found in the dataset.</p>
         </div>
         """
-    # Get the best match for audio playback
-    best_match = matches[0]
-    song_title = best_match.get('song_title', 'Unknown Song')
-    library_time = best_match.get('library_time', 0)
-    # Find song file
-    song_file_path = find_song_file_by_title(song_title)
     # Generate match results HTML
     matches_html = ""
     for match in matches:
         rank = match.get('rank', 0)
-        song_title_display = match.get('song_title', 'Unknown Song')
         confidence = match.get('confidence', '0%')
         test_time = match.get('test_time', 0)
-        library_time_display = match.get('library_time', 0)
         # Ranking colors
         rank_colors = {1: '#dc2626', 2: '#ea580c', 3: '#16a34a'}
@@ -265,7 +271,7 @@ def process_audio_for_matching(audio_file):
                     <span style="background: {rank_color}; color: white; padding: 4px 8px; border-radius: 15px; font-size: 0.8em; margin-right: 10px;">
                         #{rank}
                     </span>
-                    {song_title_display}
                 </h3>
                 <span style="background: #f3f4f6; color: #111827; padding: 6px 12px; border-radius: 20px; font-weight: 600;">
                     {confidence}
@@ -280,7 +286,7 @@ def process_audio_for_matching(audio_file):
                     </div>
                     <div>
                         <strong style="color: #1f2937;">Matched At</strong>
-                        <br><span style="color: #16a34a; font-size: 1.1em;">{library_time_display:.1f}s</span>
                     </div>
                 </div>
             </div>
@@ -299,17 +305,13 @@ def process_audio_for_matching(audio_file):
         <div style="text-align: center; margin-top: 25px; padding: 15px; background: #f3f4f6; border-radius: 8px;">
             <p style="color: #374151; margin: 0; font-size: 0.95em;">
-                <strong>Audio Player:</strong> Playing the best match starting from the matched timestamp ({library_time:.1f}s)
             </p>
         </div>
     </div>
     """
-    # Return audio file with timestamp and results
-    if song_file_path and os.path.exists(song_file_path):
-        return (song_file_path, library_time), results_html
-    else:
-        return None, results_html
 # CSS styles
 custom_css = """
@@ -382,7 +384,7 @@ h1 {
 }
 """
-# Gradio interface - using original Interface with multiple outputs
 demo = gr.Interface(
     fn=process_audio_for_matching,
     inputs=gr.Audio(
@@ -392,7 +394,15 @@ demo = gr.Interface(
     ),
     outputs=[
         gr.Audio(
-            label="Best Match Audio (plays from matched timestamp)",
             elem_classes=["output-container"]
         ),
         gr.HTML(
@@ -410,12 +420,12 @@ demo = gr.Interface(
             Submitted to ICASSP 2026
         </p>
         <hr style="border: none; border-top: 1px solid #e5e7eb; margin: 20px 0;">
-        <p><strong>⚠️ Demo Version Notice:</strong><br>
         This demo differs from the paper version and focuses exclusively on vocal segment transcription.</p>
         <p>Upload any music file to detect vocal similarities in the Covers80 dataset.<br>
         The system analyzes only vocal characteristics, ignoring instrumental parts.</p>
         <p style="font-size: 0.95em; color: #dc2626; font-weight: 600; margin-top: 15px;">
-            ⏱️ Processing can take up to 2 minutes per file
         </p>
         <p style="font-size: 0.95em; color: #6b7280; margin-top: 10px;">
             Supported formats: MP3, WAV, M4A, FLAC

 @spaces.GPU(duration=300)
 def process_audio_for_matching(audio_file):
     if audio_file is None:
+        return None, None, None, """
         <div style='text-align: center; color: #dc2626; padding: 30px; background: #fef2f2; border-radius: 12px; border: 2px dashed #fecaca;'>
             <h3>No Audio File</h3>
             <p>Please upload an audio file to get started!</p>
     result = inference(audio_file)
     if result.get('message') != 'success':
+        return None, None, None, f"""
         <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
             <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
             <p style="color: #a16207; font-size: 1.1em;">{result.get('message', 'Unknown error occurred')}</p>
     matches = result.get('matches', [])
     if not matches:
+        return None, None, None, """
         <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
             <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
             <p style="color: #a16207; font-size: 1.1em;">No matching vocals found in the dataset.</p>
         </div>
         """
+    # Get audio files for top 3 matches
+    audio_files = [None, None, None]
+    for i, match in enumerate(matches[:3]):
+        song_title = match.get('song_title', 'Unknown Song')
+        song_file_path = find_song_file_by_title(song_title)
+        print(f"Match {i+1}: {song_title}")
+        print(f"  File path: {song_file_path}")
+        if song_file_path and os.path.exists(song_file_path):
+            audio_files[i] = song_file_path
+        else:
+            audio_files[i] = None
     # Generate match results HTML
     matches_html = ""
     for match in matches:
         rank = match.get('rank', 0)
+        song_title = match.get('song_title', 'Unknown Song')
         confidence = match.get('confidence', '0%')
         test_time = match.get('test_time', 0)
+        library_time = match.get('library_time', 0)
         # Ranking colors
         rank_colors = {1: '#dc2626', 2: '#ea580c', 3: '#16a34a'}
                     <span style="background: {rank_color}; color: white; padding: 4px 8px; border-radius: 15px; font-size: 0.8em; margin-right: 10px;">
                         #{rank}
                     </span>
+                    {song_title}
                 </h3>
                 <span style="background: #f3f4f6; color: #111827; padding: 6px 12px; border-radius: 20px; font-weight: 600;">
                     {confidence}
                     </div>
                     <div>
                         <strong style="color: #1f2937;">Matched At</strong>
+                        <br><span style="color: #16a34a; font-size: 1.1em;">{library_time:.1f}s</span>
                     </div>
                 </div>
             </div>
         <div style="text-align: center; margin-top: 25px; padding: 15px; background: #f3f4f6; border-radius: 8px;">
             <p style="color: #374151; margin: 0; font-size: 0.95em;">
+                <strong>Audio Players:</strong> Top 3 matched songs are available above. Timestamps show where similar vocals were found.
             </p>
         </div>
     </div>
     """
+    return audio_files[0], audio_files[1], audio_files[2], results_html
 # CSS styles
 custom_css = """
 }
 """
+# Gradio interface with 3 audio outputs
 demo = gr.Interface(
     fn=process_audio_for_matching,
     inputs=gr.Audio(
     ),
     outputs=[
         gr.Audio(
+            label="Match #1 Audio",
+            elem_classes=["output-container"]
+        ),
+        gr.Audio(
+            label="Match #2 Audio",
+            elem_classes=["output-container"]
+        ),
+        gr.Audio(
+            label="Match #3 Audio",
             elem_classes=["output-container"]
         ),
         gr.HTML(
             Submitted to ICASSP 2026
         </p>
         <hr style="border: none; border-top: 1px solid #e5e7eb; margin: 20px 0;">
+        <p><strong>Demo Version Notice:</strong><br>
         This demo differs from the paper version and focuses exclusively on vocal segment transcription.</p>
         <p>Upload any music file to detect vocal similarities in the Covers80 dataset.<br>
         The system analyzes only vocal characteristics, ignoring instrumental parts.</p>
         <p style="font-size: 0.95em; color: #dc2626; font-weight: 600; margin-top: 15px;">
+            Processing can take up to 2 minutes per file
         </p>
         <p style="font-size: 0.95em; color: #6b7280; margin-top: 10px;">
             Supported formats: MP3, WAV, M4A, FLAC