slslslrhfem commited on
Commit
f3cb625
·
1 Parent(s): 773ceaa

change download mechanism

Browse files
Files changed (1) hide show
  1. app.py +34 -24
app.py CHANGED
@@ -210,7 +210,7 @@ def find_song_file_by_title(song_title):
210
  @spaces.GPU(duration=300)
211
  def process_audio_for_matching(audio_file):
212
  if audio_file is None:
213
- return None, """
214
  <div style='text-align: center; color: #dc2626; padding: 30px; background: #fef2f2; border-radius: 12px; border: 2px dashed #fecaca;'>
215
  <h3>No Audio File</h3>
216
  <p>Please upload an audio file to get started!</p>
@@ -220,7 +220,7 @@ def process_audio_for_matching(audio_file):
220
  result = inference(audio_file)
221
 
222
  if result.get('message') != 'success':
223
- return None, f"""
224
  <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
225
  <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
226
  <p style="color: #a16207; font-size: 1.1em;">{result.get('message', 'Unknown error occurred')}</p>
@@ -229,29 +229,35 @@ def process_audio_for_matching(audio_file):
229
 
230
  matches = result.get('matches', [])
231
  if not matches:
232
- return None, """
233
  <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
234
  <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
235
  <p style="color: #a16207; font-size: 1.1em;">No matching vocals found in the dataset.</p>
236
  </div>
237
  """
238
 
239
- # Get the best match for audio playback
240
- best_match = matches[0]
241
- song_title = best_match.get('song_title', 'Unknown Song')
242
- library_time = best_match.get('library_time', 0)
243
-
244
- # Find song file
245
- song_file_path = find_song_file_by_title(song_title)
 
 
 
 
 
 
246
 
247
  # Generate match results HTML
248
  matches_html = ""
249
  for match in matches:
250
  rank = match.get('rank', 0)
251
- song_title_display = match.get('song_title', 'Unknown Song')
252
  confidence = match.get('confidence', '0%')
253
  test_time = match.get('test_time', 0)
254
- library_time_display = match.get('library_time', 0)
255
 
256
  # Ranking colors
257
  rank_colors = {1: '#dc2626', 2: '#ea580c', 3: '#16a34a'}
@@ -265,7 +271,7 @@ def process_audio_for_matching(audio_file):
265
  <span style="background: {rank_color}; color: white; padding: 4px 8px; border-radius: 15px; font-size: 0.8em; margin-right: 10px;">
266
  #{rank}
267
  </span>
268
- {song_title_display}
269
  </h3>
270
  <span style="background: #f3f4f6; color: #111827; padding: 6px 12px; border-radius: 20px; font-weight: 600;">
271
  {confidence}
@@ -280,7 +286,7 @@ def process_audio_for_matching(audio_file):
280
  </div>
281
  <div>
282
  <strong style="color: #1f2937;">Matched At</strong>
283
- <br><span style="color: #16a34a; font-size: 1.1em;">{library_time_display:.1f}s</span>
284
  </div>
285
  </div>
286
  </div>
@@ -299,17 +305,13 @@ def process_audio_for_matching(audio_file):
299
 
300
  <div style="text-align: center; margin-top: 25px; padding: 15px; background: #f3f4f6; border-radius: 8px;">
301
  <p style="color: #374151; margin: 0; font-size: 0.95em;">
302
- <strong>Audio Player:</strong> Playing the best match starting from the matched timestamp ({library_time:.1f}s)
303
  </p>
304
  </div>
305
  </div>
306
  """
307
 
308
- # Return audio file with timestamp and results
309
- if song_file_path and os.path.exists(song_file_path):
310
- return (song_file_path, library_time), results_html
311
- else:
312
- return None, results_html
313
 
314
  # CSS styles
315
  custom_css = """
@@ -382,7 +384,7 @@ h1 {
382
  }
383
  """
384
 
385
- # Gradio interface - using original Interface with multiple outputs
386
  demo = gr.Interface(
387
  fn=process_audio_for_matching,
388
  inputs=gr.Audio(
@@ -392,7 +394,15 @@ demo = gr.Interface(
392
  ),
393
  outputs=[
394
  gr.Audio(
395
- label="Best Match Audio (plays from matched timestamp)",
 
 
 
 
 
 
 
 
396
  elem_classes=["output-container"]
397
  ),
398
  gr.HTML(
@@ -410,12 +420,12 @@ demo = gr.Interface(
410
  Submitted to ICASSP 2026
411
  </p>
412
  <hr style="border: none; border-top: 1px solid #e5e7eb; margin: 20px 0;">
413
- <p><strong>⚠️ Demo Version Notice:</strong><br>
414
  This demo differs from the paper version and focuses exclusively on vocal segment transcription.</p>
415
  <p>Upload any music file to detect vocal similarities in the Covers80 dataset.<br>
416
  The system analyzes only vocal characteristics, ignoring instrumental parts.</p>
417
  <p style="font-size: 0.95em; color: #dc2626; font-weight: 600; margin-top: 15px;">
418
- ⏱️ Processing can take up to 2 minutes per file
419
  </p>
420
  <p style="font-size: 0.95em; color: #6b7280; margin-top: 10px;">
421
  Supported formats: MP3, WAV, M4A, FLAC
 
210
  @spaces.GPU(duration=300)
211
  def process_audio_for_matching(audio_file):
212
  if audio_file is None:
213
+ return None, None, None, """
214
  <div style='text-align: center; color: #dc2626; padding: 30px; background: #fef2f2; border-radius: 12px; border: 2px dashed #fecaca;'>
215
  <h3>No Audio File</h3>
216
  <p>Please upload an audio file to get started!</p>
 
220
  result = inference(audio_file)
221
 
222
  if result.get('message') != 'success':
223
+ return None, None, None, f"""
224
  <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
225
  <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
226
  <p style="color: #a16207; font-size: 1.1em;">{result.get('message', 'Unknown error occurred')}</p>
 
229
 
230
  matches = result.get('matches', [])
231
  if not matches:
232
+ return None, None, None, """
233
  <div style="text-align: center; padding: 25px; background: #fefce8; border-radius: 12px; border: 1px solid #fde047; margin: 10px 0;">
234
  <h3 style="color: #a16207; margin-bottom: 15px;">No Matches Found</h3>
235
  <p style="color: #a16207; font-size: 1.1em;">No matching vocals found in the dataset.</p>
236
  </div>
237
  """
238
 
239
+ # Get audio files for top 3 matches
240
+ audio_files = [None, None, None]
241
+ for i, match in enumerate(matches[:3]):
242
+ song_title = match.get('song_title', 'Unknown Song')
243
+ song_file_path = find_song_file_by_title(song_title)
244
+
245
+ print(f"Match {i+1}: {song_title}")
246
+ print(f" File path: {song_file_path}")
247
+
248
+ if song_file_path and os.path.exists(song_file_path):
249
+ audio_files[i] = song_file_path
250
+ else:
251
+ audio_files[i] = None
252
 
253
  # Generate match results HTML
254
  matches_html = ""
255
  for match in matches:
256
  rank = match.get('rank', 0)
257
+ song_title = match.get('song_title', 'Unknown Song')
258
  confidence = match.get('confidence', '0%')
259
  test_time = match.get('test_time', 0)
260
+ library_time = match.get('library_time', 0)
261
 
262
  # Ranking colors
263
  rank_colors = {1: '#dc2626', 2: '#ea580c', 3: '#16a34a'}
 
271
  <span style="background: {rank_color}; color: white; padding: 4px 8px; border-radius: 15px; font-size: 0.8em; margin-right: 10px;">
272
  #{rank}
273
  </span>
274
+ {song_title}
275
  </h3>
276
  <span style="background: #f3f4f6; color: #111827; padding: 6px 12px; border-radius: 20px; font-weight: 600;">
277
  {confidence}
 
286
  </div>
287
  <div>
288
  <strong style="color: #1f2937;">Matched At</strong>
289
+ <br><span style="color: #16a34a; font-size: 1.1em;">{library_time:.1f}s</span>
290
  </div>
291
  </div>
292
  </div>
 
305
 
306
  <div style="text-align: center; margin-top: 25px; padding: 15px; background: #f3f4f6; border-radius: 8px;">
307
  <p style="color: #374151; margin: 0; font-size: 0.95em;">
308
+ <strong>Audio Players:</strong> Top 3 matched songs are available above. Timestamps show where similar vocals were found.
309
  </p>
310
  </div>
311
  </div>
312
  """
313
 
314
+ return audio_files[0], audio_files[1], audio_files[2], results_html
 
 
 
 
315
 
316
  # CSS styles
317
  custom_css = """
 
384
  }
385
  """
386
 
387
+ # Gradio interface with 3 audio outputs
388
  demo = gr.Interface(
389
  fn=process_audio_for_matching,
390
  inputs=gr.Audio(
 
394
  ),
395
  outputs=[
396
  gr.Audio(
397
+ label="Match #1 Audio",
398
+ elem_classes=["output-container"]
399
+ ),
400
+ gr.Audio(
401
+ label="Match #2 Audio",
402
+ elem_classes=["output-container"]
403
+ ),
404
+ gr.Audio(
405
+ label="Match #3 Audio",
406
  elem_classes=["output-container"]
407
  ),
408
  gr.HTML(
 
420
  Submitted to ICASSP 2026
421
  </p>
422
  <hr style="border: none; border-top: 1px solid #e5e7eb; margin: 20px 0;">
423
+ <p><strong>Demo Version Notice:</strong><br>
424
  This demo differs from the paper version and focuses exclusively on vocal segment transcription.</p>
425
  <p>Upload any music file to detect vocal similarities in the Covers80 dataset.<br>
426
  The system analyzes only vocal characteristics, ignoring instrumental parts.</p>
427
  <p style="font-size: 0.95em; color: #dc2626; font-weight: 600; margin-top: 15px;">
428
+ Processing can take up to 2 minutes per file
429
  </p>
430
  <p style="font-size: 0.95em; color: #6b7280; margin-top: 10px;">
431
  Supported formats: MP3, WAV, M4A, FLAC