Spaces:

mippia
/

MPD-demo

Sleeping

App Files Files Community

slslslrhfem commited on Sep 16, 2025

Commit

884ce27

1 Parent(s): 2978910

change download mechanism

Browse files

Files changed (1) hide show

app.py +205 -122

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import glob
 from pathlib import Path
 from huggingface_hub import snapshot_download
 import shutil
 token = os.getenv("HF_TOKEN")
@@ -175,9 +176,70 @@ if ml_models_path.exists():
     for item in ml_models_path.iterdir():
         print(f"    {item.name}")
-# Import inference
 print("=== IMPORTING INFERENCE ===")
-from inference import inference
 def find_song_file_by_title(song_title):
     covers80_path = Path("covers80")
@@ -207,6 +269,35 @@ def find_song_file_by_title(song_title):
     return None
 def format_time(seconds):
     """Convert seconds to MM:SS format"""
     if seconds is None or seconds < 0:
@@ -219,34 +310,39 @@ def format_time(seconds):
 @spaces.GPU(duration=300)
 def process_audio_for_matching(audio_file):
     if audio_file is None:
-        return None, None, None, """
         <div style='text-align: center; color: #dc2626; padding: 20px; background: #fef2f2; border-radius: 8px;'>
             <h3>No Audio File</h3>
             <p>Please upload an audio file to get started!</p>
         </div>
-        """
     result = inference(audio_file)
     if result.get('message') != 'success':
-        return None, None, None, f"""
         <div style="text-align: center; padding: 20px; background: #fefce8; border-radius: 8px;">
             <h3 style="color: #a16207;">No Matches Found</h3>
             <p style="color: #a16207;">{result.get('message', 'Unknown error occurred')}</p>
         </div>
-        """
     matches = result.get('matches', [])
     if not matches:
-        return None, None, None, """
         <div style="text-align: center; padding: 20px; background: #fefce8; border-radius: 8px;">
             <h3 style="color: #a16207;">No Matches Found</h3>
             <p style="color: #a16207;">No matching vocals found in the dataset.</p>
         </div>
-        """
-    # Get audio files for top 3 matches
-    audio_files = [None, None, None]
     for i, match in enumerate(matches[:3]):
         song_title = match.get('song_title', 'Unknown Song')
         song_file_path = find_song_file_by_title(song_title)
@@ -255,25 +351,36 @@ def process_audio_for_matching(audio_file):
         print(f"  File path: {song_file_path}")
         if song_file_path and os.path.exists(song_file_path):
-            audio_files[i] = song_file_path
-        else:
-            audio_files[i] = None
-    # Generate match results HTML - 클릭 가능한 timestamp 포함
     matches_html = ""
     for i, match in enumerate(matches[:3]):
         rank = match.get('rank', 0)
         song_title = match.get('song_title', 'Unknown Song')
         confidence = match.get('confidence', '0%')
         test_time = match.get('test_time', 0)
         library_time = match.get('library_time', 0)
         # Ranking colors
         rank_colors = {1: '#dc2626', 2: '#ea580c', 3: '#16a34a'}
         rank_color = rank_colors.get(rank, '#6b7280')
-        # 클릭 가능한 timestamp 생성 - 이 변수들은 이제 사용하지 않음
         matches_html += f"""
         <div style="background: #ffffff; border-radius: 8px; padding: 15px; margin: 10px 0;
                     border-left: 4px solid {rank_color}; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
@@ -288,21 +395,15 @@ def process_audio_for_matching(audio_file):
                 </div>
                 <div style="display: flex; gap: 15px; align-items: center;">
                     <div style="text-align: center;">
-                        <small style="color: #6b7280;">Your Audio</small>
                         <div style="color: #dc2626; font-weight: 600;">
-                            <span class="timestamp-link" onclick="window.seekAudio('input', {test_time if test_time else 0})"
-                                  title="Click to play at {format_time(test_time)} in your uploaded audio">
-                                {format_time(test_time)}
-                            </span>
                         </div>
                     </div>
                     <div style="text-align: center;">
-                        <small style="color: #6b7280;">Matched At</small>
                         <div style="color: #16a34a; font-weight: 600;">
-                            <span class="timestamp-link" onclick="window.seekAudio('match{i+1}', {library_time if library_time else 0})"
-                                  title="Click to play at {format_time(library_time)} in matched song">
-                                {format_time(library_time)}
-                            </span>
                         </div>
                     </div>
                     <div style="background: #f3f4f6; color: #111827; padding: 4px 10px; border-radius: 12px; font-weight: 600; font-size: 0.9em;">
@@ -319,15 +420,15 @@ def process_audio_for_matching(audio_file):
         <div style="text-align: center; margin-bottom: 20px;">
             <h3 style="color: #111827; margin: 0;">Vocal Matching Results</h3>
             <p style="color: #6b7280; margin: 5px 0;">Found {len(matches)} similar vocals in Covers80 dataset</p>
-            <p style="color: #2563eb; margin: 5px 0; font-size: 0.9em;">💡 Click on timestamps to jump to that time in the audio</p>
         </div>
         {matches_html}
     </div>
     """
-    return audio_files[0], audio_files[1], audio_files[2], results_html
-# CSS styles with timestamp styling and JavaScript
 custom_css = """
 .gradio-container {
     background: #f9fafb !important;
@@ -340,99 +441,27 @@ custom_css = """
     box-shadow: 0 4px 20px rgba(0,0,0,0.08) !important;
     margin: 0 auto !important;
     padding: 30px !important;
-    max-width: 1200px;
     border: 1px solid #e5e7eb !important;
 }
-.timestamp-link {
-    cursor: pointer !important;
-    color: #2563eb !important;
-    font-weight: 600 !important;
-    text-decoration: underline !important;
-    transition: color 0.2s !important;
 }
-.timestamp-link:hover {
-    color: #1d4ed8 !important;
-    background-color: #eff6ff !important;
-    padding: 2px 4px !important;
-    border-radius: 4px !important;
-}
-"""
-# JavaScript for timestamp functionality
-timestamp_js = """
-<script>
-function seekToTime(audioType, time) {
-    console.log('Seeking to time:', time, 'in audio type:', audioType);
-    // Get all audio elements on page
-    const allAudios = document.querySelectorAll('audio');
-    console.log('Found', allAudios.length, 'audio elements');
-    let audioElement = null;
-    if (audioType === 'input') {
-        // First audio is input
-        audioElement = allAudios[0];
-    } else if (audioType.startsWith('match')) {
-        // match1 = allAudios[1], match2 = allAudios[2], match3 = allAudios[3]
-        const matchNum = parseInt(audioType.replace('match', ''));
-        audioElement = allAudios[matchNum];
-    }
-    if (audioElement) {
-        console.log('Found audio element:', audioElement);
-        console.log('Audio src:', audioElement.src);
-        console.log('Audio readyState:', audioElement.readyState);
-        console.log('Audio duration:', audioElement.duration);
-        // Just set the currentTime directly
-        audioElement.currentTime = time;
-        console.log('Set currentTime to:', time);
-        console.log('Actual currentTime now:', audioElement.currentTime);
-    } else {
-        console.log('Audio element not found for:', audioType);
-        console.log('Available audio elements:', allAudios.length);
-    }
 }
-// Make function globally available
-window.seekToTime = seekToTime;
-</script>
 """
-# Gradio interface with horizontal layout
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Music Plagiarism Detection", head="""
-<script>
-// Global function for seeking audio
-window.seekAudio = function(audioType, time) {
-    console.log('Seeking to time:', time, 'in audio type:', audioType);
-    setTimeout(() => {
-        const allAudios = document.querySelectorAll('audio');
-        console.log('Found', allAudios.length, 'audio elements');
-        let audioElement = null;
-        if (audioType === 'input') {
-            audioElement = allAudios[0];
-        } else if (audioType.startsWith('match')) {
-            const matchNum = parseInt(audioType.replace('match', ''));
-            audioElement = allAudios[matchNum];
-        }
-        if (audioElement) {
-            console.log('Found audio element, setting time to:', time);
-            audioElement.currentTime = time;
-            // Try to play
-            audioElement.play().catch(e => console.log('Play blocked:', e.message));
-        } else {
-            console.log('Audio element not found');
-        }
-    }, 100);
-};
-</script>
-""") as demo:
     gr.Markdown("""
     <div style="text-align: center; margin-bottom: 20px;">
@@ -458,20 +487,74 @@ window.seekAudio = function(audioType, time) {
     with gr.Row():
         submit_btn = gr.Button("Analyze Audio", variant="primary", size="lg")
-    # Output section - horizontal layout
     with gr.Row():
-        with gr.Column(scale=1):
-            audio1 = gr.Audio(label="Match #1", show_label=True, elem_id="audio_1")
-            audio2 = gr.Audio(label="Match #2", show_label=True, elem_id="audio_2")
-            audio3 = gr.Audio(label="Match #3", show_label=True, elem_id="audio_3")
         with gr.Column(scale=1):
             results = gr.HTML(label="Analysis Results")
     submit_btn.click(
         fn=process_audio_for_matching,
         inputs=[audio_input],
-        outputs=[audio1, audio2, audio3, results]
     )
 if __name__ == "__main__":

 from pathlib import Path
 from huggingface_hub import snapshot_download
 import shutil
+import tempfile
 token = os.getenv("HF_TOKEN")
     for item in ml_models_path.iterdir():
         print(f"    {item.name}")
+# Import updated inference
 print("=== IMPORTING INFERENCE ===")
+# Updated inference functions
+def inference(audio_path):
+    from segment_transcription import segment_transcription
+    from compare import get_one_result
+    segment_datas = segment_transcription(audio_path)
+    result = get_one_result(segment_datas)
+    final_result = result_formatting(result)
+    return final_result
+def result_formatting(result):
+    """
+    get_one_result에서 나온 결과를 포맷팅
+    result: sorted list of CompareHelper objects
+    """
+    if not result or len(result) == 0:
+        return {
+            'matches': [],
+            'message': 'No matches found'
+        }
+    # 에러 메시지 체크
+    if isinstance(result, list) and len(result) > 0 and isinstance(result[0], str):
+        return {
+            'matches': [],
+            'message': result[0] # "there is no note for this song"
+        }
+    # 상위 3개 결과 추출
+    top_3_results = []
+    for i, compare_helper in enumerate(result[:3]):
+        score = compare_helper.data[0] # similarity score
+        test_label = compare_helper.data[1] # test song info
+        library_label = compare_helper.data[2] # matched song info
+        # 라이브러리 레이블에서 정보 추출
+        song_title = library_label.get('title', 'Unknown Song')
+        library_time = library_label.get('time', 0) # 매치된 구간의 시간
+        library_time2 = library_label.get('time2', 0)
+        # 테스트 레이블에서 정보 추출
+        test_time = test_label.get('time', 0) if test_label else 0 # 입력 곡의 시간
+        test_time2 = test_label.get('time2', 0) if test_label else 0
+        match_info = {
+            'rank': i + 1,
+            'score': float(score),
+            'song_title': song_title,
+            'test_time': float(test_time), # 입력 곡에서 매치된 시간
+            'test_time2' : float(test_time2),
+            'library_time': float(library_time), # 라이브러리 곡에서 매치된 시간
+            'library_time2': float(library_time2),
+            'confidence': f"{score * 100:.1f}%",
+            'time_match': f"Input: {test_time:.1f}s ↔ Library: {library_time:.1f}s"
+        }
+        top_3_results.append(match_info)
+    return {
+        'matches': top_3_results,
+        'message': 'success'
+    }
 def find_song_file_by_title(song_title):
     covers80_path = Path("covers80")
     return None
+def extract_audio_segment(audio_file_path, start_time, end_time):
+    """
+    오디오 파일에서 특정 구간을 추출하여 임시 파일로 저장
+    """
+    try:
+        # Load audio file
+        y, sr = librosa.load(audio_file_path, sr=None)
+        # Convert time to samples
+        start_sample = int(start_time * sr)
+        end_sample = int(end_time * sr)
+        # Extract segment
+        segment = y[start_sample:end_sample]
+        # Create temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+        temp_file.close()
+        # Save segment
+        import soundfile as sf
+        sf.write(temp_file.name, segment, sr)
+        return temp_file.name
+    except Exception as e:
+        print(f"Error extracting segment: {e}")
+        return None
 def format_time(seconds):
     """Convert seconds to MM:SS format"""
     if seconds is None or seconds < 0:
 @spaces.GPU(duration=300)
 def process_audio_for_matching(audio_file):
     if audio_file is None:
+        return [None] * 10 + ["""
         <div style='text-align: center; color: #dc2626; padding: 20px; background: #fef2f2; border-radius: 8px;'>
             <h3>No Audio File</h3>
             <p>Please upload an audio file to get started!</p>
         </div>
+        """]
     result = inference(audio_file)
     if result.get('message') != 'success':
+        return [None] * 10 + [f"""
         <div style="text-align: center; padding: 20px; background: #fefce8; border-radius: 8px;">
             <h3 style="color: #a16207;">No Matches Found</h3>
             <p style="color: #a16207;">{result.get('message', 'Unknown error occurred')}</p>
         </div>
+        """]
     matches = result.get('matches', [])
     if not matches:
+        return [None] * 10 + ["""
         <div style="text-align: center; padding: 20px; background: #fefce8; border-radius: 8px;">
             <h3 style="color: #a16207;">No Matches Found</h3>
             <p style="color: #a16207;">No matching vocals found in the dataset.</p>
         </div>
+        """]
+    # Initialize audio outputs
+    audio_outputs = [None] * 10
+    # Original uploaded audio (index 0)
+    audio_outputs[0] = audio_file
+    # Get full songs and segments for top 3 matches
     for i, match in enumerate(matches[:3]):
         song_title = match.get('song_title', 'Unknown Song')
         song_file_path = find_song_file_by_title(song_title)
         print(f"  File path: {song_file_path}")
         if song_file_path and os.path.exists(song_file_path):
+            # Full matched song (indices 1, 2, 3)
+            audio_outputs[1 + i] = song_file_path
+            # Extract segments for input audio (indices 4, 6, 8)
+            input_start = match.get('test_time', 0)
+            input_end = match.get('test_time2', input_start + 10)  # Default 10 seconds if no end time
+            input_segment = extract_audio_segment(audio_file, input_start, input_end)
+            audio_outputs[4 + i * 2] = input_segment
+            # Extract segments for matched song (indices 5, 7, 9)
+            library_start = match.get('library_time', 0)
+            library_end = match.get('library_time2', library_start + 10)  # Default 10 seconds if no end time
+            library_segment = extract_audio_segment(song_file_path, library_start, library_end)
+            audio_outputs[5 + i * 2] = library_segment
+    # Generate results HTML
     matches_html = ""
     for i, match in enumerate(matches[:3]):
         rank = match.get('rank', 0)
         song_title = match.get('song_title', 'Unknown Song')
         confidence = match.get('confidence', '0%')
         test_time = match.get('test_time', 0)
+        test_time2 = match.get('test_time2', 0)
         library_time = match.get('library_time', 0)
+        library_time2 = match.get('library_time2', 0)
         # Ranking colors
         rank_colors = {1: '#dc2626', 2: '#ea580c', 3: '#16a34a'}
         rank_color = rank_colors.get(rank, '#6b7280')
         matches_html += f"""
         <div style="background: #ffffff; border-radius: 8px; padding: 15px; margin: 10px 0;
                     border-left: 4px solid {rank_color}; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
                 </div>
                 <div style="display: flex; gap: 15px; align-items: center;">
                     <div style="text-align: center;">
+                        <small style="color: #6b7280;">Your Segment</small>
                         <div style="color: #dc2626; font-weight: 600;">
+                            {format_time(test_time)} - {format_time(test_time2)}
                         </div>
                     </div>
                     <div style="text-align: center;">
+                        <small style="color: #6b7280;">Matched Segment</small>
                         <div style="color: #16a34a; font-weight: 600;">
+                            {format_time(library_time)} - {format_time(library_time2)}
                         </div>
                     </div>
                     <div style="background: #f3f4f6; color: #111827; padding: 4px 10px; border-radius: 12px; font-weight: 600; font-size: 0.9em;">
         <div style="text-align: center; margin-bottom: 20px;">
             <h3 style="color: #111827; margin: 0;">Vocal Matching Results</h3>
             <p style="color: #6b7280; margin: 5px 0;">Found {len(matches)} similar vocals in Covers80 dataset</p>
+            <p style="color: #2563eb; margin: 5px 0; font-size: 0.9em;">🎵 Listen to original songs and extracted segments</p>
         </div>
         {matches_html}
     </div>
     """
+    return audio_outputs + [results_html]
+# CSS styles
 custom_css = """
 .gradio-container {
     background: #f9fafb !important;
     box-shadow: 0 4px 20px rgba(0,0,0,0.08) !important;
     margin: 0 auto !important;
     padding: 30px !important;
+    max-width: 1400px;
     border: 1px solid #e5e7eb !important;
 }
+.audio-section {
+    background: #f8fafc !important;
+    border-radius: 12px !important;
+    padding: 15px !important;
+    margin: 10px 0 !important;
+    border: 1px solid #e2e8f0 !important;
 }
+.segment-container {
+    background: #fefefe !important;
+    border-radius: 8px !important;
+    padding: 12px !important;
+    border: 1px solid #e5e7eb !important;
+    margin: 5px 0 !important;
 }
 """
+# Gradio interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Music Plagiarism Detection") as demo:
     gr.Markdown("""
     <div style="text-align: center; margin-bottom: 20px;">
     with gr.Row():
         submit_btn = gr.Button("Analyze Audio", variant="primary", size="lg")
+    # Output section
     with gr.Row():
+        # Left column - Original and Full Songs
+        with gr.Column(scale=2):
+            gr.Markdown("### 🎵 Original & Matched Songs", elem_classes=["audio-section"])
+            original_audio = gr.Audio(label="Your Uploaded Audio", show_label=True, elem_id="original_audio")
+            with gr.Row():
+                match1_full = gr.Audio(label="Match #1 - Full Song", show_label=True, elem_id="match1_full")
+                match2_full = gr.Audio(label="Match #2 - Full Song", show_label=True, elem_id="match2_full")
+                match3_full = gr.Audio(label="Match #3 - Full Song", show_label=True, elem_id="match3_full")
+        # Right column - Results
         with gr.Column(scale=1):
             results = gr.HTML(label="Analysis Results")
+    # Segments section
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🎯 Matched Segments Comparison", elem_classes=["audio-section"])
+            # Match 1 segments
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("**Match #1 - Your Segment**", elem_classes=["segment-container"])
+                    match1_input_segment = gr.Audio(label="Your Audio Segment", show_label=False, elem_id="match1_input_seg")
+                with gr.Column():
+                    gr.Markdown("**Match #1 - Matched Segment**", elem_classes=["segment-container"])
+                    match1_library_segment = gr.Audio(label="Library Segment", show_label=False, elem_id="match1_lib_seg")
+            # Match 2 segments
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("**Match #2 - Your Segment**", elem_classes=["segment-container"])
+                    match2_input_segment = gr.Audio(label="Your Audio Segment", show_label=False, elem_id="match2_input_seg")
+                with gr.Column():
+                    gr.Markdown("**Match #2 - Matched Segment**", elem_classes=["segment-container"])
+                    match2_library_segment = gr.Audio(label="Library Segment", show_label=False, elem_id="match2_lib_seg")
+            # Match 3 segments
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("**Match #3 - Your Segment**", elem_classes=["segment-container"])
+                    match3_input_segment = gr.Audio(label="Your Audio Segment", show_label=False, elem_id="match3_input_seg")
+                with gr.Column():
+                    gr.Markdown("**Match #3 - Matched Segment**", elem_classes=["segment-container"])
+                    match3_library_segment = gr.Audio(label="Library Segment", show_label=False, elem_id="match3_lib_seg")
+    # Define outputs list
+    outputs = [
+        original_audio,           # 0
+        match1_full,             # 1
+        match2_full,             # 2
+        match3_full,             # 3
+        match1_input_segment,    # 4
+        match1_library_segment,  # 5
+        match2_input_segment,    # 6
+        match2_library_segment,  # 7
+        match3_input_segment,    # 8
+        match3_library_segment,  # 9
+        results                  # 10
+    ]
     submit_btn.click(
         fn=process_audio_for_matching,
         inputs=[audio_input],
+        outputs=outputs
     )
 if __name__ == "__main__":