Spaces:

mippia
/

MPD-demo

Sleeping

App Files Files Community

slslslrhfem commited on Sep 15, 2025

Commit

e99e064

0 Parent(s):

Clean start: app + 1005_e_4 LFS only

Browse files

Files changed (6) hide show

.gitattributes +1 -0
.gitignore +5 -0
1005_e_4 +3 -0
app.py +347 -0
inference.py +112 -0
requirements.txt +116 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1005_e_4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+covers80/
+ml_models/
+__pycache__/
+*.pyc
+.env

1005_e_4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff9aab3de0a7c00d5946c33027cc6aa5a8c1267f297b212b7d906304b417f360
+size 17285251

app.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import spaces
+import gradio as gr
+import torch
+import librosa
+import numpy as np
+from inference import inference
+from huggingface_hub import snapshot_download
+from pathlib import Path
+import os
+token = os.getenv("HF_TOKEN")
+def download_data_from_hub():
+    """
+    Download covers80 and ml_models folders from Dataset repository
+    """
+    base_dir = Path(".")
+    data_repo_id = "nininigold/music-data"
+    folders_to_check = ["covers80", "ml_models"]
+    downloaded_folders = {}
+    # 폴더들이 이미 존재하는지 확인
+    all_exist = all((base_dir / folder).exists() and any((base_dir / folder).iterdir())
+                   for folder in folders_to_check)
+    if not all_exist:
+        print(f"📥 Downloading data folders from dataset: {data_repo_id}")
+        print(f"   This includes covers80 and ml_models folders (~17k+ files each)")
+        print(f"   This may take several minutes...")
+        try:
+            # Dataset 저장소에서 전체 다운로드
+            downloaded_path = snapshot_download(
+                repo_id=data_repo_id,
+                repo_type="dataset",
+                local_dir=str(base_dir),
+                local_dir_use_symlinks=False,
+                token=token,
+                ignore_patterns=["*.md", "*.txt", ".gitattributes", "README.md"]
+            )
+            print(f"✅ Dataset downloaded successfully")
+            # 각 폴더 확인 및 파일 수 카운트
+            for folder_name in folders_to_check:
+                folder_path = base_dir / folder_name
+                if folder_path.exists():
+                    file_count = len([f for f in folder_path.rglob("*") if f.is_file()])
+                    print(f"   📁 {folder_name}: {file_count:,} files")
+                    downloaded_folders[folder_name] = str(folder_path)
+                else:
+                    print(f"   ⚠️ {folder_name} folder not found in downloaded data")
+                    downloaded_folders[folder_name] = None
+        except Exception as e:
+            print(f"⚠️ Failed to download dataset: {e}")
+            print(f"   Dataset: {data_repo_id}")
+            for folder_name in folders_to_check:
+                downloaded_folders[folder_name] = None
+    else:
+        print(f"✅ Data folders and files already exist locally")
+        for folder_name in folders_to_check:
+            folder_path = base_dir / folder_name
+            if folder_path.exists():
+                file_count = len([f for f in folder_path.rglob("*") if f.is_file()])
+                print(f"   📁 {folder_name}: {file_count:,} files")
+                downloaded_folders[folder_name] = str(folder_path)
+            else:
+                downloaded_folders[folder_name] = None
+        for file_name in files_to_check:
+            file_path = base_dir / file_name
+            if file_path.exists():
+                file_size = file_path.stat().st_size / (1024*1024)  # MB
+                print(f"   📄 {file_name}: {file_size:.1f} MB")
+                downloaded_folders[file_name] = str(file_path)
+            else:
+                downloaded_folders[file_name] = None
+    return downloaded_folders
+@spaces.GPU
+def process_audio_for_matching(audio_file):
+    """
+    Process the uploaded audio file and return matching results
+    """
+    if audio_file is None:
+        return """
+        <div style='text-align: center; color: #ff6b6b; padding: 30px; background: #fff5f5; border-radius: 15px; border: 2px dashed #ff6b6b;'>
+            <h3>🎵 No Audio File</h3>
+            <p>Please upload an audio file to get started!</p>
+        </div>
+        """
+    try:
+        # inference 함수 호출
+        result = inference(audio_file)
+        # result 구조:
+        # {
+        #     'matches': [
+        #         {
+        #             'rank': 1,
+        #             'score': 0.95,
+        #             'song_title': 'Song Name',
+        #             'segment_file': 'path/to/segment.wav',
+        #             'test_time': 23.5,
+        #             'library_time': 45.2,
+        #             'confidence': '95.0%',
+        #             'time_match': 'Input: 23.5s ↔ Library: 45.2s'
+        #         }
+        #     ],
+        #     'message': 'success' or error message
+        # }
+        if result.get('message') != 'success':
+            return f"""
+            <div style="text-align: center; padding: 25px; background: #fff3cd; border-radius: 15px; border: 1px solid #ffeaa7; margin: 10px 0;">
+                <h3 style="color: #856404; margin-bottom: 15px;">⚠️ No Matches Found</h3>
+                <p style="color: #856404; font-size: 1.1em;">{result.get('message', 'Unknown error occurred')}</p>
+                <p style="color: #856404; font-size: 0.9em; margin-top: 10px;">Try uploading a clearer audio sample or a different part of the song.</p>
+            </div>
+            """
+        matches = result.get('matches', [])
+        if not matches:
+            return """
+            <div style="text-align: center; padding: 25px; background: #fff3cd; border-radius: 15px; border: 1px solid #ffeaa7; margin: 10px 0;">
+                <h3 style="color: #856404; margin-bottom: 15px;">🔍 No Matches Found</h3>
+                <p style="color: #856404; font-size: 1.1em;">Sorry, we couldn't find any matching songs in our database.</p>
+                <p style="color: #856404; font-size: 0.9em; margin-top: 10px;">Try uploading a different audio sample.</p>
+            </div>
+            """
+        # 매치 결과 HTML 생성
+        matches_html = ""
+        for match in matches:
+            rank = match.get('rank', 0)
+            song_title = match.get('song_title', 'Unknown Song')
+            confidence = match.get('confidence', '0%')
+            time_match = match.get('time_match', 'Unknown')
+            test_time = match.get('test_time', 0)
+            library_time = match.get('library_time', 0)
+            segment_file = match.get('segment_file', '')
+            # 랭킹에 따른 색상 설정
+            rank_colors = {1: '#e74c3c', 2: '#f39c12', 3: '#27ae60'}
+            rank_color = rank_colors.get(rank, '#7f8c8d')
+            # 세그먼트 파일 정보
+            segment_info = f"Found: {segment_file}" if segment_file else "No segment file found"
+            matches_html += f"""
+            <div style="background: #ffffff; border-radius: 12px; padding: 20px; margin: 15px 0;
+                        border-left: 5px solid {rank_color}; box-shadow: 0 3px 10px rgba(0,0,0,0.1);">
+                <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
+                    <h3 style="color: #2c3e50; margin: 0; font-size: 1.2em;">
+                        <span style="background: {rank_color}; color: white; padding: 4px 8px; border-radius: 15px; font-size: 0.8em; margin-right: 10px;">
+                            #{rank}
+                        </span>
+                        {song_title}
+                    </h3>
+                    <span style="background: #ecf0f1; color: #2c3e50; padding: 6px 12px; border-radius: 20px; font-weight: 600;">
+                        {confidence}
+                    </span>
+                </div>
+                <div style="background: #f8f9fa; border-radius: 8px; padding: 12px; margin: 10px 0;">
+                    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; text-align: center;">
+                        <div>
+                            <strong style="color: #3498db;">Your Audio</strong>
+                            <br><span style="color: #e74c3c; font-size: 1.1em;">{test_time:.1f}s</span>
+                        </div>
+                        <div>
+                            <strong style="color: #3498db;">Matched At</strong>
+                            <br><span style="color: #27ae60; font-size: 1.1em;">{library_time:.1f}s</span>
+                        </div>
+                    </div>
+                </div>
+                <div style="font-size: 0.9em; color: #7f8c8d; text-align: center; margin-top: 10px;">
+                    📁 {segment_info}
+                </div>
+            </div>
+            """
+        formatted_result = f"""
+        <div style="background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); border-radius: 20px; padding: 30px;
+                    box-shadow: 0 8px 25px rgba(0,0,0,0.1); border: 1px solid #dee2e6; margin: 10px 0;">
+            <div style="text-align: center; margin-bottom: 25px;">
+                <h2 style="color: #2c3e50; margin-bottom: 10px; font-size: 1.8em;">🎵 Matching Results</h2>
+                <p style="color: #7f8c8d; font-size: 1.1em;">Found {len(matches)} potential matches in our database</p>
+            </div>
+            {matches_html}
+            <div style="text-align: center; margin-top: 25px; padding: 15px; background: #e8f5e8; border-radius: 10px;">
+                <p style="color: #27ae60; margin: 0; font-size: 0.95em;">
+                    💡 <strong>How to read results:</strong> The times show where similar segments were found.
+                    Higher ranked results have better similarity scores.
+                </p>
+            </div>
+        </div>
+        """
+        return formatted_result
+    except Exception as e:
+        return f"""
+        <div style="text-align: center; padding: 20px; background: #f8d7da; border-radius: 10px; border: 1px solid #f5c6cb; color: #721c24; margin: 10px 0;">
+            <h3>❌ Error Processing Audio</h3>
+            <p>Error details: {str(e)}</p>
+            <p style="font-size: 0.9em; margin-top: 10px;">Please try again with a different audio file.</p>
+        </div>
+        """
+# 깔끔한 CSS 스타일
+custom_css = """
+.gradio-container {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    min-height: 100vh;
+    padding: 20px;
+}
+.main-container {
+    background: #ffffff !important;
+    border-radius: 20px !important;
+    box-shadow: 0 15px 35px rgba(0,0,0,0.1) !important;
+    margin: 0 auto !important;
+    padding: 40px !important;
+    max-width: 900px;
+}
+h1 {
+    text-align: center !important;
+    font-size: 2.8em !important;
+    font-weight: 800 !important;
+    margin-bottom: 15px !important;
+    background: linear-gradient(135deg, #667eea, #764ba2) !important;
+    -webkit-background-clip: text !important;
+    -webkit-text-fill-color: transparent !important;
+    background-clip: text !important;
+}
+.gradio-markdown p {
+    text-align: center !important;
+    font-size: 1.1em !important;
+    color: #555 !important;
+    margin-bottom: 25px !important;
+    line-height: 1.6;
+}
+.upload-container {
+    background: #f8f9fa !important;
+    border-radius: 15px !important;
+    padding: 25px !important;
+    border: 2px dashed #dee2e6 !important;
+    margin-bottom: 25px !important;
+    transition: all 0.3s ease !important;
+}
+.upload-container:hover {
+    border-color: #667eea !important;
+    background: #f1f3ff !important;
+}
+.output-container {
+    background: #ffffff !important;
+    border-radius: 15px !important;
+    padding: 20px !important;
+    border: 1px solid #e1e5e9 !important;
+    min-height: 200px !important;
+    box-shadow: 0 2px 10px rgba(0,0,0,0.05) !important;
+}
+.gr-button {
+    background: linear-gradient(135deg, #667eea, #764ba2) !important;
+    color: #fff !important;
+    border: none !important;
+    border-radius: 25px !important;
+    padding: 12px 30px !important;
+    font-weight: 600 !important;
+    font-size: 1.1em !important;
+    transition: all 0.3s ease !important;
+    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
+}
+.gr-button:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;
+}
+@media (max-width: 768px) {
+    h1 { font-size: 2.2em !important; }
+    .main-container { margin: 10px !important; padding: 25px !important; }
+    .upload-container { padding: 20px !important; }
+}
+"""
+# 앱 초기화
+print("🚀 Starting Music Similarity Detection App...")
+print("📦 Downloading covers80 and ml_models folders...")
+folders = download_data_from_hub()
+if folders.get("covers80") and folders.get("ml_models"):
+    print("✅ All required data folders are ready")
+elif folders.get("covers80") or folders.get("ml_models"):
+    print("⚠️ Some data folders available, app may work with limited functionality")
+else:
+    print("⚠️ Warning: Required data folders not available, app may not work properly")
+# Gradio 인터페이스
+demo = gr.Interface(
+    fn=process_audio_for_matching,
+    inputs=gr.Audio(
+        type="filepath",
+        label="🎵 Upload Your Audio File",
+        elem_classes=["upload-container"]
+    ),
+    outputs=gr.HTML(
+        label="🔍 Similarity Results",
+        elem_classes=["output-container"]
+    ),
+    title="🎵 Music Similarity Detector",
+    description="""
+    <div style="text-align: center; font-size: 1.1em; color: #555; margin: 25px 0; line-height: 1.6;">
+        <p><strong>🎯 Upload any audio clip and find similar segments in our music database!</strong></p>
+        <p>Our AI analyzes your audio and finds the most similar segments from known songs.</p>
+        <p style="font-size: 0.95em; color: #777; margin-top: 15px;">
+            📁 Supported formats: MP3, WAV, M4A, FLAC<br>
+            ⏱️ Processing time: ~15-30 seconds per file<br>
+            🎼 Database: covers80 collection with segmented analysis
+        </p>
+    </div>
+    """,
+    examples=[],
+    css=custom_css,
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="purple",
+        neutral_hue="gray",
+        font=[gr.themes.GoogleFont("Inter"), "Arial", "sans-serif"]
+    ),
+    elem_classes=["main-container"],
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=False,
+        show_error=True,
+        share=False
+    )

inference.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import glob
+from compare import get_one_result
+from segment_transcription import segment_transcription
+def inference(audio_path):
+    segment_datas = segment_transcription(audio_path)
+    result = get_one_result(segment_datas)
+    final_result = result_formatting(result)
+    return final_result
+def find_closest_segment_file(song_title, target_time):
+    """
+    covers80 폴더에서 해당 곡의 가장 가까운 시간대 세그먼트 파일을 찾음
+    """
+    # 가능한 패턴들로 검색
+    patterns = [
+        f"covers80/{song_title}_segments/*.wav",
+        f"covers80/*{song_title}*_segments/*.wav",
+        f"covers80/{song_title}*/*.wav"
+    ]
+    segment_files = []
+    for pattern in patterns:
+        segment_files.extend(glob.glob(pattern))
+    if not segment_files:
+        return None
+    # 파일명에서 시간 추출하고 target_time과 가장 가까운 것 찾기
+    closest_file = None
+    min_diff = float('inf')
+    for file_path in segment_files:
+        filename = os.path.basename(file_path)
+        try:
+            # 파일명에서 시간 추출 (예: "53.333.wav" -> 53.333)
+            time_str = filename.replace('.wav', '')
+            file_time = float(time_str)
+            diff = abs(file_time - target_time)
+            if diff < min_diff:
+                min_diff = diff
+                closest_file = file_path
+        except ValueError:
+            continue
+    return closest_file
+def result_formatting(result):
+    """
+    get_one_result에서 나온 결과를 포맷팅
+    result: sorted list of CompareHelper objects
+    """
+    if not result or len(result) == 0:
+        return {
+            'matches': [],
+            'message': 'No matches found'
+        }
+    # 에러 메시지 체크
+    if isinstance(result, list) and len(result) > 0 and isinstance(result[0], str):
+        return {
+            'matches': [],
+            'message': result[0]  # "there is no note for this song"
+        }
+    # 상위 3개 결과 추출
+    top_3_results = []
+    for i, compare_helper in enumerate(result[:3]):
+        score = compare_helper.data[0]  # similarity score
+        test_label = compare_helper.data[1]  # test song info
+        library_label = compare_helper.data[2]  # matched song info
+        # 라이브러리 레이블에서 정보 추출
+        song_title = library_label.get('title', 'Unknown Song')
+        library_time = library_label.get('time', 0)  # 매치된 구간의 시간
+        # 테스트 레이블에서 정보 추출
+        test_time = test_label.get('time', 0) if test_label else 0  # 입력 곡의 시간
+        # 가장 가까운 세그먼트 파일 찾기
+        segment_file = find_closest_segment_file(song_title, library_time)
+        match_info = {
+            'rank': i + 1,
+            'score': float(score),
+            'song_title': song_title,
+            'segment_file': segment_file,
+            'test_time': float(test_time),  # 입력 곡에서 매치된 시간
+            'library_time': float(library_time),  # 라이브러리 곡에서 매치된 시간
+            'confidence': f"{score * 100:.1f}%",
+            'time_match': f"Input: {test_time:.1f}s ↔ Library: {library_time:.1f}s"
+        }
+        top_3_results.append(match_info)
+    return {
+        'matches': top_3_results,
+        'message': 'success'
+    }
+if __name__ == "__main__":
+    result = inference("/home/ubuntu/data/coding/icassp-plagiarism-demo/KEON ＜3 - I GASLIGHT MYSELF ｜ Udio [The%20Untitled].mp3")
+    print("Inference Result:")
+    for match in result['matches']:
+        print(f"Rank {match['rank']}: {match['song_title']}")
+        print(f"  Score: {match['confidence']}")
+        print(f"  Time Match: {match['time_match']}")
+        print(f"  Segment File: {match['segment_file']}")
+        print()

requirements.txt ADDED Viewed

	@@ -0,0 +1,116 @@

+absl-py==2.3.1
+antlr4-python3-runtime==4.9.3
+astunparse==1.6.3
+attrs==25.3.0
+audioread==3.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==1.17.1
+charset-normalizer==3.4.3
+cloudpickle==3.1.1
+Cython==3.1.3
+decorator==5.2.1
+demucs==4.0.1
+dora_search==0.1.12
+einops==0.8.1
+filelock==3.16.1
+flatbuffers==25.2.10
+fsspec==2025.3.0
+gast==0.4.0
+google-auth==2.40.3
+google-auth-oauthlib==1.0.0
+google-pasta==0.2.0
+grpcio==1.70.0
+h5py==3.11.0
+hf-xet==1.1.10
+huggingface-hub==0.34.4
+idna==3.10
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
+Jinja2==3.1.6
+joblib==1.4.2
+jsonpickle==4.1.1
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+julius==0.2.7
+keras==2.13.1
+lameenc==1.8.1
+lazy_loader==0.4
+libclang==18.1.1
+librosa==0.11.0
+llvmlite==0.41.1
+madmom==0.16.1
+Markdown==3.7
+MarkupSafe==2.1.5
+mido==1.3.3
+mir_eval==0.8.2
+mpmath==1.3.0
+msgpack==1.1.1
+networkx==3.1
+numba==0.58.1
+numpy==1.23.5
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.9.86
+nvidia-nvtx-cu12==12.1.105
+oauthlib==3.3.1
+omegaconf==2.3.0
+openunmix==1.2.1
+opt_einsum==3.4.0
+packaging==25.0
+pandas==2.0.3
+pillow==10.4.0
+pkgutil_resolve_name==1.3.10
+platformdirs==4.3.6
+pooch==1.8.2
+pretty_midi==0.2.10
+protobuf==4.25.8
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+python-dateutil==2.9.0.post0
+pytz==2025.2
+PyYAML==6.0.2
+referencing==0.35.1
+requests==2.32.4
+requests-oauthlib==2.0.0
+retrying==1.4.2
+rpds-py==0.20.1
+rsa==4.9.1
+safetensors==0.5.3
+scikit-learn==1.3.2
+scipy==1.10.1
+six==1.17.0
+soundfile==0.13.1
+soxr==0.3.7
+submitit==1.5.3
+sympy==1.13.3
+tensorboard==2.13.0
+tensorboard-data-server==0.7.2
+tensorflow==2.13.1
+tensorflow-estimator==2.13.0
+tensorflow-io-gcs-filesystem==0.34.0
+termcolor==2.4.0
+threadpoolctl==3.5.0
+timm==1.0.19
+torch==2.4.1
+torchaudio==2.4.1
+torchvision==0.19.1
+tqdm==4.67.1
+treetable==0.2.6
+triton==3.0.0
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.2.3
+vamp==1.1.0
+Werkzeug==3.0.6
+wrapt==1.17.3
+zipp==3.20.2