"""Structural segmentation (intro, verse, chorus, etc.).""" import numpy as np import librosa def detect_sections( audio_path: str, min_section_duration: float = 8.0, ) -> list[tuple[float, str]]: """Detect structural sections in an audio file.""" y, sr = librosa.load(audio_path, sr=22050, mono=True) duration = len(y) / sr mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) n_frames = mfcc.shape[1] k = max(2, min(n_frames - 1, int(duration / 25))) bounds = librosa.segment.agglomerative(mfcc, k=k) bound_times = librosa.frames_to_time(bounds, sr=sr) if len(bound_times) == 0 or bound_times[0] > 0.5: bound_times = np.concatenate([[0.0], bound_times]) bound_times = _merge_short_segments(bound_times, duration, min_section_duration) labels = _assign_labels(y, sr, bound_times, duration) return list(zip(bound_times.tolist(), labels)) def _merge_short_segments(bounds, duration, min_dur): merged = [bounds[0]] for t in bounds[1:]: if t - merged[-1] >= min_dur: merged.append(t) return np.array(merged) def _assign_labels(y, sr, bound_times, duration): n = len(bound_times) if n == 0: return [] if n == 1: return ["Intro"] segment_features = [] for i in range(n): start_sample = int(bound_times[i] * sr) end_sample = int(bound_times[i + 1] * sr) if i + 1 < n else len(y) seg = y[start_sample:end_sample] if len(seg) < sr // 4: segment_features.append(np.zeros(13)) else: mfcc = librosa.feature.mfcc(y=seg, sr=sr, n_mfcc=13) segment_features.append(np.mean(mfcc, axis=1)) labels = ["Intro"] letter_idx = 0 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" assigned = {} for i in range(1, n): best_sim = -1 best_j = -1 for j in range(i): sim = _cosine_sim(segment_features[i], segment_features[j]) if sim > best_sim: best_sim = sim best_j = j if best_sim > 0.85 and best_j in assigned: labels.append(f"Section {assigned[best_j]}") else: letter = letters[letter_idx % len(letters)] letter_idx += 1 assigned[i] = letter labels.append(f"Section {letter}") if best_j not in assigned and best_j > 0: assigned[best_j] = labels[best_j].split()[-1] if " " in labels[best_j] else "A" return labels def _cosine_sim(a, b): norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) if norm_a == 0 or norm_b == 0: return 0.0 return float(np.dot(a, b) / (norm_a * norm_b))