Spaces:
Sleeping
Sleeping
| import re | |
| import numpy as np | |
| import gradio as gr | |
| import pretty_midi | |
| import subprocess | |
| import random | |
| from datasets import load_dataset | |
| # ========================================== | |
| # 1. DATA PREPARATION (RANDOM SAMPLED) | |
| # ========================================== | |
| MAX_PROGRESSIONS = 2000 | |
| print(f"Downloading and shuffling dataset... targeting {MAX_PROGRESSIONS} random progressions per genre.") | |
| # The magic happens here: .shuffle(buffer_size=10000) mixes the stream on the fly! | |
| dataset = load_dataset( | |
| "ailsntua/Chordonomicon", | |
| split="train", | |
| streaming=True | |
| ).shuffle(seed=random.randint(1, 1000), buffer_size=10000) | |
| target_genres = ["pop", "rock", "jazz", "metal", "country", "blues", "r&b", "folk", "electronic"] | |
| corpus_by_genre = {genre: set() for genre in target_genres} | |
| pattern = re.compile(r'<([^>]+)>\s*([^<]+)') | |
| for row in dataset: | |
| # Stop processing once EVERY genre has hit the max cap | |
| if all(len(progressions) >= MAX_PROGRESSIONS for progressions in corpus_by_genre.values()): | |
| break | |
| main_genre = str(row.get('main_genre', '')).lower() | |
| genres_str = str(row.get('genres', '')).lower() | |
| combined_genres = main_genre + " " + genres_str | |
| matched_genre = None | |
| for g in target_genres: | |
| if g in combined_genres and len(corpus_by_genre[g]) < MAX_PROGRESSIONS: | |
| matched_genre = g | |
| break | |
| if not matched_genre: continue | |
| chord_string = row.get('chords', '') | |
| if not chord_string: continue | |
| matches = pattern.findall(chord_string) | |
| for tag, chords in matches: | |
| tag = tag.lower().strip() | |
| chords = " ".join(chords.split()) | |
| if chords and ('verse' in tag or 'chorus' in tag): | |
| corpus_by_genre[matched_genre].add(chords) | |
| corpus_by_genre = {g: list(chords) for g, chords in corpus_by_genre.items()} | |
| print("Randomized dataset loaded successfully!") | |
| # ========================================== | |
| # 2. MARKOV CHAIN LOGIC | |
| # ========================================== | |
| def train_markov_model(corpus, order=1): | |
| markov_model = {} | |
| art_start = "*S*" | |
| art_end = "*E*" | |
| for progression in corpus: | |
| chords = progression.split() | |
| if not chords: continue | |
| current_state = tuple([art_start] * order) | |
| for chord in chords: | |
| if current_state not in markov_model: markov_model[current_state] = {} | |
| if chord not in markov_model[current_state]: markov_model[current_state][chord] = 0 | |
| markov_model[current_state][chord] += 1 | |
| current_state = tuple(list(current_state)[1:] + [chord]) | |
| if current_state not in markov_model: markov_model[current_state] = {} | |
| if art_end not in markov_model[current_state]: markov_model[current_state][art_end] = 0 | |
| markov_model[current_state][art_end] += 1 | |
| return markov_model | |
| def get_next_chord(current_state, markov_model): | |
| if current_state not in markov_model: return "*E*" | |
| transitions = markov_model[current_state] | |
| next_chords = list(transitions.keys()) | |
| counts = list(transitions.values()) | |
| total = sum(counts) | |
| probs = [c / total for c in counts] | |
| return np.random.choice(next_chords, p=probs) | |
| def generate_progression(markov_model, target_length, order=1): | |
| art_start = "*S*" | |
| art_end = "*E*" | |
| current_state = tuple([art_start] * order) | |
| progression = [] | |
| max_attempts = target_length * 5 | |
| attempts = 0 | |
| while len(progression) < target_length and attempts < max_attempts: | |
| attempts += 1 | |
| next_chord = get_next_chord(current_state, markov_model) | |
| if next_chord == art_end: | |
| current_state = tuple([art_start] * order) | |
| continue | |
| progression.append(next_chord) | |
| current_state = tuple(list(current_state)[1:] + [next_chord]) | |
| return " ".join(progression) | |
| # ========================================== | |
| # 3. AUDIO SYNTHESIS & VOICING LOGIC | |
| # ========================================== | |
| NOTE_TO_MIDI = {'C': 60, 'Cs': 61, 'Db': 61, 'D': 62, 'Ds': 63, 'Eb': 63, 'E': 64, 'F': 65, 'Fs': 66, 'Gb': 66, 'G': 67, 'Gs': 68, 'Ab': 68, 'A': 69, 'As': 70, 'Bb': 70, 'B': 71} | |
| MIDI_TO_NOTE = {60: 'C', 61: 'Db', 62: 'D', 63: 'Eb', 64: 'E', 65: 'F', 66: 'Gb', 67: 'G', 68: 'Ab', 69: 'A', 70: 'Bb', 71: 'B'} | |
| # 1. Expanded Dictionary with 7ths, 9ths, and extended chords | |
| CHORD_INTERVALS = { | |
| # --- 13ths --- | |
| 'maj13': [0, 4, 7, 11, 14, 21], # Root, 3rd, 5th, Maj7, 9th, 13th | |
| 'min13': [0, 3, 7, 10, 14, 21], | |
| '13': [0, 4, 7, 10, 14, 21], # Dominant 13 | |
| 'add13': [0, 4, 7, 21], | |
| 'madd13': [0, 3, 7, 21], | |
| # --- 11ths --- | |
| 'maj11': [0, 4, 7, 11, 14, 17], # Root, 3rd, 5th, Maj7, 9th, 11th | |
| 'min11': [0, 3, 7, 10, 14, 17], | |
| '11': [0, 4, 7, 10, 14, 17], # Dominant 11 | |
| '7#11': [0, 4, 7, 10, 18], # Lydian Dominant flavor | |
| 'm711': [0, 3, 7, 10, 17], # Min7 add 11 | |
| # --- 9ths --- | |
| 'maj9': [0, 4, 7, 11, 14], | |
| 'min9': [0, 3, 7, 10, 14], | |
| '9': [0, 4, 7, 10, 14], # Dominant 9 | |
| 'add9': [0, 4, 7, 14], | |
| 'madd9': [0, 3, 7, 14], | |
| '7b9': [0, 4, 7, 10, 13], # Altered Dominant (flat 9) | |
| '7#9': [0, 4, 7, 10, 15], # The "Hendrix" Chord (sharp 9) | |
| # --- 7ths --- | |
| 'maj7': [0, 4, 7, 11], | |
| 'min7': [0, 3, 7, 10], | |
| '7': [0, 4, 7, 10], # Dominant 7 | |
| 'dim7': [0, 3, 6, 9], # Fully diminished 7th | |
| 'm7b5': [0, 3, 6, 10], # Half-diminished 7th | |
| 'aug7': [0, 4, 8, 10], # Augmented 7th | |
| 'mmaj7': [0, 3, 7, 11], # Minor-Major 7th (James Bond chord) | |
| '7sus4': [0, 5, 7, 10], # Dominant 7 suspended 4th | |
| # --- 6ths --- | |
| '6': [0, 4, 7, 9], # Major 6th | |
| 'm6': [0, 3, 7, 9], # Minor 6th | |
| # --- Sus & Altered Triads --- | |
| 'sus4': [0, 5, 7], # Suspended 4th (replaces 3rd) | |
| 'sus2': [0, 2, 7], # Suspended 2nd (replaces 3rd) | |
| 'aug': [0, 4, 8], # Augmented triad | |
| 'dim': [0, 3, 6], # Diminished triad | |
| # --- Standard Triads & Power Chords --- | |
| 'maj': [0, 4, 7], | |
| 'min': [0, 3, 7], | |
| 'no3d': [0, 7], # Power chord (from your dataset) | |
| '5': [0, 7] # Standard power chord notation | |
| } | |
| # Pre-sort keys by length (longest first) to prevent the "greedy" bug | |
| SORTED_QUALITIES = sorted(CHORD_INTERVALS.keys(), key=len, reverse=True) | |
| def parse_chord_to_midi(chord_string): | |
| if not chord_string or chord_string == 'N': return [], "" | |
| # 1. Check for a slash chord bass note! | |
| bass_note_str = None | |
| if '/' in chord_string: | |
| parts = chord_string.split('/') | |
| chord_string = parts[0] # The main chord (e.g., 'Amin') | |
| bass_note_str = parts[1] # The bass note (e.g., 'E') | |
| # 2. Parse the main chord's root note | |
| root_note = chord_string[0] | |
| remainder = chord_string[1:] | |
| if remainder and remainder[0] in ['s', 'b']: | |
| root_note += remainder[0] | |
| remainder = remainder[1:] | |
| root_midi = NOTE_TO_MIDI.get(root_note, 60) | |
| # 3. Find the chord quality | |
| quality = 'maj' | |
| intervals = CHORD_INTERVALS['maj'] | |
| for q in SORTED_QUALITIES: | |
| if remainder.startswith(q): | |
| intervals = CHORD_INTERVALS[q] | |
| quality = q | |
| break | |
| pitches = [root_midi + i for i in intervals] | |
| # 4. Inject the custom bass note | |
| if bass_note_str: | |
| # Parse the bass note (checking for sharps/flats) | |
| b_root = bass_note_str[0] | |
| b_rem = bass_note_str[1:] | |
| if b_rem and b_rem[0] in ['s', 'b']: | |
| b_root += b_rem[0] | |
| bass_midi = NOTE_TO_MIDI.get(b_root, 60) | |
| # Force the bass note to sit below our root note | |
| while bass_midi >= root_midi: | |
| bass_midi -= 12 | |
| # Drop it one more octave for a deep, rich foundation | |
| bass_midi -= 12 | |
| pitches.append(bass_midi) | |
| # Update the display name so it shows the slash in the final output! | |
| quality += "/" + bass_note_str | |
| return pitches, quality | |
| # General MIDI Patch Numbers (0-indexed) | |
| INSTRUMENT_MAP = { | |
| "Acoustic Grand Piano": 0, | |
| "Electric Piano (Rhodes)": 4, | |
| "Drawbar Organ": 16, | |
| "Acoustic Guitar (Nylon)": 24, | |
| "Electric Guitar (Clean)": 27, | |
| "Electric Guitar (Distortion)": 30, | |
| "Synth Pad 1 (New Age)": 88, | |
| "Synth Pad 2 (Warm)": 89, | |
| "Synth Pad 3 (Polysynth)": 90, | |
| "Synth Pad 4 (Choir)": 91, | |
| "Synth Pad 7 (Halo)": 94, | |
| "Synth Pad 8 (Sweep)": 95, | |
| "Sci-Fi / Atmosphere": 103 | |
| } | |
| def apply_voicing(pitches, voicing_type): | |
| if not pitches: return pitches | |
| pitches = sorted(pitches) | |
| if voicing_type == "First Inversion" and len(pitches) > 1: | |
| pitches[0] += 12 | |
| elif voicing_type == "Second Inversion" and len(pitches) > 2: | |
| pitches[0] += 12 | |
| pitches[1] += 12 | |
| elif voicing_type == "Random Voice Leading": | |
| choice = random.choice([0, 1, 2]) | |
| if choice == 1 and len(pitches) > 1: pitches[0] += 12 | |
| if choice == 2 and len(pitches) > 2: pitches[0] += 12; pitches[1] += 12 | |
| elif voicing_type == "Open / Spread" and len(pitches) >= 3: | |
| # Drop the bass note down an octave for a huge foundation | |
| pitches[0] -= 12 | |
| # Push the 3rd (index 1) up an octave to clear room in the middle | |
| pitches[1] += 12 | |
| # If it's a 4+ note chord (like a 7th or 9th), keep the top notes clustered | |
| # Re-sort to ensure MIDI plays them in the correct vertical order | |
| return sorted(pitches) if voicing_type != "Open / Spread" else pitches | |
| def generate_audio_file(progression_string, instrument_name, transpose_semitones, voicing_type): | |
| if not progression_string.strip(): return None, None, "" | |
| # Look up the correct MIDI program number from our dictionary | |
| prog_num = INSTRUMENT_MAP.get(instrument_name, 0) | |
| # Give guitars and synths a slightly higher velocity so they cut through | |
| velocity = 100 if prog_num > 20 else 85 | |
| midi = pretty_midi.PrettyMIDI(initial_tempo=120) | |
| inst = pretty_midi.Instrument(program=prog_num) | |
| current_time = 0.0 | |
| transposed_chord_names = [] | |
| for chord in progression_string.split(): | |
| pitches, quality = parse_chord_to_midi(chord) | |
| if not pitches: continue | |
| # Transpose | |
| pitches = [p + transpose_semitones for p in pitches] | |
| normalized_root = ((pitches[0] - 60) % 12) + 60 | |
| transposed_chord_names.append(MIDI_TO_NOTE.get(normalized_root, "C") + quality) | |
| # Drop the octave if it's a distorted metal guitar | |
| if instrument_name == "Electric Guitar (Distortion)": | |
| pitches = [p - 12 for p in pitches] | |
| pitches = apply_voicing(pitches, voicing_type) | |
| for pitch in pitches: | |
| note = pretty_midi.Note(velocity=velocity, pitch=pitch, start=current_time, end=current_time + 0.5) | |
| inst.notes.append(note) | |
| current_time += 0.5 | |
| midi.instruments.append(inst) | |
| midi_path = 'generated_progression.mid' | |
| wav_path = 'generated_progression.wav' | |
| midi.write(midi_path) | |
| subprocess.run(['fluidsynth', '-ni', '/usr/share/sounds/sf2/FluidR3_GM.sf2', midi_path, '-F', wav_path, '-r', '44100'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| return wav_path, midi_path, " ".join(transposed_chord_names) | |
| # ========================================== | |
| # 4. GRADIO INTERFACE | |
| # ========================================== | |
| def app_logic(genre, order, length, instrument, transpose, voicing): | |
| corpus = corpus_by_genre.get(genre, []) | |
| if not corpus: | |
| return f"Error: No chords found for {genre}. Wait for the dataset to finish loading in the console.", "", None, None | |
| model = train_markov_model(corpus, order=int(order)) | |
| raw_chords = generate_progression(model, target_length=int(length), order=int(order)) | |
| if not raw_chords.strip(): | |
| return "(Generation stopped. The Markov chain hit an early dead end. Try again or lower the Order.)", "", None, None | |
| audio_path, midi_path, final_transposed_chords = generate_audio_file(raw_chords, instrument, int(transpose), voicing) | |
| return raw_chords, final_transposed_chords, audio_path, midi_path | |
| with gr.Blocks(theme=gr.themes.Monochrome()) as demo: | |
| gr.Markdown("# Markhords: Markov Model Chord Progression Generator") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown(f"### 1. Training Data (Up to {MAX_PROGRESSIONS} songs per genre)") | |
| genre_dropdown = gr.Dropdown( | |
| choices=[g.capitalize() for g in target_genres], | |
| value="Pop", | |
| label="Dataset Genre" | |
| ) | |
| gr.Markdown("### 2. Generation Settings") | |
| order_slider = gr.Slider(minimum=1, maximum=3, step=1, value=1, label="Markov Chain Order") | |
| length_slider = gr.Slider(minimum=2, maximum=16, step=1, value=8, label="Target Length (Chords)") | |
| gr.Markdown("### 3. Post-Processing") | |
| transpose_slider = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Transpose (Semitones)") | |
| voicing_dropdown = gr.Dropdown( | |
| choices=["Root Position", "First Inversion", "Second Inversion", "Open / Spread", "Random Voice Leading"], | |
| value="Open / Spread", # Open spread sounds incredible on synth pads! | |
| label="Chord Voicings" | |
| ) | |
| # Feed the dictionary keys into the dropdown | |
| instrument_dropdown = gr.Dropdown( | |
| choices=list(INSTRUMENT_MAP.keys()), | |
| value="Synth Pad 2 (Warm)", | |
| label="Instrument" | |
| ) | |
| generate_btn = gr.Button("Generate Chords", variant="primary") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Output") | |
| output_raw_text = gr.Textbox(label="Original Generated Progression", lines=2, interactive=False) | |
| output_final_text = gr.Textbox(label="Final Progression (After Transposition)", lines=2, interactive=False) | |
| output_audio = gr.Audio(label="Playback", type="filepath", autoplay=True) | |
| output_midi = gr.File(label="Download MIDI", interactive=False) | |
| generate_btn.click( | |
| fn=lambda g, o, l, i, t, v: app_logic(g.lower(), o, l, i, t, v), | |
| inputs=[genre_dropdown, order_slider, length_slider, instrument_dropdown, transpose_slider, voicing_dropdown], | |
| outputs=[output_raw_text, output_final_text, output_audio, output_midi] | |
| ) | |
| demo.launch() |