| import gradio as gr |
| import numpy as np |
| import librosa |
| import soundfile as sf |
| from TTS.api import TTS |
| import torch |
| import os |
| import tempfile |
|
|
| |
| try: |
| tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) |
| except Exception as e: |
| print(f"Error initializing TTS model: {e}") |
| tts = None |
|
|
| def load_audio(audio_path): |
| try: |
| audio, sr = librosa.load(audio_path, sr=None) |
| return audio, sr |
| except Exception as e: |
| print(f"Error loading audio: {e}") |
| return None, None |
|
|
| def save_audio(audio, sr, path): |
| try: |
| sf.write(path, audio, sr) |
| except Exception as e: |
| print(f"Error saving audio: {e}") |
|
|
| def pitch_shift(audio, sr, n_steps): |
| try: |
| return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps) |
| except Exception as e: |
| print(f"Error in pitch shifting: {e}") |
| return audio |
|
|
| def change_voice(audio_path, pitch_shift_amount, formant_shift_amount): |
| if tts is None: |
| return None, None |
|
|
| audio, sr = load_audio(audio_path) |
| if audio is None or sr is None: |
| return None, None |
| |
| pitched_audio = pitch_shift(audio, sr, pitch_shift_amount) |
| |
| try: |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
| save_audio(pitched_audio, sr, temp_file.name) |
| converted_audio_path = tts.voice_conversion( |
| source_wav=temp_file.name, |
| target_wav="path/to/female_target_voice.wav", |
| output_wav=None |
| ) |
| |
| converted_audio, _ = load_audio(converted_audio_path) |
| formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount) |
| |
| os.unlink(temp_file.name) |
| os.unlink(converted_audio_path) |
| |
| return sr, formant_shifted_audio |
| except Exception as e: |
| print(f"Error in voice conversion: {e}") |
| return None, None |
|
|
| def process_audio(audio_file, pitch_shift_amount, formant_shift_amount): |
| if audio_file is None: |
| return None |
| |
| |
| sr, audio = change_voice(audio_file, pitch_shift_amount, formant_shift_amount) |
| if sr is None or audio is None: |
| return None |
| |
| output_path = "output_voice.wav" |
| save_audio(audio, sr, output_path) |
| |
| return output_path |
|
|
| |
| custom_css = """ |
| .gradio-container { |
| background-color: #f0f4f8; |
| } |
| .container { |
| max-width: 900px; |
| margin: auto; |
| padding: 20px; |
| border-radius: 10px; |
| background-color: white; |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
| } |
| h1 { |
| color: #2c3e50; |
| text-align: center; |
| font-size: 2.5em; |
| margin-bottom: 20px; |
| } |
| .description { |
| text-align: center; |
| color: #34495e; |
| margin-bottom: 30px; |
| } |
| .input-section, .output-section { |
| background-color: #ecf0f1; |
| padding: 20px; |
| border-radius: 8px; |
| margin-bottom: 20px; |
| } |
| .input-section h3, .output-section h3 { |
| color: #2980b9; |
| margin-bottom: 15px; |
| } |
| """ |
|
|
| |
| with gr.Blocks(css=custom_css) as demo: |
| gr.HTML( |
| """ |
| <div style="text-align: center; max-width: 800px; margin: 0 auto;"> |
| <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"> |
| <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;"> |
| <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/> |
| <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/> |
| </svg> |
| <h1 style="font-weight: 900; margin-bottom: 7px;"> |
| AI Voice Changer |
| </h1> |
| </div> |
| <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p> |
| </div> |
| """ |
| ) |
| |
| with gr.Row(): |
| with gr.Column(elem_classes="input-section"): |
| gr.Markdown("### Input") |
| audio_input = gr.Audio(type="filepath", label="Upload Voice") |
| pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0) |
| formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0) |
| submit_btn = gr.Button("Transform Voice", variant="primary") |
|
|
| with gr.Column(elem_classes="output-section"): |
| gr.Markdown("### Output") |
| audio_output = gr.Audio(label="Transformed Voice") |
|
|
| submit_btn.click( |
| fn=process_audio, |
| inputs=[audio_input, pitch_shift, formant_shift], |
| outputs=audio_output, |
| ) |
|
|
| gr.Markdown( |
| """ |
| ### How to use: |
| 1. Upload an audio file containing the voice you want to transform. |
| 2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional). |
| 3. Click the "Transform Voice" button to process the audio. |
| 4. Listen to the transformed voice in the output section. |
| 5. Download the transformed audio file if desired. |
| |
| Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings. |
| """ |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |