import gradio as gr import edge_tts import asyncio import os # 定义生成音频的异步函数 async def generate_audio(text, voice, rate, volume, pitch): try: # 创建输出文件路径 output_path = "output.mp3" # 使用edge-tts生成音频 communicate = edge_tts.Communicate(text, voice, rate=rate, volume=volume, pitch=pitch) await communicate.save(output_path) return output_path except Exception as e: print(f"生成音频失败: {e}") raise gr.Error(f"生成音频失败: {str(e)}") # 定义Gradio界面的主函数 def text_to_speech(text, voice, rate, volume, pitch): try: # 将rate和volume参数转换为edge-tts需要的格式 rate_str = f"+{int(rate)}%" if rate >= 0 else f"{int(rate)}%" volume_str = f"+{int(volume)}%" if volume >= 0 else f"{int(volume)}%" pitch_str = f"+{int(pitch)}Hz" if pitch >= 0 else f"{int(pitch)}Hz" # 调用异步函数生成音频 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) output_path = loop.run_until_complete(generate_audio(text, voice, rate_str, volume_str, pitch_str)) loop.close() return output_path except Exception as e: print(f"文本转语音失败: {e}") raise gr.Error(f"文本转语音失败: {str(e)}") # 异步获取所有可用语音 def get_voices(): try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) voices = loop.run_until_complete(edge_tts.list_voices()) loop.close() # 过滤出支持的语音,并按语言分组 voice_options = [] for voice in voices: short_name = voice.get("ShortName", "") friendly_name = voice.get("FriendlyName", short_name) locale = voice.get("Locale", "") voice_options.append((f"{friendly_name} ({locale})", short_name)) # 按友好名称排序 voice_options.sort(key=lambda x: x[0]) return voice_options except Exception as e: # 如果获取语音列表失败,返回默认的语音列表 print(f"获取语音列表失败: {e}") return [ ("中文女声 (zh-CN)", "zh-CN-YunxiNeural"), ("中文男声 (zh-CN)", "zh-CN-YunxiNeural"), ("英文女声 (en-US)", "en-US-JennyNeural"), ("英文男声 (en-US)", "en-US-GuyNeural") ] # 获取所有可用语音 voice_options = get_voices() # 创建Gradio界面 with gr.Blocks(title="Edge TTS 配音工具") as demo: gr.Markdown("# Edge TTS 配音工具") gr.Markdown("输入文本,选择语音,调节语速、音量和语调,然后点击合成按钮生成音频。") with gr.Row(): with gr.Column(scale=2): # 文本输入框 text_input = gr.Textbox( label="输入文本", placeholder="请输入要转换为语音的文本...", lines=5, max_lines=10 ) # 语音选择下拉菜单 voice_dropdown = gr.Dropdown( label="选择语音", choices=voice_options, value="zh-CN-YunxiNeural" ) # 语速滑块 rate_slider = gr.Slider( label="语速", minimum=-50, maximum=50, step=5, value=0, info="-50% 到 +50%" ) # 音量滑块 volume_slider = gr.Slider( label="音量", minimum=-50, maximum=50, step=5, value=0, info="-50% 到 +50%" ) # 语调滑块 pitch_slider = gr.Slider( label="语调", minimum=-50, maximum=50, step=5, value=0, info="-50Hz 到 +50Hz" ) # 合成按钮 generate_button = gr.Button("合成语音", variant="primary") with gr.Column(scale=1): # 音频输出 audio_output = gr.Audio( label="合成音频", type="filepath" ) # 设置按钮点击事件 generate_button.click( fn=text_to_speech, inputs=[text_input, voice_dropdown, rate_slider, volume_slider, pitch_slider], outputs=audio_output ) # 启动Gradio应用 if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)