| import gradio as gr |
| from pydub import AudioSegment |
| import librosa |
| import torch |
| import soundfile as sf |
| import numpy as np |
| import os |
|
|
| |
| from model import textonly, speechonly |
|
|
|
|
| def text_interface(text): |
| """Process text input and return response""" |
| result = textonly(text) |
| return result |
|
|
|
|
| def speech_interface(audio_file): |
| """Process speech input and return LLM response and audio output""" |
| if audio_file is None: |
| return "Please provide an audio file", None |
| |
| |
| sr, audio_data = audio_file |
| |
| |
| if len(audio_data.shape) > 1: |
| audio_data = np.mean(audio_data, axis=1) |
| |
| |
| if sr != 16000: |
| audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000) |
| |
| |
| llm_response = speechonly(audio_data, output_wav_path="output.wav") |
| |
| return llm_response |
|
|
|
|
| |
| with gr.Blocks(title="Hamid AI Speech API") as app: |
| gr.Markdown("# Hamid AI Speech Interface") |
| gr.Markdown("Choose between text-only or speech-based interaction") |
| |
| with gr.Tab("Text Only"): |
| text_input = gr.Textbox(label="Enter your text", placeholder="Type something...") |
| text_output = gr.Textbox(label="Response", interactive=False) |
| text_button = gr.Button("Process Text") |
| text_button.click(fn=text_interface, inputs=text_input, outputs=text_output) |
| |
| with gr.Tab("Speech Only"): |
| audio_input = gr.Audio(label="Upload or record audio", type="numpy") |
| speech_output = gr.Textbox(label="LLM Response", interactive=False) |
| audio_output = gr.Audio(label="Output Audio", type="filepath") |
| speech_button = gr.Button("Process Speech") |
| speech_button.click(fn=speech_interface, inputs=audio_input, outputs=[speech_output, audio_output]) |
|
|
|
|
| if __name__ == "__main__": |
| app.launch(share=False) |
|
|