| import io |
| import os |
|
|
| import soundfile as sf |
| from dotenv import load_dotenv |
| from huggingface_hub import InferenceClient |
| from smolagents import CodeAgent, GradioUI, HfApiModel |
|
|
| load_dotenv() |
|
|
|
|
| def convert_data_to_audio_filelike(your_input_tuple): |
| """Convert (sample_rate, np.ndarray) to a BytesIO WAV file""" |
| sample_rate, audio_data = your_input_tuple |
| buffer = io.BytesIO() |
| sf.write(buffer, audio_data, sample_rate, format="WAV") |
| buffer.seek(0) |
| return buffer |
|
|
|
|
| def speech2text_func(data, model: str = "openai/whisper-small.en") -> str: |
| if isinstance(data, tuple): |
| buffer = convert_data_to_audio_filelike(data) |
| data = buffer.read() |
| client = InferenceClient( |
| provider="hf-inference", |
| api_key=os.getenv("HF_TOKEN"), |
| ) |
| return client.automatic_speech_recognition(data, model=model).text |
|
|
|
|
| def get_tools(): |
| add_base_tools = True |
| tools_list = [] |
| return tools_list, add_base_tools |
|
|
|
|
| if __name__ == "__main__": |
| tools_list, add_base_tools = get_tools() |
| model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", provider=None) |
| agent = CodeAgent( |
| tools=tools_list, |
| model=model, |
| add_base_tools=add_base_tools, |
| additional_authorized_imports=["web_search"], |
| ) |
| GradioUI(agent).launch(speech2text_func=speech2text_func) |
|
|