File size: 3,832 Bytes
177a062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import os
from loguru import logger
from zhipuai import ZhipuAI
from zhipuai.api_resource.chat.chat import Chat
import yaml
import json
from iat import IATClient
from tts import TTSClient
import numpy as np
from scipy.signal import resample

logger.debug("Loading config")
config_env = os.environ.get("CONFIG", "")
if config_env:
    logger.debug("Using environment variable for config")
    config = json.loads(config_env)
else:
    logger.debug("Reading config from file")
    with open("config.yaml", "r") as f:
        try:
            config = yaml.safe_load(f)["config"]
        except yaml.YAMLError as e:
            logger.error(e)
            raise e

zhipuai_config = config["zhipuai"]
xfyun_config = config["xfyun"]

zhipuai = ZhipuAI(api_key=zhipuai_config["apikey"])
iat = IATClient(
    xfyun_config["iat"]["appid"],
    xfyun_config["iat"]["apikey"],
    xfyun_config["iat"]["apisecret"],
)
tts = TTSClient(
    xfyun_config["tts"]["appid"],
    xfyun_config["tts"]["apikey"],
    xfyun_config["tts"]["apisecret"],
)


def build_zhipuai_history(history: list[list[str]]):
    result = [{"role": "system", "content": config["zhipuai"]["prompt"]}]
    for history_element in history:
        user_message, assistant_message = history_element
        if user_message != None:
            result += [{"role": "user", "content": user_message}]
        if assistant_message != None:
            result += [{"role": "assistant", "content": assistant_message}]
    return result


def add_text(history, text):
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)


def bot(history):
    zhipuai_history = build_zhipuai_history(history)
    res = zhipuai.chat.completions.create(
        model="glm-4", messages=zhipuai_history, stream=True
    )
    history[-1][1] = ""
    for chunk in res:
        history[-1][1] += chunk.choices[0].delta.content
        yield history


async def generate_text(audio: tuple[int, np.ndarray]):
    logger.debug(f"Generating text from audio")
    logger.debug(f"Sampling rate: {audio[0]}, resampling to 16000")
    audio = (16000, resample(audio[1], 16000))
    result_list = []
    async for result in iat.dictate(audio):
        logger.debug(f"Result: {result}")
        result_list.append(result)
    return "".join(result_list)


async def generate_audio(history: list[list[str]]):
    logger.debug(f"Generating audio from text")
    text = history[-1][-1]
    result = await tts.generate(text)
    return result

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
    )

    with gr.Row():
        txt = gr.Textbox(
            scale=4,
            show_label=False,
            placeholder="Enter text and press enter",
            container=False,
        )
        submit_button = gr.Button(value="提交", variant="primary")

    with gr.Row():
        with gr.Column():
            user_title = gr.Markdown("## 用户语音识别")
            user_audio = gr.Audio(type="numpy", sources=['microphone'])
            user_audio_submit = gr.Button(value="上传用户语音并转换", variant="primary")
        with gr.Column():
            user_title = gr.Markdown("## 机器人语音合成")
            bot_audio = gr.Audio()
            bot_audio_submit = gr.Button(value="将机器人最后一个回复转换为语音", variant="primary")

    user_audio_submit.click(generate_text, [user_audio], outputs=txt)
    bot_audio_submit.click(generate_audio, [chatbot], outputs=bot_audio)

    txt_msg = submit_button.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
        bot, chatbot, chatbot, api_name="bot_response"
    )

    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

demo.launch()