import gradio as gr from huggingface_hub import InferenceClient import os # 从环境变量读取 token HF_TOKEN = os.environ.get("HF_TOKEN") client = InferenceClient( provider="zai-org", api_key=HF_TOKEN, ) def chat_function(message, history): messages = [] for human, assistant in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": message}) response = client.chat.completions.create( model="zai-org/GLM-5.1", messages=messages, max_tokens=512, ) return response.choices[0].message.content with gr.Blocks() as demo: gr.ChatInterface(fn=chat_function) demo.launch(server_name="0.0.0.0", server_port=7860)