| import gradio as gr |
| from huggingface_hub import InferenceClient |
|
|
| AVAILABLE_MODELS = [ |
| "openai/gpt-oss-20b", |
| "meta-llama/Llama-3.3-70B-Instruct", |
| "meta-llama/Llama-3.1-8B-Instruct", |
| "Qwen/Qwen2.5-72B-Instruct", |
| "Qwen/Qwen2.5-7B-Instruct", |
| "mistralai/Mistral-7B-Instruct-v0.3", |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", |
| "google/gemma-2-27b-it", |
| "google/gemma-2-9b-it", |
| "hydffgg/HOS-OSS-270M", |
| "Hyggshi-AI/HOS-OSS-200M", |
| ] |
|
|
| def respond( |
| message, |
| history: list[dict[str, str]], |
| system_message, |
| max_tokens, |
| temperature, |
| top_p, |
| model_name, |
| hf_token: gr.OAuthToken, |
| ): |
| client = InferenceClient(token=hf_token.token, model=model_name) |
|
|
| messages = [{"role": "system", "content": system_message}] |
| messages.extend(history) |
| messages.append({"role": "user", "content": message}) |
|
|
| response = "" |
|
|
| try: |
| |
| for chunk in client.chat_completion( |
| messages, |
| max_tokens=max_tokens, |
| stream=True, |
| temperature=temperature, |
| top_p=top_p, |
| ): |
| try: |
| choices = chunk.choices |
| if choices and choices[0].delta.content: |
| response += choices[0].delta.content |
| yield response |
| except (AttributeError, IndexError): |
| continue |
|
|
| except Exception as e: |
| |
| try: |
| result = client.chat_completion( |
| messages, |
| max_tokens=max_tokens, |
| stream=False, |
| temperature=temperature, |
| top_p=top_p, |
| ) |
| response = result.choices[0].message.content |
| yield response |
| except Exception as e2: |
| yield f"❌ Lỗi: {str(e2)}\n\nModel `{model_name}` có thể không hỗ trợ chat completion." |
|
|
|
|
| chatbot = gr.ChatInterface( |
| respond, |
| additional_inputs=[ |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.95, |
| step=0.05, |
| label="Top-p (nucleus sampling)", |
| ), |
| gr.Dropdown( |
| choices=AVAILABLE_MODELS, |
| value=AVAILABLE_MODELS[0], |
| label="🤖 Model", |
| info="Chọn model để chat", |
| ), |
| ], |
| ) |
|
|
| with gr.Blocks() as demo: |
| with gr.Sidebar(): |
| gr.LoginButton() |
| gr.Markdown("### ⚙️ Cài đặt") |
| gr.Markdown("Đăng nhập để sử dụng các model HuggingFace.") |
| chatbot.render() |
|
|
| if __name__ == "__main__": |
| demo.launch() |