File size: 1,359 Bytes
c8fa53c 18bf77e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import os
import gradio as gr
from openai import OpenAI
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://129.212.178.215:8000/v1")
MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")
client = OpenAI(base_url=VLLM_BASE_URL, api_key="not-required")
def chat(message, history):
messages = [{"role": "system", "content": "You are a helpful assistant."}]
for item in history:
if isinstance(item, dict):
messages.append({"role": item["role"], "content": item["content"]})
else:
messages.append({"role": "user", "content": item[0]})
if item[1]:
messages.append({"role": "assistant", "content": item[1]})
messages.append({"role": "user", "content": message})
stream = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
stream=True,
)
partial = ""
for chunk in stream:
delta = chunk.choices[0].delta.content
if delta:
partial += delta
yield partial
demo = gr.ChatInterface(
fn=chat,
title="AMD MI300X AI Demo",
description="Chat with an LLM running on AMD MI300X GPU via vLLM.",
examples=["Explain what AMD MI300X is.", "Write a Python hello world."],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|