Spaces:

lablab-ai-amd-developer-hackathon
/

amd-gradio-demo

Running

Upload app.py with huggingface_hub

18bf77e verified 1 day ago

1.36 kB

	import os
	import gradio as gr
	from openai import OpenAI

	VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://129.212.178.215:8000/v1")
	MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")

	client = OpenAI(base_url=VLLM_BASE_URL, api_key="not-required")


	def chat(message, history):
	messages = [{"role": "system", "content": "You are a helpful assistant."}]
	for item in history:
	if isinstance(item, dict):
	messages.append({"role": item["role"], "content": item["content"]})
	else:
	messages.append({"role": "user", "content": item[0]})
	if item[1]:
	messages.append({"role": "assistant", "content": item[1]})
	messages.append({"role": "user", "content": message})

	stream = client.chat.completions.create(
	model=MODEL_NAME,
	messages=messages,
	stream=True,
	)

	partial = ""
	for chunk in stream:
	delta = chunk.choices[0].delta.content
	if delta:
	partial += delta
	yield partial


	demo = gr.ChatInterface(
	fn=chat,
	title="AMD MI300X AI Demo",
	description="Chat with an LLM running on AMD MI300X GPU via vLLM.",
	examples=["Explain what AMD MI300X is.", "Write a Python hello world."],
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)