Spaces:

lablab-ai-amd-developer-hackathon
/

amd-gradio-demo

Running

pramodthe commited on 2 days ago

Commit

c8fa53c

verified ·

1 Parent(s): 906722e

Upload app.py with huggingface_hub

Files changed (1) hide show

app.py ADDED Viewed

+import os
+import gradio as gr
+from openai import OpenAI
+VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://129.212.178.215:8000/v1")
+MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")
+client = OpenAI(base_url=VLLM_BASE_URL, api_key="not-required")
+def chat(message, history):
+    messages = [{"role": "system", "content": "You are a helpful assistant."}]
+    for item in history:
+        if isinstance(item, dict):
+            messages.append({"role": item["role"], "content": item["content"]})
+        else:
+            messages.append({"role": "user", "content": item[0]})
+            if item[1]:
+                messages.append({"role": "assistant", "content": item[1]})
+    messages.append({"role": "user", "content": message})
+    stream = client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        stream=True,
+    )
+    partial = ""
+    for chunk in stream:
+        delta = chunk.choices[0].delta.content
+        if delta:
+            partial += delta
+            yield partial
+demo = gr.ChatInterface(
+    fn=chat,
+    title="AMD MI300X AI Demo",
+    description="Chat with an LLM running on AMD MI300X GPU via vLLM.",
+    examples=["Explain what AMD MI300X is.", "Write a Python hello world."],
+    cache_examples=False,
+)
+if __name__ == "__main__":
+    demo.launch()