stevekimoi Claude Sonnet 4.6 commited on
Commit
1f14b87
ยท
1 Parent(s): 6f28b5c

Add AMD Gradio demo app with vLLM chat interface

Browse files

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (4) hide show
  1. .gitignore +5 -0
  2. README.md +22 -8
  3. app.py +48 -0
  4. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ venv/
2
+ .env
3
+ __pycache__/
4
+ *.pyc
5
+ .DS_Store
README.md CHANGED
@@ -1,15 +1,29 @@
1
  ---
2
- title: Amd Gradio Workshop Demo
3
- emoji: ๐ŸŒ–
4
- colorFrom: indigo
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.14.0
8
- python_version: '3.13'
9
  app_file: app.py
10
  pinned: false
11
  license: mit
12
- short_description: This is just a workshop demo
 
 
 
 
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: AMD HuggingFace Workshop Demo
3
+ emoji: ๐Ÿš€
4
+ colorFrom: red
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 4.44.1
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ tags:
12
+ - amd
13
+ - amd-hackathon-2026
14
+ - vllm
15
+ - gradio
16
  ---
17
 
18
+ # AMD MI300X AI Demo
19
+
20
+ A Gradio chat interface connected to a vLLM endpoint running on AMD MI300X GPU.
21
+
22
+ ## Setup
23
+
24
+ Add these as Space secrets (Settings โ†’ Variables and secrets):
25
+
26
+ | Secret | Value |
27
+ |--------|-------|
28
+ | `VLLM_BASE_URL` | Your AMD vLLM endpoint, e.g. `http://your-ip:8000/v1` |
29
+ | `MODEL_NAME` | Model ID loaded by vLLM, e.g. `Qwen/Qwen2.5-1.5B-Instruct` |
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import OpenAI
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
9
+ MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
10
+
11
+ client = OpenAI(base_url=VLLM_BASE_URL, api_key="not-required")
12
+
13
+
14
+ def chat(message, history):
15
+ messages = [{"role": "system", "content": "You are a helpful assistant."}]
16
+ for item in history:
17
+ if isinstance(item, dict):
18
+ messages.append({"role": item["role"], "content": item["content"]})
19
+ else:
20
+ messages.append({"role": "user", "content": item[0]})
21
+ if item[1]:
22
+ messages.append({"role": "assistant", "content": item[1]})
23
+ messages.append({"role": "user", "content": message})
24
+
25
+ stream = client.chat.completions.create(
26
+ model=MODEL_NAME,
27
+ messages=messages,
28
+ stream=True,
29
+ )
30
+
31
+ partial = ""
32
+ for chunk in stream:
33
+ delta = chunk.choices[0].delta.content
34
+ if delta:
35
+ partial += delta
36
+ yield partial
37
+
38
+
39
+ demo = gr.ChatInterface(
40
+ fn=chat,
41
+ title="AMD MI300X AI Demo",
42
+ description="Chat with an LLM running on AMD MI300X GPU via vLLM.",
43
+ examples=["Explain what AMD MI300X is.", "Write a Python hello world."],
44
+ cache_examples=False,
45
+ )
46
+
47
+ if __name__ == "__main__":
48
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ openai>=1.0.0