pramodthe commited on
Commit
c8fa53c
·
verified ·
1 Parent(s): 906722e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import OpenAI
4
+
5
+ VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://129.212.178.215:8000/v1")
6
+ MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")
7
+
8
+ client = OpenAI(base_url=VLLM_BASE_URL, api_key="not-required")
9
+
10
+
11
+ def chat(message, history):
12
+ messages = [{"role": "system", "content": "You are a helpful assistant."}]
13
+ for item in history:
14
+ if isinstance(item, dict):
15
+ messages.append({"role": item["role"], "content": item["content"]})
16
+ else:
17
+ messages.append({"role": "user", "content": item[0]})
18
+ if item[1]:
19
+ messages.append({"role": "assistant", "content": item[1]})
20
+ messages.append({"role": "user", "content": message})
21
+
22
+ stream = client.chat.completions.create(
23
+ model=MODEL_NAME,
24
+ messages=messages,
25
+ stream=True,
26
+ )
27
+
28
+ partial = ""
29
+ for chunk in stream:
30
+ delta = chunk.choices[0].delta.content
31
+ if delta:
32
+ partial += delta
33
+ yield partial
34
+
35
+
36
+ demo = gr.ChatInterface(
37
+ fn=chat,
38
+ title="AMD MI300X AI Demo",
39
+ description="Chat with an LLM running on AMD MI300X GPU via vLLM.",
40
+ examples=["Explain what AMD MI300X is.", "Write a Python hello world."],
41
+ cache_examples=False,
42
+ )
43
+
44
+ if __name__ == "__main__":
45
+ demo.launch()