LLM: vLLM on AMD MI300X (OpenAI-compat) with HF fallback
Browse files
app.py
CHANGED
|
@@ -135,9 +135,10 @@ def preprocess(bold):
|
|
| 135 |
bw = _windows(bold)
|
| 136 |
return torch.FloatTensor(bw).unsqueeze(0), torch.FloatTensor(adj).unsqueeze(0)
|
| 137 |
|
| 138 |
-
# ── LLM (Qwen2.5-7B fine-tuned on AMD MI300X, served via
|
| 139 |
|
| 140 |
-
|
|
|
|
| 141 |
_HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 142 |
|
| 143 |
_SYSTEM_PROMPT = (
|
|
@@ -191,13 +192,30 @@ def _llm_report(p_mean: float, per_model: list, net_saliency: dict | None = None
|
|
| 191 |
f"and values listed above. Do not mention any network not in this report."
|
| 192 |
)
|
| 193 |
try:
|
| 194 |
-
from
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
messages = [
|
| 197 |
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 198 |
{"role": "user", "content": user_msg},
|
| 199 |
]
|
| 200 |
-
response = client.
|
|
|
|
|
|
|
| 201 |
return response.choices[0].message.content.strip()
|
| 202 |
except Exception as e:
|
| 203 |
return f"[LLM unavailable: {e}]"
|
|
|
|
| 135 |
bw = _windows(bold)
|
| 136 |
return torch.FloatTensor(bw).unsqueeze(0), torch.FloatTensor(adj).unsqueeze(0)
|
| 137 |
|
| 138 |
+
# ── LLM (Qwen2.5-7B fine-tuned on AMD MI300X, served via vLLM on MI300X) ───
|
| 139 |
|
| 140 |
+
_VLLM_URL = os.environ.get("VLLM_BASE_URL", "")
|
| 141 |
+
_LLM_MODEL = "Yatsuiii/asd-interpreter-merged"
|
| 142 |
_HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 143 |
|
| 144 |
_SYSTEM_PROMPT = (
|
|
|
|
| 192 |
f"and values listed above. Do not mention any network not in this report."
|
| 193 |
)
|
| 194 |
try:
|
| 195 |
+
from openai import OpenAI
|
| 196 |
+
if _VLLM_URL:
|
| 197 |
+
# Live AMD MI300X inference via vLLM
|
| 198 |
+
client = OpenAI(base_url=_VLLM_URL, api_key="not-required")
|
| 199 |
+
model_id = _LLM_MODEL
|
| 200 |
+
else:
|
| 201 |
+
# Fallback: HF Inference API
|
| 202 |
+
from huggingface_hub import InferenceClient as _HFClient
|
| 203 |
+
client = _HFClient(model=_LLM_MODEL, token=_HF_TOKEN or None)
|
| 204 |
+
response = client.chat_completion(
|
| 205 |
+
messages=[
|
| 206 |
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 207 |
+
{"role": "user", "content": user_msg},
|
| 208 |
+
],
|
| 209 |
+
max_tokens=512, temperature=0.1,
|
| 210 |
+
)
|
| 211 |
+
return response.choices[0].message.content.strip()
|
| 212 |
messages = [
|
| 213 |
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 214 |
{"role": "user", "content": user_msg},
|
| 215 |
]
|
| 216 |
+
response = client.chat.completions.create(
|
| 217 |
+
model=model_id, messages=messages, max_tokens=512, temperature=0.1
|
| 218 |
+
)
|
| 219 |
return response.choices[0].message.content.strip()
|
| 220 |
except Exception as e:
|
| 221 |
return f"[LLM unavailable: {e}]"
|