Spaces:

lablab-ai-amd-developer-hackathon
/

BrainConnect-ASD

Running

App Files Files Community

Yatsuiii commited on 2 days ago

Commit

e83206b

verified ·

1 Parent(s): d84066a

LLM: vLLM on AMD MI300X (OpenAI-compat) with HF fallback

Browse files

Files changed (1) hide show

app.py +23 -5

app.py CHANGED Viewed

@@ -135,9 +135,10 @@ def preprocess(bold):
     bw   = _windows(bold)
     return torch.FloatTensor(bw).unsqueeze(0), torch.FloatTensor(adj).unsqueeze(0)
-# ── LLM (Qwen2.5-7B fine-tuned on AMD MI300X, served via HF Inference API) ─
-_LLM_MODEL = "Yatsuiii/asd-interpreter-merged"
 _HF_TOKEN   = os.environ.get("HF_TOKEN", "")
 _SYSTEM_PROMPT = (
@@ -191,13 +192,30 @@ def _llm_report(p_mean: float, per_model: list, net_saliency: dict | None = None
         f"and values listed above. Do not mention any network not in this report."
     )
     try:
-        from huggingface_hub import InferenceClient
-        client = InferenceClient(model=_LLM_MODEL, token=_HF_TOKEN or None)
         messages = [
             {"role": "system", "content": _SYSTEM_PROMPT},
             {"role": "user",   "content": user_msg},
         ]
-        response = client.chat_completion(messages=messages, max_tokens=512, temperature=0.1)
         return response.choices[0].message.content.strip()
     except Exception as e:
         return f"[LLM unavailable: {e}]"

     bw   = _windows(bold)
     return torch.FloatTensor(bw).unsqueeze(0), torch.FloatTensor(adj).unsqueeze(0)
+# ── LLM (Qwen2.5-7B fine-tuned on AMD MI300X, served via vLLM on MI300X) ───
+_VLLM_URL   = os.environ.get("VLLM_BASE_URL", "")
+_LLM_MODEL  = "Yatsuiii/asd-interpreter-merged"
 _HF_TOKEN   = os.environ.get("HF_TOKEN", "")
 _SYSTEM_PROMPT = (
         f"and values listed above. Do not mention any network not in this report."
     )
     try:
+        from openai import OpenAI
+        if _VLLM_URL:
+            # Live AMD MI300X inference via vLLM
+            client = OpenAI(base_url=_VLLM_URL, api_key="not-required")
+            model_id = _LLM_MODEL
+        else:
+            # Fallback: HF Inference API
+            from huggingface_hub import InferenceClient as _HFClient
+            client = _HFClient(model=_LLM_MODEL, token=_HF_TOKEN or None)
+            response = client.chat_completion(
+                messages=[
+                    {"role": "system", "content": _SYSTEM_PROMPT},
+                    {"role": "user",   "content": user_msg},
+                ],
+                max_tokens=512, temperature=0.1,
+            )
+            return response.choices[0].message.content.strip()
         messages = [
             {"role": "system", "content": _SYSTEM_PROMPT},
             {"role": "user",   "content": user_msg},
         ]
+        response = client.chat.completions.create(
+            model=model_id, messages=messages, max_tokens=512, temperature=0.1
+        )
         return response.choices[0].message.content.strip()
     except Exception as e:
         return f"[LLM unavailable: {e}]"