Speed: cap saliency at 2 models, 60s LLM timeout
Browse files
app.py
CHANGED
|
@@ -195,7 +195,7 @@ def _llm_report(p_mean: float, per_model: list, net_saliency: dict | None = None
|
|
| 195 |
from openai import OpenAI
|
| 196 |
if _VLLM_URL:
|
| 197 |
# Live AMD MI300X inference via vLLM
|
| 198 |
-
client = OpenAI(base_url=_VLLM_URL, api_key="not-required")
|
| 199 |
model_id = _LLM_MODEL
|
| 200 |
else:
|
| 201 |
# Fallback: HF Inference API
|
|
@@ -243,8 +243,8 @@ def get_models(atlas: str = "cc200"):
|
|
| 243 |
# ── gradient saliency ──────────────────────────────────────────────────────
|
| 244 |
|
| 245 |
def _compute_saliency(bw_t, adj_t, models):
|
| 246 |
-
# Cap at
|
| 247 |
-
sample = models[:
|
| 248 |
maps = []
|
| 249 |
for _, task in sample:
|
| 250 |
try:
|
|
|
|
| 195 |
from openai import OpenAI
|
| 196 |
if _VLLM_URL:
|
| 197 |
# Live AMD MI300X inference via vLLM
|
| 198 |
+
client = OpenAI(base_url=_VLLM_URL, api_key="not-required", timeout=60.0)
|
| 199 |
model_id = _LLM_MODEL
|
| 200 |
else:
|
| 201 |
# Fallback: HF Inference API
|
|
|
|
| 243 |
# ── gradient saliency ──────────────────────────────────────────────────────
|
| 244 |
|
| 245 |
def _compute_saliency(bw_t, adj_t, models):
|
| 246 |
+
# Cap at 2 models — backward pass on CPU is slow
|
| 247 |
+
sample = models[:2] if len(models) > 2 else models
|
| 248 |
maps = []
|
| 249 |
for _, task in sample:
|
| 250 |
try:
|