Yatsuiii commited on
Commit
88a856e
·
verified ·
1 Parent(s): ee9799c

Speed: cap saliency at 2 models, 60s LLM timeout

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -195,7 +195,7 @@ def _llm_report(p_mean: float, per_model: list, net_saliency: dict | None = None
195
  from openai import OpenAI
196
  if _VLLM_URL:
197
  # Live AMD MI300X inference via vLLM
198
- client = OpenAI(base_url=_VLLM_URL, api_key="not-required")
199
  model_id = _LLM_MODEL
200
  else:
201
  # Fallback: HF Inference API
@@ -243,8 +243,8 @@ def get_models(atlas: str = "cc200"):
243
  # ── gradient saliency ──────────────────────────────────────────────────────
244
 
245
  def _compute_saliency(bw_t, adj_t, models):
246
- # Cap at 5 models to keep CPU inference fast
247
- sample = models[:5] if len(models) > 5 else models
248
  maps = []
249
  for _, task in sample:
250
  try:
 
195
  from openai import OpenAI
196
  if _VLLM_URL:
197
  # Live AMD MI300X inference via vLLM
198
+ client = OpenAI(base_url=_VLLM_URL, api_key="not-required", timeout=60.0)
199
  model_id = _LLM_MODEL
200
  else:
201
  # Fallback: HF Inference API
 
243
  # ── gradient saliency ──────────────────────────────────────────────────────
244
 
245
  def _compute_saliency(bw_t, adj_t, models):
246
+ # Cap at 2 models backward pass on CPU is slow
247
+ sample = models[:2] if len(models) > 2 else models
248
  maps = []
249
  for _, task in sample:
250
  try: