test(llm): add network-gated end-to-end OpenRouter integration test
Browse files- tests/llm/test_explainer.py +43 -0
tests/llm/test_explainer.py
CHANGED
|
@@ -250,3 +250,46 @@ class TestAuthFailureShortCircuits:
|
|
| 250 |
assert attempts == ["model-a:free", "model-b:free", "model-c:free"], (
|
| 251 |
f"400 must advance to next model; got attempts={attempts}"
|
| 252 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
assert attempts == ["model-a:free", "model-b:free", "model-c:free"], (
|
| 251 |
f"400 must advance to next model; got attempts={attempts}"
|
| 252 |
)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
import os as _os
|
| 256 |
+
|
| 257 |
+
import pytest as _pytest
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
@_pytest.mark.skipif(
|
| 261 |
+
not _os.environ.get("OPENROUTER_API_KEY"),
|
| 262 |
+
reason="OPENROUTER_API_KEY not set — skipping live LLM integration test",
|
| 263 |
+
)
|
| 264 |
+
@_pytest.mark.skipif(
|
| 265 |
+
_os.environ.get("NEUROBRIDGE_DISABLE_LLM") == "1",
|
| 266 |
+
reason="NEUROBRIDGE_DISABLE_LLM=1 — skipping live LLM integration test",
|
| 267 |
+
)
|
| 268 |
+
class TestLiveOpenRouterLLM:
|
| 269 |
+
"""End-to-end: hit a real OpenRouter free-tier model and assert
|
| 270 |
+
`explain()` returns source='llm' with non-empty content. Skipped
|
| 271 |
+
when no key is set or the kill-switch is on."""
|
| 272 |
+
|
| 273 |
+
def test_bbb_explain_returns_llm_source_with_real_key(self):
|
| 274 |
+
from src.llm import explainer as ex
|
| 275 |
+
|
| 276 |
+
result = ex.explain(_payload(), modality="bbb")
|
| 277 |
+
|
| 278 |
+
# If every model in the chain is rate-limited or unreachable RIGHT NOW
|
| 279 |
+
# the result will fall back to template — that's a flaky-network
|
| 280 |
+
# condition, not a code bug. Surface it as an XFAIL-style assertion
|
| 281 |
+
# message instead of a hard failure.
|
| 282 |
+
if result["source"] == "template":
|
| 283 |
+
_pytest.skip(
|
| 284 |
+
"All free models in the chain were rate-limited or unreachable "
|
| 285 |
+
"at test time. Re-run later or run scripts/diagnose_openrouter.py."
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
assert result["source"] == "llm"
|
| 289 |
+
assert result["model"] is not None and result["model"].endswith(":free")
|
| 290 |
+
assert result["rationale"].strip(), "LLM returned empty rationale"
|
| 291 |
+
# Sanity: the rationale should mention SOMETHING about the prediction.
|
| 292 |
+
# We do not assert on exact model wording (non-deterministic), but
|
| 293 |
+
# we do assert it isn't a generic refusal/safety-filter response.
|
| 294 |
+
lowered = result["rationale"].lower()
|
| 295 |
+
assert not lowered.startswith("i cannot"), f"LLM refused: {result['rationale']!r}"
|