Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 3,844 Bytes
3eec386 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | """Tests for LLM error classification helpers in agent.core.agent_loop.
Covers two regressions on 2026-04-25:
1. Non-Anthropic context overflow (Kimi 365k > 262k) was not classified as
``_is_context_overflow_error``, so the recovery path didn't fire and
session 62ccfdcb died with 68 wasted compaction events.
2. Bedrock TPM rate limit (`Too many tokens, please wait before trying
again.`) needs the longer rate-limit retry schedule. The old schedule
([5, 15, 30] = 50s) burned through 6 sessions costing >$2,400 combined
on the same day.
"""
from agent.core.agent_loop import (
_MAX_LLM_RETRIES,
_LLM_RATE_LIMIT_RETRY_DELAYS,
_LLM_RETRY_DELAYS,
_is_context_overflow_error,
_is_rate_limit_error,
_is_transient_error,
_retry_delay_for,
)
# ββ context overflow ββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_kimi_prompt_too_long_is_context_overflow():
# Verbatim error text from session 62ccfdcb (2026-04-25, Kimi K2.6).
err = Exception(
"litellm.BadRequestError: OpenAIException - The prompt is too long: "
"365407, model maximum context length: 262143"
)
assert _is_context_overflow_error(err)
def test_openai_context_length_exceeded_is_context_overflow():
err = Exception("Error: This model's maximum context length is 8192 tokens.")
assert _is_context_overflow_error(err)
def test_random_error_is_not_context_overflow():
err = Exception("connection reset by peer")
assert not _is_context_overflow_error(err)
# ββ rate limit ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_bedrock_too_many_tokens_is_rate_limit():
# Verbatim from sessions b37a3823, c4d7a831, b63c4933 (2026-04-25).
err = Exception(
'litellm.RateLimitError: BedrockException - {"message":"Too many '
'tokens, please wait before trying again."}'
)
assert _is_rate_limit_error(err)
# Rate-limit errors are also classified as transient.
assert _is_transient_error(err)
def test_429_is_rate_limit():
err = Exception("HTTP 429 Too Many Requests")
assert _is_rate_limit_error(err)
def test_timeout_is_transient_but_not_rate_limit():
err = Exception("Request timed out after 600s")
assert _is_transient_error(err)
assert not _is_rate_limit_error(err)
# ββ retry schedule selection ββββββββββββββββββββββββββββββββββββββββββββ
def test_rate_limit_uses_longer_schedule():
err = Exception("Too many tokens, please wait before trying again.")
delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RATE_LIMIT_RETRY_DELAYS))]
assert delays == _LLM_RATE_LIMIT_RETRY_DELAYS
# Just past the schedule β None (stop retrying).
assert _retry_delay_for(err, len(_LLM_RATE_LIMIT_RETRY_DELAYS)) is None
def test_other_transient_uses_short_schedule():
err = Exception("503 service unavailable")
delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RETRY_DELAYS))]
assert delays == _LLM_RETRY_DELAYS
assert _retry_delay_for(err, len(_LLM_RETRY_DELAYS)) is None
def test_non_transient_returns_none():
err = Exception("invalid request: bad parameter")
assert _retry_delay_for(err, 0) is None
def test_rate_limit_total_budget_covers_bedrock_bucket_recovery():
"""The whole point of the rate-limit schedule: total wait time should
exceed the ~60s Bedrock TPM bucket recovery window."""
assert len(_LLM_RATE_LIMIT_RETRY_DELAYS) == _MAX_LLM_RETRIES - 1
assert sum(_LLM_RATE_LIMIT_RETRY_DELAYS) > 60
|