Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Aksel Joonas Reedi
Fix CLI rendering corruption and split CLI/frontend model defaults (#121)
3eec386 unverified | """Tests for LLM error classification helpers in agent.core.agent_loop. | |
| Covers two regressions on 2026-04-25: | |
| 1. Non-Anthropic context overflow (Kimi 365k > 262k) was not classified as | |
| ``_is_context_overflow_error``, so the recovery path didn't fire and | |
| session 62ccfdcb died with 68 wasted compaction events. | |
| 2. Bedrock TPM rate limit (`Too many tokens, please wait before trying | |
| again.`) needs the longer rate-limit retry schedule. The old schedule | |
| ([5, 15, 30] = 50s) burned through 6 sessions costing >$2,400 combined | |
| on the same day. | |
| """ | |
| from agent.core.agent_loop import ( | |
| _MAX_LLM_RETRIES, | |
| _LLM_RATE_LIMIT_RETRY_DELAYS, | |
| _LLM_RETRY_DELAYS, | |
| _is_context_overflow_error, | |
| _is_rate_limit_error, | |
| _is_transient_error, | |
| _retry_delay_for, | |
| ) | |
| # ββ context overflow ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_kimi_prompt_too_long_is_context_overflow(): | |
| # Verbatim error text from session 62ccfdcb (2026-04-25, Kimi K2.6). | |
| err = Exception( | |
| "litellm.BadRequestError: OpenAIException - The prompt is too long: " | |
| "365407, model maximum context length: 262143" | |
| ) | |
| assert _is_context_overflow_error(err) | |
| def test_openai_context_length_exceeded_is_context_overflow(): | |
| err = Exception("Error: This model's maximum context length is 8192 tokens.") | |
| assert _is_context_overflow_error(err) | |
| def test_random_error_is_not_context_overflow(): | |
| err = Exception("connection reset by peer") | |
| assert not _is_context_overflow_error(err) | |
| # ββ rate limit ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_bedrock_too_many_tokens_is_rate_limit(): | |
| # Verbatim from sessions b37a3823, c4d7a831, b63c4933 (2026-04-25). | |
| err = Exception( | |
| 'litellm.RateLimitError: BedrockException - {"message":"Too many ' | |
| 'tokens, please wait before trying again."}' | |
| ) | |
| assert _is_rate_limit_error(err) | |
| # Rate-limit errors are also classified as transient. | |
| assert _is_transient_error(err) | |
| def test_429_is_rate_limit(): | |
| err = Exception("HTTP 429 Too Many Requests") | |
| assert _is_rate_limit_error(err) | |
| def test_timeout_is_transient_but_not_rate_limit(): | |
| err = Exception("Request timed out after 600s") | |
| assert _is_transient_error(err) | |
| assert not _is_rate_limit_error(err) | |
| # ββ retry schedule selection ββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_rate_limit_uses_longer_schedule(): | |
| err = Exception("Too many tokens, please wait before trying again.") | |
| delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RATE_LIMIT_RETRY_DELAYS))] | |
| assert delays == _LLM_RATE_LIMIT_RETRY_DELAYS | |
| # Just past the schedule β None (stop retrying). | |
| assert _retry_delay_for(err, len(_LLM_RATE_LIMIT_RETRY_DELAYS)) is None | |
| def test_other_transient_uses_short_schedule(): | |
| err = Exception("503 service unavailable") | |
| delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RETRY_DELAYS))] | |
| assert delays == _LLM_RETRY_DELAYS | |
| assert _retry_delay_for(err, len(_LLM_RETRY_DELAYS)) is None | |
| def test_non_transient_returns_none(): | |
| err = Exception("invalid request: bad parameter") | |
| assert _retry_delay_for(err, 0) is None | |
| def test_rate_limit_total_budget_covers_bedrock_bucket_recovery(): | |
| """The whole point of the rate-limit schedule: total wait time should | |
| exceed the ~60s Bedrock TPM bucket recovery window.""" | |
| assert len(_LLM_RATE_LIMIT_RETRY_DELAYS) == _MAX_LLM_RETRIES - 1 | |
| assert sum(_LLM_RATE_LIMIT_RETRY_DELAYS) > 60 | |