Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

File size: 3,844 Bytes

3eec386

"""Tests for LLM error classification helpers in agent.core.agent_loop.

Covers two regressions on 2026-04-25:

1. Non-Anthropic context overflow (Kimi 365k > 262k) was not classified as
   ``_is_context_overflow_error``, so the recovery path didn't fire and
   session 62ccfdcb died with 68 wasted compaction events.

2. Bedrock TPM rate limit (`Too many tokens, please wait before trying
   again.`) needs the longer rate-limit retry schedule. The old schedule
   ([5, 15, 30] = 50s) burned through 6 sessions costing >$2,400 combined
   on the same day.
"""

from agent.core.agent_loop import (
    _MAX_LLM_RETRIES,
    _LLM_RATE_LIMIT_RETRY_DELAYS,
    _LLM_RETRY_DELAYS,
    _is_context_overflow_error,
    _is_rate_limit_error,
    _is_transient_error,
    _retry_delay_for,
)


# ── context overflow ────────────────────────────────────────────────────


def test_kimi_prompt_too_long_is_context_overflow():
    # Verbatim error text from session 62ccfdcb (2026-04-25, Kimi K2.6).
    err = Exception(
        "litellm.BadRequestError: OpenAIException - The prompt is too long: "
        "365407, model maximum context length: 262143"
    )
    assert _is_context_overflow_error(err)


def test_openai_context_length_exceeded_is_context_overflow():
    err = Exception("Error: This model's maximum context length is 8192 tokens.")
    assert _is_context_overflow_error(err)


def test_random_error_is_not_context_overflow():
    err = Exception("connection reset by peer")
    assert not _is_context_overflow_error(err)


# ── rate limit ──────────────────────────────────────────────────────────


def test_bedrock_too_many_tokens_is_rate_limit():
    # Verbatim from sessions b37a3823, c4d7a831, b63c4933 (2026-04-25).
    err = Exception(
        'litellm.RateLimitError: BedrockException - {"message":"Too many '
        'tokens, please wait before trying again."}'
    )
    assert _is_rate_limit_error(err)
    # Rate-limit errors are also classified as transient.
    assert _is_transient_error(err)


def test_429_is_rate_limit():
    err = Exception("HTTP 429 Too Many Requests")
    assert _is_rate_limit_error(err)


def test_timeout_is_transient_but_not_rate_limit():
    err = Exception("Request timed out after 600s")
    assert _is_transient_error(err)
    assert not _is_rate_limit_error(err)


# ── retry schedule selection ────────────────────────────────────────────


def test_rate_limit_uses_longer_schedule():
    err = Exception("Too many tokens, please wait before trying again.")
    delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RATE_LIMIT_RETRY_DELAYS))]
    assert delays == _LLM_RATE_LIMIT_RETRY_DELAYS
    # Just past the schedule → None (stop retrying).
    assert _retry_delay_for(err, len(_LLM_RATE_LIMIT_RETRY_DELAYS)) is None


def test_other_transient_uses_short_schedule():
    err = Exception("503 service unavailable")
    delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RETRY_DELAYS))]
    assert delays == _LLM_RETRY_DELAYS
    assert _retry_delay_for(err, len(_LLM_RETRY_DELAYS)) is None


def test_non_transient_returns_none():
    err = Exception("invalid request: bad parameter")
    assert _retry_delay_for(err, 0) is None


def test_rate_limit_total_budget_covers_bedrock_bucket_recovery():
    """The whole point of the rate-limit schedule: total wait time should
    exceed the ~60s Bedrock TPM bucket recovery window."""
    assert len(_LLM_RATE_LIMIT_RETRY_DELAYS) == _MAX_LLM_RETRIES - 1
    assert sum(_LLM_RATE_LIMIT_RETRY_DELAYS) > 60