Spaces:

qa1145
/

astrbbbb

Paused

App Files Files Community

astrbbbb / tests /agent /test_context_manager.py

qa1145

Upload 1245 files

8ede856 verified 2 months ago

raw

history blame

29.2 kB

	"""Comprehensive tests for ContextManager."""

	import sys
	from pathlib import Path
	from typing import Literal
	from unittest.mock import AsyncMock, MagicMock, patch

	import pytest

	# Add parent directory to path to avoid circular import issues
	sys.path.insert(0, str(Path(__file__).parent.parent.parent))

	from astrbot.core.agent.context.config import ContextConfig
	from astrbot.core.agent.context.manager import ContextManager
	from astrbot.core.agent.message import Message, TextPart
	from astrbot.core.provider.entities import LLMResponse


	class MockProvider:
	"""模拟 Provider"""

	def __init__(self):
	self.provider_config = {
	"id": "test_provider",
	"model": "gpt-4",
	"modalities": ["text", "image", "tool_use"],
	}

	async def text_chat(self, **kwargs):
	"""模拟 LLM 调用，返回摘要"""
	messages = kwargs.get("messages", [])
	# 简单的摘要逻辑：返回消息数量统计
	return LLMResponse(
	role="assistant",
	completion_text=f"历史对话包含 {len(messages) - 1} 条消息，主要讨论了技术话题。",
	)

	def get_model(self):
	return "gpt-4"

	def meta(self):
	return MagicMock(id="test_provider", type="openai")


	class TestContextManager:
	"""Test suite for ContextManager."""

	def create_message(
	self, role: Literal["system", "user", "assistant", "tool"], content: str
	) -> Message:
	"""Helper to create a simple text message."""
	return Message(role=role, content=content)

	def create_messages(self, count: int) -> list[Message]:
	"""Helper to create alternating user/assistant messages."""
	messages = []
	for i in range(count):
	role = "user" if i % 2 == 0 else "assistant"
	messages.append(self.create_message(role, f"Message {i}"))
	return messages

	# ==================== Basic Initialization Tests ====================

	def test_init_with_minimal_config(self):
	"""Test initialization with minimal configuration."""
	config = ContextConfig()
	manager = ContextManager(config)

	assert manager.config == config
	assert manager.token_counter is not None
	assert manager.truncator is not None
	assert manager.compressor is not None

	def test_init_with_llm_compressor(self):
	"""Test initialization with LLM-based compression."""
	mock_provider = MockProvider()
	config = ContextConfig(
	llm_compress_provider=mock_provider, # type: ignore
	llm_compress_keep_recent=5,
	llm_compress_instruction="Summarize the conversation",
	)
	manager = ContextManager(config)

	from astrbot.core.agent.context.compressor import LLMSummaryCompressor

	assert isinstance(manager.compressor, LLMSummaryCompressor)

	def test_init_with_truncate_compressor(self):
	"""Test initialization with truncate-based compression (default)."""
	config = ContextConfig(truncate_turns=3)
	manager = ContextManager(config)

	from astrbot.core.agent.context.compressor import TruncateByTurnsCompressor

	assert isinstance(manager.compressor, TruncateByTurnsCompressor)

	# ==================== Empty and Edge Cases ====================

	@pytest.mark.asyncio
	async def test_process_empty_messages(self):
	"""Test processing an empty message list."""
	config = ContextConfig()
	manager = ContextManager(config)

	result = await manager.process([])

	assert result == []

	@pytest.mark.asyncio
	async def test_process_single_message(self):
	"""Test processing a single message."""
	config = ContextConfig()
	manager = ContextManager(config)

	messages = [self.create_message("user", "Hello")]
	result = await manager.process(messages)

	assert len(result) == 1
	assert result[0].content == "Hello"

	@pytest.mark.asyncio
	async def test_process_with_no_limits(self):
	"""Test processing when no limits are set (no truncation or compression)."""
	config = ContextConfig(max_context_tokens=0, enforce_max_turns=-1)
	manager = ContextManager(config)

	messages = self.create_messages(20)
	result = await manager.process(messages)

	assert len(result) == 20
	assert result == messages

	# ==================== Enforce Max Turns Tests ====================

	@pytest.mark.asyncio
	async def test_enforce_max_turns_basic(self):
	"""Test basic enforce_max_turns functionality."""
	config = ContextConfig(enforce_max_turns=3, truncate_turns=1)
	manager = ContextManager(config)

	# Create 10 turns (20 messages)
	messages = self.create_messages(20)
	result = await manager.process(messages)

	# Should keep only 3 most recent turns (6 messages)
	assert len(result) <= 8 # May vary due to truncation logic

	@pytest.mark.asyncio
	async def test_enforce_max_turns_zero(self):
	"""Test enforce_max_turns with value 0 (should keep nothing)."""
	config = ContextConfig(enforce_max_turns=0, truncate_turns=1)
	manager = ContextManager(config)

	messages = self.create_messages(10)
	result = await manager.process(messages)

	# Should result in empty or minimal message list
	assert len(result) <= 2

	@pytest.mark.asyncio
	async def test_enforce_max_turns_negative(self):
	"""Test enforce_max_turns with -1 (no limit)."""
	config = ContextConfig(enforce_max_turns=-1)
	manager = ContextManager(config)

	messages = self.create_messages(20)
	result = await manager.process(messages)

	assert len(result) == 20

	@pytest.mark.asyncio
	async def test_enforce_max_turns_with_system_messages(self):
	"""Test enforce_max_turns preserves system messages."""
	config = ContextConfig(enforce_max_turns=2, truncate_turns=1)
	manager = ContextManager(config)

	messages = [
	self.create_message("system", "System instruction"),
	*self.create_messages(10),
	]
	result = await manager.process(messages)

	# System message should be preserved
	system_msgs = [m for m in result if m.role == "system"]
	assert len(system_msgs) >= 1
	assert system_msgs[0].content == "System instruction"

	# ==================== Token-based Compression Tests ====================

	@pytest.mark.asyncio
	async def test_token_compression_not_triggered_below_threshold(self):
	"""Test that compression is not triggered below threshold."""
	config = ContextConfig(max_context_tokens=1000)
	manager = ContextManager(config)

	# Create messages that total less than threshold
	messages = [self.create_message("user", "Hi" * 50)] # ~100 tokens

	with patch.object(
	manager.compressor, "should_compress", return_value=False
	) as mock_should_compress:
	with patch.object(
	manager.compressor, "__call__", new_callable=AsyncMock
	) as mock_compress:
	result = await manager.process(messages)

	# should_compress should be called
	mock_should_compress.assert_called_once()
	# Compressor should not be called
	mock_compress.assert_not_called()
	assert result == messages

	@pytest.mark.asyncio
	async def test_token_compression_triggered_above_threshold(self):
	"""Test that compression is triggered above threshold."""
	config = ContextConfig(max_context_tokens=100, truncate_turns=1)
	manager = ContextManager(config)

	# Create messages that exceed threshold (0.82 * 100 = 82 tokens)
	# 300 chars * 0.3 = 90 tokens > 82 threshold
	long_text = "x" * 300 # ~90 tokens, above threshold
	messages = [self.create_message("user", long_text)]

	# Mock compressor to return smaller result
	compressed = [self.create_message("user", "short")]

	# Create a mock compressor
	mock_compressor = AsyncMock()
	mock_compressor.compression_threshold = 0.82
	mock_compressor.return_value = compressed

	# Mock should_compress to return True first time, False after
	call_count = 0

	def mock_should_compress(args, *kwargs):
	nonlocal call_count
	call_count += 1
	return call_count == 1

	mock_compressor.should_compress = mock_should_compress
	manager.compressor = mock_compressor

	result = await manager.process(messages)

	# Compressor should be called
	mock_compressor.assert_called_once()
	# Result should be the compressed version
	assert len(result) <= len(messages)

	@pytest.mark.asyncio
	async def test_token_compression_with_zero_max_tokens(self):
	"""Test that compression is skipped when max_context_tokens is 0."""
	config = ContextConfig(max_context_tokens=0)
	manager = ContextManager(config)

	messages = [self.create_message("user", "x" * 10000)]

	with patch.object(
	manager.compressor, "__call__", new_callable=AsyncMock
	) as mock_compress:
	result = await manager.process(messages)

	# Compressor should not be called when max_context_tokens is 0
	mock_compress.assert_not_called()
	assert result == messages

	@pytest.mark.asyncio
	async def test_token_compression_with_negative_max_tokens(self):
	"""Test that compression is skipped when max_context_tokens is negative."""
	config = ContextConfig(max_context_tokens=-100)
	manager = ContextManager(config)

	messages = [self.create_message("user", "x" * 10000)]

	with patch.object(
	manager.compressor, "__call__", new_callable=AsyncMock
	) as mock_compress:
	result = await manager.process(messages)

	# Compressor should not be called
	mock_compress.assert_not_called()
	assert result == messages

	@pytest.mark.asyncio
	async def test_double_check_after_compression(self):
	"""Test that halving is applied if still over threshold after compression."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	# Create messages that would still be over threshold after compression
	long_messages = [self.create_message("user", "x" * 200) for _ in range(10)]

	# Mock compressor to return messages still over threshold
	async def mock_compress(msgs):
	return msgs # Return same messages (still over limit)

	# Mock should_compress to return True twice (before and after compression)
	with patch.object(manager.compressor, "should_compress", return_value=True):
	with patch.object(manager.compressor, "__call__", new=mock_compress):
	with patch.object(
	manager.truncator,
	"truncate_by_halving",
	return_value=long_messages[:5],
	) as mock_halving:
	_ = await manager.process(long_messages)

	# Halving should be called
	mock_halving.assert_called_once()

	# ==================== Combined Truncation and Compression Tests ====================

	@pytest.mark.asyncio
	async def test_combined_enforce_turns_and_token_limit(self):
	"""Test combining enforce_max_turns and token limit."""
	config = ContextConfig(
	enforce_max_turns=5, max_context_tokens=500, truncate_turns=1
	)
	manager = ContextManager(config)

	# Create many messages
	messages = self.create_messages(30)

	result = await manager.process(messages)

	# Should be truncated by both mechanisms
	assert len(result) < 30

	@pytest.mark.asyncio
	async def test_sequential_processing_order(self):
	"""Test that enforce_max_turns happens before token compression."""
	config = ContextConfig(enforce_max_turns=5, max_context_tokens=1000)
	manager = ContextManager(config)

	messages = self.create_messages(20)

	# Mock the truncator to track calls
	with patch.object(
	manager.truncator,
	"truncate_by_turns",
	wraps=manager.truncator.truncate_by_turns,
	) as mock_truncate:
	await manager.process(messages)

	# Truncator should be called first
	mock_truncate.assert_called_once()

	# ==================== Error Handling Tests ====================

	@pytest.mark.asyncio
	async def test_error_handling_returns_original_messages(self):
	"""Test that errors during processing return original messages."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	messages = self.create_messages(5)

	# Make compressor raise an exception
	with patch.object(
	manager.compressor, "__call__", side_effect=Exception("Test error")
	):
	result = await manager.process(messages)

	# Should return original messages despite error
	assert result == messages

	@pytest.mark.asyncio
	async def test_error_handling_logs_exception(self):
	"""Test that errors are logged."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	# Create messages that will trigger compression (> 82 tokens)
	messages = [self.create_message("user", "x" * 300)] # ~90 tokens

	# Replace compressor with one that raises an exception
	mock_compressor = AsyncMock(side_effect=Exception("Test error"))
	mock_compressor.compression_threshold = 0.82
	mock_compressor.should_compress = MagicMock(return_value=True)
	manager.compressor = mock_compressor

	with patch("astrbot.core.agent.context.manager.logger") as mock_logger:
	result = await manager.process(messages)

	# Logger error method should be called
	assert mock_logger.error.called
	# Should return original messages on error
	assert result == messages

	# ==================== Multi-modal Content Tests ====================

	@pytest.mark.asyncio
	async def test_process_messages_with_textpart_content(self):
	"""Test processing messages with TextPart content."""
	config = ContextConfig()
	manager = ContextManager(config)

	messages = [
	Message(role="user", content=[TextPart(text="Hello")]),
	Message(role="assistant", content=[TextPart(text="Hi there")]),
	]

	result = await manager.process(messages)

	assert len(result) == 2
	assert result == messages

	@pytest.mark.asyncio
	async def test_token_counting_with_multimodal_content(self):
	"""Test token counting works with multi-modal content."""
	config = ContextConfig(max_context_tokens=50)
	manager = ContextManager(config)

	# Need enough tokens to exceed threshold: 50 * 0.82 = 41 tokens
	# 150 chars * 0.3 = 45 tokens > 41
	messages = [
	Message(role="user", content=[TextPart(text="x" * 150)]),
	]

	# Should trigger compression due to token count
	tokens = manager.token_counter.count_tokens(messages)
	needs_compression = manager.compressor.should_compress(messages, tokens, 50)

	assert tokens > 0 # Tokens should be counted
	assert needs_compression # Should trigger compression

	# ==================== Tool Calls Tests ====================

	@pytest.mark.asyncio
	async def test_process_messages_with_tool_calls(self):
	"""Test processing messages with tool calls."""
	config = ContextConfig()
	manager = ContextManager(config)

	messages = [
	Message(
	role="assistant",
	content="Let me search for that",
	tool_calls=[
	{
	"id": "call_1",
	"type": "function",
	"function": {"name": "search", "arguments": "{}"},
	}
	],
	),
	Message(role="tool", content="Search result", tool_call_id="call_1"),
	]

	result = await manager.process(messages)

	assert len(result) == 2

	# ==================== Compressor should_compress Tests ====================

	@pytest.mark.asyncio
	async def test_should_compress_empty_messages(self):
	"""Test should_compress with empty messages."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	# Compressor's should_compress should handle empty gracefully
	needs_compression = manager.compressor.should_compress([], 0, 100)
	assert not needs_compression

	@pytest.mark.asyncio
	async def test_should_compress_below_threshold(self):
	"""Test should_compress when below compression threshold."""
	config = ContextConfig(max_context_tokens=1000)
	manager = ContextManager(config)

	messages = [self.create_message("user", "Hello")]
	tokens = manager.token_counter.count_tokens(messages)

	needs_compression = manager.compressor.should_compress(messages, tokens, 1000)
	assert not needs_compression

	@pytest.mark.asyncio
	async def test_should_compress_above_threshold(self):
	"""Test should_compress when above compression threshold."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	# Create message with many tokens
	messages = [self.create_message("user", "这是测试" * 50)]
	tokens = manager.token_counter.count_tokens(messages)

	needs_compression = manager.compressor.should_compress(messages, tokens, 100)
	# Should need compression if tokens > 82 (0.82 * 100)
	assert needs_compression == (tokens > 82)

	# ==================== Truncator Halving Tests ====================

	def test_truncate_by_halving_basic(self):
	"""Test truncate_by_halving removes middle 50%."""
	config = ContextConfig()
	manager = ContextManager(config)

	messages = self.create_messages(10)
	result = manager.truncator.truncate_by_halving(messages)

	# Should keep roughly half
	assert len(result) < len(messages)

	def test_truncate_by_halving_empty_list(self):
	"""Test truncate_by_halving with empty list."""
	config = ContextConfig()
	manager = ContextManager(config)

	result = manager.truncator.truncate_by_halving([])

	assert result == []

	def test_truncate_by_halving_single_message(self):
	"""Test truncate_by_halving with single message."""
	config = ContextConfig()
	manager = ContextManager(config)

	messages = [self.create_message("user", "Hello")]
	result = manager.truncator.truncate_by_halving(messages)

	assert len(result) <= 1

	# ==================== Complex Scenarios ====================

	@pytest.mark.asyncio
	async def test_multiple_compression_cycles(self):
	"""Test that compression can be triggered multiple times in sequence."""
	config = ContextConfig(max_context_tokens=50, truncate_turns=1)
	manager = ContextManager(config)

	# Process messages multiple times
	messages = self.create_messages(10)

	result1 = await manager.process(messages)
	result2 = await manager.process(result1)
	result3 = await manager.process(result2)

	# Each cycle should maintain or reduce message count
	assert len(result3) <= len(result2) <= len(result1)

	@pytest.mark.asyncio
	async def test_alternating_roles_preserved(self):
	"""Test that user/assistant alternation is preserved after processing."""
	config = ContextConfig(enforce_max_turns=3, truncate_turns=1)
	manager = ContextManager(config)

	messages = self.create_messages(20)
	result = await manager.process(messages)

	# Check that roles still alternate (excluding system messages)
	non_system = [m for m in result if m.role != "system"]
	if len(non_system) >= 2:
	# Should start with user
	assert non_system[0].role == "user"

	@pytest.mark.asyncio
	async def test_compression_threshold_default(self):
	"""Test that compression threshold is used correctly."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	# Verify the default threshold is 0.82
	assert manager.compressor.compression_threshold == 0.82

	# Test threshold logic
	messages = [self.create_message("user", "x" * 81)] # ~24 tokens
	tokens = manager.token_counter.count_tokens(messages)

	needs_compression = manager.compressor.should_compress(messages, tokens, 100)
	# Should not compress if below threshold
	assert needs_compression == (tokens > 82)

	@pytest.mark.asyncio
	async def test_large_batch_processing(self):
	"""Test processing a large batch of messages."""
	config = ContextConfig(
	enforce_max_turns=10, max_context_tokens=1000, truncate_turns=2
	)
	manager = ContextManager(config)

	# Create 100 messages (50 turns)
	messages = self.create_messages(100)

	result = await manager.process(messages)

	# Should be significantly reduced
	assert len(result) < 100
	assert len(result) > 0

	@pytest.mark.asyncio
	async def test_config_persistence(self):
	"""Test that config settings are respected throughout processing."""
	config = ContextConfig(
	max_context_tokens=500,
	enforce_max_turns=5,
	truncate_turns=2,
	llm_compress_keep_recent=3,
	)
	manager = ContextManager(config)

	# Verify config is stored
	assert manager.config.max_context_tokens == 500
	assert manager.config.enforce_max_turns == 5
	assert manager.config.truncate_turns == 2
	assert manager.config.llm_compress_keep_recent == 3

	# ==================== Run Compression Tests ====================

	@pytest.mark.asyncio
	async def test_run_compression_calls_compressor(self):
	"""Test _run_compression calls compressor."""
	config = ContextConfig(max_context_tokens=100)
	manager = ContextManager(config)

	messages = self.create_messages(5)
	compressed = self.create_messages(3)

	# Create a mock compressor
	mock_compressor = AsyncMock()
	mock_compressor.compression_threshold = 0.82
	mock_compressor.return_value = compressed
	mock_compressor.should_compress = MagicMock(return_value=False)
	manager.compressor = mock_compressor

	result = await manager._run_compression(messages, prev_tokens=100)

	# Compressor __call__ should be invoked
	mock_compressor.assert_called_once_with(messages)
	assert result == compressed

	@pytest.mark.asyncio
	async def test_run_compression_applies_compressor_through_process(self):
	"""Test _run_compression calls compressor when needed through process()."""
	config = ContextConfig(max_context_tokens=100, truncate_turns=1)
	manager = ContextManager(config)

	# Create messages that will trigger compression
	messages = [self.create_message("user", "x" * 300)] # ~90 tokens > 82 threshold
	compressed = [self.create_message("user", "short")] # Much smaller

	# Create a mock compressor
	mock_compressor = AsyncMock()
	mock_compressor.compression_threshold = 0.82
	mock_compressor.return_value = compressed

	# Mock should_compress to return True first time, False after
	call_count = 0

	def mock_should_compress(args, *kwargs):
	nonlocal call_count
	call_count += 1
	return call_count == 1

	mock_compressor.should_compress = mock_should_compress
	manager.compressor = mock_compressor

	result = await manager.process(messages)

	# Compressor should have been called
	mock_compressor.assert_called_once()
	assert len(result) <= len(messages)

	@pytest.mark.asyncio
	async def test_llm_compression_with_mock_provider(self):
	"""Test LLM compression using MockProvider."""
	mock_provider = MockProvider()
	config = ContextConfig(
	llm_compress_provider=mock_provider, # type: ignore
	llm_compress_keep_recent=3,
	llm_compress_instruction="请总结对话内容",
	max_context_tokens=100,
	)
	manager = ContextManager(config)

	# Create messages that will trigger compression
	messages = [
	self.create_message("user", "x" * 100),
	self.create_message("assistant", "y" * 100),
	self.create_message("user", "z" * 100),
	]

	result = await manager.process(messages)

	# Should have been compressed
	assert len(result) <= len(messages)

	# ==================== split_history Tests ====================

	def test_split_history_ensures_user_start(self):
	"""Test split_history ensures recent_messages starts with user message."""
	from astrbot.core.agent.context.compressor import split_history

	# Create alternating messages: user, assistant, user, assistant, user, assistant
	messages = [
	self.create_message("system", "System prompt"),
	self.create_message("user", "msg1"),
	self.create_message("assistant", "msg2"),
	self.create_message("user", "msg3"),
	self.create_message("assistant", "msg4"),
	self.create_message("user", "msg5"),
	self.create_message("assistant", "msg6"),
	]

	# Keep recent 3 messages - should adjust to start with user
	system, to_summarize, recent = split_history(messages, keep_recent=3)

	# recent_messages should start with user message
	assert len(recent) > 0
	assert recent[0].role == "user"

	# messages_to_summarize should end with assistant (complete turn)
	if len(to_summarize) > 0:
	assert to_summarize[-1].role == "assistant"

	def test_split_history_handles_assistant_at_split_point(self):
	"""Test split_history when assistant message is at the intended split point."""
	from astrbot.core.agent.context.compressor import split_history

	messages = [
	self.create_message("user", "msg1"),
	self.create_message("assistant", "msg2"),
	self.create_message("user", "msg3"),
	self.create_message("assistant", "msg4"), # <- intended split here
	self.create_message("user", "msg5"),
	self.create_message("assistant", "msg6"),
	]

	# keep_recent=2 would normally split at index 4 (assistant msg4)
	# Should move back to include from msg5 (user)
	system, to_summarize, recent = split_history(messages, keep_recent=2)

	# recent should start with user message
	assert recent[0].role == "user"
	assert recent[0].content == "msg5"

	def test_split_history_all_assistant_messages(self):
	"""Test split_history when there are consecutive assistant messages."""
	from astrbot.core.agent.context.compressor import split_history

	messages = [
	self.create_message("user", "msg1"),
	self.create_message("assistant", "msg2"),
	self.create_message("assistant", "msg3"),
	self.create_message("assistant", "msg4"),
	]

	system, to_summarize, recent = split_history(messages, keep_recent=2)

	# Should find the user message and keep from there
	if len(recent) > 0:
	# Find first user message backwards
	assert any(m.role == "user" for m in messages)

	def test_split_history_with_system_messages(self):
	"""Test split_history preserves system messages separately."""
	from astrbot.core.agent.context.compressor import split_history

	messages = [
	self.create_message("system", "System 1"),
	self.create_message("system", "System 2"),
	self.create_message("user", "msg1"),
	self.create_message("assistant", "msg2"),
	self.create_message("user", "msg3"),
	]

	system, to_summarize, recent = split_history(messages, keep_recent=2)

	# System messages should be separate
	assert len(system) == 2
	assert all(m.role == "system" for m in system)

	# Recent should start with user
	if len(recent) > 0:
	assert recent[0].role == "user"