contextforge-demo / tests /test_normalization.py
Pablo
feat: APOHARA: Context Forge V5 — synthesis + rebrand complete
cf0a8ed
"""Tests for PrefixNormalizer."""
import pytest
from apohara_context_forge.normalization.prefix_normalizer import (
PrefixNormalizer,
create_prefix_normalizer,
SEPARATOR,
)
class TestPrefixNormalizerBasic:
"""Basic PrefixNormalizer tests."""
def test_byte_identical_output_for_same_canonical_prompt(self):
"""Test normalize() produces byte-identical output for same canonical prompt."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
prompt1 = normalizer.normalize("agent1", "What is AI?", "retriever role")
prompt2 = normalizer.normalize("agent2", "What is AI?", "summarizer role")
# Extract system prompt prefix (everything before first separator)
system_prefix_1 = prompt1.split(SEPARATOR)[0]
system_prefix_2 = prompt2.split(SEPARATOR)[0]
# Both should have the same system prompt prefix
assert system_prefix_1 == system_prefix_2
assert system_prefix_1 == "You are a helpful AI."
def test_sha256_validation_catches_mismatched_canonical_prompts(self):
"""Test SHA256 validation catches mismatched canonical prompts."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
# Valid matching prompt
assert normalizer.validate_system_prompt("You are a helpful AI.") is True
# Different prompt should not match
assert normalizer.validate_system_prompt("You are a different AI.") is False
# Prompt with extra whitespace should not match (validation strips input)
assert normalizer.validate_system_prompt(" You are a helpful AI. ") is True
def test_separator_enforcement(self):
"""Test separator enforcement."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
# Default separator should be exactly "\n\n"
assert normalizer.separator == "\n\n"
# Output should contain exactly two newlines between segments
prompt = normalizer.normalize("agent1", "What is AI?", "retriever role")
# Count occurrences of separator
assert prompt.count("\n\n") == 2
# Should have pattern: system\n\nrole\n\nuser
parts = prompt.split("\n\n")
assert len(parts) == 3
assert parts[0] == "You are a helpful AI."
assert parts[1] == "retriever role"
assert parts[2] == "What is AI?"
def test_whitespace_stripping(self):
"""Test whitespace stripping from user_prompt and role_prompt."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
# Trailing whitespace should be stripped
prompt = normalizer.normalize(
"agent1",
"What is AI? ",
"retriever role ",
)
# Verify no trailing whitespace in output
lines = prompt.split("\n\n")
assert lines[1] == "retriever role"
assert lines[2] == "What is AI?"
# Leading whitespace should also be stripped
prompt2 = normalizer.normalize(
"agent2",
" What is AI?",
" summarizer role",
)
lines2 = prompt2.split("\n\n")
assert lines2[1] == "summarizer role"
assert lines2[2] == "What is AI?"
def test_get_canonical_hash(self):
"""Test get_canonical_hash() returns consistent SHA256 hex string."""
normalizer1 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
normalizer2 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
hash1 = normalizer1.get_canonical_hash()
hash2 = normalizer2.get_canonical_hash()
# Same prompt should produce same hash
assert hash1 == hash2
# Should be a valid SHA256 hex string (64 characters)
assert len(hash1) == 64
assert all(c in "0123456789abcdef" for c in hash1)
# Different prompt should produce different hash
normalizer3 = PrefixNormalizer(canonical_system_prompt="You are a different AI.")
hash3 = normalizer3.get_canonical_hash()
assert hash1 != hash3
def test_separator_property(self):
"""Test separator property returns the correct string."""
normalizer = PrefixNormalizer(canonical_system_prompt="Test prompt.")
assert normalizer.separator == SEPARATOR
assert normalizer.separator == "\n\n"
def test_canonical_hash_consistency(self):
"""Test two instances with same prompt have same hash."""
normalizer_a = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
normalizer_b = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
assert normalizer_a.get_canonical_hash() == normalizer_b.get_canonical_hash()
class TestCreatePrefixNormalizer:
"""Tests for create_prefix_normalizer factory function."""
def test_create_with_custom_prompt(self):
"""Test create_prefix_normalizer with custom prompt."""
normalizer = create_prefix_normalizer(
canonical_system_prompt="Custom system prompt."
)
assert normalizer.get_canonical_prompt() == "Custom system prompt."
def test_create_with_default_prompt(self):
"""Test create_prefix_normalizer uses default prompt when none provided."""
normalizer = create_prefix_normalizer()
expected_default = (
"You are a helpful AI assistant. "
"Provide accurate, detailed, and thoughtful responses. "
"Use chain-of-thought reasoning when appropriate."
)
assert normalizer.get_canonical_prompt() == expected_default
def test_create_prefix_normalizer_has_correct_separator(self):
"""Test create_prefix_normalizer uses correct separator."""
normalizer = create_prefix_normalizer(
canonical_system_prompt="Test prompt."
)
assert normalizer.separator == "\n\n"
class TestNormalize:
"""Tests for normalize() method."""
def test_normalize_assembles_in_fixed_order(self):
"""Test normalize() assembles segments in fixed order."""
normalizer = PrefixNormalizer(canonical_system_prompt="System prompt.")
prompt = normalizer.normalize(
agent_id="test_agent",
user_prompt="User question?",
agent_role_prompt="Role description.",
)
# Order should be: system, role, user
assert prompt.startswith("System prompt.")
assert "Role description." in prompt
assert "User question?" in prompt
def test_normalize_with_empty_role_prompt(self):
"""Test normalize() with empty role prompt."""
normalizer = PrefixNormalizer(canonical_system_prompt="System.")
prompt = normalizer.normalize(
agent_id="agent",
user_prompt="Question",
agent_role_prompt="",
)
parts = prompt.split("\n\n")
assert parts[0] == "System."
assert parts[1] == ""
assert parts[2] == "Question"
def test_normalize_registered_agents(self):
"""Test normalize() tracks registered agents."""
normalizer = PrefixNormalizer(canonical_system_prompt="System.")
normalizer.normalize("agent1", "Q1", "Role1")
normalizer.normalize("agent2", "Q2", "Role2")
# Agents should be tracked (internal state)
assert len(normalizer._registered_agents) == 2