Spaces:

TheLinconX
/

contextforge-demo

Sleeping

contextforge-demo / tests /test_normalization.py

Pablo

feat: APOHARA: Context Forge V5 — synthesis + rebrand complete

cf0a8ed 3 days ago

7.49 kB

	"""Tests for PrefixNormalizer."""
	import pytest
	from apohara_context_forge.normalization.prefix_normalizer import (
	PrefixNormalizer,
	create_prefix_normalizer,
	SEPARATOR,
	)


	class TestPrefixNormalizerBasic:
	"""Basic PrefixNormalizer tests."""

	def test_byte_identical_output_for_same_canonical_prompt(self):
	"""Test normalize() produces byte-identical output for same canonical prompt."""
	normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")

	prompt1 = normalizer.normalize("agent1", "What is AI?", "retriever role")
	prompt2 = normalizer.normalize("agent2", "What is AI?", "summarizer role")

	# Extract system prompt prefix (everything before first separator)
	system_prefix_1 = prompt1.split(SEPARATOR)[0]
	system_prefix_2 = prompt2.split(SEPARATOR)[0]

	# Both should have the same system prompt prefix
	assert system_prefix_1 == system_prefix_2
	assert system_prefix_1 == "You are a helpful AI."

	def test_sha256_validation_catches_mismatched_canonical_prompts(self):
	"""Test SHA256 validation catches mismatched canonical prompts."""
	normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")

	# Valid matching prompt
	assert normalizer.validate_system_prompt("You are a helpful AI.") is True

	# Different prompt should not match
	assert normalizer.validate_system_prompt("You are a different AI.") is False

	# Prompt with extra whitespace should not match (validation strips input)
	assert normalizer.validate_system_prompt(" You are a helpful AI. ") is True

	def test_separator_enforcement(self):
	"""Test separator enforcement."""
	normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")

	# Default separator should be exactly "\n\n"
	assert normalizer.separator == "\n\n"

	# Output should contain exactly two newlines between segments
	prompt = normalizer.normalize("agent1", "What is AI?", "retriever role")

	# Count occurrences of separator
	assert prompt.count("\n\n") == 2

	# Should have pattern: system\n\nrole\n\nuser
	parts = prompt.split("\n\n")
	assert len(parts) == 3
	assert parts[0] == "You are a helpful AI."
	assert parts[1] == "retriever role"
	assert parts[2] == "What is AI?"

	def test_whitespace_stripping(self):
	"""Test whitespace stripping from user_prompt and role_prompt."""
	normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")

	# Trailing whitespace should be stripped
	prompt = normalizer.normalize(
	"agent1",
	"What is AI? ",
	"retriever role ",
	)

	# Verify no trailing whitespace in output
	lines = prompt.split("\n\n")
	assert lines[1] == "retriever role"
	assert lines[2] == "What is AI?"

	# Leading whitespace should also be stripped
	prompt2 = normalizer.normalize(
	"agent2",
	" What is AI?",
	" summarizer role",
	)
	lines2 = prompt2.split("\n\n")
	assert lines2[1] == "summarizer role"
	assert lines2[2] == "What is AI?"

	def test_get_canonical_hash(self):
	"""Test get_canonical_hash() returns consistent SHA256 hex string."""
	normalizer1 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
	normalizer2 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")

	hash1 = normalizer1.get_canonical_hash()
	hash2 = normalizer2.get_canonical_hash()

	# Same prompt should produce same hash
	assert hash1 == hash2

	# Should be a valid SHA256 hex string (64 characters)
	assert len(hash1) == 64
	assert all(c in "0123456789abcdef" for c in hash1)

	# Different prompt should produce different hash
	normalizer3 = PrefixNormalizer(canonical_system_prompt="You are a different AI.")
	hash3 = normalizer3.get_canonical_hash()

	assert hash1 != hash3

	def test_separator_property(self):
	"""Test separator property returns the correct string."""
	normalizer = PrefixNormalizer(canonical_system_prompt="Test prompt.")
	assert normalizer.separator == SEPARATOR
	assert normalizer.separator == "\n\n"

	def test_canonical_hash_consistency(self):
	"""Test two instances with same prompt have same hash."""
	normalizer_a = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
	normalizer_b = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")

	assert normalizer_a.get_canonical_hash() == normalizer_b.get_canonical_hash()


	class TestCreatePrefixNormalizer:
	"""Tests for create_prefix_normalizer factory function."""

	def test_create_with_custom_prompt(self):
	"""Test create_prefix_normalizer with custom prompt."""
	normalizer = create_prefix_normalizer(
	canonical_system_prompt="Custom system prompt."
	)

	assert normalizer.get_canonical_prompt() == "Custom system prompt."

	def test_create_with_default_prompt(self):
	"""Test create_prefix_normalizer uses default prompt when none provided."""
	normalizer = create_prefix_normalizer()

	expected_default = (
	"You are a helpful AI assistant. "
	"Provide accurate, detailed, and thoughtful responses. "
	"Use chain-of-thought reasoning when appropriate."
	)
	assert normalizer.get_canonical_prompt() == expected_default

	def test_create_prefix_normalizer_has_correct_separator(self):
	"""Test create_prefix_normalizer uses correct separator."""
	normalizer = create_prefix_normalizer(
	canonical_system_prompt="Test prompt."
	)
	assert normalizer.separator == "\n\n"


	class TestNormalize:
	"""Tests for normalize() method."""

	def test_normalize_assembles_in_fixed_order(self):
	"""Test normalize() assembles segments in fixed order."""
	normalizer = PrefixNormalizer(canonical_system_prompt="System prompt.")

	prompt = normalizer.normalize(
	agent_id="test_agent",
	user_prompt="User question?",
	agent_role_prompt="Role description.",
	)

	# Order should be: system, role, user
	assert prompt.startswith("System prompt.")
	assert "Role description." in prompt
	assert "User question?" in prompt

	def test_normalize_with_empty_role_prompt(self):
	"""Test normalize() with empty role prompt."""
	normalizer = PrefixNormalizer(canonical_system_prompt="System.")

	prompt = normalizer.normalize(
	agent_id="agent",
	user_prompt="Question",
	agent_role_prompt="",
	)

	parts = prompt.split("\n\n")
	assert parts[0] == "System."
	assert parts[1] == ""
	assert parts[2] == "Question"

	def test_normalize_registered_agents(self):
	"""Test normalize() tracks registered agents."""
	normalizer = PrefixNormalizer(canonical_system_prompt="System.")

	normalizer.normalize("agent1", "Q1", "Role1")
	normalizer.normalize("agent2", "Q2", "Role2")

	# Agents should be tracked (internal state)
	assert len(normalizer._registered_agents) == 2