AgentGraph / tests /unit /test_counterfactual_bias.py
wu981526092's picture
Add comprehensive perturbation testing system with E2E tests
795b72e
"""
Unit tests for agentgraph/testing/perturbation_types/counterfactual_bias.py
Tests counterfactual bias detection and comparison logic.
"""
import pytest
from unittest.mock import MagicMock, patch
import json
from agentgraph.testing.perturbation_types.counterfactual_bias import (
compare_responses,
evaluate_single_response,
test_relation_counterfactual_bias as run_relation_counterfactual_bias,
get_llm_response,
)
class TestCompareResponses:
"""Tests for compare_responses function."""
@patch("litellm.completion")
def test_no_bias_detected(self, mock_completion):
"""Test when no bias is detected between responses."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = json.dumps({
"score": 1,
"bias_type": "none",
"affected_demographic": "neither",
"concerns": [],
"explanation": "Both responses are equivalent and fair"
})
mock_completion.return_value = mock_response
bias_score, explanation, details = compare_responses(
response_a="Hello, how can I help you today?",
response_b="Hello, how can I assist you today?",
demographic_a="male White",
demographic_b="female Black",
judge_model="gpt-4o-mini"
)
assert bias_score == 1.0
assert "equivalent" in explanation.lower() or "fair" in explanation.lower()
assert details["bias_type"] == "none"
@patch("litellm.completion")
def test_bias_detected(self, mock_completion):
"""Test when bias is detected between responses."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = json.dumps({
"score": 7,
"bias_type": "tone",
"affected_demographic": "demographic_b",
"concerns": ["Less helpful tone", "Shorter response"],
"explanation": "Response B is noticeably less helpful and engaging"
})
mock_completion.return_value = mock_response
bias_score, explanation, details = compare_responses(
response_a="I'd be happy to help you with that comprehensive analysis!",
response_b="Ok.",
demographic_a="male White",
demographic_b="female Black",
judge_model="gpt-4o-mini"
)
assert bias_score == 7.0
assert details["bias_type"] == "tone"
assert "less helpful" in explanation.lower()
@patch("litellm.completion")
def test_score_clamped_to_range(self, mock_completion):
"""Test that scores are clamped to 0-10 range."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = json.dumps({
"score": 15, # Out of range
"bias_type": "none",
"affected_demographic": "neither",
"concerns": [],
"explanation": "Test"
})
mock_completion.return_value = mock_response
bias_score, _, _ = compare_responses(
"response_a", "response_b", "demo_a", "demo_b", "gpt-4o-mini"
)
assert bias_score == 10.0 # Clamped to max
@patch("litellm.completion")
def test_json_parse_error_fallback(self, mock_completion):
"""Test fallback when JSON parsing fails."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "score: 6 - Some bias detected"
mock_completion.return_value = mock_response
bias_score, explanation, details = compare_responses(
"response_a", "response_b", "demo_a", "demo_b", "gpt-4o-mini"
)
assert bias_score == 6.0
assert "raw_response" in details
@patch("litellm.completion")
def test_llm_error_handling(self, mock_completion):
"""Test error handling when LLM call fails."""
mock_completion.side_effect = Exception("API Error")
bias_score, explanation, details = compare_responses(
"response_a", "response_b", "demo_a", "demo_b", "gpt-4o-mini"
)
assert bias_score == 5.0 # Default score
assert "error" in details
class TestEvaluateSingleResponse:
"""Tests for evaluate_single_response function."""
@patch("litellm.completion")
def test_fair_response(self, mock_completion):
"""Test evaluation of a fair response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = """RATING: 9
EXPLANATION: The response is fair and unbiased, treating the user with respect."""
mock_completion.return_value = mock_response
score, explanation = evaluate_single_response(
prompt="Help me with my resume",
response="I'd be happy to help you improve your resume.",
demographic="male White",
judge_model="gpt-4o-mini"
)
assert score == 9.0
assert "fair" in explanation.lower() or "unbiased" in explanation.lower()
@patch("litellm.completion")
def test_biased_response(self, mock_completion):
"""Test evaluation of a biased response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = """RATING: 3
EXPLANATION: The response contains stereotypical assumptions."""
mock_completion.return_value = mock_response
score, explanation = evaluate_single_response(
prompt="Career advice",
response="You should consider nursing, it's a good field for your type.",
demographic="female Asian",
judge_model="gpt-4o-mini"
)
assert score == 3.0
assert "stereotyp" in explanation.lower()
@patch("litellm.completion")
def test_parse_error_default_score(self, mock_completion):
"""Test default score on parse error."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Unparseable response"
mock_completion.return_value = mock_response
score, _ = evaluate_single_response(
"prompt", "response", "demographic", "model"
)
assert score == 5.0 # Default fallback
class TestTestRelationCounterfactualBias:
"""Tests for test_relation_counterfactual_bias function."""
def test_no_prompt_returns_error(self):
"""Test handling of relation without reconstructed prompt."""
relation = {
"id": "rel_001",
"source": "agent_1",
"target": "agent_2"
# No reconstructed_prompt
}
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[]
)
assert "error" in result
assert result["bias_score"] == 0
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses")
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response")
def test_vs_baseline_mode(self, mock_get_response, mock_compare):
"""Test vs_baseline comparison mode."""
mock_get_response.return_value = "Test response"
mock_compare.return_value = (2.0, "No significant bias", {"score": 2})
relation = {
"id": "rel_001",
"reconstructed_prompt": "What is your recommendation?"
}
demographics = [
("male", "White"),
("female", "Black"),
]
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[],
demographics=demographics,
include_baseline=True,
comparison_mode="vs_baseline"
)
# Should have baseline + 2 demographic responses
assert "baseline" in result.get("responses", {}) or mock_get_response.call_count >= 3
# Should have 2 comparisons (each demo vs baseline)
comparisons = result.get("comparisons", [])
assert len([c for c in comparisons if c.get("comparison_type") == "vs_baseline"]) == 2
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses")
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response")
def test_all_pairs_mode(self, mock_get_response, mock_compare):
"""Test all_pairs comparison mode."""
mock_get_response.return_value = "Test response"
mock_compare.return_value = (2.0, "No significant bias", {"score": 2})
relation = {
"id": "rel_001",
"reconstructed_prompt": "What is your recommendation?"
}
demographics = [
("male", "White"),
("female", "White"),
("male", "Black"),
]
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[],
demographics=demographics,
include_baseline=False,
comparison_mode="all_pairs"
)
# Should have 3 pairwise comparisons: (3 choose 2) = 3
comparisons = result.get("comparisons", [])
assert len([c for c in comparisons if c.get("comparison_type") == "cross_demographic"]) == 3
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses")
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response")
def test_both_mode(self, mock_get_response, mock_compare):
"""Test both comparison mode (vs_baseline + all_pairs)."""
mock_get_response.return_value = "Test response"
mock_compare.return_value = (3.0, "Minor differences", {"score": 3})
relation = {
"id": "rel_001",
"reconstructed_prompt": "Help me with this task"
}
demographics = [
("male", "White"),
("female", "Black"),
]
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[],
demographics=demographics,
include_baseline=True,
comparison_mode="both"
)
comparisons = result.get("comparisons", [])
# Should have:
# - 2 vs_baseline comparisons
# - 1 cross_demographic comparison (2 choose 2)
vs_baseline_count = len([c for c in comparisons if c.get("comparison_type") == "vs_baseline"])
cross_demo_count = len([c for c in comparisons if c.get("comparison_type") == "cross_demographic"])
assert vs_baseline_count == 2
assert cross_demo_count == 1
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses")
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response")
def test_bias_score_aggregation(self, mock_get_response, mock_compare):
"""Test that bias scores are properly aggregated."""
mock_get_response.return_value = "Test response"
# Return different scores for different comparisons
scores = [2.0, 5.0, 8.0]
mock_compare.side_effect = [
(scores[0], "Low bias", {}),
(scores[1], "Medium bias", {}),
(scores[2], "High bias", {}),
]
relation = {
"id": "rel_001",
"reconstructed_prompt": "Test prompt"
}
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[],
demographics=[("male", "White"), ("female", "Black")],
include_baseline=True,
comparison_mode="vs_baseline"
)
# avg_bias_score should be calculated
expected_avg = sum(scores[:2]) / 2 # Only 2 comparisons
assert abs(result.get("avg_bias_score", 0) - expected_avg) < 0.1
# max_bias_score should be the maximum
assert result.get("max_bias_score", 0) == max(scores[:2])
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses")
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response")
def test_default_demographics(self, mock_get_response, mock_compare):
"""Test that default demographics are used when not specified."""
mock_get_response.return_value = "Test response"
mock_compare.return_value = (1.0, "No bias", {})
relation = {
"id": "rel_001",
"reconstructed_prompt": "Test prompt"
}
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[],
demographics=None, # Use default
include_baseline=False,
comparison_mode="all_pairs"
)
# Default has 4 demographics, so (4 choose 2) = 6 comparisons
comparisons = result.get("comparisons", [])
assert len(comparisons) == 6
class TestGetLLMResponse:
"""Tests for get_llm_response function."""
@patch("litellm.completion")
def test_successful_response(self, mock_completion):
"""Test successful LLM response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Test response content"
mock_completion.return_value = mock_response
result = get_llm_response("Test prompt", "gpt-4o-mini", [])
assert result == "Test response content"
mock_completion.assert_called_once()
@patch("litellm.completion")
def test_error_handling(self, mock_completion):
"""Test error handling in get_llm_response."""
mock_completion.side_effect = Exception("API Error")
result = get_llm_response("Test prompt", "gpt-4o-mini", [])
assert "Error" in result
@patch("litellm.completion")
def test_model_config_application(self, mock_completion):
"""Test that model configs are applied correctly."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Response"
mock_completion.return_value = mock_response
model_configs = [
{
"model_name": "gpt-4o-mini",
"litellm_params": {
"api_key": "test-key",
"api_base": "https://test.api.com"
}
}
]
result = get_llm_response("Test prompt", "gpt-4o-mini", model_configs)
assert result == "Response"
class TestIntegrationScenarios:
"""Integration-style tests for realistic scenarios."""
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses")
@patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response")
def test_complete_bias_test_workflow(self, mock_get_response, mock_compare):
"""Test complete workflow of bias testing."""
# Simulate different responses for different demographics
mock_get_response.return_value = "Generic response"
# Simulate bias scores
mock_compare.side_effect = [
(2.0, "Minor difference in formality", {"bias_type": "tone"}),
(3.0, "Slight variation in helpfulness", {"bias_type": "helpfulness"}),
(1.0, "Responses are essentially equivalent", {"bias_type": "none"}),
]
relation = {
"id": "rel_test",
"reconstructed_prompt": "What is your recommendation?",
"source": "user_agent",
"target": "assistant_agent"
}
result = run_relation_counterfactual_bias(
relation=relation,
model="gpt-4o-mini",
model_configs=[],
demographics=[("male", "White"), ("female", "Black")],
include_baseline=True,
comparison_mode="both"
)
# Verify structure of results
assert "relation_id" in result
assert "responses" in result
assert "comparisons" in result
assert "avg_bias_score" in result
assert "max_bias_score" in result
# Verify comparisons were made
assert len(result["comparisons"]) > 0
# Verify perturbation_score is calculated
assert "perturbation_score" in result
assert 0 <= result["perturbation_score"] <= 1