""" Unit tests for agentgraph/testing/perturbation_types/counterfactual_bias.py Tests counterfactual bias detection and comparison logic. """ import pytest from unittest.mock import MagicMock, patch import json from agentgraph.testing.perturbation_types.counterfactual_bias import ( compare_responses, evaluate_single_response, test_relation_counterfactual_bias as run_relation_counterfactual_bias, get_llm_response, ) class TestCompareResponses: """Tests for compare_responses function.""" @patch("litellm.completion") def test_no_bias_detected(self, mock_completion): """Test when no bias is detected between responses.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = json.dumps({ "score": 1, "bias_type": "none", "affected_demographic": "neither", "concerns": [], "explanation": "Both responses are equivalent and fair" }) mock_completion.return_value = mock_response bias_score, explanation, details = compare_responses( response_a="Hello, how can I help you today?", response_b="Hello, how can I assist you today?", demographic_a="male White", demographic_b="female Black", judge_model="gpt-4o-mini" ) assert bias_score == 1.0 assert "equivalent" in explanation.lower() or "fair" in explanation.lower() assert details["bias_type"] == "none" @patch("litellm.completion") def test_bias_detected(self, mock_completion): """Test when bias is detected between responses.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = json.dumps({ "score": 7, "bias_type": "tone", "affected_demographic": "demographic_b", "concerns": ["Less helpful tone", "Shorter response"], "explanation": "Response B is noticeably less helpful and engaging" }) mock_completion.return_value = mock_response bias_score, explanation, details = compare_responses( response_a="I'd be happy to help you with that comprehensive analysis!", response_b="Ok.", demographic_a="male White", demographic_b="female Black", judge_model="gpt-4o-mini" ) assert bias_score == 7.0 assert details["bias_type"] == "tone" assert "less helpful" in explanation.lower() @patch("litellm.completion") def test_score_clamped_to_range(self, mock_completion): """Test that scores are clamped to 0-10 range.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = json.dumps({ "score": 15, # Out of range "bias_type": "none", "affected_demographic": "neither", "concerns": [], "explanation": "Test" }) mock_completion.return_value = mock_response bias_score, _, _ = compare_responses( "response_a", "response_b", "demo_a", "demo_b", "gpt-4o-mini" ) assert bias_score == 10.0 # Clamped to max @patch("litellm.completion") def test_json_parse_error_fallback(self, mock_completion): """Test fallback when JSON parsing fails.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = "score: 6 - Some bias detected" mock_completion.return_value = mock_response bias_score, explanation, details = compare_responses( "response_a", "response_b", "demo_a", "demo_b", "gpt-4o-mini" ) assert bias_score == 6.0 assert "raw_response" in details @patch("litellm.completion") def test_llm_error_handling(self, mock_completion): """Test error handling when LLM call fails.""" mock_completion.side_effect = Exception("API Error") bias_score, explanation, details = compare_responses( "response_a", "response_b", "demo_a", "demo_b", "gpt-4o-mini" ) assert bias_score == 5.0 # Default score assert "error" in details class TestEvaluateSingleResponse: """Tests for evaluate_single_response function.""" @patch("litellm.completion") def test_fair_response(self, mock_completion): """Test evaluation of a fair response.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = """RATING: 9 EXPLANATION: The response is fair and unbiased, treating the user with respect.""" mock_completion.return_value = mock_response score, explanation = evaluate_single_response( prompt="Help me with my resume", response="I'd be happy to help you improve your resume.", demographic="male White", judge_model="gpt-4o-mini" ) assert score == 9.0 assert "fair" in explanation.lower() or "unbiased" in explanation.lower() @patch("litellm.completion") def test_biased_response(self, mock_completion): """Test evaluation of a biased response.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = """RATING: 3 EXPLANATION: The response contains stereotypical assumptions.""" mock_completion.return_value = mock_response score, explanation = evaluate_single_response( prompt="Career advice", response="You should consider nursing, it's a good field for your type.", demographic="female Asian", judge_model="gpt-4o-mini" ) assert score == 3.0 assert "stereotyp" in explanation.lower() @patch("litellm.completion") def test_parse_error_default_score(self, mock_completion): """Test default score on parse error.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = "Unparseable response" mock_completion.return_value = mock_response score, _ = evaluate_single_response( "prompt", "response", "demographic", "model" ) assert score == 5.0 # Default fallback class TestTestRelationCounterfactualBias: """Tests for test_relation_counterfactual_bias function.""" def test_no_prompt_returns_error(self): """Test handling of relation without reconstructed prompt.""" relation = { "id": "rel_001", "source": "agent_1", "target": "agent_2" # No reconstructed_prompt } result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[] ) assert "error" in result assert result["bias_score"] == 0 @patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses") @patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response") def test_vs_baseline_mode(self, mock_get_response, mock_compare): """Test vs_baseline comparison mode.""" mock_get_response.return_value = "Test response" mock_compare.return_value = (2.0, "No significant bias", {"score": 2}) relation = { "id": "rel_001", "reconstructed_prompt": "What is your recommendation?" } demographics = [ ("male", "White"), ("female", "Black"), ] result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[], demographics=demographics, include_baseline=True, comparison_mode="vs_baseline" ) # Should have baseline + 2 demographic responses assert "baseline" in result.get("responses", {}) or mock_get_response.call_count >= 3 # Should have 2 comparisons (each demo vs baseline) comparisons = result.get("comparisons", []) assert len([c for c in comparisons if c.get("comparison_type") == "vs_baseline"]) == 2 @patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses") @patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response") def test_all_pairs_mode(self, mock_get_response, mock_compare): """Test all_pairs comparison mode.""" mock_get_response.return_value = "Test response" mock_compare.return_value = (2.0, "No significant bias", {"score": 2}) relation = { "id": "rel_001", "reconstructed_prompt": "What is your recommendation?" } demographics = [ ("male", "White"), ("female", "White"), ("male", "Black"), ] result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[], demographics=demographics, include_baseline=False, comparison_mode="all_pairs" ) # Should have 3 pairwise comparisons: (3 choose 2) = 3 comparisons = result.get("comparisons", []) assert len([c for c in comparisons if c.get("comparison_type") == "cross_demographic"]) == 3 @patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses") @patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response") def test_both_mode(self, mock_get_response, mock_compare): """Test both comparison mode (vs_baseline + all_pairs).""" mock_get_response.return_value = "Test response" mock_compare.return_value = (3.0, "Minor differences", {"score": 3}) relation = { "id": "rel_001", "reconstructed_prompt": "Help me with this task" } demographics = [ ("male", "White"), ("female", "Black"), ] result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[], demographics=demographics, include_baseline=True, comparison_mode="both" ) comparisons = result.get("comparisons", []) # Should have: # - 2 vs_baseline comparisons # - 1 cross_demographic comparison (2 choose 2) vs_baseline_count = len([c for c in comparisons if c.get("comparison_type") == "vs_baseline"]) cross_demo_count = len([c for c in comparisons if c.get("comparison_type") == "cross_demographic"]) assert vs_baseline_count == 2 assert cross_demo_count == 1 @patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses") @patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response") def test_bias_score_aggregation(self, mock_get_response, mock_compare): """Test that bias scores are properly aggregated.""" mock_get_response.return_value = "Test response" # Return different scores for different comparisons scores = [2.0, 5.0, 8.0] mock_compare.side_effect = [ (scores[0], "Low bias", {}), (scores[1], "Medium bias", {}), (scores[2], "High bias", {}), ] relation = { "id": "rel_001", "reconstructed_prompt": "Test prompt" } result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[], demographics=[("male", "White"), ("female", "Black")], include_baseline=True, comparison_mode="vs_baseline" ) # avg_bias_score should be calculated expected_avg = sum(scores[:2]) / 2 # Only 2 comparisons assert abs(result.get("avg_bias_score", 0) - expected_avg) < 0.1 # max_bias_score should be the maximum assert result.get("max_bias_score", 0) == max(scores[:2]) @patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses") @patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response") def test_default_demographics(self, mock_get_response, mock_compare): """Test that default demographics are used when not specified.""" mock_get_response.return_value = "Test response" mock_compare.return_value = (1.0, "No bias", {}) relation = { "id": "rel_001", "reconstructed_prompt": "Test prompt" } result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[], demographics=None, # Use default include_baseline=False, comparison_mode="all_pairs" ) # Default has 4 demographics, so (4 choose 2) = 6 comparisons comparisons = result.get("comparisons", []) assert len(comparisons) == 6 class TestGetLLMResponse: """Tests for get_llm_response function.""" @patch("litellm.completion") def test_successful_response(self, mock_completion): """Test successful LLM response.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = "Test response content" mock_completion.return_value = mock_response result = get_llm_response("Test prompt", "gpt-4o-mini", []) assert result == "Test response content" mock_completion.assert_called_once() @patch("litellm.completion") def test_error_handling(self, mock_completion): """Test error handling in get_llm_response.""" mock_completion.side_effect = Exception("API Error") result = get_llm_response("Test prompt", "gpt-4o-mini", []) assert "Error" in result @patch("litellm.completion") def test_model_config_application(self, mock_completion): """Test that model configs are applied correctly.""" mock_response = MagicMock() mock_response.choices = [MagicMock()] mock_response.choices[0].message.content = "Response" mock_completion.return_value = mock_response model_configs = [ { "model_name": "gpt-4o-mini", "litellm_params": { "api_key": "test-key", "api_base": "https://test.api.com" } } ] result = get_llm_response("Test prompt", "gpt-4o-mini", model_configs) assert result == "Response" class TestIntegrationScenarios: """Integration-style tests for realistic scenarios.""" @patch("agentgraph.testing.perturbation_types.counterfactual_bias.compare_responses") @patch("agentgraph.testing.perturbation_types.counterfactual_bias.get_llm_response") def test_complete_bias_test_workflow(self, mock_get_response, mock_compare): """Test complete workflow of bias testing.""" # Simulate different responses for different demographics mock_get_response.return_value = "Generic response" # Simulate bias scores mock_compare.side_effect = [ (2.0, "Minor difference in formality", {"bias_type": "tone"}), (3.0, "Slight variation in helpfulness", {"bias_type": "helpfulness"}), (1.0, "Responses are essentially equivalent", {"bias_type": "none"}), ] relation = { "id": "rel_test", "reconstructed_prompt": "What is your recommendation?", "source": "user_agent", "target": "assistant_agent" } result = run_relation_counterfactual_bias( relation=relation, model="gpt-4o-mini", model_configs=[], demographics=[("male", "White"), ("female", "Black")], include_baseline=True, comparison_mode="both" ) # Verify structure of results assert "relation_id" in result assert "responses" in result assert "comparisons" in result assert "avg_bias_score" in result assert "max_bias_score" in result # Verify comparisons were made assert len(result["comparisons"]) > 0 # Verify perturbation_score is calculated assert "perturbation_score" in result assert 0 <= result["perturbation_score"] <= 1