| """ |
| Test script to verify language detection case sensitivity and confidence score fixes |
| """ |
|
|
| import requests |
| import json |
|
|
| def test_case_sensitivity_fix(api_url="https://sematech-sema-api.hf.space"): |
| """Test that language detection works with different text cases""" |
| |
| print("๐ง Testing Case Sensitivity Fix") |
| print("=" * 50) |
| |
| |
| test_cases = [ |
| { |
| "variations": [ |
| "Habari ya asubuhi", |
| "habari ya asubuhi", |
| "HABARI YA ASUBUHI", |
| "HaBaRi Ya AsUbUhI" |
| ], |
| "expected_language": "swh_Latn", |
| "language_name": "Swahili" |
| }, |
| { |
| "variations": [ |
| "Good morning everyone", |
| "good morning everyone", |
| "GOOD MORNING EVERYONE", |
| "GoOd MoRnInG eVeRyOnE" |
| ], |
| "expected_language": "eng_Latn", |
| "language_name": "English" |
| }, |
| { |
| "variations": [ |
| "Bonjour tout le monde", |
| "bonjour tout le monde", |
| "BONJOUR TOUT LE MONDE" |
| ], |
| "expected_language": "fra_Latn", |
| "language_name": "French" |
| } |
| ] |
| |
| total_tests = 0 |
| successful_tests = 0 |
| |
| for test_group in test_cases: |
| print(f"\n๐งช Testing {test_group['language_name']} variations:") |
| |
| for variation in test_group["variations"]: |
| total_tests += 1 |
| |
| try: |
| response = requests.post( |
| f"{api_url}/detect-language", |
| headers={"Content-Type": "application/json"}, |
| json={"text": variation}, |
| timeout=10 |
| ) |
| |
| if response.status_code == 200: |
| data = response.json() |
| detected = data['detected_language'] |
| confidence = data['confidence'] |
| |
| |
| if detected == test_group['expected_language']: |
| print(f" โ
'{variation}' โ {detected} (confidence: {confidence:.3f})") |
| successful_tests += 1 |
| else: |
| print(f" โ ๏ธ '{variation}' โ {detected} (expected: {test_group['expected_language']}, confidence: {confidence:.3f})") |
| |
| if confidence > 0.5: |
| successful_tests += 1 |
| else: |
| print(f" โ '{variation}' โ HTTP {response.status_code}") |
| try: |
| error_data = response.json() |
| print(f" Error: {error_data.get('detail', 'Unknown error')}") |
| except: |
| print(f" Error: {response.text}") |
| |
| except Exception as e: |
| print(f" ๐ฅ '{variation}' โ Exception: {e}") |
| |
| |
| print(f"\n๐ Case Sensitivity Test Results:") |
| print(f" โ
Successful: {successful_tests}/{total_tests}") |
| print(f" ๐ Success Rate: {(successful_tests/total_tests)*100:.1f}%") |
| |
| return successful_tests >= (total_tests * 0.8) |
|
|
| def test_confidence_score_fix(api_url="https://sematech-sema-api.hf.space"): |
| """Test that confidence scores are properly normalized""" |
| |
| print(f"\n๐ง Testing Confidence Score Normalization") |
| print("=" * 50) |
| |
| |
| test_cases = [ |
| "hello", |
| "the", |
| "habari", |
| "bonjour", |
| "hola", |
| "a", |
| "I am fine thank you", |
| "je suis bien merci" |
| ] |
| |
| confidence_issues = 0 |
| total_tests = len(test_cases) |
| |
| for text in test_cases: |
| try: |
| response = requests.post( |
| f"{api_url}/detect-language", |
| headers={"Content-Type": "application/json"}, |
| json={"text": text}, |
| timeout=10 |
| ) |
| |
| if response.status_code == 200: |
| data = response.json() |
| confidence = data['confidence'] |
| detected = data['detected_language'] |
| |
| if confidence > 1.0: |
| print(f" โ ๏ธ '{text}' โ confidence {confidence:.6f} > 1.0 (not normalized)") |
| confidence_issues += 1 |
| elif confidence < 0.0: |
| print(f" โ ๏ธ '{text}' โ confidence {confidence:.6f} < 0.0 (invalid)") |
| confidence_issues += 1 |
| else: |
| print(f" โ
'{text}' โ {detected} (confidence: {confidence:.3f})") |
| |
| else: |
| print(f" โ '{text}' โ HTTP {response.status_code}") |
| confidence_issues += 1 |
| |
| except Exception as e: |
| print(f" ๐ฅ '{text}' โ Exception: {e}") |
| confidence_issues += 1 |
| |
| print(f"\n๐ Confidence Score Test Results:") |
| print(f" โ
Valid confidence scores: {total_tests - confidence_issues}/{total_tests}") |
| print(f" โ ๏ธ Issues found: {confidence_issues}") |
| |
| return confidence_issues == 0 |
|
|
| def test_multilingual_chatbot_scenario(api_url="https://sematech-sema-api.hf.space"): |
| """Test a realistic multilingual chatbot scenario""" |
| |
| print(f"\n๐ค Testing Multilingual Chatbot Scenario") |
| print("=" * 50) |
| |
| |
| user_inputs = [ |
| {"text": "Hello, how are you?", "expected_flow": "direct_english"}, |
| {"text": "Habari, hujambo?", "expected_flow": "translate_to_english"}, |
| {"text": "Bonjour, comment รงa va?", "expected_flow": "translate_to_english"}, |
| {"text": "Hola, ยฟcรณmo estรกs?", "expected_flow": "translate_to_english"}, |
| {"text": "What's the weather like?", "expected_flow": "direct_english"}, |
| {"text": "Hali ya hewa ni vipi?", "expected_flow": "translate_to_english"} |
| ] |
| |
| successful_scenarios = 0 |
| |
| for i, user_input in enumerate(user_inputs, 1): |
| print(f"\n๐ฏ Scenario {i}: '{user_input['text']}'") |
| |
| try: |
| |
| response = requests.post( |
| f"{api_url}/detect-language", |
| headers={"Content-Type": "application/json"}, |
| json={"text": user_input["text"]}, |
| timeout=10 |
| ) |
| |
| if response.status_code == 200: |
| detection = response.json() |
| is_english = detection['is_english'] |
| detected_lang = detection['detected_language'] |
| confidence = detection['confidence'] |
| |
| print(f" ๐ Detected: {detected_lang} (confidence: {confidence:.3f})") |
| print(f" ๐ด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ Is English: {is_english}") |
| |
| |
| if is_english: |
| print(f" โ
Flow: Process directly in English") |
| if user_input["expected_flow"] == "direct_english": |
| successful_scenarios += 1 |
| print(f" ๐ Expected flow matched!") |
| else: |
| print(f" โ ๏ธ Expected translation flow, got direct English") |
| else: |
| print(f" ๐ Flow: Translate to English โ Process โ Translate back to {detected_lang}") |
| if user_input["expected_flow"] == "translate_to_english": |
| successful_scenarios += 1 |
| print(f" ๐ Expected flow matched!") |
| else: |
| print(f" โ ๏ธ Expected direct English, got translation flow") |
| |
| else: |
| print(f" โ Detection failed: HTTP {response.status_code}") |
| |
| except Exception as e: |
| print(f" ๐ฅ Scenario failed: {e}") |
| |
| print(f"\n๐ Chatbot Scenario Results:") |
| print(f" โ
Correct flows: {successful_scenarios}/{len(user_inputs)}") |
| print(f" ๐ Accuracy: {(successful_scenarios/len(user_inputs))*100:.1f}%") |
| |
| return successful_scenarios >= len(user_inputs) * 0.8 |
|
|
| if __name__ == "__main__": |
| import sys |
| |
| |
| api_url = "https://sematech-sema-api.hf.space" |
| if len(sys.argv) > 1: |
| api_url = sys.argv[1] |
| |
| print(f"๐ฏ Testing Language Detection Fixes at: {api_url}") |
| |
| |
| case_test = test_case_sensitivity_fix(api_url) |
| confidence_test = test_confidence_score_fix(api_url) |
| chatbot_test = test_multilingual_chatbot_scenario(api_url) |
| |
| |
| print(f"\n๐ FINAL RESULTS:") |
| print(f" ๐ค Case Sensitivity Fix: {'โ
PASSED' if case_test else 'โ FAILED'}") |
| print(f" ๐ Confidence Score Fix: {'โ
PASSED' if confidence_test else 'โ FAILED'}") |
| print(f" ๐ค Chatbot Scenario: {'โ
PASSED' if chatbot_test else 'โ FAILED'}") |
| |
| if all([case_test, confidence_test, chatbot_test]): |
| print(f"\n๐ ALL FIXES WORKING PERFECTLY!") |
| sys.exit(0) |
| else: |
| print(f"\nโ ๏ธ SOME ISSUES REMAIN") |
| sys.exit(1) |
|
|