| """ |
| Test script to verify the updated African languages support |
| """ |
|
|
| import requests |
| import json |
|
|
| def test_african_languages_count(api_url="https://sematech-sema-api.hf.space"): |
| """Test that we now have 50+ African languages""" |
| |
| print("[INFO] Testing Updated African Languages Support") |
| print("=" * 60) |
| |
| |
| print("\n[TEST] African Languages Count") |
| print("-" * 40) |
| |
| response = requests.get(f"{api_url}/api/v1/languages/african") |
| |
| if response.status_code == 200: |
| data = response.json() |
| african_count = data['total_count'] |
| |
| print(f"[RESULT] Found {african_count} African languages") |
| |
| if african_count >= 50: |
| print(f"[PASS] African language count meets FLORES-200 expectations ({african_count} >= 50)") |
| else: |
| print(f"[FAIL] African language count below expectations ({african_count} < 50)") |
| |
| |
| print(f"\n[EXAMPLES] Sample African Languages:") |
| count = 0 |
| for code, info in data['languages'].items(): |
| if count < 10: |
| print(f" {code}: {info['name']} ({info['native_name']}) - {info['script']}") |
| count += 1 |
| |
| if african_count > 10: |
| print(f" ... and {african_count - 10} more languages") |
| |
| else: |
| print(f"[FAIL] Failed to get African languages: HTTP {response.status_code}") |
| |
| |
| print(f"\n[TEST] Specific New African Languages") |
| print("-" * 40) |
| |
| new_languages_to_test = [ |
| "aka_Latn", |
| "bam_Latn", |
| "bem_Latn", |
| "dik_Latn", |
| "dyu_Latn", |
| "ewe_Latn", |
| "fon_Latn", |
| "fuv_Latn", |
| "kab_Latn", |
| "kam_Latn", |
| "kbp_Latn", |
| "kea_Latn", |
| "kin_Latn", |
| "kmb_Latn", |
| "knc_Latn", |
| "kon_Latn", |
| "lua_Latn", |
| "lus_Latn", |
| "mos_Latn", |
| "nso_Latn", |
| "nus_Latn", |
| "run_Latn", |
| "sag_Latn", |
| "taq_Latn", |
| "taq_Tfng", |
| "tum_Latn", |
| "twi_Latn", |
| "tzm_Tfng", |
| ] |
| |
| found_count = 0 |
| missing_languages = [] |
| |
| for lang_code in new_languages_to_test: |
| response = requests.get(f"{api_url}/api/v1/languages/{lang_code}") |
| |
| if response.status_code == 200: |
| data = response.json() |
| print(f"[PASS] {lang_code}: {data['name']} ({data['native_name']})") |
| found_count += 1 |
| else: |
| print(f"[FAIL] {lang_code}: Not found") |
| missing_languages.append(lang_code) |
| |
| print(f"\n[SUMMARY] New Languages Test Results:") |
| print(f" Found: {found_count}/{len(new_languages_to_test)} languages") |
| print(f" Success rate: {(found_count/len(new_languages_to_test))*100:.1f}%") |
| |
| if missing_languages: |
| print(f" Missing languages: {', '.join(missing_languages)}") |
| |
| |
| print(f"\n[TEST] African Language Search") |
| print("-" * 40) |
| |
| search_terms = ["Akan", "Bambara", "Fulfulde", "Tamasheq", "Kanuri"] |
| |
| for term in search_terms: |
| response = requests.get(f"{api_url}/api/v1/languages/search?q={term}") |
| |
| if response.status_code == 200: |
| data = response.json() |
| if data['total_count'] > 0: |
| print(f"[PASS] Search '{term}': Found {data['total_count']} result(s)") |
| for code, info in data['languages'].items(): |
| print(f" {code}: {info['name']} ({info['native_name']})") |
| else: |
| print(f"[FAIL] Search '{term}': No results found") |
| else: |
| print(f"[FAIL] Search '{term}': HTTP {response.status_code}") |
| |
| |
| print(f"\n[TEST] Updated Language Statistics") |
| print("-" * 40) |
| |
| response = requests.get(f"{api_url}/api/v1/languages/stats") |
| |
| if response.status_code == 200: |
| data = response.json() |
| |
| print(f"[RESULT] Language Statistics:") |
| print(f" Total languages: {data['total_languages']}") |
| print(f" Total regions: {data['regions']}") |
| print(f" Total scripts: {data['scripts']}") |
| |
| if 'by_region' in data: |
| print(f" Languages by region:") |
| for region, count in data['by_region'].items(): |
| print(f" {region}: {count} languages") |
| |
| |
| africa_count = data.get('by_region', {}).get('Africa', 0) |
| if africa_count >= 50: |
| print(f"[PASS] Africa region has {africa_count} languages (>= 50)") |
| else: |
| print(f"[FAIL] Africa region has only {africa_count} languages (< 50)") |
| |
| else: |
| print(f"[FAIL] Failed to get language statistics: HTTP {response.status_code}") |
|
|
| def test_translation_with_new_languages(api_url="https://sematech-sema-api.hf.space"): |
| """Test translation with some of the newly added African languages""" |
| |
| print(f"\n[TEST] Translation with New African Languages") |
| print("-" * 50) |
| |
| |
| test_cases = [ |
| { |
| "text": "Hello world", |
| "target_language": "aka_Latn", |
| "expected_lang": "Akan" |
| }, |
| { |
| "text": "Good morning", |
| "target_language": "bam_Latn", |
| "expected_lang": "Bambara" |
| }, |
| { |
| "text": "How are you?", |
| "target_language": "fon_Latn", |
| "expected_lang": "Fon" |
| }, |
| { |
| "text": "Thank you", |
| "target_language": "twi_Latn", |
| "expected_lang": "Twi" |
| } |
| ] |
| |
| successful_translations = 0 |
| |
| for i, test_case in enumerate(test_cases, 1): |
| print(f"\n{i}. Testing translation to {test_case['expected_lang']} ({test_case['target_language']})") |
| |
| response = requests.post( |
| f"{api_url}/api/v1/translate", |
| headers={"Content-Type": "application/json"}, |
| json={ |
| "text": test_case["text"], |
| "target_language": test_case["target_language"] |
| }, |
| timeout=30 |
| ) |
| |
| if response.status_code == 200: |
| data = response.json() |
| print(f" Original: '{test_case['text']}'") |
| print(f" Translation: '{data['translated_text']}'") |
| print(f" Source: {data['source_language']}") |
| print(f" Target: {data['target_language']}") |
| print(f" [PASS] Translation successful") |
| successful_translations += 1 |
| else: |
| print(f" [FAIL] Translation failed: HTTP {response.status_code}") |
| try: |
| error_data = response.json() |
| print(f" Error: {error_data.get('detail', 'Unknown error')}") |
| except: |
| print(f" Error: {response.text}") |
| |
| print(f"\n[SUMMARY] Translation Test Results:") |
| print(f" Successful: {successful_translations}/{len(test_cases)} translations") |
| print(f" Success rate: {(successful_translations/len(test_cases))*100:.1f}%") |
|
|
| if __name__ == "__main__": |
| import sys |
| |
| api_url = "https://sematech-sema-api.hf.space" |
| if len(sys.argv) > 1: |
| api_url = sys.argv[1] |
| |
| print(f"[INFO] Testing updated African languages at: {api_url}") |
| |
| |
| test_african_languages_count(api_url) |
| |
| |
| test_translation_with_new_languages(api_url) |
| |
| print(f"\n[SUCCESS] African languages update tests completed!") |
| print(f"\nExpected results:") |
| print(f"- African languages count should be 50+ (was 23)") |
| print(f"- New languages like Akan, Bambara, Fon, Twi should be available") |
| print(f"- Search should find new African languages") |
| print(f"- Translation should work with new language codes") |
|
|