customeragent-api / server /tests /test_real_users.py
anasraza526's picture
Clean deploy to Hugging Face
ac90985
"""
Real-World User Simulation Test
Simulates multiple user personas asking various types of questions:
- Different languages (English, Urdu, Roman Urdu)
- Different industries (Healthcare, Education)
- Different intent types (FAQ, Industry Knowledge, Business Specific, Creative)
Run with: python test_real_users.py
"""
import sys
import os
from typing import Dict, List
import asyncio
# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.services.language_detector import detect_language
from app.services.translation_service import get_translation_service
from app.services.intent_classifier import classify_intent
from app.services.query_router import QueryRouter
from app.services.enhanced_healthcare import get_enhanced_healthcare_module
from app.services.enhanced_education import get_enhanced_education_module
class UserPersona:
"""Represents a user persona with specific characteristics"""
def __init__(
self,
name: str,
language: str,
industry: str,
description: str,
questions: List[Dict]
):
self.name = name
self.language = language
self.industry = industry
self.description = description
self.questions = questions
# Define user personas
USER_PERSONAS = [
UserPersona(
name="Ali (Patient)",
language="ur-roman",
industry="healthcare",
description="Pakistani patient seeking medical information",
questions=[
{
"query": "mujhe bukhar aur dard hai",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Symptom inquiry in Roman Urdu"
},
{
"query": "diabetes kya hai?",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Medical definition query"
},
{
"query": "aapke clinic ke timings kya hain?",
"expected_intent": "FAQ",
"description": "Business hours question"
}
]
),
UserPersona(
name="Sarah (Student)",
language="en",
industry="education",
description="University student seeking academic help",
questions=[
{
"query": "What is the GPA requirement for admission?",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Academic policy question"
},
{
"query": "How do I register for courses?",
"expected_intent": "FAQ",
"description": "Process-oriented question"
},
{
"query": "Write an email to my professor about missing class",
"expected_intent": "CREATIVE",
"description": "Content generation request"
}
]
),
UserPersona(
name="Fatima (Urdu Student)",
language="ur",
industry="education",
description="Urdu-speaking student",
questions=[
{
"query": "داخلے کے لیے کیا ضرورت ہے؟",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Admission requirements in Urdu"
},
{
"query": "نتیجہ کب آئے گا؟",
"expected_intent": "FAQ",
"description": "Result timing question"
}
]
),
UserPersona(
name="Ahmed (Emergency Patient)",
language="en",
industry="healthcare",
description="Patient with urgent medical concern",
questions=[
{
"query": "I have severe chest pain and shortness of breath",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Emergency symptoms"
},
{
"query": "Who is the cardiologist on duty?",
"expected_intent": "BUSINESS_SPECIFIC",
"description": "Staff information"
}
]
),
UserPersona(
name="Zainab (Mixed Language User)",
language="ur-roman",
industry="education",
description="Uses mix of English and Roman Urdu",
questions=[
{
"query": "scholarship ke liye apply kaise karun?",
"expected_intent": "FAQ",
"description": "Scholarship application process"
},
{
"query": "GPA calculate karne ka tarika batao",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "GPA calculation method"
}
]
),
UserPersona(
name="Dr. Khan (Healthcare Professional)",
language="en",
industry="healthcare",
description="Medical professional seeking information",
questions=[
{
"query": "What is the latest treatment for hypertension?",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Medical treatment inquiry"
},
{
"query": "Explain the pathophysiology of diabetes",
"expected_intent": "INDUSTRY_KNOWLEDGE",
"description": "Medical explanation request"
}
]
)
]
class TestResults:
"""Track test results"""
def __init__(self):
self.total_queries = 0
self.successful_detections = 0
self.successful_intents = 0
self.by_language = {}
self.by_intent = {}
self.by_industry = {}
def record_result(
self,
language_correct: bool,
intent_correct: bool,
language: str,
intent: str,
industry: str
):
self.total_queries += 1
if language_correct:
self.successful_detections += 1
if intent_correct:
self.successful_intents += 1
# Track by category
if language not in self.by_language:
self.by_language[language] = {"total": 0, "correct": 0}
self.by_language[language]["total"] += 1
if language_correct:
self.by_language[language]["correct"] += 1
if intent not in self.by_intent:
self.by_intent[intent] = {"total": 0, "correct": 0}
self.by_intent[intent]["total"] += 1
if intent_correct:
self.by_intent[intent]["correct"] += 1
if industry not in self.by_industry:
self.by_industry[industry] = {"total": 0, "correct": 0}
self.by_industry[industry]["total"] += 1
def print_summary(self):
"""Print test summary"""
print("\n" + "=" * 70)
print("📊 TEST RESULTS SUMMARY")
print("=" * 70)
print(f"\n🔢 Overall Statistics:")
print(f" Total Queries: {self.total_queries}")
print(f" Language Detection Accuracy: {self.successful_detections}/{self.total_queries} "
f"({100 * self.successful_detections / self.total_queries:.1f}%)")
print(f" Intent Classification Accuracy: {self.successful_intents}/{self.total_queries} "
f"({100 * self.successful_intents / self.total_queries:.1f}%)")
print(f"\n🌐 By Language:")
for lang, stats in self.by_language.items():
accuracy = 100 * stats['correct'] / stats['total'] if stats['total'] > 0 else 0
print(f" {lang}: {stats['correct']}/{stats['total']} ({accuracy:.1f}%)")
print(f"\n🎯 By Intent:")
for intent, stats in self.by_intent.items():
accuracy = 100 * stats['correct'] / stats['total'] if stats['total'] > 0 else 0
print(f" {intent}: {stats['correct']}/{stats['total']} ({accuracy:.1f}%)")
print(f"\n🏥 By Industry:")
for industry, stats in self.by_industry.items():
print(f" {industry}: {stats['total']} queries")
async def test_user_persona(persona: UserPersona, results: TestResults):
"""Test a single user persona"""
print("\n" + "=" * 70)
print(f"👤 USER: {persona.name}")
print("=" * 70)
print(f"Description: {persona.description}")
print(f"Preferred Language: {persona.language}")
print(f"Industry: {persona.industry}")
router = QueryRouter()
healthcare = get_enhanced_healthcare_module()
education = get_enhanced_education_module()
translator = get_translation_service()
for idx, question_data in enumerate(persona.questions, 1):
query = question_data["query"]
expected_intent = question_data["expected_intent"]
description = question_data["description"]
print(f"\n📝 Question {idx}: {description}")
print(f" Query: \"{query}\"")
# Step 1: Language Detection
language, lang_conf = detect_language(query)
print(f" 🌐 Detected Language: {language.value} (confidence: {lang_conf:.2f})")
language_correct = language.value == persona.language or \
(language.value in ["ur", "ur-roman"] and persona.language in ["ur", "ur-roman"])
# Step 2: Translation if needed
processed_query = query
if language.value in ["ur", "ur-roman"]:
try:
if language.value == "ur-roman":
normalized = translator.normalize_roman_urdu(query)
processed_query = translator.translate_to_english(normalized, source="ur")
else:
processed_query = translator.translate_to_english(query, source="ur")
print(f" 🔄 Translated: \"{processed_query}\"")
except Exception as e:
print(f" ⚠️ Translation error: {e}")
# Step 3: Intent Classification
intent_result = classify_intent(
processed_query,
industry=persona.industry
)
print(f" 🎯 Intent: {intent_result.category.value} (confidence: {intent_result.confidence:.2f})")
print(f" Expected: {expected_intent}")
intent_correct = intent_result.category.value == expected_intent
status = "✅" if intent_correct else "❌"
print(f" {status} Intent Match: {intent_correct}")
# Step 4: Generate Response based on industry and intent
response_text = None
response_source = None
print(f"\n 🤖 BOT RESPONSE:")
print(f" " + "─" * 65)
if persona.industry == "healthcare":
if "symptom" in description.lower() or "pain" in query.lower() or "bukhar" in query.lower() or "dard" in query.lower():
# Extract symptoms
symptoms = []
if "fever" in processed_query.lower() or "bukhar" in query.lower():
symptoms.append("fever")
if "pain" in processed_query.lower() or "dard" in query.lower():
symptoms.append("headache")
if "chest" in processed_query.lower():
symptoms.append("chest pain")
if "breath" in processed_query.lower():
symptoms.append("shortness of breath")
if symptoms:
result = healthcare.check_symptoms_enhanced(symptoms)
# Format complete response
print(f" 📋 SYMPTOM CHECK RESULT:")
print(f" Symptoms Analyzed: {', '.join(symptoms)}")
print(f" Severity: {result['severity'].upper()}")
print(f" Urgency: {result['urgency']}")
print(f"\n Possible Conditions:")
for i, condition in enumerate(result['possible_conditions'][:5], 1):
print(f" {i}. {condition}")
print(f"\n 💊 Advice:")
advice_lines = result['advice'].split('\n')
for line in advice_lines[:5]: # Show first 5 lines
if line.strip():
print(f" {line}")
if result.get('red_flags'):
print(f"\n ⚠️ RED FLAGS:")
for flag in result['red_flags'][:3]:
print(f" • {flag}")
print(f"\n {result['disclaimer']}")
response_source = "Symptom Checker + SymCAT Dataset"
elif "what is" in processed_query.lower() or "kya hai" in query.lower():
answer = healthcare.answer_medical_question(processed_query)
if answer:
print(f" 📖 MEDICAL INFORMATION:")
print(f" Q: {answer['question']}")
print(f"\n A: {answer['answer']}")
print(f"\n Source: {answer['source']}")
print(f" Confidence: {answer['confidence']:.0%}")
response_source = f"MedQuAD Dataset (conf: {answer['confidence']:.0%})"
else:
print(f" 📖 I don't have specific information about that in my knowledge base.")
print(f" 💡 I recommend consulting with a healthcare professional.")
response_source = "Fallback response"
else:
print(f" 📖 I can help you with:")
print(f" • Symptom checking (informational only)")
print(f" • Medical terminology definitions")
print(f" • General health information")
print(f"\n 💡 For specific medical advice, please consult a healthcare professional.")
response_source = "General healthcare guidance"
elif persona.industry == "education":
answer = education.answer_academic_question(processed_query)
if answer:
print(f" 📚 ACADEMIC INFORMATION:")
print(f" Q: {processed_query}")
print(f"\n A: {answer['answer']}")
print(f"\n Source: {answer['source']}")
print(f" Category: {answer.get('category', 'N/A')}")
print(f" Confidence: {answer['confidence']:.0%}")
response_source = f"{answer['source']} Dataset (conf: {answer['confidence']:.0%})"
else:
print(f" 📚 I can help you with:")
print(f" • Admission requirements and process")
print(f" • Course registration and enrollment")
print(f" • Results and grading information")
print(f" • Fee structure and payment")
print(f" • Scholarship applications")
print(f"\n 💡 Please provide more specific details so I can assist you better.")
response_source = "General education guidance"
print(f" " + "─" * 65)
if response_source:
print(f" 📊 Source: {response_source}")
# Record results
results.record_result(
language_correct=language_correct,
intent_correct=intent_correct,
language=language.value,
intent=intent_result.category.value,
industry=persona.industry
)
async def run_all_tests():
"""Run tests for all user personas"""
results = TestResults()
print("\n" + "🚀" * 35)
print("REAL-WORLD USER SIMULATION TEST")
print(f"Testing {len(USER_PERSONAS)} user personas")
print("🚀" * 35)
for persona in USER_PERSONAS:
await test_user_persona(persona, results)
results.print_summary()
# Additional insights
print("\n" + "=" * 70)
print("💡 KEY INSIGHTS")
print("=" * 70)
if results.successful_detections / results.total_queries >= 0.9:
print("✅ Language detection is highly accurate (>90%)")
else:
print("⚠️ Language detection needs improvement")
if results.successful_intents / results.total_queries >= 0.8:
print("✅ Intent classification is performing well (>80%)")
else:
print("⚠️ Intent classification could be improved")
print("\n📋 System Capabilities Demonstrated:")
print(" ✓ Multilingual support (English, Urdu, Roman Urdu)")
print(" ✓ Multi-industry support (Healthcare, Education)")
print(" ✓ Intent classification (FAQ, Industry Knowledge, Creative, Business)")
print(" ✓ Symptom checking with urgency detection")
print(" ✓ Academic Q&A with dataset integration")
print(" ✓ Real-time language detection and translation")
if __name__ == "__main__":
print("\n" + "=" * 70)
print("🌟 STARTING REAL-WORLD USER SIMULATION")
print("=" * 70)
try:
asyncio.run(run_all_tests())
print("\n" + "=" * 70)
print("✅ ALL USER TESTS COMPLETED SUCCESSFULLY")
print("=" * 70 + "\n")
except Exception as e:
print(f"\n❌ TEST FAILED: {e}")
import traceback
traceback.print_exc()