Spaces:

anasraza526
/

customeragent-api

Runtime error

App Files Files Community

customeragent-api / server /tests /test_real_users.py

anasraza526

Clean deploy to Hugging Face

ac90985 22 days ago

raw

history blame contribute delete

17.6 kB

	"""
	Real-World User Simulation Test

	Simulates multiple user personas asking various types of questions:
	- Different languages (English, Urdu, Roman Urdu)
	- Different industries (Healthcare, Education)
	- Different intent types (FAQ, Industry Knowledge, Business Specific, Creative)

	Run with: python test_real_users.py
	"""

	import sys
	import os
	from typing import Dict, List
	import asyncio

	# Add parent directory to path
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from app.services.language_detector import detect_language
	from app.services.translation_service import get_translation_service
	from app.services.intent_classifier import classify_intent
	from app.services.query_router import QueryRouter
	from app.services.enhanced_healthcare import get_enhanced_healthcare_module
	from app.services.enhanced_education import get_enhanced_education_module


	class UserPersona:
	"""Represents a user persona with specific characteristics"""

	def __init__(
	self,
	name: str,
	language: str,
	industry: str,
	description: str,
	questions: List[Dict]
	):
	self.name = name
	self.language = language
	self.industry = industry
	self.description = description
	self.questions = questions


	# Define user personas
	USER_PERSONAS = [
	UserPersona(
	name="Ali (Patient)",
	language="ur-roman",
	industry="healthcare",
	description="Pakistani patient seeking medical information",
	questions=[
	{
	"query": "mujhe bukhar aur dard hai",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Symptom inquiry in Roman Urdu"
	},
	{
	"query": "diabetes kya hai?",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Medical definition query"
	},
	{
	"query": "aapke clinic ke timings kya hain?",
	"expected_intent": "FAQ",
	"description": "Business hours question"
	}
	]
	),

	UserPersona(
	name="Sarah (Student)",
	language="en",
	industry="education",
	description="University student seeking academic help",
	questions=[
	{
	"query": "What is the GPA requirement for admission?",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Academic policy question"
	},
	{
	"query": "How do I register for courses?",
	"expected_intent": "FAQ",
	"description": "Process-oriented question"
	},
	{
	"query": "Write an email to my professor about missing class",
	"expected_intent": "CREATIVE",
	"description": "Content generation request"
	}
	]
	),

	UserPersona(
	name="Fatima (Urdu Student)",
	language="ur",
	industry="education",
	description="Urdu-speaking student",
	questions=[
	{
	"query": "داخلے کے لیے کیا ضرورت ہے؟",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Admission requirements in Urdu"
	},
	{
	"query": "نتیجہ کب آئے گا؟",
	"expected_intent": "FAQ",
	"description": "Result timing question"
	}
	]
	),

	UserPersona(
	name="Ahmed (Emergency Patient)",
	language="en",
	industry="healthcare",
	description="Patient with urgent medical concern",
	questions=[
	{
	"query": "I have severe chest pain and shortness of breath",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Emergency symptoms"
	},
	{
	"query": "Who is the cardiologist on duty?",
	"expected_intent": "BUSINESS_SPECIFIC",
	"description": "Staff information"
	}
	]
	),

	UserPersona(
	name="Zainab (Mixed Language User)",
	language="ur-roman",
	industry="education",
	description="Uses mix of English and Roman Urdu",
	questions=[
	{
	"query": "scholarship ke liye apply kaise karun?",
	"expected_intent": "FAQ",
	"description": "Scholarship application process"
	},
	{
	"query": "GPA calculate karne ka tarika batao",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "GPA calculation method"
	}
	]
	),

	UserPersona(
	name="Dr. Khan (Healthcare Professional)",
	language="en",
	industry="healthcare",
	description="Medical professional seeking information",
	questions=[
	{
	"query": "What is the latest treatment for hypertension?",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Medical treatment inquiry"
	},
	{
	"query": "Explain the pathophysiology of diabetes",
	"expected_intent": "INDUSTRY_KNOWLEDGE",
	"description": "Medical explanation request"
	}
	]
	)
	]


	class TestResults:
	"""Track test results"""

	def __init__(self):
	self.total_queries = 0
	self.successful_detections = 0
	self.successful_intents = 0
	self.by_language = {}
	self.by_intent = {}
	self.by_industry = {}

	def record_result(
	self,
	language_correct: bool,
	intent_correct: bool,
	language: str,
	intent: str,
	industry: str
	):
	self.total_queries += 1

	if language_correct:
	self.successful_detections += 1

	if intent_correct:
	self.successful_intents += 1

	# Track by category
	if language not in self.by_language:
	self.by_language[language] = {"total": 0, "correct": 0}
	self.by_language[language]["total"] += 1
	if language_correct:
	self.by_language[language]["correct"] += 1

	if intent not in self.by_intent:
	self.by_intent[intent] = {"total": 0, "correct": 0}
	self.by_intent[intent]["total"] += 1
	if intent_correct:
	self.by_intent[intent]["correct"] += 1

	if industry not in self.by_industry:
	self.by_industry[industry] = {"total": 0, "correct": 0}
	self.by_industry[industry]["total"] += 1

	def print_summary(self):
	"""Print test summary"""
	print("\n" + "=" * 70)
	print("📊 TEST RESULTS SUMMARY")
	print("=" * 70)

	print(f"\n🔢 Overall Statistics:")
	print(f" Total Queries: {self.total_queries}")
	print(f" Language Detection Accuracy: {self.successful_detections}/{self.total_queries} "
	f"({100 * self.successful_detections / self.total_queries:.1f}%)")
	print(f" Intent Classification Accuracy: {self.successful_intents}/{self.total_queries} "
	f"({100 * self.successful_intents / self.total_queries:.1f}%)")

	print(f"\n🌐 By Language:")
	for lang, stats in self.by_language.items():
	accuracy = 100 * stats['correct'] / stats['total'] if stats['total'] > 0 else 0
	print(f" {lang}: {stats['correct']}/{stats['total']} ({accuracy:.1f}%)")

	print(f"\n🎯 By Intent:")
	for intent, stats in self.by_intent.items():
	accuracy = 100 * stats['correct'] / stats['total'] if stats['total'] > 0 else 0
	print(f" {intent}: {stats['correct']}/{stats['total']} ({accuracy:.1f}%)")

	print(f"\n🏥 By Industry:")
	for industry, stats in self.by_industry.items():
	print(f" {industry}: {stats['total']} queries")


	async def test_user_persona(persona: UserPersona, results: TestResults):
	"""Test a single user persona"""
	print("\n" + "=" * 70)
	print(f"👤 USER: {persona.name}")
	print("=" * 70)
	print(f"Description: {persona.description}")
	print(f"Preferred Language: {persona.language}")
	print(f"Industry: {persona.industry}")

	router = QueryRouter()
	healthcare = get_enhanced_healthcare_module()
	education = get_enhanced_education_module()
	translator = get_translation_service()

	for idx, question_data in enumerate(persona.questions, 1):
	query = question_data["query"]
	expected_intent = question_data["expected_intent"]
	description = question_data["description"]

	print(f"\n📝 Question {idx}: {description}")
	print(f" Query: \"{query}\"")

	# Step 1: Language Detection
	language, lang_conf = detect_language(query)
	print(f" 🌐 Detected Language: {language.value} (confidence: {lang_conf:.2f})")

	language_correct = language.value == persona.language or \
	(language.value in ["ur", "ur-roman"] and persona.language in ["ur", "ur-roman"])

	# Step 2: Translation if needed
	processed_query = query
	if language.value in ["ur", "ur-roman"]:
	try:
	if language.value == "ur-roman":
	normalized = translator.normalize_roman_urdu(query)
	processed_query = translator.translate_to_english(normalized, source="ur")
	else:
	processed_query = translator.translate_to_english(query, source="ur")
	print(f" 🔄 Translated: \"{processed_query}\"")
	except Exception as e:
	print(f" ⚠️ Translation error: {e}")

	# Step 3: Intent Classification
	intent_result = classify_intent(
	processed_query,
	industry=persona.industry
	)
	print(f" 🎯 Intent: {intent_result.category.value} (confidence: {intent_result.confidence:.2f})")
	print(f" Expected: {expected_intent}")

	intent_correct = intent_result.category.value == expected_intent
	status = "✅" if intent_correct else "❌"
	print(f" {status} Intent Match: {intent_correct}")

	# Step 4: Generate Response based on industry and intent
	response_text = None
	response_source = None

	print(f"\n 🤖 BOT RESPONSE:")
	print(f" " + "─" * 65)

	if persona.industry == "healthcare":
	if "symptom" in description.lower() or "pain" in query.lower() or "bukhar" in query.lower() or "dard" in query.lower():
	# Extract symptoms
	symptoms = []
	if "fever" in processed_query.lower() or "bukhar" in query.lower():
	symptoms.append("fever")
	if "pain" in processed_query.lower() or "dard" in query.lower():
	symptoms.append("headache")
	if "chest" in processed_query.lower():
	symptoms.append("chest pain")
	if "breath" in processed_query.lower():
	symptoms.append("shortness of breath")

	if symptoms:
	result = healthcare.check_symptoms_enhanced(symptoms)

	# Format complete response
	print(f" 📋 SYMPTOM CHECK RESULT:")
	print(f" Symptoms Analyzed: {', '.join(symptoms)}")
	print(f" Severity: {result['severity'].upper()}")
	print(f" Urgency: {result['urgency']}")
	print(f"\n Possible Conditions:")
	for i, condition in enumerate(result['possible_conditions'][:5], 1):
	print(f" {i}. {condition}")

	print(f"\n 💊 Advice:")
	advice_lines = result['advice'].split('\n')
	for line in advice_lines[:5]: # Show first 5 lines
	if line.strip():
	print(f" {line}")

	if result.get('red_flags'):
	print(f"\n ⚠️ RED FLAGS:")
	for flag in result['red_flags'][:3]:
	print(f" • {flag}")

	print(f"\n {result['disclaimer']}")
	response_source = "Symptom Checker + SymCAT Dataset"

	elif "what is" in processed_query.lower() or "kya hai" in query.lower():
	answer = healthcare.answer_medical_question(processed_query)
	if answer:
	print(f" 📖 MEDICAL INFORMATION:")
	print(f" Q: {answer['question']}")
	print(f"\n A: {answer['answer']}")
	print(f"\n Source: {answer['source']}")
	print(f" Confidence: {answer['confidence']:.0%}")
	response_source = f"MedQuAD Dataset (conf: {answer['confidence']:.0%})"
	else:
	print(f" 📖 I don't have specific information about that in my knowledge base.")
	print(f" 💡 I recommend consulting with a healthcare professional.")
	response_source = "Fallback response"

	else:
	print(f" 📖 I can help you with:")
	print(f" • Symptom checking (informational only)")
	print(f" • Medical terminology definitions")
	print(f" • General health information")
	print(f"\n 💡 For specific medical advice, please consult a healthcare professional.")
	response_source = "General healthcare guidance"

	elif persona.industry == "education":
	answer = education.answer_academic_question(processed_query)
	if answer:
	print(f" 📚 ACADEMIC INFORMATION:")
	print(f" Q: {processed_query}")
	print(f"\n A: {answer['answer']}")
	print(f"\n Source: {answer['source']}")
	print(f" Category: {answer.get('category', 'N/A')}")
	print(f" Confidence: {answer['confidence']:.0%}")
	response_source = f"{answer['source']} Dataset (conf: {answer['confidence']:.0%})"
	else:
	print(f" 📚 I can help you with:")
	print(f" • Admission requirements and process")
	print(f" • Course registration and enrollment")
	print(f" • Results and grading information")
	print(f" • Fee structure and payment")
	print(f" • Scholarship applications")
	print(f"\n 💡 Please provide more specific details so I can assist you better.")
	response_source = "General education guidance"

	print(f" " + "─" * 65)

	if response_source:
	print(f" 📊 Source: {response_source}")


	# Record results
	results.record_result(
	language_correct=language_correct,
	intent_correct=intent_correct,
	language=language.value,
	intent=intent_result.category.value,
	industry=persona.industry
	)


	async def run_all_tests():
	"""Run tests for all user personas"""
	results = TestResults()

	print("\n" + "🚀" * 35)
	print("REAL-WORLD USER SIMULATION TEST")
	print(f"Testing {len(USER_PERSONAS)} user personas")
	print("🚀" * 35)

	for persona in USER_PERSONAS:
	await test_user_persona(persona, results)

	results.print_summary()

	# Additional insights
	print("\n" + "=" * 70)
	print("💡 KEY INSIGHTS")
	print("=" * 70)

	if results.successful_detections / results.total_queries >= 0.9:
	print("✅ Language detection is highly accurate (>90%)")
	else:
	print("⚠️ Language detection needs improvement")

	if results.successful_intents / results.total_queries >= 0.8:
	print("✅ Intent classification is performing well (>80%)")
	else:
	print("⚠️ Intent classification could be improved")

	print("\n📋 System Capabilities Demonstrated:")
	print(" ✓ Multilingual support (English, Urdu, Roman Urdu)")
	print(" ✓ Multi-industry support (Healthcare, Education)")
	print(" ✓ Intent classification (FAQ, Industry Knowledge, Creative, Business)")
	print(" ✓ Symptom checking with urgency detection")
	print(" ✓ Academic Q&A with dataset integration")
	print(" ✓ Real-time language detection and translation")


	if __name__ == "__main__":
	print("\n" + "=" * 70)
	print("🌟 STARTING REAL-WORLD USER SIMULATION")
	print("=" * 70)

	try:
	asyncio.run(run_all_tests())

	print("\n" + "=" * 70)
	print("✅ ALL USER TESTS COMPLETED SUCCESSFULLY")
	print("=" * 70 + "\n")

	except Exception as e:
	print(f"\n❌ TEST FAILED: {e}")
	import traceback
	traceback.print_exc()