Spaces:
Runtime error
Runtime error
| """ | |
| Test Enhanced Modules with Datasets | |
| Tests healthcare, education, and intent modules with dataset integration. | |
| Run with: python test_enhanced_modules.py | |
| """ | |
| import sys | |
| import os | |
| # Add parent directory to path | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from app.services.enhanced_healthcare import get_enhanced_healthcare_module | |
| from app.services.enhanced_education import get_enhanced_education_module | |
| from app.services.intent_trainer import get_intent_trainer | |
| from app.services.dataset_manager import get_dataset_manager | |
| def test_enhanced_healthcare(): | |
| """Test enhanced healthcare with MedQuAD and SymCAT""" | |
| print("=" * 60) | |
| print("TESTING ENHANCED HEALTHCARE MODULE") | |
| print("=" * 60) | |
| healthcare = get_enhanced_healthcare_module() | |
| # Show loaded knowledge stats | |
| stats = healthcare.get_knowledge_stats() | |
| print("\nπ Knowledge Base Stats:") | |
| for key, value in stats.items(): | |
| print(f" {key}: {value}") | |
| # Test medical Q&A | |
| print("\nπ₯ Testing Medical Q&A:") | |
| questions = [ | |
| "What is diabetes?", | |
| "What are the symptoms of flu?", | |
| "How is pneumonia treated?" | |
| ] | |
| for question in questions: | |
| print(f"\n Q: {question}") | |
| answer = healthcare.answer_medical_question(question) | |
| if answer: | |
| print(f" A: {answer['answer'][:100]}...") | |
| print(f" Source: {answer['source']}, Confidence: {answer['confidence']:.2f}") | |
| else: | |
| print(f" No answer found") | |
| # Test enhanced symptom checker | |
| print("\nπ©Ί Testing Enhanced Symptom Checker:") | |
| symptoms = ["fever", "cough"] | |
| result = healthcare.check_symptoms_enhanced(symptoms) | |
| print(f" Symptoms: {', '.join(symptoms)}") | |
| print(f" Possible Conditions: {', '.join(result['possible_conditions'][:5])}") | |
| print(f" Severity: {result['severity']}") | |
| print(f" Data Source: {result.get('data_source', 'Base')}") | |
| def test_enhanced_education(): | |
| """Test enhanced education with CourseQ and STACKED""" | |
| print("\n" + "=" * 60) | |
| print("TESTING ENHANCED EDUCATION MODULE") | |
| print("=" * 60) | |
| education = get_enhanced_education_module() | |
| # Show loaded knowledge stats | |
| stats = education.get_knowledge_stats() | |
| print("\nπ Knowledge Base Stats:") | |
| for key, value in stats.items(): | |
| print(f" {key}: {value}") | |
| # Test academic Q&A | |
| print("\nπ Testing Academic Q&A:") | |
| questions = [ | |
| "What is the GPA requirement for admission?", | |
| "How do I register for courses?", | |
| "What is a prerequisite course?" | |
| ] | |
| for question in questions: | |
| print(f"\n Q: {question}") | |
| answer = education.answer_academic_question(question) | |
| if answer: | |
| print(f" A: {answer['answer'][:100]}...") | |
| print(f" Source: {answer['source']}, Confidence: {answer['confidence']:.2f}") | |
| else: | |
| print(f" No answer found") | |
| # Test admission information | |
| print("\nπ Testing Admission Information:") | |
| admission_info = education.get_admission_information() | |
| print(f" Total admission resources: {admission_info['total_resources']}") | |
| print(f" Checklist steps: {len(admission_info['admission_checklist'])}") | |
| def test_intent_trainer(): | |
| """Test intent classifier training and evaluation""" | |
| print("\n" + "=" * 60) | |
| print("TESTING INTENT CLASSIFIER TRAINER") | |
| print("=" * 60) | |
| trainer = get_intent_trainer() | |
| # Evaluate on CLINC150 test set | |
| print("\nπ Evaluating on CLINC150 Test Set:") | |
| results = trainer.evaluate_classifier(dataset="clinc150", split="test") | |
| print(f"\n Dataset: {results['dataset']}") | |
| print(f" Accuracy: {results['accuracy']:.2%} ({results['correct']}/{results['total']})") | |
| # Show category distribution | |
| print(f"\n Category Distribution:") | |
| dist = results['category_distribution'] | |
| print(f" Predicted: {dist['predicted']}") | |
| print(f" Expected: {dist['expected']}") | |
| # Sample predictions | |
| print(f"\n Sample Predictions:") | |
| for pred in results['predictions'][:3]: | |
| status = "β" if pred['correct'] else "β" | |
| print(f"\n {status} '{pred['text']}'") | |
| print(f" Expected: {pred['expected_category']}") | |
| print(f" Predicted: {pred['predicted_category']} (conf: {pred['confidence']:.2f})") | |
| # Analyze misclassifications | |
| print("\nπ Misclassification Analysis:") | |
| analysis = trainer.analyze_misclassifications(results) | |
| print(f" Total misclassified: {analysis['total_misclassified']}") | |
| print(f" Low confidence count: {analysis.get('low_confidence_count', 0)}") | |
| if analysis.get('confusion_matrix'): | |
| print(f"\n Confusion Matrix:") | |
| for key, count in list(analysis['confusion_matrix'].items())[:5]: | |
| print(f" {key}: {count}") | |
| # Get improvement suggestions | |
| print("\nπ‘ Improvement Suggestions:") | |
| suggestions = trainer.suggest_improvements(results) | |
| for suggestion in suggestions: | |
| print(f" {suggestion}") | |
| def test_dataset_manager(): | |
| """Test dataset manager""" | |
| print("\n" + "=" * 60) | |
| print("TESTING DATASET MANAGER") | |
| print("=" * 60) | |
| manager = get_dataset_manager() | |
| # Get dataset info | |
| print("\nπ¦ Available Datasets:") | |
| info = manager.get_dataset_info() | |
| for name, details in info.items(): | |
| status = "β Downloaded" if details['downloaded'] else "β³ Not downloaded" | |
| print(f"\n {name} ({details['type']})") | |
| print(f" {status}") | |
| print(f" {details['description']}") | |
| def test_full_workflow(): | |
| """Test complete workflow with all modules""" | |
| print("\n" + "=" * 60) | |
| print("TESTING COMPLETE WORKFLOW") | |
| print("=" * 60) | |
| healthcare = get_enhanced_healthcare_module() | |
| education = get_enhanced_education_module() | |
| # Simulate user queries | |
| test_queries = [ | |
| { | |
| "query": "What is diabetes?", | |
| "industry": "healthcare", | |
| "expected_module": "healthcare" | |
| }, | |
| { | |
| "query": "How to check my GPA?", | |
| "industry": "education", | |
| "expected_module": "education" | |
| }, | |
| { | |
| "query": "I have fever and headache", | |
| "industry": "healthcare", | |
| "expected_module": "healthcare" | |
| } | |
| ] | |
| for test in test_queries: | |
| print(f"\nπ© Query: '{test['query']}'") | |
| print(f" Industry: {test['industry']}") | |
| if test['industry'] == "healthcare": | |
| # Try medical Q&A first | |
| answer = healthcare.answer_medical_question(test['query']) | |
| if answer: | |
| print(f" π Answer: {answer['answer'][:80]}...") | |
| print(f" Source: {answer['source']}") | |
| elif test['industry'] == "education": | |
| # Try academic Q&A | |
| answer = education.answer_academic_question(test['query']) | |
| if answer: | |
| print(f" π Answer: {answer['answer'][:80]}...") | |
| print(f" Source: {answer['source']}") | |
| if __name__ == "__main__": | |
| print("\nπ Starting Enhanced Module Tests with Datasets\n") | |
| try: | |
| # Test dataset manager | |
| test_dataset_manager() | |
| # Test enhanced modules | |
| test_enhanced_healthcare() | |
| test_enhanced_education() | |
| # Test intent trainer | |
| test_intent_trainer() | |
| # Test full workflow | |
| test_full_workflow() | |
| print("\n" + "=" * 60) | |
| print("β ALL ENHANCED MODULE TESTS COMPLETED") | |
| print("=" * 60 + "\n") | |
| except Exception as e: | |
| print(f"\nβ TEST FAILED: {e}") | |
| import traceback | |
| traceback.print_exc() | |