|
|
| """
|
| Quick test for the enhanced quality scoring system
|
| """
|
|
|
| import sys
|
| import os
|
|
|
|
|
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
| from app import (
|
| calculate_quality_score,
|
| generate_comprehensive_quality_report,
|
| suggest_quality_improvements
|
| )
|
|
|
| def test_quality_scoring():
|
| """Test the enhanced quality scoring with the actual data from your conversion"""
|
| print("🧪 Testing Enhanced Quality Scoring System")
|
| print("=" * 50)
|
|
|
|
|
| docx_info = {
|
| 'text_content_length': 1573,
|
| 'font_families': {'Arial'},
|
| 'has_tables': True,
|
| 'has_images': True,
|
| 'rtl_content_detected': True,
|
| 'placeholder_count': 9,
|
| 'has_textboxes': False,
|
| 'has_smartart': False,
|
| 'has_complex_shapes': False,
|
| 'table_structure_issues': ['Complex cell merging detected']
|
| }
|
|
|
| pdf_validation = {
|
| 'file_size_mb': 0.12,
|
| 'file_exists': True,
|
| 'size_reasonable': True,
|
| 'warnings': [],
|
| 'success_metrics': [
|
| 'PDF file size is reasonable',
|
| 'Document contains tables - formatting preservation critical',
|
| 'Document contains images - quality preservation applied',
|
| 'Font substitution applied for 1 font families'
|
| ]
|
| }
|
|
|
| post_process_results = {
|
| 'pages_processed': 1,
|
| 'placeholders_verified': 9,
|
| 'tables_verified': 1,
|
| 'arabic_text_verified': 150,
|
| 'layout_issues_fixed': 0,
|
| 'warnings': [],
|
| 'success_metrics': [
|
| 'All 9 placeholders preserved',
|
| 'Arabic RTL text verified: 150 characters',
|
| 'Table structure preserved'
|
| ]
|
| }
|
|
|
|
|
| quality_score = calculate_quality_score(docx_info, pdf_validation, post_process_results)
|
| print(f"🏆 Enhanced Quality Score: {quality_score:.1f}%")
|
|
|
|
|
| quality_report = generate_comprehensive_quality_report(docx_info, pdf_validation, post_process_results)
|
| print("\n📋 Enhanced Quality Report:")
|
| print(quality_report)
|
|
|
|
|
| suggestions = suggest_quality_improvements(docx_info, pdf_validation, post_process_results, quality_score)
|
| print(f"\n💡 Improvement Suggestions:")
|
| for suggestion in suggestions:
|
| print(suggestion)
|
|
|
| return quality_score
|
|
|
| def test_different_scenarios():
|
| """Test quality scoring with different scenarios"""
|
| print("\n" + "=" * 50)
|
| print("🔬 Testing Different Quality Scenarios")
|
| print("=" * 50)
|
|
|
| scenarios = [
|
| {
|
| 'name': 'Perfect Conversion',
|
| 'docx_info': {
|
| 'text_content_length': 1000,
|
| 'font_families': {'Arial'},
|
| 'has_tables': True,
|
| 'has_images': False,
|
| 'rtl_content_detected': True,
|
| 'placeholder_count': 5,
|
| 'has_textboxes': False,
|
| 'has_smartart': False,
|
| 'has_complex_shapes': False,
|
| 'table_structure_issues': []
|
| },
|
| 'pdf_validation': {
|
| 'file_size_mb': 0.5,
|
| 'warnings': [],
|
| 'success_metrics': ['Perfect conversion', 'All elements preserved']
|
| },
|
| 'post_process_results': {
|
| 'pages_processed': 1,
|
| 'placeholders_verified': 5,
|
| 'tables_verified': 1,
|
| 'arabic_text_verified': 200,
|
| 'warnings': [],
|
| 'success_metrics': ['All placeholders preserved', 'Arabic text verified']
|
| }
|
| },
|
| {
|
| 'name': 'Complex Document with Issues',
|
| 'docx_info': {
|
| 'text_content_length': 5000,
|
| 'font_families': {'Arial', 'Traditional Arabic'},
|
| 'has_tables': True,
|
| 'has_images': True,
|
| 'rtl_content_detected': True,
|
| 'placeholder_count': 10,
|
| 'has_textboxes': True,
|
| 'has_smartart': True,
|
| 'has_complex_shapes': True,
|
| 'table_structure_issues': ['Nested tables', 'Complex merging']
|
| },
|
| 'pdf_validation': {
|
| 'file_size_mb': 2.5,
|
| 'warnings': ['Large file size'],
|
| 'success_metrics': ['Basic conversion completed']
|
| },
|
| 'post_process_results': {
|
| 'pages_processed': 3,
|
| 'placeholders_verified': 8,
|
| 'tables_verified': 2,
|
| 'arabic_text_verified': 500,
|
| 'warnings': ['Some layout issues detected'],
|
| 'success_metrics': ['Most elements preserved']
|
| }
|
| }
|
| ]
|
|
|
| for scenario in scenarios:
|
| print(f"\n📊 Scenario: {scenario['name']}")
|
| score = calculate_quality_score(
|
| scenario['docx_info'],
|
| scenario['pdf_validation'],
|
| scenario['post_process_results']
|
| )
|
| print(f" Quality Score: {score:.1f}%")
|
|
|
| if score >= 95:
|
| print(" Result: 🌟 EXCELLENT")
|
| elif score >= 85:
|
| print(" Result: ✅ VERY GOOD")
|
| elif score >= 75:
|
| print(" Result: 👍 GOOD")
|
| elif score >= 65:
|
| print(" Result: ⚠️ FAIR")
|
| else:
|
| print(" Result: ❌ NEEDS IMPROVEMENT")
|
|
|
| if __name__ == "__main__":
|
|
|
| actual_score = test_quality_scoring()
|
|
|
|
|
| test_different_scenarios()
|
|
|
| print(f"\n" + "=" * 50)
|
| print(f"🎯 SUMMARY")
|
| print(f"=" * 50)
|
| print(f"Your document achieved: {actual_score:.1f}%")
|
|
|
| if actual_score >= 90:
|
| print("🌟 Excellent quality! The enhanced system is working perfectly.")
|
| elif actual_score >= 80:
|
| print("✅ Good quality! Minor improvements applied successfully.")
|
| elif actual_score >= 70:
|
| print("👍 Acceptable quality. The system detected and addressed issues.")
|
| else:
|
| print("⚠️ Quality needs improvement. The system provided detailed suggestions.")
|
|
|
| print(f"\n💡 The enhanced quality scoring system now provides:")
|
| print(f" • More accurate quality assessment")
|
| print(f" • Detailed improvement suggestions")
|
| print(f" • Better handling of complex documents")
|
| print(f" • Comprehensive quality reports")
|
|
|