OrgAI / rag_anything_smaranika /examples /enhanced_markdown_example.py
Phonex
TheTruthSchool_RAG
167596f
#!/usr/bin/env python
"""
Enhanced Markdown Conversion Example for RAG-Anything
This example demonstrates the enhanced markdown to PDF conversion capabilities
with multiple backends, advanced styling, and professional formatting.
Features demonstrated:
- Basic markdown to PDF conversion
- Multiple conversion backends (WeasyPrint, Pandoc)
- Custom CSS styling and configuration
- Backend detection and selection
- Error handling and fallback mechanisms
- Command-line interface usage
"""
import logging
from pathlib import Path
import tempfile
# Add project root directory to Python path
import sys
sys.path.append(str(Path(__file__).parent.parent))
from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
def create_sample_markdown_content():
"""Create comprehensive sample markdown content for testing"""
# Basic sample
basic_content = """# Basic Markdown Sample
## Introduction
This is a simple markdown document demonstrating basic formatting.
### Text Formatting
- **Bold text** and *italic text*
- `Inline code` examples
- [Links to external sites](https://github.com)
### Lists
1. First ordered item
2. Second ordered item
3. Third ordered item
- Unordered item
- Another unordered item
- Nested item
- Another nested item
### Blockquotes
> This is a blockquote with important information.
> It can span multiple lines.
### Code Block
```python
def hello_world():
print("Hello, World!")
return "Success"
```
"""
# Technical documentation sample
technical_content = """# Technical Documentation
## Table of Contents
- [Overview](#overview)
- [Architecture](#architecture)
- [Implementation](#implementation)
- [Performance](#performance)
## Overview
This document provides comprehensive technical specifications for the enhanced markdown conversion system.
## Architecture
### Core Components
1. **Markdown Parser**: Processes markdown syntax
2. **CSS Engine**: Applies styling and layout
3. **PDF Generator**: Creates final PDF output
4. **Backend Manager**: Handles multiple conversion engines
### Data Flow
```mermaid
graph LR
A[Markdown Input] --> B[Parser]
B --> C[CSS Processor]
C --> D[PDF Generator]
D --> E[PDF Output]
```
## Implementation
### Python Code Example
```python
from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
# Configure converter
config = MarkdownConfig(
page_size="A4",
margin="1in",
include_toc=True,
syntax_highlighting=True
)
# Create converter
converter = EnhancedMarkdownConverter(config)
# Convert to PDF
success = converter.convert_file_to_pdf(
input_path="document.md",
output_path="output.pdf",
method="weasyprint"
)
```
### Configuration Options
```yaml
converter:
page_size: A4
margin: 1in
font_size: 12pt
include_toc: true
syntax_highlighting: true
backend: weasyprint
```
## Performance
### Benchmark Results
| Backend | Speed | Quality | Features |
|---------|-------|---------|----------|
| WeasyPrint | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
| Pandoc | ⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
### Processing Times
- **Small documents** (< 10 pages): 1-3 seconds
- **Medium documents** (10-50 pages): 3-10 seconds
- **Large documents** (> 50 pages): 10-30 seconds
## Advanced Features
### Custom CSS Styling
The system supports advanced CSS customization:
```css
body {
font-family: 'Georgia', serif;
line-height: 1.6;
color: #333;
}
h1 {
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 0.3em;
}
code {
background-color: #f8f9fa;
padding: 2px 4px;
border-radius: 3px;
font-family: 'Courier New', monospace;
}
pre {
background-color: #f8f9fa;
border-left: 4px solid #3498db;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
}
table {
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}
th, td {
border: 1px solid #ddd;
padding: 8px 12px;
text-align: left;
}
th {
background-color: #f2f2f2;
font-weight: bold;
}
```
### Image Support
![Sample Image](https://via.placeholder.com/400x200/3498db/ffffff?text=Sample+Image)
Images are automatically scaled and positioned appropriately in the PDF output.
## Conclusion
The enhanced markdown conversion system provides professional-quality PDF generation with extensive customization options and multiple backend support.
---
*Generated on: 2024-01-15*
*Version: 1.0.0*
"""
# Academic paper sample
academic_content = """# Research Paper: Advanced Document Processing
**Authors:** Alice Johnson¹, Bob Smith², Carol Williams¹
**Affiliations:**
¹ University of Technology
² Research Institute
## Abstract
This paper presents a comprehensive analysis of advanced document processing techniques using enhanced markdown conversion. Our research demonstrates significant improvements in processing speed and output quality through optimized backend selection and custom styling approaches.
**Keywords:** document processing, markdown conversion, PDF generation, performance optimization
## 1. Introduction
Document processing has become increasingly important in modern information systems. The ability to convert markdown documents to high-quality PDF outputs with professional formatting is crucial for academic, technical, and business applications.
### 1.1 Research Objectives
1. Evaluate different markdown conversion backends
2. Analyze performance characteristics of each approach
3. Develop optimization strategies for large-scale processing
4. Design flexible configuration systems for diverse use cases
### 1.2 Contributions
This work makes the following contributions:
- Comprehensive comparison of markdown conversion backends
- Performance optimization techniques for large documents
- Flexible configuration framework for customization
- Integration patterns for document processing pipelines
## 2. Methodology
### 2.1 Experimental Setup
We conducted experiments using the following configuration:
```python
# Experimental configuration
config = MarkdownConfig(
page_size="A4",
margin="1in",
font_size="11pt",
line_height="1.4",
include_toc=True,
syntax_highlighting=True
)
```
### 2.2 Test Documents
| Category | Count | Avg Size | Complexity |
|----------|-------|----------|------------|
| Simple | 100 | 2 pages | Low |
| Medium | 50 | 10 pages | Medium |
| Complex | 25 | 25 pages | High |
### 2.3 Metrics
We evaluated performance using the following metrics:
- **Conversion Speed**: Time to generate PDF (seconds)
- **Memory Usage**: Peak memory consumption (MB)
- **Output Quality**: Visual assessment score (1-10)
- **Feature Support**: Number of supported markdown features
## 3. Results
### 3.1 Performance Comparison
The following table summarizes our performance results:
| Backend | Speed (s) | Memory (MB) | Quality | Features |
|---------|-----------|-------------|---------|----------|
| WeasyPrint | 2.3 ± 0.5 | 85 ± 15 | 8.5 | 85% |
| Pandoc | 4.7 ± 1.2 | 120 ± 25 | 9.2 | 95% |
### 3.2 Quality Analysis
#### 3.2.1 Typography
WeasyPrint excels in web-style typography with excellent CSS support, while Pandoc provides superior academic formatting with LaTeX-quality output.
#### 3.2.2 Code Highlighting
Both backends support syntax highlighting through Pygments:
```python
def analyze_performance(backend, documents):
'''Analyze conversion performance for given backend'''
results = []
for doc in documents:
start_time = time.time()
success = backend.convert(doc)
end_time = time.time()
results.append({
'document': doc,
'time': end_time - start_time,
'success': success
})
return results
```
### 3.3 Scalability
Our scalability analysis shows:
- Linear scaling with document size for both backends
- Memory usage proportional to content complexity
- Optimal batch sizes of 10-20 documents for parallel processing
## 4. Discussion
### 4.1 Backend Selection Guidelines
Choose **WeasyPrint** for:
- Web-style documents with custom CSS
- Fast conversion requirements
- Simple to medium complexity documents
Choose **Pandoc** for:
- Academic papers and publications
- Complex document structures
- Maximum feature support requirements
### 4.2 Optimization Strategies
1. **Image Optimization**: Compress images before embedding
2. **CSS Minimization**: Use efficient CSS selectors
3. **Content Chunking**: Process large documents in sections
4. **Caching**: Cache converted content for repeated use
## 5. Conclusion
This research demonstrates that enhanced markdown conversion provides significant benefits for document processing workflows. The choice between WeasyPrint and Pandoc depends on specific requirements for speed, quality, and features.
### 5.1 Future Work
- Integration with cloud processing services
- Real-time collaborative editing support
- Advanced template systems
- Performance optimization for very large documents
## References
1. Johnson, A. et al. (2024). "Advanced Document Processing Techniques." *Journal of Information Systems*, 15(3), 45-62.
2. Smith, B. (2023). "PDF Generation Optimization." *Technical Computing Review*, 8(2), 12-28.
3. Williams, C. (2024). "Markdown Processing Frameworks." *Software Engineering Quarterly*, 22(1), 78-95.
---
**Manuscript received:** January 10, 2024
**Accepted for publication:** January 15, 2024
**Published online:** January 20, 2024
"""
return {
"basic": basic_content,
"technical": technical_content,
"academic": academic_content,
}
def demonstrate_basic_conversion():
"""Demonstrate basic markdown to PDF conversion"""
print("\n" + "=" * 60)
print("BASIC MARKDOWN CONVERSION DEMONSTRATION")
print("=" * 60)
try:
# Create converter with default settings
converter = EnhancedMarkdownConverter()
# Show backend information
backend_info = converter.get_backend_info()
print("Available conversion backends:")
for backend, available in backend_info["available_backends"].items():
status = "✅" if available else "❌"
print(f" {status} {backend}")
print(f"Recommended backend: {backend_info['recommended_backend']}")
# Get sample content
samples = create_sample_markdown_content()
temp_dir = Path(tempfile.mkdtemp())
# Convert basic sample
basic_md_path = temp_dir / "basic_sample.md"
with open(basic_md_path, "w", encoding="utf-8") as f:
f.write(samples["basic"])
print(f"\nConverting basic sample: {basic_md_path}")
success = converter.convert_file_to_pdf(
input_path=str(basic_md_path),
output_path=str(temp_dir / "basic_sample.pdf"),
method="auto", # Let the system choose the best backend
)
if success:
print("✅ Basic conversion successful!")
print(f" Output: {temp_dir / 'basic_sample.pdf'}")
else:
print("❌ Basic conversion failed")
return success, temp_dir
except Exception as e:
print(f"❌ Basic conversion demonstration failed: {str(e)}")
return False, None
def demonstrate_backend_comparison():
"""Demonstrate different conversion backends"""
print("\n" + "=" * 60)
print("BACKEND COMPARISON DEMONSTRATION")
print("=" * 60)
try:
samples = create_sample_markdown_content()
temp_dir = Path(tempfile.mkdtemp())
# Create technical document
tech_md_path = temp_dir / "technical.md"
with open(tech_md_path, "w", encoding="utf-8") as f:
f.write(samples["technical"])
print("Testing different backends with technical document...")
# Test different backends
backends = ["auto", "weasyprint", "pandoc"]
results = {}
for backend in backends:
try:
print(f"\nTesting {backend} backend...")
converter = EnhancedMarkdownConverter()
output_path = temp_dir / f"technical_{backend}.pdf"
import time
start_time = time.time()
success = converter.convert_file_to_pdf(
input_path=str(tech_md_path),
output_path=str(output_path),
method=backend,
)
end_time = time.time()
conversion_time = end_time - start_time
if success:
file_size = (
output_path.stat().st_size if output_path.exists() else 0
)
print(
f" ✅ {backend}: Success in {conversion_time:.2f}s, {file_size} bytes"
)
results[backend] = {
"success": True,
"time": conversion_time,
"size": file_size,
"output": str(output_path),
}
else:
print(f" ❌ {backend}: Failed")
results[backend] = {"success": False, "time": conversion_time}
except Exception as e:
print(f" ❌ {backend}: Error - {str(e)}")
results[backend] = {"success": False, "error": str(e)}
# Summary
print("\n" + "-" * 40)
print("BACKEND COMPARISON SUMMARY")
print("-" * 40)
successful_backends = [b for b, r in results.items() if r.get("success", False)]
print(f"Successful backends: {successful_backends}")
if successful_backends:
fastest = min(successful_backends, key=lambda b: results[b]["time"])
print(f"Fastest backend: {fastest} ({results[fastest]['time']:.2f}s)")
return results, temp_dir
except Exception as e:
print(f"❌ Backend comparison demonstration failed: {str(e)}")
return None, None
def demonstrate_custom_styling():
"""Demonstrate custom CSS styling and configuration"""
print("\n" + "=" * 60)
print("CUSTOM STYLING DEMONSTRATION")
print("=" * 60)
try:
samples = create_sample_markdown_content()
temp_dir = Path(tempfile.mkdtemp())
# Create custom CSS
custom_css = """
body {
font-family: 'Times New Roman', serif;
font-size: 11pt;
line-height: 1.4;
color: #2c3e50;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
h1 {
color: #c0392b;
font-size: 2.2em;
border-bottom: 3px solid #e74c3c;
padding-bottom: 0.5em;
margin-top: 2em;
}
h2 {
color: #8e44ad;
font-size: 1.6em;
border-bottom: 2px solid #9b59b6;
padding-bottom: 0.3em;
margin-top: 1.5em;
}
h3 {
color: #2980b9;
font-size: 1.3em;
margin-top: 1.2em;
}
code {
background-color: #ecf0f1;
color: #e74c3c;
padding: 3px 6px;
border-radius: 4px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre {
background-color: #2c3e50;
color: #ecf0f1;
padding: 20px;
border-radius: 8px;
border-left: 5px solid #3498db;
overflow-x: auto;
font-size: 0.9em;
}
pre code {
background-color: transparent;
color: inherit;
padding: 0;
}
blockquote {
background-color: #f8f9fa;
border-left: 5px solid #3498db;
margin: 1em 0;
padding: 15px 20px;
font-style: italic;
color: #555;
}
table {
border-collapse: collapse;
width: 100%;
margin: 1.5em 0;
background-color: white;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
th {
background-color: #3498db;
color: white;
padding: 12px 15px;
text-align: left;
font-weight: bold;
}
td {
padding: 10px 15px;
border-bottom: 1px solid #ecf0f1;
}
tr:nth-child(even) {
background-color: #f8f9fa;
}
tr:hover {
background-color: #e8f4fd;
}
ul, ol {
margin-bottom: 1em;
padding-left: 2em;
}
li {
margin-bottom: 0.5em;
line-height: 1.6;
}
a {
color: #3498db;
text-decoration: none;
border-bottom: 1px dotted #3498db;
}
a:hover {
color: #2980b9;
border-bottom: 1px solid #2980b9;
}
.toc {
background-color: #f8f9fa;
border: 2px solid #e9ecef;
border-radius: 8px;
padding: 20px;
margin: 2em 0;
}
.toc h2 {
color: #2c3e50;
margin-top: 0;
border-bottom: none;
}
.toc ul {
list-style-type: none;
padding-left: 0;
}
.toc li {
margin-bottom: 0.8em;
}
.toc a {
color: #2c3e50;
font-weight: 500;
border-bottom: none;
}
"""
# Create custom configuration
config = MarkdownConfig(
page_size="A4",
margin="0.8in",
font_size="11pt",
line_height="1.4",
include_toc=True,
syntax_highlighting=True,
custom_css=custom_css,
)
converter = EnhancedMarkdownConverter(config)
# Convert academic sample with custom styling
academic_md_path = temp_dir / "academic_styled.md"
with open(academic_md_path, "w", encoding="utf-8") as f:
f.write(samples["academic"])
print("Converting academic paper with custom styling...")
print("Custom styling features:")
print(" - Custom color scheme (reds, purples, blues)")
print(" - Times New Roman serif font")
print(" - Enhanced table styling with hover effects")
print(" - Styled code blocks with dark theme")
print(" - Custom blockquote styling")
print(" - Professional header styling")
success = converter.convert_file_to_pdf(
input_path=str(academic_md_path),
output_path=str(temp_dir / "academic_styled.pdf"),
method="weasyprint", # WeasyPrint is best for custom CSS
)
if success:
print("✅ Custom styling conversion successful!")
print(f" Output: {temp_dir / 'academic_styled.pdf'}")
# Also create a default version for comparison
default_converter = EnhancedMarkdownConverter()
default_success = default_converter.convert_file_to_pdf(
input_path=str(academic_md_path),
output_path=str(temp_dir / "academic_default.pdf"),
method="weasyprint",
)
if default_success:
print(f" Comparison (default): {temp_dir / 'academic_default.pdf'}")
else:
print("❌ Custom styling conversion failed")
return success, temp_dir
except Exception as e:
print(f"❌ Custom styling demonstration failed: {str(e)}")
return False, None
def demonstrate_content_conversion():
"""Demonstrate converting markdown content directly (not from file)"""
print("\n" + "=" * 60)
print("CONTENT CONVERSION DEMONSTRATION")
print("=" * 60)
try:
# Create markdown content programmatically
dynamic_content = f"""# Dynamic Content Example
## Generated Information
This document was generated programmatically on {Path(__file__).name}.
## System Information
- **Python Path**: {sys.executable}
- **Script Location**: {Path(__file__).absolute()}
- **Working Directory**: {Path.cwd()}
## Dynamic Table
| Property | Value |
|----------|-------|
| Script Name | {Path(__file__).name} |
| Python Version | {sys.version.split()[0]} |
| Platform | {sys.platform} |
## Code Example
```python
# This content was generated dynamically
import sys
from pathlib import Path
def generate_report():
return f"Report generated from {{Path(__file__).name}}"
print(generate_report())
```
## Features Demonstrated
This example shows how to:
1. Generate markdown content programmatically
2. Convert content directly without saving to file first
3. Include dynamic information in documents
4. Use different conversion methods
> **Note**: This content was created in memory and converted directly to PDF
> without intermediate file storage.
## Conclusion
Direct content conversion is useful for:
- Dynamic report generation
- Programmatic document creation
- API-based document services
- Real-time content processing
"""
temp_dir = Path(tempfile.mkdtemp())
converter = EnhancedMarkdownConverter()
print("Converting dynamically generated markdown content...")
print("Content includes:")
print(" - System information")
print(" - Dynamic tables with current values")
print(" - Generated timestamps")
print(" - Programmatic examples")
# Convert content directly to PDF
output_path = temp_dir / "dynamic_content.pdf"
success = converter.convert_markdown_to_pdf(
markdown_content=dynamic_content,
output_path=str(output_path),
method="auto",
)
if success:
print("✅ Content conversion successful!")
print(f" Output: {output_path}")
# Show file size
file_size = output_path.stat().st_size
print(f" Generated PDF size: {file_size} bytes")
else:
print("❌ Content conversion failed")
return success, temp_dir
except Exception as e:
print(f"❌ Content conversion demonstration failed: {str(e)}")
return False, None
def demonstrate_error_handling():
"""Demonstrate error handling and fallback mechanisms"""
print("\n" + "=" * 60)
print("ERROR HANDLING DEMONSTRATION")
print("=" * 60)
try:
temp_dir = Path(tempfile.mkdtemp())
# Test cases with various issues
test_cases = {
"invalid_markdown": """# Invalid Markdown
This markdown has some {{invalid}} syntax and [broken links](http://nonexistent.invalid).
```unknown_language
This code block uses an unknown language
```
![Missing Image](nonexistent_image.png)
""",
"complex_content": """# Complex Content Test
## Mathematical Expressions
This tests content that might be challenging for some backends:
$$ E = mc^2 $$
$$\\sum_{i=1}^{n} x_i = \\frac{n(n+1)}{2}$$
## Complex Tables
| A | B | C | D | E | F | G |
|---|---|---|---|---|---|---|
| Very long content that might wrap | Short | Medium length content | X | Y | Z | End |
| Another row with different lengths | A | B | C | D | E | F |
## Special Characters
Unicode: α, β, γ, δ, ε, ζ, η, θ, ι, κ, λ, μ, ν, ξ, ο, π, ρ, σ, τ, υ, φ, χ, ψ, ω
Symbols: ♠ ♣ ♥ ♦ ☀ ☁ ☂ ☃ ☄ ★ ☆ ☉ ☊ ☋ ☌ ☍ ☎ ☏
Arrows: ← ↑ → ↓ ↔ ↕ ↖ ↗ ↘ ↙
""",
"empty_content": "",
"minimal_content": "# Just a title",
}
print("Testing error handling with various content types...")
results = {}
for test_name, content in test_cases.items():
print(f"\nTesting: {test_name}")
try:
# Try multiple backends for each test case
for backend in ["auto", "weasyprint", "pandoc"]:
try:
converter = EnhancedMarkdownConverter()
output_path = temp_dir / f"{test_name}_{backend}.pdf"
success = converter.convert_markdown_to_pdf(
markdown_content=content,
output_path=str(output_path),
method=backend,
)
if success:
file_size = (
output_path.stat().st_size
if output_path.exists()
else 0
)
print(f" ✅ {backend}: Success ({file_size} bytes)")
results[f"{test_name}_{backend}"] = {
"success": True,
"size": file_size,
}
else:
print(f" ❌ {backend}: Failed")
results[f"{test_name}_{backend}"] = {"success": False}
except Exception as e:
print(f" ❌ {backend}: Error - {str(e)[:60]}...")
results[f"{test_name}_{backend}"] = {
"success": False,
"error": str(e),
}
except Exception as e:
print(f" ❌ Test case failed: {str(e)}")
# Demonstrate robust conversion with fallbacks
print("\nDemonstrating robust conversion with fallback logic...")
def robust_convert(content, output_path):
"""Convert with multiple backend fallbacks"""
backends = ["weasyprint", "pandoc", "auto"]
for backend in backends:
try:
converter = EnhancedMarkdownConverter()
success = converter.convert_markdown_to_pdf(
markdown_content=content,
output_path=output_path,
method=backend,
)
if success:
return backend, True
except Exception:
continue
return None, False
# Test robust conversion
test_content = test_cases["complex_content"]
robust_output = temp_dir / "robust_conversion.pdf"
successful_backend, success = robust_convert(test_content, str(robust_output))
if success:
print(f"✅ Robust conversion successful using {successful_backend}")
print(f" Output: {robust_output}")
else:
print("❌ All backends failed for robust conversion")
# Summary
print("\n" + "-" * 40)
print("ERROR HANDLING SUMMARY")
print("-" * 40)
successful_conversions = sum(
1 for r in results.values() if r.get("success", False)
)
total_attempts = len(results)
success_rate = (
(successful_conversions / total_attempts * 100) if total_attempts > 0 else 0
)
print(f"Total conversion attempts: {total_attempts}")
print(f"Successful conversions: {successful_conversions}")
print(f"Success rate: {success_rate:.1f}%")
return results, temp_dir
except Exception as e:
print(f"❌ Error handling demonstration failed: {str(e)}")
return None, None
def main():
"""Main demonstration function"""
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
print("RAG-Anything Enhanced Markdown Conversion Demonstration")
print("=" * 70)
print(
"This example demonstrates various enhanced markdown conversion capabilities:"
)
print(" - Basic markdown to PDF conversion")
print(" - Multiple backend comparison (WeasyPrint vs Pandoc)")
print(" - Custom CSS styling and professional formatting")
print(" - Direct content conversion without file I/O")
print(" - Comprehensive error handling and fallback mechanisms")
results = {}
# Run demonstrations
print("\n🚀 Starting demonstrations...")
# Basic conversion
success, temp_dir = demonstrate_basic_conversion()
results["basic"] = success
# Backend comparison
backend_results, _ = demonstrate_backend_comparison()
results["backends"] = backend_results
# Custom styling
styling_success, _ = demonstrate_custom_styling()
results["styling"] = styling_success
# Content conversion
content_success, _ = demonstrate_content_conversion()
results["content"] = content_success
# Error handling
error_results, _ = demonstrate_error_handling()
results["error_handling"] = error_results
# Summary
print("\n" + "=" * 70)
print("DEMONSTRATION SUMMARY")
print("=" * 70)
print("✅ Features Successfully Demonstrated:")
if results["basic"]:
print(" - Basic markdown to PDF conversion")
if results["backends"]:
successful_backends = [
b for b, r in results["backends"].items() if r.get("success", False)
]
print(f" - Multiple backends: {successful_backends}")
if results["styling"]:
print(" - Custom CSS styling and professional formatting")
if results["content"]:
print(" - Direct content conversion without file I/O")
if results["error_handling"]:
success_rate = (
sum(
1 for r in results["error_handling"].values() if r.get("success", False)
)
/ len(results["error_handling"])
* 100
)
print(f" - Error handling with {success_rate:.1f}% overall success rate")
print("\n📊 Key Capabilities Highlighted:")
print(" - Professional PDF generation with high-quality typography")
print(" - Multiple conversion backends with automatic selection")
print(" - Extensive CSS customization for branded documents")
print(" - Syntax highlighting for code blocks using Pygments")
print(" - Table formatting with professional styling")
print(" - Image embedding with proper scaling")
print(" - Table of contents generation with navigation")
print(" - Comprehensive error handling and fallback mechanisms")
print("\n💡 Best Practices Demonstrated:")
print(" - Choose WeasyPrint for web-style documents and custom CSS")
print(" - Choose Pandoc for academic papers and complex formatting")
print(" - Use 'auto' method for general-purpose conversion")
print(" - Implement fallback logic for robust conversion")
print(" - Optimize images before embedding in documents")
print(" - Test custom CSS with simple content first")
print(" - Handle errors gracefully with multiple backend attempts")
print(" - Use appropriate page sizes and margins for target use case")
print("\n🎯 Integration Patterns:")
print(" - Standalone conversion for document generation")
print(" - Integration with RAG-Anything document pipeline")
print(" - API-based document services")
print(" - Batch processing for multiple documents")
print(" - Dynamic content generation from templates")
if __name__ == "__main__":
main()