Spaces:

GlokalAI
/

OrgAI

Running

OrgAI / rag_anything_smaranika /examples /enhanced_markdown_example.py

Phonex

TheTruthSchool_RAG

167596f 7 months ago

31.6 kB

	#!/usr/bin/env python
	"""
	Enhanced Markdown Conversion Example for RAG-Anything

	This example demonstrates the enhanced markdown to PDF conversion capabilities
	with multiple backends, advanced styling, and professional formatting.

	Features demonstrated:
	- Basic markdown to PDF conversion
	- Multiple conversion backends (WeasyPrint, Pandoc)
	- Custom CSS styling and configuration
	- Backend detection and selection
	- Error handling and fallback mechanisms
	- Command-line interface usage
	"""

	import logging
	from pathlib import Path
	import tempfile

	# Add project root directory to Python path
	import sys

	sys.path.append(str(Path(__file__).parent.parent))

	from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig


	def create_sample_markdown_content():
	"""Create comprehensive sample markdown content for testing"""

	# Basic sample
	basic_content = """# Basic Markdown Sample

	## Introduction
	This is a simple markdown document demonstrating basic formatting.

	### Text Formatting
	- Bold text and italic text
	- `Inline code` examples
	- [Links to external sites](https://github.com)

	### Lists
	1. First ordered item
	2. Second ordered item
	3. Third ordered item

	- Unordered item
	- Another unordered item
	- Nested item
	- Another nested item

	### Blockquotes
	> This is a blockquote with important information.
	> It can span multiple lines.

	### Code Block
	```python
	def hello_world():
	print("Hello, World!")
	return "Success"
	```
	"""

	# Technical documentation sample
	technical_content = """# Technical Documentation

	## Table of Contents
	- [Overview](#overview)
	- [Architecture](#architecture)
	- [Implementation](#implementation)
	- [Performance](#performance)

	## Overview
	This document provides comprehensive technical specifications for the enhanced markdown conversion system.

	## Architecture

	### Core Components
	1. Markdown Parser: Processes markdown syntax
	2. CSS Engine: Applies styling and layout
	3. PDF Generator: Creates final PDF output
	4. Backend Manager: Handles multiple conversion engines

	### Data Flow
	```mermaid
	graph LR
	A[Markdown Input] --> B[Parser]
	B --> C[CSS Processor]
	C --> D[PDF Generator]
	D --> E[PDF Output]
	```

	## Implementation

	### Python Code Example
	```python
	from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig

	# Configure converter
	config = MarkdownConfig(
	page_size="A4",
	margin="1in",
	include_toc=True,
	syntax_highlighting=True
	)

	# Create converter
	converter = EnhancedMarkdownConverter(config)

	# Convert to PDF
	success = converter.convert_file_to_pdf(
	input_path="document.md",
	output_path="output.pdf",
	method="weasyprint"
	)
	```

	### Configuration Options
	```yaml
	converter:
	page_size: A4
	margin: 1in
	font_size: 12pt
	include_toc: true
	syntax_highlighting: true
	backend: weasyprint
	```

	## Performance

	### Benchmark Results
	\| Backend \| Speed \| Quality \| Features \|
	\|---------\|-------\|---------\|----------\|
	\| WeasyPrint \| ⭐⭐⭐⭐ \| ⭐⭐⭐⭐ \| ⭐⭐⭐⭐ \|
	\| Pandoc \| ⭐⭐⭐ \| ⭐⭐⭐⭐⭐ \| ⭐⭐⭐⭐⭐ \|

	### Processing Times
	- Small documents (< 10 pages): 1-3 seconds
	- Medium documents (10-50 pages): 3-10 seconds
	- Large documents (> 50 pages): 10-30 seconds

	## Advanced Features

	### Custom CSS Styling
	The system supports advanced CSS customization:

	```css
	body {
	font-family: 'Georgia', serif;
	line-height: 1.6;
	color: #333;
	}

	h1 {
	color: #2c3e50;
	border-bottom: 2px solid #3498db;
	padding-bottom: 0.3em;
	}

	code {
	background-color: #f8f9fa;
	padding: 2px 4px;
	border-radius: 3px;
	font-family: 'Courier New', monospace;
	}

	pre {
	background-color: #f8f9fa;
	border-left: 4px solid #3498db;
	padding: 15px;
	border-radius: 5px;
	overflow-x: auto;
	}

	table {
	border-collapse: collapse;
	width: 100%;
	margin: 1em 0;
	}

	th, td {
	border: 1px solid #ddd;
	padding: 8px 12px;
	text-align: left;
	}

	th {
	background-color: #f2f2f2;
	font-weight: bold;
	}
	```

	### Image Support
	![Sample Image](https://via.placeholder.com/400x200/3498db/ffffff?text=Sample+Image)

	Images are automatically scaled and positioned appropriately in the PDF output.

	## Conclusion
	The enhanced markdown conversion system provides professional-quality PDF generation with extensive customization options and multiple backend support.

	---

	Generated on: 2024-01-15
	Version: 1.0.0
	"""

	# Academic paper sample
	academic_content = """# Research Paper: Advanced Document Processing

	Authors: Alice Johnson¹, Bob Smith², Carol Williams¹
	Affiliations:
	¹ University of Technology
	² Research Institute

	## Abstract

	This paper presents a comprehensive analysis of advanced document processing techniques using enhanced markdown conversion. Our research demonstrates significant improvements in processing speed and output quality through optimized backend selection and custom styling approaches.

	Keywords: document processing, markdown conversion, PDF generation, performance optimization

	## 1. Introduction

	Document processing has become increasingly important in modern information systems. The ability to convert markdown documents to high-quality PDF outputs with professional formatting is crucial for academic, technical, and business applications.

	### 1.1 Research Objectives

	1. Evaluate different markdown conversion backends
	2. Analyze performance characteristics of each approach
	3. Develop optimization strategies for large-scale processing
	4. Design flexible configuration systems for diverse use cases

	### 1.2 Contributions

	This work makes the following contributions:
	- Comprehensive comparison of markdown conversion backends
	- Performance optimization techniques for large documents
	- Flexible configuration framework for customization
	- Integration patterns for document processing pipelines

	## 2. Methodology

	### 2.1 Experimental Setup

	We conducted experiments using the following configuration:

	```python
	# Experimental configuration
	config = MarkdownConfig(
	page_size="A4",
	margin="1in",
	font_size="11pt",
	line_height="1.4",
	include_toc=True,
	syntax_highlighting=True
	)
	```

	### 2.2 Test Documents

	\| Category \| Count \| Avg Size \| Complexity \|
	\|----------\|-------\|----------\|------------\|
	\| Simple \| 100 \| 2 pages \| Low \|
	\| Medium \| 50 \| 10 pages \| Medium \|
	\| Complex \| 25 \| 25 pages \| High \|

	### 2.3 Metrics

	We evaluated performance using the following metrics:
	- Conversion Speed: Time to generate PDF (seconds)
	- Memory Usage: Peak memory consumption (MB)
	- Output Quality: Visual assessment score (1-10)
	- Feature Support: Number of supported markdown features

	## 3. Results

	### 3.1 Performance Comparison

	The following table summarizes our performance results:

	\| Backend \| Speed (s) \| Memory (MB) \| Quality \| Features \|
	\|---------\|-----------\|-------------\|---------\|----------\|
	\| WeasyPrint \| 2.3 ± 0.5 \| 85 ± 15 \| 8.5 \| 85% \|
	\| Pandoc \| 4.7 ± 1.2 \| 120 ± 25 \| 9.2 \| 95% \|

	### 3.2 Quality Analysis

	#### 3.2.1 Typography
	WeasyPrint excels in web-style typography with excellent CSS support, while Pandoc provides superior academic formatting with LaTeX-quality output.

	#### 3.2.2 Code Highlighting
	Both backends support syntax highlighting through Pygments:

	```python
	def analyze_performance(backend, documents):
	'''Analyze conversion performance for given backend'''
	results = []

	for doc in documents:
	start_time = time.time()
	success = backend.convert(doc)
	end_time = time.time()

	results.append({
	'document': doc,
	'time': end_time - start_time,
	'success': success
	})

	return results
	```

	### 3.3 Scalability

	Our scalability analysis shows:
	- Linear scaling with document size for both backends
	- Memory usage proportional to content complexity
	- Optimal batch sizes of 10-20 documents for parallel processing

	## 4. Discussion

	### 4.1 Backend Selection Guidelines

	Choose WeasyPrint for:
	- Web-style documents with custom CSS
	- Fast conversion requirements
	- Simple to medium complexity documents

	Choose Pandoc for:
	- Academic papers and publications
	- Complex document structures
	- Maximum feature support requirements

	### 4.2 Optimization Strategies

	1. Image Optimization: Compress images before embedding
	2. CSS Minimization: Use efficient CSS selectors
	3. Content Chunking: Process large documents in sections
	4. Caching: Cache converted content for repeated use

	## 5. Conclusion

	This research demonstrates that enhanced markdown conversion provides significant benefits for document processing workflows. The choice between WeasyPrint and Pandoc depends on specific requirements for speed, quality, and features.

	### 5.1 Future Work

	- Integration with cloud processing services
	- Real-time collaborative editing support
	- Advanced template systems
	- Performance optimization for very large documents

	## References

	1. Johnson, A. et al. (2024). "Advanced Document Processing Techniques." Journal of Information Systems, 15(3), 45-62.
	2. Smith, B. (2023). "PDF Generation Optimization." Technical Computing Review, 8(2), 12-28.
	3. Williams, C. (2024). "Markdown Processing Frameworks." Software Engineering Quarterly, 22(1), 78-95.

	---

	Manuscript received: January 10, 2024
	Accepted for publication: January 15, 2024
	Published online: January 20, 2024
	"""

	return {
	"basic": basic_content,
	"technical": technical_content,
	"academic": academic_content,
	}


	def demonstrate_basic_conversion():
	"""Demonstrate basic markdown to PDF conversion"""
	print("\n" + "=" * 60)
	print("BASIC MARKDOWN CONVERSION DEMONSTRATION")
	print("=" * 60)

	try:
	# Create converter with default settings
	converter = EnhancedMarkdownConverter()

	# Show backend information
	backend_info = converter.get_backend_info()
	print("Available conversion backends:")
	for backend, available in backend_info["available_backends"].items():
	status = "✅" if available else "❌"
	print(f" {status} {backend}")
	print(f"Recommended backend: {backend_info['recommended_backend']}")

	# Get sample content
	samples = create_sample_markdown_content()
	temp_dir = Path(tempfile.mkdtemp())

	# Convert basic sample
	basic_md_path = temp_dir / "basic_sample.md"
	with open(basic_md_path, "w", encoding="utf-8") as f:
	f.write(samples["basic"])

	print(f"\nConverting basic sample: {basic_md_path}")

	success = converter.convert_file_to_pdf(
	input_path=str(basic_md_path),
	output_path=str(temp_dir / "basic_sample.pdf"),
	method="auto", # Let the system choose the best backend
	)

	if success:
	print("✅ Basic conversion successful!")
	print(f" Output: {temp_dir / 'basic_sample.pdf'}")
	else:
	print("❌ Basic conversion failed")

	return success, temp_dir

	except Exception as e:
	print(f"❌ Basic conversion demonstration failed: {str(e)}")
	return False, None


	def demonstrate_backend_comparison():
	"""Demonstrate different conversion backends"""
	print("\n" + "=" * 60)
	print("BACKEND COMPARISON DEMONSTRATION")
	print("=" * 60)

	try:
	samples = create_sample_markdown_content()
	temp_dir = Path(tempfile.mkdtemp())

	# Create technical document
	tech_md_path = temp_dir / "technical.md"
	with open(tech_md_path, "w", encoding="utf-8") as f:
	f.write(samples["technical"])

	print("Testing different backends with technical document...")

	# Test different backends
	backends = ["auto", "weasyprint", "pandoc"]
	results = {}

	for backend in backends:
	try:
	print(f"\nTesting {backend} backend...")

	converter = EnhancedMarkdownConverter()
	output_path = temp_dir / f"technical_{backend}.pdf"

	import time

	start_time = time.time()

	success = converter.convert_file_to_pdf(
	input_path=str(tech_md_path),
	output_path=str(output_path),
	method=backend,
	)

	end_time = time.time()
	conversion_time = end_time - start_time

	if success:
	file_size = (
	output_path.stat().st_size if output_path.exists() else 0
	)
	print(
	f" ✅ {backend}: Success in {conversion_time:.2f}s, {file_size} bytes"
	)
	results[backend] = {
	"success": True,
	"time": conversion_time,
	"size": file_size,
	"output": str(output_path),
	}
	else:
	print(f" ❌ {backend}: Failed")
	results[backend] = {"success": False, "time": conversion_time}

	except Exception as e:
	print(f" ❌ {backend}: Error - {str(e)}")
	results[backend] = {"success": False, "error": str(e)}

	# Summary
	print("\n" + "-" * 40)
	print("BACKEND COMPARISON SUMMARY")
	print("-" * 40)
	successful_backends = [b for b, r in results.items() if r.get("success", False)]
	print(f"Successful backends: {successful_backends}")

	if successful_backends:
	fastest = min(successful_backends, key=lambda b: results[b]["time"])
	print(f"Fastest backend: {fastest} ({results[fastest]['time']:.2f}s)")

	return results, temp_dir

	except Exception as e:
	print(f"❌ Backend comparison demonstration failed: {str(e)}")
	return None, None


	def demonstrate_custom_styling():
	"""Demonstrate custom CSS styling and configuration"""
	print("\n" + "=" * 60)
	print("CUSTOM STYLING DEMONSTRATION")
	print("=" * 60)

	try:
	samples = create_sample_markdown_content()
	temp_dir = Path(tempfile.mkdtemp())

	# Create custom CSS
	custom_css = """
	body {
	font-family: 'Times New Roman', serif;
	font-size: 11pt;
	line-height: 1.4;
	color: #2c3e50;
	max-width: 800px;
	margin: 0 auto;
	padding: 20px;
	}

	h1 {
	color: #c0392b;
	font-size: 2.2em;
	border-bottom: 3px solid #e74c3c;
	padding-bottom: 0.5em;
	margin-top: 2em;
	}

	h2 {
	color: #8e44ad;
	font-size: 1.6em;
	border-bottom: 2px solid #9b59b6;
	padding-bottom: 0.3em;
	margin-top: 1.5em;
	}

	h3 {
	color: #2980b9;
	font-size: 1.3em;
	margin-top: 1.2em;
	}

	code {
	background-color: #ecf0f1;
	color: #e74c3c;
	padding: 3px 6px;
	border-radius: 4px;
	font-family: 'Courier New', monospace;
	font-size: 0.9em;
	}

	pre {
	background-color: #2c3e50;
	color: #ecf0f1;
	padding: 20px;
	border-radius: 8px;
	border-left: 5px solid #3498db;
	overflow-x: auto;
	font-size: 0.9em;
	}

	pre code {
	background-color: transparent;
	color: inherit;
	padding: 0;
	}

	blockquote {
	background-color: #f8f9fa;
	border-left: 5px solid #3498db;
	margin: 1em 0;
	padding: 15px 20px;
	font-style: italic;
	color: #555;
	}

	table {
	border-collapse: collapse;
	width: 100%;
	margin: 1.5em 0;
	background-color: white;
	border-radius: 8px;
	overflow: hidden;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}

	th {
	background-color: #3498db;
	color: white;
	padding: 12px 15px;
	text-align: left;
	font-weight: bold;
	}

	td {
	padding: 10px 15px;
	border-bottom: 1px solid #ecf0f1;
	}

	tr:nth-child(even) {
	background-color: #f8f9fa;
	}

	tr:hover {
	background-color: #e8f4fd;
	}

	ul, ol {
	margin-bottom: 1em;
	padding-left: 2em;
	}

	li {
	margin-bottom: 0.5em;
	line-height: 1.6;
	}

	a {
	color: #3498db;
	text-decoration: none;
	border-bottom: 1px dotted #3498db;
	}

	a:hover {
	color: #2980b9;
	border-bottom: 1px solid #2980b9;
	}

	.toc {
	background-color: #f8f9fa;
	border: 2px solid #e9ecef;
	border-radius: 8px;
	padding: 20px;
	margin: 2em 0;
	}

	.toc h2 {
	color: #2c3e50;
	margin-top: 0;
	border-bottom: none;
	}

	.toc ul {
	list-style-type: none;
	padding-left: 0;
	}

	.toc li {
	margin-bottom: 0.8em;
	}

	.toc a {
	color: #2c3e50;
	font-weight: 500;
	border-bottom: none;
	}
	"""

	# Create custom configuration
	config = MarkdownConfig(
	page_size="A4",
	margin="0.8in",
	font_size="11pt",
	line_height="1.4",
	include_toc=True,
	syntax_highlighting=True,
	custom_css=custom_css,
	)

	converter = EnhancedMarkdownConverter(config)

	# Convert academic sample with custom styling
	academic_md_path = temp_dir / "academic_styled.md"
	with open(academic_md_path, "w", encoding="utf-8") as f:
	f.write(samples["academic"])

	print("Converting academic paper with custom styling...")
	print("Custom styling features:")
	print(" - Custom color scheme (reds, purples, blues)")
	print(" - Times New Roman serif font")
	print(" - Enhanced table styling with hover effects")
	print(" - Styled code blocks with dark theme")
	print(" - Custom blockquote styling")
	print(" - Professional header styling")

	success = converter.convert_file_to_pdf(
	input_path=str(academic_md_path),
	output_path=str(temp_dir / "academic_styled.pdf"),
	method="weasyprint", # WeasyPrint is best for custom CSS
	)

	if success:
	print("✅ Custom styling conversion successful!")
	print(f" Output: {temp_dir / 'academic_styled.pdf'}")

	# Also create a default version for comparison
	default_converter = EnhancedMarkdownConverter()
	default_success = default_converter.convert_file_to_pdf(
	input_path=str(academic_md_path),
	output_path=str(temp_dir / "academic_default.pdf"),
	method="weasyprint",
	)

	if default_success:
	print(f" Comparison (default): {temp_dir / 'academic_default.pdf'}")
	else:
	print("❌ Custom styling conversion failed")

	return success, temp_dir

	except Exception as e:
	print(f"❌ Custom styling demonstration failed: {str(e)}")
	return False, None


	def demonstrate_content_conversion():
	"""Demonstrate converting markdown content directly (not from file)"""
	print("\n" + "=" * 60)
	print("CONTENT CONVERSION DEMONSTRATION")
	print("=" * 60)

	try:
	# Create markdown content programmatically
	dynamic_content = f"""# Dynamic Content Example

	## Generated Information
	This document was generated programmatically on {Path(__file__).name}.

	## System Information
	- Python Path: {sys.executable}
	- Script Location: {Path(__file__).absolute()}
	- Working Directory: {Path.cwd()}

	## Dynamic Table
	\| Property \| Value \|
	\|----------\|-------\|
	\| Script Name \| {Path(__file__).name} \|
	\| Python Version \| {sys.version.split()[0]} \|
	\| Platform \| {sys.platform} \|

	## Code Example
	```python
	# This content was generated dynamically
	import sys
	from pathlib import Path

	def generate_report():
	return f"Report generated from {{Path(__file__).name}}"

	print(generate_report())
	```

	## Features Demonstrated
	This example shows how to:
	1. Generate markdown content programmatically
	2. Convert content directly without saving to file first
	3. Include dynamic information in documents
	4. Use different conversion methods

	> Note: This content was created in memory and converted directly to PDF
	> without intermediate file storage.

	## Conclusion
	Direct content conversion is useful for:
	- Dynamic report generation
	- Programmatic document creation
	- API-based document services
	- Real-time content processing
	"""

	temp_dir = Path(tempfile.mkdtemp())
	converter = EnhancedMarkdownConverter()

	print("Converting dynamically generated markdown content...")
	print("Content includes:")
	print(" - System information")
	print(" - Dynamic tables with current values")
	print(" - Generated timestamps")
	print(" - Programmatic examples")

	# Convert content directly to PDF
	output_path = temp_dir / "dynamic_content.pdf"

	success = converter.convert_markdown_to_pdf(
	markdown_content=dynamic_content,
	output_path=str(output_path),
	method="auto",
	)

	if success:
	print("✅ Content conversion successful!")
	print(f" Output: {output_path}")

	# Show file size
	file_size = output_path.stat().st_size
	print(f" Generated PDF size: {file_size} bytes")
	else:
	print("❌ Content conversion failed")

	return success, temp_dir

	except Exception as e:
	print(f"❌ Content conversion demonstration failed: {str(e)}")
	return False, None


	def demonstrate_error_handling():
	"""Demonstrate error handling and fallback mechanisms"""
	print("\n" + "=" * 60)
	print("ERROR HANDLING DEMONSTRATION")
	print("=" * 60)

	try:
	temp_dir = Path(tempfile.mkdtemp())

	# Test cases with various issues
	test_cases = {
	"invalid_markdown": """# Invalid Markdown

	This markdown has some {{invalid}} syntax and [broken links](http://nonexistent.invalid).

	```unknown_language
	This code block uses an unknown language
	```

	![Missing Image](nonexistent_image.png)
	""",
	"complex_content": """# Complex Content Test

	## Mathematical Expressions
	This tests content that might be challenging for some backends:

	$$ E = mc^2 $$

	$$\\sum_{i=1}^{n} x_i = \\frac{n(n+1)}{2}$$

	## Complex Tables
	\| A \| B \| C \| D \| E \| F \| G \|
	\|---\|---\|---\|---\|---\|---\|---\|
	\| Very long content that might wrap \| Short \| Medium length content \| X \| Y \| Z \| End \|
	\| Another row with different lengths \| A \| B \| C \| D \| E \| F \|

	## Special Characters
	Unicode: α, β, γ, δ, ε, ζ, η, θ, ι, κ, λ, μ, ν, ξ, ο, π, ρ, σ, τ, υ, φ, χ, ψ, ω
	Symbols: ♠ ♣ ♥ ♦ ☀ ☁ ☂ ☃ ☄ ★ ☆ ☉ ☊ ☋ ☌ ☍ ☎ ☏
	Arrows: ← ↑ → ↓ ↔ ↕ ↖ ↗ ↘ ↙
	""",
	"empty_content": "",
	"minimal_content": "# Just a title",
	}

	print("Testing error handling with various content types...")

	results = {}

	for test_name, content in test_cases.items():
	print(f"\nTesting: {test_name}")

	try:
	# Try multiple backends for each test case
	for backend in ["auto", "weasyprint", "pandoc"]:
	try:
	converter = EnhancedMarkdownConverter()
	output_path = temp_dir / f"{test_name}_{backend}.pdf"

	success = converter.convert_markdown_to_pdf(
	markdown_content=content,
	output_path=str(output_path),
	method=backend,
	)

	if success:
	file_size = (
	output_path.stat().st_size
	if output_path.exists()
	else 0
	)
	print(f" ✅ {backend}: Success ({file_size} bytes)")
	results[f"{test_name}_{backend}"] = {
	"success": True,
	"size": file_size,
	}
	else:
	print(f" ❌ {backend}: Failed")
	results[f"{test_name}_{backend}"] = {"success": False}

	except Exception as e:
	print(f" ❌ {backend}: Error - {str(e)[:60]}...")
	results[f"{test_name}_{backend}"] = {
	"success": False,
	"error": str(e),
	}

	except Exception as e:
	print(f" ❌ Test case failed: {str(e)}")

	# Demonstrate robust conversion with fallbacks
	print("\nDemonstrating robust conversion with fallback logic...")

	def robust_convert(content, output_path):
	"""Convert with multiple backend fallbacks"""
	backends = ["weasyprint", "pandoc", "auto"]

	for backend in backends:
	try:
	converter = EnhancedMarkdownConverter()
	success = converter.convert_markdown_to_pdf(
	markdown_content=content,
	output_path=output_path,
	method=backend,
	)
	if success:
	return backend, True
	except Exception:
	continue

	return None, False

	# Test robust conversion
	test_content = test_cases["complex_content"]
	robust_output = temp_dir / "robust_conversion.pdf"

	successful_backend, success = robust_convert(test_content, str(robust_output))

	if success:
	print(f"✅ Robust conversion successful using {successful_backend}")
	print(f" Output: {robust_output}")
	else:
	print("❌ All backends failed for robust conversion")

	# Summary
	print("\n" + "-" * 40)
	print("ERROR HANDLING SUMMARY")
	print("-" * 40)
	successful_conversions = sum(
	1 for r in results.values() if r.get("success", False)
	)
	total_attempts = len(results)
	success_rate = (
	(successful_conversions / total_attempts * 100) if total_attempts > 0 else 0
	)

	print(f"Total conversion attempts: {total_attempts}")
	print(f"Successful conversions: {successful_conversions}")
	print(f"Success rate: {success_rate:.1f}%")

	return results, temp_dir

	except Exception as e:
	print(f"❌ Error handling demonstration failed: {str(e)}")
	return None, None


	def main():
	"""Main demonstration function"""
	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)

	print("RAG-Anything Enhanced Markdown Conversion Demonstration")
	print("=" * 70)
	print(
	"This example demonstrates various enhanced markdown conversion capabilities:"
	)
	print(" - Basic markdown to PDF conversion")
	print(" - Multiple backend comparison (WeasyPrint vs Pandoc)")
	print(" - Custom CSS styling and professional formatting")
	print(" - Direct content conversion without file I/O")
	print(" - Comprehensive error handling and fallback mechanisms")

	results = {}

	# Run demonstrations
	print("\n🚀 Starting demonstrations...")

	# Basic conversion
	success, temp_dir = demonstrate_basic_conversion()
	results["basic"] = success

	# Backend comparison
	backend_results, _ = demonstrate_backend_comparison()
	results["backends"] = backend_results

	# Custom styling
	styling_success, _ = demonstrate_custom_styling()
	results["styling"] = styling_success

	# Content conversion
	content_success, _ = demonstrate_content_conversion()
	results["content"] = content_success

	# Error handling
	error_results, _ = demonstrate_error_handling()
	results["error_handling"] = error_results

	# Summary
	print("\n" + "=" * 70)
	print("DEMONSTRATION SUMMARY")
	print("=" * 70)

	print("✅ Features Successfully Demonstrated:")
	if results["basic"]:
	print(" - Basic markdown to PDF conversion")
	if results["backends"]:
	successful_backends = [
	b for b, r in results["backends"].items() if r.get("success", False)
	]
	print(f" - Multiple backends: {successful_backends}")
	if results["styling"]:
	print(" - Custom CSS styling and professional formatting")
	if results["content"]:
	print(" - Direct content conversion without file I/O")
	if results["error_handling"]:
	success_rate = (
	sum(
	1 for r in results["error_handling"].values() if r.get("success", False)
	)
	/ len(results["error_handling"])
	* 100
	)
	print(f" - Error handling with {success_rate:.1f}% overall success rate")

	print("\n📊 Key Capabilities Highlighted:")
	print(" - Professional PDF generation with high-quality typography")
	print(" - Multiple conversion backends with automatic selection")
	print(" - Extensive CSS customization for branded documents")
	print(" - Syntax highlighting for code blocks using Pygments")
	print(" - Table formatting with professional styling")
	print(" - Image embedding with proper scaling")
	print(" - Table of contents generation with navigation")
	print(" - Comprehensive error handling and fallback mechanisms")

	print("\n💡 Best Practices Demonstrated:")
	print(" - Choose WeasyPrint for web-style documents and custom CSS")
	print(" - Choose Pandoc for academic papers and complex formatting")
	print(" - Use 'auto' method for general-purpose conversion")
	print(" - Implement fallback logic for robust conversion")
	print(" - Optimize images before embedding in documents")
	print(" - Test custom CSS with simple content first")
	print(" - Handle errors gracefully with multiple backend attempts")
	print(" - Use appropriate page sizes and margins for target use case")

	print("\n🎯 Integration Patterns:")
	print(" - Standalone conversion for document generation")
	print(" - Integration with RAG-Anything document pipeline")
	print(" - API-based document services")
	print(" - Batch processing for multiple documents")
	print(" - Dynamic content generation from templates")


	if __name__ == "__main__":
	main()