| |
| """ |
| Example script demonstrating the integration of MinerU parser with RAGAnything |
| |
| This example shows how to: |
| 1. Process parsed documents with RAGAnything |
| 2. Perform multimodal queries on the processed documents |
| 3. Handle different types of content (text, images, tables) |
| """ |
|
|
| import os |
| import argparse |
| import asyncio |
| import logging |
| import logging.config |
| from pathlib import Path |
|
|
| |
| import sys |
|
|
| sys.path.append(str(Path(__file__).parent.parent)) |
|
|
| from lightrag.llm.openai import openai_complete_if_cache, openai_embed |
| from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug |
| from raganything import RAGAnything, RAGAnythingConfig |
|
|
|
|
| def configure_logging(): |
| """Configure logging for the application""" |
| |
| log_dir = os.getenv("LOG_DIR", os.getcwd()) |
| log_file_path = os.path.abspath(os.path.join(log_dir, "raganything_example.log")) |
|
|
| print(f"\nRAGAnything example log file: {log_file_path}\n") |
| os.makedirs(os.path.dirname(log_dir), exist_ok=True) |
|
|
| |
| log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) |
| log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) |
|
|
| logging.config.dictConfig( |
| { |
| "version": 1, |
| "disable_existing_loggers": False, |
| "formatters": { |
| "default": { |
| "format": "%(levelname)s: %(message)s", |
| }, |
| "detailed": { |
| "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", |
| }, |
| }, |
| "handlers": { |
| "console": { |
| "formatter": "default", |
| "class": "logging.StreamHandler", |
| "stream": "ext://sys.stderr", |
| }, |
| "file": { |
| "formatter": "detailed", |
| "class": "logging.handlers.RotatingFileHandler", |
| "filename": log_file_path, |
| "maxBytes": log_max_bytes, |
| "backupCount": log_backup_count, |
| "encoding": "utf-8", |
| }, |
| }, |
| "loggers": { |
| "lightrag": { |
| "handlers": ["console", "file"], |
| "level": "INFO", |
| "propagate": False, |
| }, |
| }, |
| } |
| ) |
|
|
| |
| logger.setLevel(logging.INFO) |
| |
| set_verbose_debug(os.getenv("VERBOSE", "false").lower() == "true") |
|
|
|
|
| async def process_with_rag( |
| file_path: str, |
| output_dir: str, |
| api_key: str, |
| base_url: str = None, |
| working_dir: str = None, |
| ): |
| """ |
| Process document with RAGAnything |
| |
| Args: |
| file_path: Path to the document |
| output_dir: Output directory for RAG results |
| api_key: OpenAI API key |
| base_url: Optional base URL for API |
| working_dir: Working directory for RAG storage |
| """ |
| try: |
| |
| config = RAGAnythingConfig( |
| working_dir=working_dir or "./rag_storage", |
| mineru_parse_method="auto", |
| enable_image_processing=True, |
| enable_table_processing=True, |
| enable_equation_processing=True, |
| ) |
|
|
| |
| def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): |
| return openai_complete_if_cache( |
| "gpt-4o-mini", |
| prompt, |
| system_prompt=system_prompt, |
| history_messages=history_messages, |
| api_key=api_key, |
| base_url=base_url, |
| **kwargs, |
| ) |
|
|
| |
| def vision_model_func( |
| prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs |
| ): |
| if image_data: |
| return openai_complete_if_cache( |
| "gpt-4o", |
| "", |
| system_prompt=None, |
| history_messages=[], |
| messages=[ |
| {"role": "system", "content": system_prompt} |
| if system_prompt |
| else None, |
| { |
| "role": "user", |
| "content": [ |
| {"type": "text", "text": prompt}, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/jpeg;base64,{image_data}" |
| }, |
| }, |
| ], |
| } |
| if image_data |
| else {"role": "user", "content": prompt}, |
| ], |
| api_key=api_key, |
| base_url=base_url, |
| **kwargs, |
| ) |
| else: |
| return llm_model_func(prompt, system_prompt, history_messages, **kwargs) |
|
|
| |
| embedding_func = EmbeddingFunc( |
| embedding_dim=3072, |
| max_token_size=8192, |
| func=lambda texts: openai_embed( |
| texts, |
| model="text-embedding-3-large", |
| api_key=api_key, |
| base_url=base_url, |
| ), |
| ) |
|
|
| |
| rag = RAGAnything( |
| config=config, |
| llm_model_func=llm_model_func, |
| vision_model_func=vision_model_func, |
| embedding_func=embedding_func, |
| ) |
|
|
| |
| await rag.process_document_complete( |
| file_path=file_path, output_dir=output_dir, parse_method="auto" |
| ) |
|
|
| |
| logger.info("\nQuerying processed document:") |
|
|
| |
| text_queries = [ |
| "What is the main content of the document?", |
| "What are the key topics discussed?", |
| ] |
|
|
| for query in text_queries: |
| logger.info(f"\n[Text Query]: {query}") |
| result = await rag.aquery(query, mode="hybrid") |
| logger.info(f"Answer: {result}") |
|
|
| |
| logger.info( |
| "\n[Multimodal Query]: Analyzing performance data in context of document" |
| ) |
| multimodal_result = await rag.aquery_with_multimodal( |
| "Compare this performance data with any similar results mentioned in the document", |
| multimodal_content=[ |
| { |
| "type": "table", |
| "table_data": """Method,Accuracy,Processing_Time |
| RAGAnything,95.2%,120ms |
| Traditional_RAG,87.3%,180ms |
| Baseline,82.1%,200ms""", |
| "table_caption": "Performance comparison results", |
| } |
| ], |
| mode="hybrid", |
| ) |
| logger.info(f"Answer: {multimodal_result}") |
|
|
| |
| logger.info("\n[Multimodal Query]: Mathematical formula analysis") |
| equation_result = await rag.aquery_with_multimodal( |
| "Explain this formula and relate it to any mathematical concepts in the document", |
| multimodal_content=[ |
| { |
| "type": "equation", |
| "latex": "F1 = 2 \\cdot \\frac{precision \\cdot recall}{precision + recall}", |
| "equation_caption": "F1-score calculation formula", |
| } |
| ], |
| mode="hybrid", |
| ) |
| logger.info(f"Answer: {equation_result}") |
|
|
| except Exception as e: |
| logger.error(f"Error processing with RAG: {str(e)}") |
| import traceback |
|
|
| logger.error(traceback.format_exc()) |
|
|
|
|
| def main(): |
| """Main function to run the example""" |
| parser = argparse.ArgumentParser(description="MinerU RAG Example") |
| parser.add_argument("file_path", help="Path to the document to process") |
| parser.add_argument( |
| "--working_dir", "-w", default="./rag_storage", help="Working directory path" |
| ) |
| parser.add_argument( |
| "--output", "-o", default="./output", help="Output directory path" |
| ) |
| parser.add_argument( |
| "--api-key", |
| default=os.getenv("OPENAI_API_KEY"), |
| help="OpenAI API key (defaults to OPENAI_API_KEY env var)", |
| ) |
| parser.add_argument("--base-url", help="Optional base URL for API") |
|
|
| args = parser.parse_args() |
|
|
| |
| if not args.api_key: |
| logger.error("Error: OpenAI API key is required") |
| logger.error("Set OPENAI_API_KEY environment variable or use --api-key option") |
| return |
|
|
| |
| if args.output: |
| os.makedirs(args.output, exist_ok=True) |
|
|
| |
| asyncio.run( |
| process_with_rag( |
| args.file_path, args.output, args.api_key, args.base_url, args.working_dir |
| ) |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| |
| configure_logging() |
|
|
| print("RAGAnything Example") |
| print("=" * 30) |
| print("Processing document with multimodal RAG pipeline") |
| print("=" * 30) |
|
|
| main() |
|
|