Spaces:

pkgprateek
/

ai-rag-document

Sleeping

File size: 1,384 Bytes

bb58af7
 
 
a864c4e
bb58af7
 
 
 
a864c4e
 
 
bb58af7
 
 
 
a864c4e
bb58af7
 
a864c4e
 
bb58af7
a864c4e
bb58af7
a864c4e
bb58af7
 
 
 
 
a864c4e

# Test rag pipeline
import sys
import os

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from app.rag_pipeline import RAGPipeline
from app.document_processor import DocumentProcessor
from dotenv import load_dotenv

load_dotenv()


processor = DocumentProcessor()
# chunks = processor.process_pdf("./data/test.pdf")
test_doc = """Python is a high-level programming language.
    It was created by Guido van Rossum in 1991.
    Python is known for its simple syntax., 
    test_python.txt"""
chunks = processor._chunk_text("user", test_doc, doc_type="txt")

# Initialize Rag and Using document processor
rag_pipeline = RAGPipeline()
rag_pipeline.add_documents(chunks)

# Query
question = "What is python known for?"
result = rag_pipeline.query(question)
print(f"Answer: {result['answer']}")


# Format sources with page numbers
# sources = result["sources_formatted"]
# source_info = []
# for i, doc in enumerate(sources, 1):
#     source_file = doc.metadata.get("source", "Unknown")
#     # Extract just filename
#     source_name = source_file.split("/")[-1] if "/" in source_file else source_file
#     page_preview = doc.page_content[:100].replace("\n", " ")
#     source_info.append(f"**[{i}]** {source_name}\n> {page_preview}...")

# sources_text = "\n\n".join(source_info) if source_info else "No sources found"
# print(f"Sources: {sources_text}")