| |
| """final_app |
| Automatically generated by Colab. |
| Original file is located at |
| https://colab.research.google.com/drive/1pG3uDsJzglvQecdTcY76aXa5ObFadRux |
| """ |
|
|
| |
|
|
|
|
|
|
| import gradio as gr |
| import os |
| import tempfile |
| from langchain_community.document_loaders import PyPDFLoader |
| from langchain_community.vectorstores import FAISS |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| from langchain_groq import ChatGroq |
| from langchain.chains import RetrievalQA |
| from langchain.prompts import PromptTemplate |
|
|
| |
| GROQ_API_KEY = "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" |
| os.environ["GROQ_API_KEY"] = GROQ_API_KEY |
|
|
| |
| vectorstore = None |
| processed_files_list = [] |
|
|
| def process_pdfs(files): |
| """Process uploaded PDF files and create vector store""" |
| global vectorstore, processed_files_list |
|
|
| if not files: |
| return "β οΈ Please upload at least one PDF file", "" |
|
|
| try: |
| all_documents = [] |
| processed_names = [] |
|
|
| |
| for file in files: |
| |
| loader = PyPDFLoader(file.name) |
| documents = loader.load() |
| all_documents.extend(documents) |
| processed_names.append(os.path.basename(file.name)) |
|
|
| if not all_documents: |
| return "β No content extracted from PDFs", "" |
|
|
| |
| text_splitter = RecursiveCharacterTextSplitter( |
| chunk_size=1000, |
| chunk_overlap=200, |
| length_function=len |
| ) |
| splits = text_splitter.split_documents(all_documents) |
|
|
| |
| embeddings = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2", |
| model_kwargs={'device': 'cpu'} |
| ) |
|
|
| |
| vectorstore = FAISS.from_documents(splits, embeddings) |
| processed_files_list = processed_names |
|
|
| success_msg = f"β
Successfully processed {len(files)} document(s)!\n" |
| success_msg += f"π Created {len(splits)} text chunks for retrieval\n\n" |
| success_msg += "π Processed files:\n" + "\n".join([f" β’ {name}" for name in processed_names]) |
|
|
| return success_msg, "β
Documents processed! You can now ask questions." |
|
|
| except Exception as e: |
| return f"β Error processing documents: {str(e)}", "" |
|
|
| def answer_question(question, chat_history): |
| """Answer questions based on the processed documents""" |
| global vectorstore |
|
|
| if not vectorstore: |
| return chat_history + [[question, "β οΈ Please upload and process PDF documents first!"]] |
|
|
| if not question or question.strip() == "": |
| return chat_history + [[question, "β οΈ Please enter a valid question."]] |
|
|
| try: |
| |
| llm = ChatGroq( |
| model="llama-3.1-8b-instant", |
| temperature=0, |
| max_tokens=1024, |
| api_key=GROQ_API_KEY |
| ) |
|
|
| |
| prompt_template = """You are a helpful assistant that answers questions ONLY based on the provided context from uploaded PDF documents. |
| CRITICAL INSTRUCTIONS: |
| - Answer ONLY if the information is present in the context below |
| - If the context does not contain relevant information to answer the question, you MUST respond with: "I don't know the answer. This information is not available in the uploaded documents." |
| - DO NOT use any external knowledge or information not present in the context |
| - DO NOT make assumptions or inferences beyond what is explicitly stated in the context |
| - If you're unsure whether the context contains the answer, say you don't know |
| Context from uploaded documents: |
| {context} |
| Question: {question} |
| Answer (only from the context above):""" |
|
|
| PROMPT = PromptTemplate( |
| template=prompt_template, |
| input_variables=["context", "question"] |
| ) |
|
|
| |
| qa_chain = RetrievalQA.from_chain_type( |
| llm=llm, |
| chain_type="stuff", |
| retriever=vectorstore.as_retriever( |
| search_type="similarity", |
| search_kwargs={ |
| "k": 5, |
| "fetch_k": 20 |
| } |
| ), |
| chain_type_kwargs={"prompt": PROMPT}, |
| return_source_documents=True |
| ) |
|
|
| |
| result = qa_chain({"query": question}) |
| answer = result['result'] |
| source_docs = result.get('source_documents', []) |
|
|
| |
| if source_docs and "don't know" not in answer.lower(): |
| answer += "\n\nπ **Sources found in documents:**" |
| unique_sources = set() |
| for doc in source_docs[:3]: |
| source = doc.metadata.get('source', 'Unknown') |
| page = doc.metadata.get('page', 'Unknown') |
| source_id = f"{source} (Page {page})" |
| if source_id not in unique_sources: |
| unique_sources.add(source_id) |
|
|
| for source in unique_sources: |
| answer += f"\n β’ {source}" |
|
|
| |
| chat_history = chat_history + [[question, answer]] |
|
|
| return chat_history |
|
|
| except Exception as e: |
| error_msg = f"β Error generating answer: {str(e)}" |
| return chat_history + [[question, error_msg]] |
|
|
| def clear_data(): |
| """Clear all processed data""" |
| global vectorstore, processed_files_list |
| vectorstore = None |
| processed_files_list = [] |
| return "ποΈ All data cleared. Please upload new documents.", "", [] |
|
|
| |
| custom_css = """ |
| #title { |
| text-align: center; |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| font-size: 2.5em; |
| font-weight: bold; |
| margin-bottom: 10px; |
| } |
| #subtitle { |
| text-align: center; |
| color: #666; |
| font-size: 1.2em; |
| margin-bottom: 20px; |
| } |
| .gradio-container { |
| max-width: 1200px !important; |
| margin: auto !important; |
| } |
| """ |
|
|
| |
| with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: |
| |
| gr.HTML("<h1 id='title'>π Slashbyte RAG</h1>") |
| gr.HTML("<p id='subtitle'>Upload PDFs and ask questions using AI-powered retrieval</p>") |
|
|
| with gr.Row(): |
| |
| with gr.Column(scale=1): |
| gr.Markdown("### π Document Upload") |
| file_upload = gr.File( |
| label="Upload PDF Documents", |
| file_types=[".pdf"], |
| file_count="multiple" |
| ) |
| process_btn = gr.Button("π Process Documents", variant="primary", size="lg") |
| process_output = gr.Textbox( |
| label="Processing Status", |
| lines=8, |
| interactive=False |
| ) |
| clear_btn = gr.Button("ποΈ Clear All Data", variant="stop") |
|
|
| gr.Markdown(""" |
| --- |
| ### βΉοΈ How to Use |
| 1. **Upload PDFs** using the file uploader |
| 2. Click **Process Documents** |
| 3. **Ask questions** in the chat |
| 4. Get **AI-powered answers** |
| **Features:** |
| - π Multiple PDF support |
| - π€ Powered by Groq LLM |
| - π Semantic search |
| - πΎ Chat history |
| """) |
|
|
| |
| with gr.Column(scale=2): |
| gr.Markdown("### π¬ Ask Questions") |
| status_text = gr.Textbox( |
| label="Status", |
| value="β οΈ Upload and process documents to start", |
| interactive=False |
| ) |
| chatbot = gr.Chatbot( |
| label="Chat History", |
| height=400, |
| show_label=True |
| ) |
| with gr.Row(): |
| question_input = gr.Textbox( |
| label="Your Question", |
| placeholder="Ask anything about your documents...", |
| scale=4 |
| ) |
| submit_btn = gr.Button("π Ask", variant="primary", scale=1) |
|
|
| clear_chat_btn = gr.Button("π§Ή Clear Chat") |
|
|
| |
| gr.HTML(""" |
| <div style='text-align: center; color: #666; padding: 20px; margin-top: 20px; border-top: 1px solid #ddd;'> |
| <p>Powered by Langchain, Groq, and HuggingFace | Built with β€οΈ using Gradio</p> |
| </div> |
| """) |
|
|
| |
| process_btn.click( |
| fn=process_pdfs, |
| inputs=[file_upload], |
| outputs=[process_output, status_text] |
| ) |
|
|
| submit_btn.click( |
| fn=answer_question, |
| inputs=[question_input, chatbot], |
| outputs=[chatbot] |
| ).then( |
| lambda: "", |
| outputs=[question_input] |
| ) |
|
|
| question_input.submit( |
| fn=answer_question, |
| inputs=[question_input, chatbot], |
| outputs=[chatbot] |
| ).then( |
| lambda: "", |
| outputs=[question_input] |
| ) |
|
|
| clear_chat_btn.click( |
| fn=lambda: [], |
| outputs=[chatbot] |
| ) |
|
|
| clear_btn.click( |
| fn=clear_data, |
| outputs=[process_output, status_text, chatbot] |
| ) |
|
|
| |
| if __name__ == "__main__": |
| demo.launch( |
| share=True, |
| server_name="0.0.0.0", |
| server_port=7860 |
| ) |