| import streamlit as st |
| import PyPDF2 |
| from groq import Groq |
| import os |
|
|
| |
| os.environ["GROQ_API_KEY"] = "myKey" |
|
|
| |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
|
| |
| def extract_pdf_content(pdf_file): |
| pdf_text = "" |
| reader = PyPDF2.PdfReader(pdf_file) |
| for page in reader.pages: |
| pdf_text += page.extract_text() |
| return pdf_text |
|
|
| |
| def chunk_text(text, chunk_size=1000, overlap=200): |
| chunks = [] |
| start = 0 |
| while start < len(text): |
| end = start + chunk_size |
| chunk = text[start:end] |
| chunks.append(chunk) |
| start += chunk_size - overlap |
| return chunks |
|
|
| |
| def find_relevant_chunks(chunks, query, num_chunks=3): |
| return chunks[:num_chunks] |
|
|
| |
| def chatbot_response(user_query, chunks): |
| relevant_chunks = find_relevant_chunks(chunks, user_query) |
| combined_context = "\n\n".join(relevant_chunks) |
| context = f"PDF Content:\n{combined_context}\n\nUser Query: {user_query}" |
| chat_completion = client.chat.completions.create( |
| messages=[{"role": "user", "content": context}], |
| model="llama-3.3-70b-versatile", |
| ) |
| return chat_completion.choices[0].message.content |
|
|
| |
| st.title("PDF Query Chatbot") |
| st.write("Upload a PDF and ask questions based on its content.") |
|
|
| |
| pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
| if pdf_file: |
| with st.spinner("Extracting content..."): |
| pdf_content = extract_pdf_content(pdf_file) |
| chunks = chunk_text(pdf_content) |
| st.success("PDF content loaded successfully!") |
|
|
| user_query = st.text_input("Ask a question about the PDF:") |
| |
| if user_query: |
| with st.spinner("Fetching response..."): |
| response = chatbot_response(user_query, chunks) |
| st.write(f"**Chatbot Response:** {response}") |
|
|