Spaces:

maaz21
/

chatbotqa

Sleeping

App Files Files Community

maaz21 commited on May 12, 2025

Commit

75574d6

verified ·

1 Parent(s): bd41f85

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +68 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,70 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+from langchain.vectorstores import FAISS
+from langchain.document_loaders.csv_loader import CSVLoader
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain.llms import OpenAI
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Create Groq LLaMA LLM
+llm = OpenAI(
+    base_url="https://api.groq.com/openai/v1",
+    api_key="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",
+    model_name="llama3-8b-8192",
+    temperature=0.1
+)
+embedding_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
+vectordb_file_path = "faiss_index"
+def create_vector_db():
+    loader = CSVLoader(file_path='codebasics_faqs.csv', source_column="prompt")
+    data = loader.load()
+    vectordb = FAISS.from_documents(documents=data, embedding=embedding_model)
+    vectordb.save_local(vectordb_file_path)
+def get_qa_chain():
+    vectordb = FAISS.load_local(vectordb_file_path, embedding_model)
+    retriever = vectordb.as_retriever(score_threshold=0.7)
+    prompt_template = """Given the following context and a question, generate an answer based on this context only.
+    In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
+    If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.
+    CONTEXT: {context}
+    QUESTION: {question}"""
+    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        return_source_documents=True,
+        input_key="query",
+        chain_type_kwargs={"prompt": PROMPT}
+    )
+    return chain
+# Streamlit UI
+st.title("📊 Ask Questions About Your CSV")
+if not os.path.exists(f"{vectordb_file_path}/index.faiss"):
+    with st.spinner("Creating vector DB..."):
+        create_vector_db()
+user_input = st.text_input("Enter your question:")
+if user_input:
+    qa_chain = get_qa_chain()
+    result = qa_chain({"query": user_input})
+    st.write("### Answer:")
+    st.write(result["result"])
+    with st.expander("Show Source Document(s)"):
+        for doc in result["source_documents"]:
+            st.markdown(f"**Source:** {doc.metadata}")
+            st.text(doc.page_content)