avimittal30 commited on
Commit
e86ca01
·
verified ·
1 Parent(s): 7be26d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.document_loaders import DirectoryLoader, TextLoader
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.llms import HuggingFaceHub
11
+
12
+ # Set up environment variables for HuggingFace
13
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN") # Replace with your token
14
+
15
+ # Create a directory for document storage if it doesn't exist
16
+ os.makedirs("documents", exist_ok=True)
17
+
18
+ # Function to load documents
19
+ def load_documents(directory="documents"):
20
+ loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
21
+ documents = loader.load()
22
+ return documents
23
+
24
+ # Function to process documents and create vector store
25
+ def process_documents():
26
+ documents = load_documents()
27
+
28
+ # Split documents into chunks
29
+ text_splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=1000,
31
+ chunk_overlap=200
32
+ )
33
+ chunks = text_splitter.split_documents(documents)
34
+
35
+ # Create embeddings
36
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37
+
38
+ # Create vector store
39
+ vector_store = FAISS.from_documents(chunks, embeddings)
40
+
41
+ return vector_store
42
+
43
+ # Create RAG chain
44
+ def create_chain(vector_store):
45
+ # Initialize the LLM
46
+ llm = HuggingFaceHub(
47
+ repo_id="google/flan-t5-large",
48
+ model_kwargs={"temperature": 0.5, "max_length": 512}
49
+ )
50
+
51
+ # Create memory for the conversation
52
+ memory = ConversationBufferMemory(
53
+ memory_key="chat_history",
54
+ return_messages=True
55
+ )
56
+
57
+ # Create the conversational chain
58
+ chain = ConversationalRetrievalChain.from_llm(
59
+ llm=llm,
60
+ retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
61
+ memory=memory
62
+ )
63
+
64
+ return chain
65
+
66
+ # Initialize variables for handling chat state
67
+ vector_store = None
68
+ chain = None
69
+ chat_history = []
70
+
71
+ # Function to handle file uploads
72
+ def upload_file(files):
73
+ for file in files:
74
+ file_path = os.path.join("documents", os.path.basename(file.name))
75
+ with open(file_path, "wb") as f:
76
+ f.write(file.read())
77
+
78
+ global vector_store, chain
79
+ vector_store = process_documents()
80
+ chain = create_chain(vector_store)
81
+
82
+ return "Files uploaded and processed successfully!"
83
+
84
+ # Function to handle user queries
85
+ def chat(message, history):
86
+ global chain, chat_history, vector_store
87
+
88
+ # Initialize vector store and chain if they don't exist
89
+ if vector_store is None:
90
+ if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
91
+ vector_store = process_documents()
92
+ chain = create_chain(vector_store)
93
+ else:
94
+ return "Please upload documents first to initialize the chatbot."
95
+
96
+ # Convert Gradio history to the format expected by the chain
97
+ if history:
98
+ chat_history = [(turn[0], turn[1]) for turn in history]
99
+
100
+ # Get response from chain
101
+ response = chain({"question": message})
102
+
103
+ return response['answer']
104
+
105
+ # Create Gradio interface
106
+ with gr.Blocks(title="RAG Chatbot") as demo:
107
+ gr.Markdown("# RAG-based Conversational Chatbot")
108
+ gr.Markdown("Upload text documents and chat with an AI that can answer questions based on their content.")
109
+
110
+ with gr.Row():
111
+ with gr.Column(scale=1):
112
+ file_output = gr.Textbox(label="Upload Status")
113
+ file_input = gr.File(
114
+ file_count="multiple",
115
+ label="Upload Documents (.txt files)"
116
+ )
117
+ upload_button = gr.Button("Process Documents")
118
+ upload_button.click(upload_file, inputs=[file_input], outputs=[file_output])
119
+
120
+ with gr.Column(scale=2):
121
+ chatbot = gr.Chatbot(height=400)
122
+ msg = gr.Textbox(label="Ask a question about your documents")
123
+
124
+ msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot])
125
+ clear = gr.Button("Clear")
126
+ clear.click(lambda: [], outputs=[chatbot])
127
+
128
+ # Launch the app
129
+ if __name__ == "__main__":
130
+ demo.launch()