binqiangliu commited on
Commit
a69d668
·
1 Parent(s): fe6eeea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader
3
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
4
+ from llama_index import LangchainEmbedding, ServiceContext
5
+ from llama_index import StorageContext, load_index_from_storage
6
+ from llama_index import LLMPredictor
7
+ from langchain import HuggingFaceHub
8
+ from pathlib import Path
9
+ import random
10
+ import string
11
+ import os
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
+
15
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
16
+ repo_id = os.getenv("repo_id")
17
+ model_name = os.getenv("model_name")
18
+
19
+ def generate_random_string(length):
20
+ letters = string.ascii_lowercase
21
+ return ''.join(random.choice(letters) for i in range(length))
22
+
23
+ def process_documents_and_query(pdf_files, question):
24
+ random_string = generate_random_string(20)
25
+ directory_path = random_string
26
+ os.makedirs(directory_path)
27
+
28
+ documents = []
29
+ for pdf_file in pdf_files:
30
+ file_path = os.path.join(directory_path, pdf_file.name)
31
+ with open(file_path, 'wb') as f:
32
+ f.write(pdf_file.read())
33
+ documents.append(file_path)
34
+
35
+ embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name=model_name))
36
+
37
+ llm = HuggingFaceHub(repo_id=repo_id,
38
+ model_kwargs={"min_length": 1024, "max_new_tokens": 5632,
39
+ "do_sample": True, "temperature": 0.1,
40
+ "top_k": 50, "top_p": 0.95, "eos_token_id": 49155})
41
+
42
+ llm_predictor = LLMPredictor(llm)
43
+
44
+ service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
45
+
46
+ new_index = VectorStoreIndex.from_documents(documents, service_context=service_context)
47
+
48
+ if question:
49
+ new_index.storage_context.persist("directory_path")
50
+ storage_context = StorageContext.from_defaults(persist_dir="directory_path")
51
+ loadedindex = load_index_from_storage(storage_context=storage_context, service_context=service_context)
52
+ query_engine = loadedindex.as_query_engine()
53
+ initial_response = query_engine.query(question)
54
+ return str(initial_response)
55
+
56
+ return "Please enter a question."
57
+
58
+ gr_interface = gr.Interface(
59
+ fn=process_documents_and_query,
60
+ inputs=[gr.inputs.File(label="Upload PDF files", type="file", accept=".pdf", multiple=True), gr.inputs.Textbox(label="Enter your query here:")],
61
+ outputs=gr.outputs.Textbox(label="AI Response"),
62
+ title="AI Doc-Chat",
63
+ description="Upload PDF files and ask questions!",
64
+ allow_flagging="never"
65
+ )
66
+
67
+ gr_interface.launch()