| import gradio as gr |
| import os |
| from langchain.chains import RetrievalQA |
| from langchain.llms import OpenAI |
| from langchain.document_loaders import PyPDFLoader |
| from langchain.document_loaders import DirectoryLoader |
| from langchain.text_splitter import CharacterTextSplitter |
| from langchain.embeddings import OpenAIEmbeddings |
| from langchain.vectorstores import Chroma |
|
|
| from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper, ServiceContext |
| |
| |
| import sys |
|
|
|
|
| |
| dir_path = "./docs" |
|
|
| os.environ["OPENAI_API_KEY"] |
|
|
| |
| os.makedirs(dir_path, exist_ok=True) |
|
|
| def construct_index(directory_path): |
| max_input_size = 4096 |
| num_outputs = 512 |
| max_chunk_overlap = 20 |
| chunk_size_limit = 600 |
|
|
| prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) |
|
|
| |
| llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=num_outputs)) |
|
|
|
|
| service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) |
|
|
| documents = SimpleDirectoryReader(directory_path).load_data() |
|
|
| index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) |
|
|
| index.save_to_disk('index.json') |
|
|
| return index |
|
|
| def chatbot(input_text): |
| index = GPTSimpleVectorIndex.load_from_disk('index.json') |
| response = index.query(input_text, response_mode="compact") |
| return response.response |
|
|
| def qa_system(pdf_file, openai_key, prompt, chain_type, k): |
| os.environ["OPENAI_API_KEY"] = openai_key |
| |
| |
| |
| loader = DirectoryLoader(dir_path, glob="**/*.pdf") |
| documents = loader.load() |
| |
| |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
| texts = text_splitter.split_documents(documents) |
| |
| |
| embeddings = OpenAIEmbeddings() |
| |
| |
| db = Chroma.from_documents(texts, embeddings) |
| |
| |
| retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k}) |
| |
| |
| qa = RetrievalQA.from_chain_type( |
| llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True) |
| |
| |
| result = qa({"query": prompt}) |
| return result['result'], [doc.page_content for doc in result["source_documents"]] |
|
|
|
|
| |
| index = construct_index(dir_path) |
|
|
| |
| |
| |
| |
|
|
| |
| |
| openai_key = gr.inputs.Textbox(label="OpenAI API Key", type="password") |
| prompt = gr.inputs.Textbox(label="Question Prompt") |
| chain_type = gr.inputs.Radio(['stuff', 'map_reduce', "refine", "map_rerank"], label="Chain Type") |
| k = gr.inputs.Slider(minimum=1, maximum=5, default=1, label="Number of Relevant Chunks") |
|
|
| output_text = gr.outputs.Textbox(label="Answer") |
| output_docs = gr.outputs.Textbox(label="Relevant Source Text") |
|
|
| |
| |
| |
| |
|
|
| gr.Interface(fn=chatbot, |
| inputs= prompt, outputs="text", |
| title="TKO GPT for URDs - experimental", |
| description="Tikehau URDs.").launch(debug = True) |
|
|