Sid Sarasvati commited on
Commit
3fee523
·
1 Parent(s): a0be185

init hugging

Browse files
Files changed (4) hide show
  1. LICENSE +21 -0
  2. app.py +48 -0
  3. memory.py +88 -0
  4. requirements.txt +105 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 TrialAndErrorAI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ from memory import get_memory
4
+
5
+ print('this is app.py')
6
+ # st.title('How My ZSH')
7
+
8
+ st.markdown("""
9
+ # How My ZSH
10
+ This is a demo of a chatbot that can answer questions about the Oh My Zsh
11
+ wiki. It uses the [Streamlit Chat]() component and the [Chroma]() library to
12
+ search the wiki and find answers to your questions.
13
+ """)
14
+
15
+ def get_text():
16
+ input_text = st.text_input("", key="input")
17
+ return input_text
18
+
19
+ # Initialize the session state for generated responses and past inputs
20
+ if 'generated' not in st.session_state:
21
+ st.session_state['generated'] = ['i am ready to help you ser']
22
+
23
+ if 'past' not in st.session_state:
24
+ st.session_state['past'] = ['hello']
25
+
26
+ # Get the user's input from the text input field
27
+ user_input = get_text()
28
+
29
+ # add reset or clear button to clear the chat
30
+ if st.button('Clear', key='clear'):
31
+ st.session_state['generated'] = []
32
+ st.session_state['past'] = []
33
+
34
+ # If there is user input, search for a response using the search_chroma function
35
+ if user_input:
36
+ with st.spinner(text='Searching for response...'):
37
+ output = get_memory().query(user_input)
38
+ st.session_state.past.append(user_input)
39
+ st.session_state.generated.append(output)
40
+
41
+ # If there are generated responses, display the conversation using Streamlit messages
42
+ if st.session_state['generated']:
43
+ for i in range(len(st.session_state['generated'])):
44
+ message(st.session_state['past'][i],
45
+ avatar_style="personas",
46
+ is_user=True, key=str(i) + '_user')
47
+ message(st.session_state["generated"][i], key=str(i))
48
+
memory.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain import OpenAI
4
+ from langchain.document_loaders import GitLoader
5
+ from langchain.vectorstores import VectorStore, Chroma
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.chains.question_answering import load_qa_chain
9
+
10
+ REPO_PATH = "out/ohmyzsh-wiki"
11
+ OHMYZSH_WIKI_URL = "https://github.com/ohmyzsh/wiki"
12
+ PRESISTENT_PATH = ".vdb"
13
+ DEBUG = True
14
+
15
+ memory = None
16
+
17
+ print('this is memory.py')
18
+ load_dotenv()
19
+
20
+ class Memory:
21
+ _instance = None
22
+ vectordb = None
23
+
24
+ def __init__(self):
25
+ if self._instance is not None:
26
+ raise Exception("Memory is a singleton")
27
+ else:
28
+ Memory._instance = self
29
+ self.vectordb = self.update_memory()
30
+
31
+ @staticmethod
32
+ def get_instance():
33
+ if Memory._instance is None:
34
+ Memory()
35
+ return Memory._instance
36
+
37
+
38
+
39
+ # TODO - make it singleton and load it once
40
+ def query(self, query):
41
+ if self.vectordb is None:
42
+ self.vectordb = Chroma(persist_directory=PRESISTENT_PATH, embedding_function=OpenAIEmbeddings())
43
+
44
+ context_docs = self.vectordb.as_retriever().get_relevant_documents(query)
45
+ chain = load_qa_chain(llm=OpenAI(), chain_type="stuff")
46
+ answer = chain.run(input_documents=context_docs, question=query)
47
+
48
+ if DEBUG:
49
+ print(f"Found {len(context_docs)} relevant documents")
50
+ print(f"Answer: {answer}")
51
+
52
+ return answer
53
+
54
+ ## Private methods
55
+ def update_memory(self):
56
+ docs = self._sniff_github(clone_url=None if os.path.exists(REPO_PATH) else OHMYZSH_WIKI_URL, branch="main", file_ext="md")
57
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
58
+ doc_chunks = text_splitter.split_documents(docs)
59
+ vectordb = Chroma.from_documents(documents=doc_chunks, embedding=OpenAIEmbeddings(), persist_directory=PRESISTENT_PATH)
60
+ vectordb.persist()
61
+ if DEBUG:
62
+ print(f"[[Memory Update]]. Added {len(doc_chunks)} vectors to the vector store.")
63
+ return vectordb
64
+
65
+ def _sniff_github(self, clone_url=None, branch="main", file_ext=None):
66
+ loader = GitLoader(repo_path="out/ohmyzsh-wiki",
67
+ clone_url=clone_url,
68
+ branch=branch,
69
+ file_filter= lambda file_path: file_path.endswith(f".{file_ext}")) if file_ext else None
70
+
71
+ documents = loader.load()
72
+ if DEBUG:
73
+ print(f"Found {len(documents)} documents in repo\n")
74
+ return documents
75
+
76
+ def get_memory():
77
+ global memory
78
+ if memory is None:
79
+ return Memory.get_instance()
80
+ else:
81
+ return memory
82
+
83
+ def main():
84
+ memory = Memory.get_instance()
85
+ print(memory.query("how to install ohmyzsh"))
86
+
87
+ if __name__ == "__main__":
88
+ main()
requirements.txt ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.4
2
+ aiosignal==1.3.1
3
+ altair==4.2.2
4
+ anyio==3.6.2
5
+ async-timeout==4.0.2
6
+ attrs==23.1.0
7
+ backoff==2.2.1
8
+ blinker==1.6.2
9
+ cachetools==5.3.0
10
+ certifi==2022.12.7
11
+ charset-normalizer==3.1.0
12
+ chromadb==0.3.21
13
+ click==8.1.3
14
+ clickhouse-connect==0.5.21
15
+ dataclasses-json==0.5.7
16
+ decorator==5.1.1
17
+ duckdb==0.7.1
18
+ entrypoints==0.4
19
+ fastapi==0.95.1
20
+ filelock==3.12.0
21
+ frozenlist==1.3.3
22
+ fsspec==2023.4.0
23
+ gitdb==4.0.10
24
+ GitPython==3.1.31
25
+ h11==0.14.0
26
+ hnswlib==0.7.0
27
+ httptools==0.5.0
28
+ huggingface-hub==0.14.1
29
+ idna==3.4
30
+ importlib-metadata==6.6.0
31
+ Jinja2==3.1.2
32
+ joblib==1.2.0
33
+ jsonschema==4.17.3
34
+ langchain==0.0.150
35
+ lz4==4.3.2
36
+ markdown-it-py==2.2.0
37
+ MarkupSafe==2.1.2
38
+ marshmallow==3.19.0
39
+ marshmallow-enum==1.5.1
40
+ mdurl==0.1.2
41
+ monotonic==1.6
42
+ mpmath==1.3.0
43
+ multidict==6.0.4
44
+ mypy-extensions==1.0.0
45
+ networkx==3.1
46
+ nltk==3.8.1
47
+ numexpr==2.8.4
48
+ numpy==1.24.3
49
+ openai==0.27.5
50
+ openapi-schema-pydantic==1.2.4
51
+ packaging==23.1
52
+ pandas==2.0.1
53
+ Pillow==9.5.0
54
+ posthog==3.0.1
55
+ protobuf==3.20.3
56
+ pyarrow==11.0.0
57
+ pydantic==1.10.7
58
+ pydeck==0.8.1b0
59
+ Pygments==2.15.1
60
+ Pympler==1.0.1
61
+ pyrsistent==0.19.3
62
+ python-dateutil==2.8.2
63
+ python-dotenv==1.0.0
64
+ pytz==2023.3
65
+ pytz-deprecation-shim==0.1.0.post0
66
+ PyYAML==6.0
67
+ regex==2023.3.23
68
+ requests==2.29.0
69
+ rich==13.3.5
70
+ scikit-learn==1.2.2
71
+ scipy==1.10.1
72
+ sentence-transformers==2.2.2
73
+ sentencepiece==0.1.98
74
+ six==1.16.0
75
+ smmap==5.0.0
76
+ sniffio==1.3.0
77
+ SQLAlchemy==2.0.11
78
+ starlette==0.26.1
79
+ streamlit==1.22.0
80
+ streamlit-chat==0.0.2.2
81
+ sympy==1.11.1
82
+ tenacity==8.2.2
83
+ threadpoolctl==3.1.0
84
+ tiktoken==0.3.3
85
+ tokenizers==0.13.3
86
+ toml==0.10.2
87
+ toolz==0.12.0
88
+ torch==2.0.0
89
+ torchvision==0.15.1
90
+ tornado==6.3.1
91
+ tqdm==4.65.0
92
+ transformers==4.28.1
93
+ typing-inspect==0.8.0
94
+ typing_extensions==4.5.0
95
+ tzdata==2023.3
96
+ tzlocal==4.3
97
+ urllib3==1.26.15
98
+ uvicorn==0.21.1
99
+ uvloop==0.17.0
100
+ validators==0.20.0
101
+ watchfiles==0.19.0
102
+ websockets==11.0.2
103
+ yarl==1.9.2
104
+ zipp==3.15.0
105
+ zstandard==0.21.0