| |
| |
| |
| |
| |
| |
|
|
| import os |
| import pandas as pd |
|
|
| from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub |
| from langchain.agents import AgentType, load_tools, initialize_agent,\ |
| create_pandas_dataframe_agent |
| from langchain.chat_models import ChatOpenAI |
| from langchain.chains import RetrievalQA |
| from langchain.document_loaders import DirectoryLoader, TextLoader |
| from langchain.embeddings.openai import OpenAIEmbeddings |
| from langchain.llms import OpenAI |
| from langchain.text_splitter import CharacterTextSplitter |
| from langchain.tools import DuckDuckGoSearchRun |
| from langchain.vectorstores import Chroma |
|
|
| import chromadb |
| from chromadb.config import Settings |
| DB_DIR = "./db" |
|
|
| OPENAI_LLMS = [ |
| 'text-davinci-003', |
| 'text-babbage-001', |
| 'text-curie-001', |
| 'text-ada-001' |
| ] |
|
|
| OPENAI_CHAT_LLMS = [ |
| 'gpt-3.5-turbo', |
| 'gpt-4', |
| ] |
|
|
| HUGGINGFACE_LLMS = [ |
| 'google/flan-t5-xl', |
| 'databricks/dolly-v2-3b', |
| 'bigscience/bloom-1b7' |
| ] |
|
|
| HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
| def createLLM(model_name="text-davinci-003", temperature=0): |
| llm = None |
| if model_name in OPENAI_LLMS: |
| llm = OpenAI(model_name=model_name, temperature=temperature) |
| elif model_name in OPENAI_CHAT_LLMS: |
| llm = ChatOpenAI(model_name=model_name, temperature=temperature) |
| elif model_name in HUGGINGFACE_LLMS: |
| llm = HuggingFaceHub(repo_id=model_name, |
| model_kwargs={"temperature":1e-10}) |
| return llm |
|
|
| def load_chat_agent(verbose=True): |
| return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5) |
|
|
| def load_earnings_agent(verbose=True): |
| retriever = None |
| embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY']) |
|
|
| if not os.path.exists(DB_DIR): |
| loader = DirectoryLoader(path="./data/", glob="**/*.txt") |
| docs = loader.load() |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
| text_chunks = text_splitter.split_documents(documents=docs) |
| docsearch = Chroma.from_documents(text_chunks, embeddings, |
| persist_directory="./db") |
| retriever = docsearch.as_retriever() |
| else: |
| vectordb = Chroma(persist_directory=DB_DIR, |
| embedding_function=embeddings) |
| retriever = vectordb.as_retriever() |
|
|
| qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.0), |
| chain_type="stuff", |
| retriever=retriever, |
| return_source_documents=True |
| ) |
| return qa |
|
|
| def load_book_agent(verbose=True): |
| retriever = None |
| embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY']) |
|
|
| if not os.path.exists(DB_DIR): |
| loader = DirectoryLoader(path="./data/", glob="**/*.txt") |
| docs = loader.load() |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
| text_chunks = text_splitter.split_documents(documents=docs) |
| docsearch = Chroma.from_documents(text_chunks, embeddings, |
| persist_directory="./db") |
| retriever = docsearch.as_retriever() |
| else: |
| vectordb = Chroma(persist_directory=DB_DIR, |
| embedding_function=embeddings) |
| retriever = vectordb.as_retriever() |
|
|
| qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.7), |
| chain_type="stuff", |
| retriever=retriever, |
| return_source_documents=True |
| ) |
| return qa |
|
|
| def load_sales_agent(verbose=True): |
| ''' |
| Hard-coded agent that gates an internal sales CSV file for demo |
| ''' |
| chat = createLLM(model_name='text-davinci-003') |
| df = pd.read_csv("data/sales_data.csv") |
| agent = create_pandas_dataframe_agent(chat, df, verbose=verbose) |
| return agent |
|
|
| def load_sqlite_agent(model_name="text-davinci-003"): |
| ''' |
| Hard-coded agent that gates a sqlite DB of digital media for demo |
| ''' |
| llm = createLLM(OPENAI_LLMS[0]) |
| sqlite_db_path = "./data/Chinook_Sqlite.sqlite" |
| db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}") |
| db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True) |
| return db_chain |
|
|
| def load_chained_agent(verbose=True, model_name="text-davinci-003"): |
| llm = createLLM(model_name) |
| toolkit = load_tools(["serpapi", "open-meteo-api", "news-api", |
| "python_repl", "wolfram-alpha"], |
| llm=llm, |
| serpapi_api_key=os.getenv('SERPAPI_API_KEY'), |
| news_api_key=os.getenv('NEWS_API_KEY'), |
| tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN') |
| ) |
| toolkit += [DuckDuckGoSearchRun()] |
|
|
| agent = initialize_agent(toolkit, |
| llm, |
| agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, |
| verbose=verbose, |
| return_intermediate_steps=True) |
| return agent |