| import tempfile |
| import os |
| import tiktoken |
| import streamlit as st |
|
|
| from llama_index.core import ( |
| VectorStoreIndex, |
| Settings, |
| ) |
|
|
| from llama_parse import LlamaParse |
| from streamlit_pdf_viewer import pdf_viewer |
|
|
| class MistralTokens: |
| """ |
| Returns tokens for MistralAI models. |
| |
| See: https://docs.mistral.ai/guides/tokenization/ |
| """ |
| def __init__(self, llm_name): |
| from mistral_common.tokens.tokenizers.mistral import MistralTokenizer |
| if 'open-mistral-nemo' in llm_name: |
| self.tokenizer = MistralTokenizer.v3(is_tekken=True) |
| else: |
| |
| self.tokenizer = MistralTokenizer.from_model(llm_name) |
|
|
| def __call__(self, input): |
| """This returns all the tokens indices in a list since LlamaIndex seems to count by calling `len()` on the tokenizer function.""" |
| from mistral_common.protocol.instruct.messages import UserMessage |
| from mistral_common.protocol.instruct.request import ChatCompletionRequest |
|
|
| return self.tokenizer.encode_chat_completion( |
| ChatCompletionRequest( |
| tools=[], |
| messages=[ |
| UserMessage(content=input) |
| ] |
| ) |
| ).tokens |
|
|
| class GeminiTokens: |
| """ |
| Returns tokens for Gemini models. |
| |
| See: https://medium.com/google-cloud/counting-gemini-text-tokens-locally-with-the-vertex-ai-sdk-78979fea6244 |
| """ |
| def __init__(self, llm_name): |
| from vertexai.preview import tokenization |
| self.tokenizer = tokenization.get_tokenizer_for_model(llm_name) |
|
|
| def __call__(self, input): |
| """This returns all the tokens in a list since LlamaIndex seems to count by calling `len()` on the tokenizer function.""" |
| tokens = [] |
| for list in self.tokenizer.compute_tokens(input).token_info_list: |
| tokens += list.tokens |
| return tokens |
| |
| def main(): |
| submit_button = False |
| |
| with st.sidebar: |
| st.title('Document Summarization and QA System') |
| |
| with st.form(key="model_settings"): |
| |
| provider = st.selectbox( |
| label="Select LLM Provider", |
| options=['google', 'huggingface', 'mistralai', 'openai'], |
| index=3 |
| ) |
| |
| |
| if provider == 'google': |
| llm_list = ['gemini-1.0-pro', 'gemini-1.5-flash', 'gemini-1.5-pro'] |
| elif provider == 'huggingface': |
| llm_list = [] |
| elif provider == 'mistralai': |
| llm_list = ["mistral-large-latest", "open-mistral-nemo-latest"] |
| elif provider == 'openai': |
| llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini'] |
| else: |
| llm_list = [] |
| |
| if provider == 'huggingface': |
| llm_name = st.text_input( |
| "Enter LLM namespace/model-name", |
| value="HuggingFaceH4/zephyr-7b-alpha", |
| ) |
| |
| |
| embed_name = st.text_input( |
| label="Enter embedding namespace/model-name", |
| value="BAAI/bge-small-en-v1.5", |
| ) |
| else: |
| llm_name = st.selectbox( |
| label="Select LLM Model", |
| options=llm_list, |
| index=0 |
| ) |
| |
| |
| temperature = st.slider( |
| "Temperature", |
| min_value=0.0, |
| max_value=1.0, |
| value=0.0, |
| step=0.05, |
| ) |
|
|
| similarity_top_k = st.number_input("Top k nodes to retrieve (similarity_top_k)", min_value=1, max_value=100, value=5, step=1) |
| similarity_cutoff = st.slider("Select node similarity cutoff", min_value=0.0, max_value=1.0, value=0.7) |
| |
| |
| parse_key = st.text_input( |
| "Enter your LlamaParse API Key", |
| value=None |
| ) |
| |
| |
| llm_key = st.text_input( |
| "Enter your LLM provider API Key", |
| value=None, |
| ) |
| |
| |
| |
| |
| if llm_key is not None: |
| if provider == 'google': |
| from llama_index.llms.gemini import Gemini |
| from llama_index.embeddings.gemini import GeminiEmbedding |
| max_output_tokens = 8192 |
| |
| os.environ['GOOGLE_API_KEY'] = str(llm_key) |
| Settings.llm = Gemini( |
| model=f"models/{llm_name}", |
| token=os.environ.get("GOOGLE_API_KEY"), |
| temperature=temperature, |
| max_tokens=max_output_tokens |
| ) |
| Settings.tokenizer = GeminiTokens(llm_name) |
| Settings.num_output = max_output_tokens |
| Settings.embed_model = GeminiEmbedding( |
| model_name="models/text-embedding-004", api_key=os.environ.get("GOOGLE_API_KEY") |
| ) |
| if llm_name == 'gemini-1.0-pro': |
| total_token_limit = 32760 |
| else: |
| total_token_limit = 1e6 |
| Settings.context_window = total_token_limit - max_output_tokens |
| elif provider == 'huggingface': |
| if llm_name is not None and embed_name is not None: |
| from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI |
| from llama_index.embeddings.huggingface import HuggingFaceInferenceAPIEmbedding |
| from transformers import AutoTokenizer |
| |
| max_output_tokens = 2048 |
| |
| os.environ['HF_TOKEN'] = str(llm_key) |
| Settings.llm = HuggingFaceInferenceAPI( |
| model_name=llm_name, |
| token=os.environ.get("HF_TOKEN"), |
| temperature=temperature, |
| max_tokens=max_output_tokens |
| ) |
| Settings.tokenizer = AutoTokenizer.from_pretrained( |
| llm_name, |
| token=os.environ.get("HF_TOKEN"), |
| ) |
| Settings.num_output = max_output_tokens |
| Settings.embed_model = HuggingFaceInferenceAPIEmbedding( |
| model_name=embed_name |
| ) |
| Settings.context_window = 4096 |
| elif provider == 'mistralai': |
| from llama_index.llms.mistralai import MistralAI |
| from llama_index.embeddings.mistralai import MistralAIEmbedding |
| max_output_tokens = 8192 |
| |
| os.environ['MISTRAL_API_KEY'] = str(llm_key) |
| Settings.llm = MistralAI( |
| model=llm_name, |
| temperature=temperature, |
| max_tokens=max_output_tokens, |
| random_seed=42, |
| safe_mode=True |
| ) |
| Settings.tokenizer = MistralTokens(llm_name) |
| Settings.num_output = max_output_tokens |
| Settings.embed_model = MistralAIEmbedding( |
| model_name="mistral-embed", |
| api_key=os.environ.get("MISTRAL_API_KEY") |
| ) |
| Settings.context_window = 128000 |
| elif provider == 'openai': |
| from llama_index.llms.openai import OpenAI |
| from llama_index.embeddings.openai import OpenAIEmbedding |
| |
| |
| if llm_name == 'gpt-3.5-turbo': |
| max_output_tokens = 4096 |
| context_window = 16385 |
| elif llm_name == 'gpt-4': |
| max_output_tokens = 8192 |
| context_window = 8192 |
| elif llm_name == 'gpt-4-turbo': |
| max_output_tokens = 4096 |
| context_window = 128000 |
| elif llm_name == 'gpt-4o': |
| max_output_tokens = 4096 |
| context_window = 128000 |
| elif llm_name == 'gpt-4o-mini': |
| max_output_tokens = 16384 |
| context_window = 128000 |
| |
| os.environ["OPENAI_API_KEY"] = str(llm_key) |
| Settings.llm = OpenAI( |
| model=llm_name, |
| temperature=temperature, |
| max_tokens=max_output_tokens |
| ) |
| Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode |
| Settings.num_output = max_output_tokens |
| Settings.embed_model = OpenAIEmbedding() |
| Settings.context_window = context_window |
| else: |
| raise NotImplementedError(f"{provider} is not supported yet") |
| |
| uploaded_file = st.file_uploader( |
| "Choose a PDF file to upload", |
| type=['pdf'], |
| accept_multiple_files=False |
| ) |
| |
| parsed_document = None |
| if uploaded_file is not None: |
| |
| parser = LlamaParse( |
| api_key=parse_key, |
| result_type="text" |
| ) |
| |
| |
| temp_dir = tempfile.TemporaryDirectory() |
| temp_filename = os.path.join(temp_dir.name, uploaded_file.name) |
| with open(temp_filename, "wb") as f: |
| f.write(uploaded_file.getvalue()) |
| parsed_document = parser.load_data(temp_filename) |
| temp_dir.cleanup() |
| |
| submit_button = st.form_submit_button( |
| "Construct RAG" |
| ) |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col2: |
| tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",]) |
|
|
| with tab1: |
| if uploaded_file is not None: |
| bytes_data = uploaded_file.getvalue() |
| pdf_viewer(input=bytes_data, width=700) |
| |
| with tab2: |
| if parsed_document is not None: |
| st.write(parsed_document) |
|
|
| with col1: |
| st.markdown( |
| """ |
| # Introduction |
| |
| This app builds a [retrieval-augmented generation](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) model that let's you ask "talk" to your document, ask questions, summarize, and extract data. |
| |
| :clap: The workflow relies on: |
| * [OpenAI](https://platform.openai.com/apps) |
| * [LlamaParse](https://cloud.llamaindex.ai/) |
| * [LlamaIndex](https://cloud.llamaindex.ai/) |
| |
| :warning: This tool is provided "as-is" without warranty. |
| |
| # Instructions |
| |
| 1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document. |
| 2. Obtain a similar API Key from your preferred LLM provider. Note, if you are using [Hugging Face](https://huggingface.co/models) you may need to request access to a model if it is gated. |
| 3. Make selections at the left and upload a document to use as context. |
| 4. Begin asking questions below! |
| """ |
| ) |
|
|
| st.divider() |
|
|
| prompt_txt = 'You are a trusted scientific expert that only responds truthfully to inquiries. Summarize this document in a 3-5 sentences.' |
| prompt = st.text_area( |
| label="Enter your query.", |
| key="prompt_widget", |
| value=prompt_txt |
| ) |
|
|
| run = st.button("Answer", type="primary") |
|
|
| if parsed_document is not None and run: |
| index = VectorStoreIndex.from_documents(parsed_document) |
| query_engine = index.as_query_engine( |
| similarity_top_k=similarity_top_k, |
| similarity_cutoff=similarity_cutoff, |
| response_mode='compact', |
| |
| |
| ) |
| response = query_engine.query(prompt) |
| st.write(response.response) |
|
|
| if __name__ == '__main__': |
| |
| |
| |
| |
|
|
| st.set_page_config(layout="wide") |
|
|
| main() |