| import streamlit as st |
| import os |
| from llama_index.core.indices.vector_store.base import VectorStoreIndex |
| from llama_index.vector_stores.qdrant import QdrantVectorStore |
| from llama_index.embeddings.fastembed import FastEmbedEmbedding |
| from llama_index.core import Settings |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext |
| import qdrant_client |
| from llama_index.core.indices.query.schema import QueryBundle |
| from llama_index.llms.gemini import Gemini |
| from llama_index.embeddings.gemini import GeminiEmbedding |
| from llama_index.core.memory import ChatMemoryBuffer |
| from llama_index.readers.web import FireCrawlWebReader |
| from llama_index.core import SummaryIndex |
| |
| import time |
| import dotenv |
|
|
| dotenv.load_dotenv() |
| |
| |
|
|
| |
| if 'setup_complete' not in st.session_state: |
| st.session_state['setup_complete'] = False |
| if 'documents' not in st.session_state: |
| st.session_state['documents'] = None |
| if 'chat_history' not in st.session_state: |
| st.session_state['chat_history'] = [] |
| if 'index' not in st.session_state: |
| st.session_state['index'] = None |
| if 'url' not in st.session_state: |
| st.session_state['url'] = "" |
| if 'collection_name' not in st.session_state: |
| st.session_state['collection_name'] = "" |
| if 'query' not in st.session_state: |
| st.session_state['query'] = "" |
|
|
| os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") |
| os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") |
|
|
| |
| def embed_setup(): |
| Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") |
| Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro") |
|
|
| def qdrant_setup(): |
| client = qdrant_client.QdrantClient( |
| os.getenv("QDRANT_URL"), |
| api_key = os.getenv("QDRANT_API_KEY"), |
| ) |
| return client |
|
|
| def llm_setup(): |
| llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro") |
| return llm |
|
|
| def query_index(index, streaming=True): |
| memory = ChatMemoryBuffer.from_defaults(token_limit=4000) |
| chat_engine = index.as_chat_engine( |
| chat_mode="context", |
| memory=memory, |
| system_prompt=( |
| """You are an AI assistant for developers, specializing in technical documentation. Your task is to provide accurate, detailed, and helpful responses based on the given documentation context. |
| Context information is below: |
| {context_str} |
| Always answer based on the information in the context and general knowledge and be precise |
| Given this context, please respond to the following user query: |
| {query_str} |
| Your response should: |
| Directly address the query using information from the context |
| Include relevant code examples or direct quotes if applicable |
| Mention specific sections or pages of the documentation |
| Highlight any best practices or potential pitfalls related to the query |
| After your response, suggest 3 follow-up questions based on the context that the user might find helpful for deeper understanding. |
| ALWAYS SUGGEST FOLLOW UP QUESTIONS |
| Your response:""" |
| ), |
| ) |
| return chat_engine |
|
|
| |
| def ingest_documents(url): |
| firecrawl_reader = FireCrawlWebReader( |
| api_key=os.getenv("FIRECRAWL_API_KEY"), |
| mode="scrape", |
| ) |
| documents = firecrawl_reader.load_data(url=url) |
| print(type(documents[0])) |
| return documents |
|
|
| |
| st.title("Talk to Software Documentation") |
|
|
| st.markdown(""" |
| Be the programmer you've always wanted to be. |
| 1. Paste doc link |
| 2. Enter a Collection name |
| 3. Ask any question you want |
| """) |
|
|
|
|
| |
| st.session_state['url'] = st.text_input("Enter URL to crawl and ingest documents (optional):", value=st.session_state['url']) |
|
|
| |
| st.session_state['collection_name'] = st.text_input("Enter collection name for vector store (compulsory):", value=st.session_state['collection_name']) |
|
|
| |
| if st.button("Ingest and Setup"): |
| with st.spinner("Setting up query engine..."): |
| embed_setup() |
| client = qdrant_setup() |
| llm = llm_setup() |
| vector_store = QdrantVectorStore(client=client, collection_name=st.session_state['collection_name']) |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) |
| |
| if st.session_state['url']: |
| st.session_state['documents'] = ingest_documents(st.session_state['url']) |
| st.session_state['index'] = VectorStoreIndex.from_documents(st.session_state['documents'], vector_store=vector_store, storage_context=storage_context) |
| st.success(f"Documents ingested from {st.session_state['url']} and query engine setup completed successfully!") |
| else: |
| st.session_state['index'] = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context=storage_context) |
| st.success(f"Query engine setup completed successfully using existing collection: {st.session_state['collection_name']}") |
| |
| st.session_state['setup_complete'] = True |
|
|
| |
| st.session_state['query'] = st.text_input("Enter your query:", value=st.session_state['query']) |
|
|
| |
| if st.button("Search"): |
| if not st.session_state['setup_complete']: |
| st.error("Please complete the setup first") |
| elif st.session_state['query']: |
| with st.spinner("Searching..."): |
| try: |
| chat_engine = query_index(st.session_state['index']) |
| response = chat_engine.chat(st.session_state['query']) |
| except Exception as e: |
| st.error(f"An error occurred: {str(e)}") |
| st.info("Retrying in 120 seconds...") |
| time.sleep(120) |
| try: |
| chat_engine = query_index(st.session_state['index']) |
| response = chat_engine.chat(st.session_state['query']) |
| except Exception as e: |
| st.error(f"Retry failed. Error: {str(e)}") |
| st.stop() |
|
|
| |
| st.session_state['chat_history'].append(("User", st.session_state['query'])) |
| st.session_state['chat_history'].append(("Assistant", str(response.response))) |
| |
| |
| st.subheader("Assistant's Response:") |
| st.write(response.response) |
| else: |
| st.error("Please enter a query") |
|
|
| |
| st.sidebar.title("Chat History") |
| for role, message in st.session_state['chat_history']: |
| st.sidebar.text(f"{role}: {message}") |
|
|
| |
| if st.sidebar.button("Clear Chat History"): |
| st.session_state['chat_history'] = [] |
| st.sidebar.success("Chat history cleared!") |