Isshi-AI-Twin

Runtime error

App Files Files Community

Isshi14 commited on Feb 10

Commit

e84cd12

verified ·

1 Parent(s): 3e4a391

Upload 2 files

Browse files

Files changed (2) hide show

app.py +51 -35
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import gradio as gr
-from sentence_transformers import SentenceTransformer
 import chromadb
 from huggingface_hub import InferenceClient
@@ -8,7 +7,7 @@ from huggingface_hub import InferenceClient
 KNOWLEDGE_BASE_DIR = "knowledge_base"
 COLLECTION_NAME = "ai_twin_kb"
-# --- Step 1: Load documents from knowledge_base/ ---
 def load_documents():
     """Loads all .txt files from the knowledge base directory."""
     documents = []
@@ -34,18 +33,35 @@ def chunk_text(text, chunk_size=500, overlap=100):
         start += chunk_size - overlap
     return chunks
-# --- Step 3: Build vector store ---
-def build_vector_store(documents, filenames):
-    """Creates embeddings and stores them in ChromaDB."""
-    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-    client = chromadb.Client()
-    # Delete existing collection if it exists
     try:
-        client.delete_collection(COLLECTION_NAME)
     except:
         pass
-    collection = client.create_collection(name=COLLECTION_NAME)
     all_chunks = []
     all_ids = []
@@ -60,32 +76,33 @@ def build_vector_store(documents, filenames):
             all_metadata.append({"source": fname})
             chunk_id += 1
-    # Generate embeddings
-    embeddings = model.encode(all_chunks).tolist()
-    # Add to ChromaDB
     collection.add(
         documents=all_chunks,
-        embeddings=embeddings,
         ids=all_ids,
         metadatas=all_metadata
     )
-    return collection, model
-# --- Step 4: RAG query function ---
-def query_rag(question, collection, embed_model, llm_client):
     """Retrieves relevant chunks and generates an answer."""
-    # Embed the question
-    q_embedding = embed_model.encode([question]).tolist()
-    # Retrieve top 3 relevant chunks
     results = collection.query(query_embeddings=q_embedding, n_results=3)
-    # Build context from retrieved documents
     context = "\n\n".join(results["documents"][0])
-    # Create prompt
     prompt = f"""You are an AI Twin that represents a person. Use ONLY the following context to answer the question.
 If you don't know the answer from the context, say "I don't have that information in my profile."
@@ -96,10 +113,10 @@ Question: {question}
 Answer:"""
-    # Generate response using Hugging Face Inference API
     try:
-        response = llm_client.text_generation(
             prompt,
             max_new_tokens=512,
             temperature=0.3,
             repetition_penalty=1.1
@@ -109,18 +126,17 @@ Answer:"""
         return f"Error generating response: {str(e)}"
 # --- Global Initialization ---
 print("Loading documents...")
 docs, fnames = load_documents()
 print(f"Loaded {len(docs)} documents: {fnames}")
-print("Building vector store...")
-kb_collection, embedding_model = build_vector_store(docs, fnames)
-print("Vector store ready.")
-print("Initializing LLM client...")
-hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN", None)
-llm = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.2", token=hf_token)
-print("LLM client ready.")
 # --- Gradio UI ---
 def load_profile_summary():
@@ -131,7 +147,7 @@ def load_profile_summary():
         return "Profile not found."
 def ask_ai_twin(message, chat_history):
-    answer = query_rag(message, kb_collection, embedding_model, llm)
     chat_history.append((message, answer))
     return "", chat_history

 import os
 import gradio as gr
 import chromadb
 from huggingface_hub import InferenceClient
 KNOWLEDGE_BASE_DIR = "knowledge_base"
 COLLECTION_NAME = "ai_twin_kb"
+# --- Step 1: Load documents ---
 def load_documents():
     """Loads all .txt files from the knowledge base directory."""
     documents = []
         start += chunk_size - overlap
     return chunks
+# --- Step 3: Get embeddings via HF API (no local model!) ---
+def get_embeddings(texts, client):
+    """Gets embeddings from Hugging Face Inference API."""
+    embeddings = client.feature_extraction(
+        texts,
+        model="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    # The API returns nested lists, convert to list of lists
+    result = []
+    for emb in embeddings:
+        if isinstance(emb[0], list):
+            # Mean pooling if token-level embeddings returned
+            import numpy as np
+            arr = np.array(emb)
+            pooled = arr.mean(axis=0).tolist()
+            result.append(pooled)
+        else:
+            result.append(emb)
+    return result
+# --- Step 4: Build vector store ---
+def build_vector_store(documents, filenames, client):
+    """Creates embeddings via API and stores them in ChromaDB."""
+    chroma_client = chromadb.Client()
     try:
+        chroma_client.delete_collection(COLLECTION_NAME)
     except:
         pass
+    collection = chroma_client.create_collection(name=COLLECTION_NAME)
     all_chunks = []
     all_ids = []
             all_metadata.append({"source": fname})
             chunk_id += 1
+    print(f"Generating embeddings for {len(all_chunks)} chunks via API...")
+    # Process in batches to avoid API limits
+    batch_size = 16
+    all_embeddings = []
+    for i in range(0, len(all_chunks), batch_size):
+        batch = all_chunks[i:i+batch_size]
+        batch_embeddings = get_embeddings(batch, client)
+        all_embeddings.extend(batch_embeddings)
+        print(f"  Processed {min(i+batch_size, len(all_chunks))}/{len(all_chunks)} chunks")
     collection.add(
         documents=all_chunks,
+        embeddings=all_embeddings,
         ids=all_ids,
         metadatas=all_metadata
     )
+    return collection
+# --- Step 5: RAG query function ---
+def query_rag(question, collection, client):
     """Retrieves relevant chunks and generates an answer."""
+    q_embedding = get_embeddings([question], client)
     results = collection.query(query_embeddings=q_embedding, n_results=3)
     context = "\n\n".join(results["documents"][0])
     prompt = f"""You are an AI Twin that represents a person. Use ONLY the following context to answer the question.
 If you don't know the answer from the context, say "I don't have that information in my profile."
 Answer:"""
     try:
+        response = client.text_generation(
             prompt,
+            model="mistralai/Mistral-7B-Instruct-v0.2",
             max_new_tokens=512,
             temperature=0.3,
             repetition_penalty=1.1
         return f"Error generating response: {str(e)}"
 # --- Global Initialization ---
+print("Initializing HF client...")
+hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN", None)
+hf_client = InferenceClient(token=hf_token)
 print("Loading documents...")
 docs, fnames = load_documents()
 print(f"Loaded {len(docs)} documents: {fnames}")
+print("Building vector store (using API for embeddings)...")
+kb_collection = build_vector_store(docs, fnames, hf_client)
+print("Vector store ready!")
 # --- Gradio UI ---
 def load_profile_summary():
         return "Profile not found."
 def ask_ai_twin(message, chat_history):
+    answer = query_rag(message, kb_collection, hf_client)
     chat_history.append((message, answer))
     return "", chat_history

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 chromadb
-sentence-transformers
 gradio
 huggingface-hub

 chromadb
 gradio
 huggingface-hub
+numpy