Isshi14 commited on
Commit
e84cd12
·
verified ·
1 Parent(s): 3e4a391

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +51 -35
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import gradio as gr
3
- from sentence_transformers import SentenceTransformer
4
  import chromadb
5
  from huggingface_hub import InferenceClient
6
 
@@ -8,7 +7,7 @@ from huggingface_hub import InferenceClient
8
  KNOWLEDGE_BASE_DIR = "knowledge_base"
9
  COLLECTION_NAME = "ai_twin_kb"
10
 
11
- # --- Step 1: Load documents from knowledge_base/ ---
12
  def load_documents():
13
  """Loads all .txt files from the knowledge base directory."""
14
  documents = []
@@ -34,18 +33,35 @@ def chunk_text(text, chunk_size=500, overlap=100):
34
  start += chunk_size - overlap
35
  return chunks
36
 
37
- # --- Step 3: Build vector store ---
38
- def build_vector_store(documents, filenames):
39
- """Creates embeddings and stores them in ChromaDB."""
40
- model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
41
-
42
- client = chromadb.Client()
43
- # Delete existing collection if it exists
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
- client.delete_collection(COLLECTION_NAME)
46
  except:
47
  pass
48
- collection = client.create_collection(name=COLLECTION_NAME)
49
 
50
  all_chunks = []
51
  all_ids = []
@@ -60,32 +76,33 @@ def build_vector_store(documents, filenames):
60
  all_metadata.append({"source": fname})
61
  chunk_id += 1
62
 
63
- # Generate embeddings
64
- embeddings = model.encode(all_chunks).tolist()
 
 
 
 
 
 
 
65
 
66
- # Add to ChromaDB
67
  collection.add(
68
  documents=all_chunks,
69
- embeddings=embeddings,
70
  ids=all_ids,
71
  metadatas=all_metadata
72
  )
73
 
74
- return collection, model
75
 
76
- # --- Step 4: RAG query function ---
77
- def query_rag(question, collection, embed_model, llm_client):
78
  """Retrieves relevant chunks and generates an answer."""
79
- # Embed the question
80
- q_embedding = embed_model.encode([question]).tolist()
81
 
82
- # Retrieve top 3 relevant chunks
83
  results = collection.query(query_embeddings=q_embedding, n_results=3)
84
-
85
- # Build context from retrieved documents
86
  context = "\n\n".join(results["documents"][0])
87
 
88
- # Create prompt
89
  prompt = f"""You are an AI Twin that represents a person. Use ONLY the following context to answer the question.
90
  If you don't know the answer from the context, say "I don't have that information in my profile."
91
 
@@ -96,10 +113,10 @@ Question: {question}
96
 
97
  Answer:"""
98
 
99
- # Generate response using Hugging Face Inference API
100
  try:
101
- response = llm_client.text_generation(
102
  prompt,
 
103
  max_new_tokens=512,
104
  temperature=0.3,
105
  repetition_penalty=1.1
@@ -109,18 +126,17 @@ Answer:"""
109
  return f"Error generating response: {str(e)}"
110
 
111
  # --- Global Initialization ---
 
 
 
 
112
  print("Loading documents...")
113
  docs, fnames = load_documents()
114
  print(f"Loaded {len(docs)} documents: {fnames}")
115
 
116
- print("Building vector store...")
117
- kb_collection, embedding_model = build_vector_store(docs, fnames)
118
- print("Vector store ready.")
119
-
120
- print("Initializing LLM client...")
121
- hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN", None)
122
- llm = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.2", token=hf_token)
123
- print("LLM client ready.")
124
 
125
  # --- Gradio UI ---
126
  def load_profile_summary():
@@ -131,7 +147,7 @@ def load_profile_summary():
131
  return "Profile not found."
132
 
133
  def ask_ai_twin(message, chat_history):
134
- answer = query_rag(message, kb_collection, embedding_model, llm)
135
  chat_history.append((message, answer))
136
  return "", chat_history
137
 
 
1
  import os
2
  import gradio as gr
 
3
  import chromadb
4
  from huggingface_hub import InferenceClient
5
 
 
7
  KNOWLEDGE_BASE_DIR = "knowledge_base"
8
  COLLECTION_NAME = "ai_twin_kb"
9
 
10
+ # --- Step 1: Load documents ---
11
  def load_documents():
12
  """Loads all .txt files from the knowledge base directory."""
13
  documents = []
 
33
  start += chunk_size - overlap
34
  return chunks
35
 
36
+ # --- Step 3: Get embeddings via HF API (no local model!) ---
37
+ def get_embeddings(texts, client):
38
+ """Gets embeddings from Hugging Face Inference API."""
39
+ embeddings = client.feature_extraction(
40
+ texts,
41
+ model="sentence-transformers/all-MiniLM-L6-v2"
42
+ )
43
+ # The API returns nested lists, convert to list of lists
44
+ result = []
45
+ for emb in embeddings:
46
+ if isinstance(emb[0], list):
47
+ # Mean pooling if token-level embeddings returned
48
+ import numpy as np
49
+ arr = np.array(emb)
50
+ pooled = arr.mean(axis=0).tolist()
51
+ result.append(pooled)
52
+ else:
53
+ result.append(emb)
54
+ return result
55
+
56
+ # --- Step 4: Build vector store ---
57
+ def build_vector_store(documents, filenames, client):
58
+ """Creates embeddings via API and stores them in ChromaDB."""
59
+ chroma_client = chromadb.Client()
60
  try:
61
+ chroma_client.delete_collection(COLLECTION_NAME)
62
  except:
63
  pass
64
+ collection = chroma_client.create_collection(name=COLLECTION_NAME)
65
 
66
  all_chunks = []
67
  all_ids = []
 
76
  all_metadata.append({"source": fname})
77
  chunk_id += 1
78
 
79
+ print(f"Generating embeddings for {len(all_chunks)} chunks via API...")
80
+ # Process in batches to avoid API limits
81
+ batch_size = 16
82
+ all_embeddings = []
83
+ for i in range(0, len(all_chunks), batch_size):
84
+ batch = all_chunks[i:i+batch_size]
85
+ batch_embeddings = get_embeddings(batch, client)
86
+ all_embeddings.extend(batch_embeddings)
87
+ print(f" Processed {min(i+batch_size, len(all_chunks))}/{len(all_chunks)} chunks")
88
 
 
89
  collection.add(
90
  documents=all_chunks,
91
+ embeddings=all_embeddings,
92
  ids=all_ids,
93
  metadatas=all_metadata
94
  )
95
 
96
+ return collection
97
 
98
+ # --- Step 5: RAG query function ---
99
+ def query_rag(question, collection, client):
100
  """Retrieves relevant chunks and generates an answer."""
101
+ q_embedding = get_embeddings([question], client)
 
102
 
 
103
  results = collection.query(query_embeddings=q_embedding, n_results=3)
 
 
104
  context = "\n\n".join(results["documents"][0])
105
 
 
106
  prompt = f"""You are an AI Twin that represents a person. Use ONLY the following context to answer the question.
107
  If you don't know the answer from the context, say "I don't have that information in my profile."
108
 
 
113
 
114
  Answer:"""
115
 
 
116
  try:
117
+ response = client.text_generation(
118
  prompt,
119
+ model="mistralai/Mistral-7B-Instruct-v0.2",
120
  max_new_tokens=512,
121
  temperature=0.3,
122
  repetition_penalty=1.1
 
126
  return f"Error generating response: {str(e)}"
127
 
128
  # --- Global Initialization ---
129
+ print("Initializing HF client...")
130
+ hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN", None)
131
+ hf_client = InferenceClient(token=hf_token)
132
+
133
  print("Loading documents...")
134
  docs, fnames = load_documents()
135
  print(f"Loaded {len(docs)} documents: {fnames}")
136
 
137
+ print("Building vector store (using API for embeddings)...")
138
+ kb_collection = build_vector_store(docs, fnames, hf_client)
139
+ print("Vector store ready!")
 
 
 
 
 
140
 
141
  # --- Gradio UI ---
142
  def load_profile_summary():
 
147
  return "Profile not found."
148
 
149
  def ask_ai_twin(message, chat_history):
150
+ answer = query_rag(message, kb_collection, hf_client)
151
  chat_history.append((message, answer))
152
  return "", chat_history
153
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  chromadb
2
- sentence-transformers
3
  gradio
4
  huggingface-hub
 
 
1
  chromadb
 
2
  gradio
3
  huggingface-hub
4
+ numpy