Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import numpy as np | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # --- Configuration --- | |
| KNOWLEDGE_BASE_DIR = "knowledge_base" | |
| # --- Step 1: Load documents --- | |
| def load_documents(): | |
| documents = [] | |
| filenames = [] | |
| for filename in os.listdir(KNOWLEDGE_BASE_DIR): | |
| if filename.endswith(".txt"): | |
| filepath = os.path.join(KNOWLEDGE_BASE_DIR, filename) | |
| with open(filepath, "r", encoding="utf-8", errors="ignore") as f: | |
| content = f.read().strip() | |
| if content: | |
| documents.append(content) | |
| filenames.append(filename) | |
| return documents, filenames | |
| # --- Step 2: Chunk documents --- | |
| def chunk_text(text, chunk_size=500, overlap=100): | |
| chunks = [] | |
| start = 0 | |
| while start < len(text): | |
| end = start + chunk_size | |
| chunks.append(text[start:end]) | |
| start += chunk_size - overlap | |
| return chunks | |
| # --- Step 3: Get embeddings via HF API --- | |
| def get_embeddings(texts, client): | |
| embeddings = [] | |
| for text in texts: | |
| response = client.feature_extraction(text, model="sentence-transformers/all-MiniLM-L6-v2") | |
| emb = np.array(response) | |
| if emb.ndim == 2: | |
| emb = emb.mean(axis=0) | |
| embeddings.append(emb) | |
| return np.array(embeddings) | |
| # --- Step 4: Simple vector search with numpy --- | |
| def cosine_similarity(a, b): | |
| a_norm = a / (np.linalg.norm(a, axis=-1, keepdims=True) + 1e-10) | |
| b_norm = b / (np.linalg.norm(b, axis=-1, keepdims=True) + 1e-10) | |
| return np.dot(a_norm, b_norm.T) | |
| class SimpleVectorStore: | |
| def __init__(self): | |
| self.chunks = [] | |
| self.sources = [] | |
| self.embeddings = None | |
| def add(self, chunks, sources, embeddings): | |
| self.chunks = chunks | |
| self.sources = sources | |
| self.embeddings = embeddings | |
| def search(self, query_embedding, top_k=3): | |
| scores = cosine_similarity(query_embedding.reshape(1, -1), self.embeddings)[0] | |
| top_indices = np.argsort(scores)[-top_k:][::-1] | |
| results = [(self.chunks[i], self.sources[i], float(scores[i])) for i in top_indices] | |
| return results | |
| # --- Step 5: Build the knowledge store --- | |
| def build_store(documents, filenames, client): | |
| all_chunks = [] | |
| all_sources = [] | |
| for doc, fname in zip(documents, filenames): | |
| chunks = chunk_text(doc) | |
| for chunk in chunks: | |
| all_chunks.append(chunk) | |
| all_sources.append(fname) | |
| print(f"Embedding {len(all_chunks)} chunks via API...") | |
| embeddings = get_embeddings(all_chunks, client) | |
| print("Embeddings complete.") | |
| store = SimpleVectorStore() | |
| store.add(all_chunks, all_sources, embeddings) | |
| return store | |
| # --- Step 6: RAG query --- | |
| def query_rag(question, store, client): | |
| q_emb = get_embeddings([question], client) | |
| results = store.search(q_emb[0], top_k=3) | |
| context = "\n\n".join([chunk for chunk, src, score in results]) | |
| system_prompt = f"""You are an AI Twin that represents a person. Use ONLY the following context to answer the question. | |
| If you don't know the answer from the context, say "I don't have that information in my profile." | |
| Context: | |
| {context}""" | |
| try: | |
| response = client.chat_completion( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": question} | |
| ], | |
| model="meta-llama/Meta-Llama-3-8B-Instruct", | |
| max_tokens=512, | |
| temperature=0.3, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # --- Initialization --- | |
| print("Starting AI Twin...") | |
| hf_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN", None) | |
| hf_client = InferenceClient(token=hf_token) | |
| docs, fnames = load_documents() | |
| print(f"Loaded {len(docs)} documents: {fnames}") | |
| vector_store = build_store(docs, fnames, hf_client) | |
| print("Ready!") | |
| # --- Helpers --- | |
| def load_profile(): | |
| try: | |
| with open(os.path.join(KNOWLEDGE_BASE_DIR, "profile.txt"), "r", encoding="utf-8") as f: | |
| return f.read() | |
| except: | |
| return "Profile not found." | |
| def respond(message, chat_history): | |
| if not message: | |
| return "", chat_history | |
| if chat_history is None: | |
| chat_history = [] | |
| chat_history.append({"role": "user", "content": message}) | |
| try: | |
| answer = query_rag(message, vector_store, hf_client) | |
| chat_history.append({"role": "assistant", "content": answer}) | |
| except Exception as e: | |
| chat_history.append({"role": "assistant", "content": f"Error: {str(e)}"}) | |
| return "", chat_history | |
| # When a suggestion chip is clicked, fill the textbox with that prompt | |
| def use_suggestion(prompt_text): | |
| return prompt_text | |
| # --- Default prompt suggestions --- | |
| SUGGESTIONS = [ | |
| "πΌ What are my skills?", | |
| "π What projects have I done?", | |
| "π― What roles am I eligible for?", | |
| "π What is my educational background?", | |
| "π What languages do I speak?", | |
| "π How can someone contact me?", | |
| ] | |
| # --- Custom CSS for suggestion chips --- | |
| custom_css = """ | |
| #suggestion-row { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-bottom: 10px; | |
| } | |
| .suggestion-chip { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| color: white !important; | |
| border: none !important; | |
| border-radius: 20px !important; | |
| padding: 6px 14px !important; | |
| font-size: 13px !important; | |
| cursor: pointer !important; | |
| transition: transform 0.15s ease, box-shadow 0.15s ease !important; | |
| white-space: nowrap !important; | |
| } | |
| .suggestion-chip:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 4px 12px rgba(102, 126, 234, 0.45) !important; | |
| } | |
| .suggestion-chip:active { | |
| transform: translateY(0px) !important; | |
| } | |
| #chatbot-col { | |
| display: flex; | |
| flex-direction: column; | |
| } | |
| """ | |
| # --- UI --- | |
| with gr.Blocks(title="My AI Twin", theme=gr.themes.Soft(), css=custom_css) as demo: | |
| gr.Markdown("# π€ My AI Twin") | |
| gr.Markdown("Ask me anything about my professional background, skills, and projects β or pick a suggestion below!") | |
| with gr.Row(): | |
| # Left: Profile summary | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Profile Summary") | |
| gr.Textbox( | |
| value=load_profile(), | |
| label="About Me", | |
| interactive=False, | |
| lines=15, | |
| ) | |
| # Right: Chat + suggestions | |
| with gr.Column(scale=2, elem_id="chatbot-col"): | |
| chatbot = gr.Chatbot(label="Conversation", height=380, type="messages") | |
| # --- Suggestion chips --- | |
| gr.Markdown("**π‘ Suggested questions β click to use:**") | |
| with gr.Row(elem_id="suggestion-row"): | |
| chip_btns = [ | |
| gr.Button(s, elem_classes=["suggestion-chip"], size="sm") | |
| for s in SUGGESTIONS | |
| ] | |
| # --- Input area --- | |
| msg = gr.Textbox( | |
| label="Ask a question", | |
| placeholder="Type your own question, or click a suggestion aboveβ¦", | |
| lines=1, | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Submit", variant="primary") | |
| clear_btn = gr.Button("Clear") | |
| # Wire up suggestion chips β fill textbox | |
| for chip, suggestion in zip(chip_btns, SUGGESTIONS): | |
| chip.click( | |
| fn=use_suggestion, | |
| inputs=[gr.State(suggestion)], | |
| outputs=[msg], | |
| ) | |
| # Wire up submit / enter | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| submit_btn.click(respond, [msg, chatbot], [msg, chatbot]) | |
| clear_btn.click(lambda: (None, ""), None, [chatbot, msg], queue=False) | |
| if __name__ == "__main__": | |
| demo.launch() |