Spaces:

NeerajCodz
/

rag-slack

Sleeping

App Files Files Community

NeerajCodz commited on Dec 5, 2025

Commit

c18dee2

verified ·

1 Parent(s): 1cb9d55

Initial commit of RAG Slack bot

Browse files

Files changed (4) hide show

Dockerfile +20 -0
README.md +29 -10
app.py +181 -0
requirements.txt +11 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as a parent image
+FROM python:3.12-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the current directory contents into the container
+COPY . .
+# Expose port 7860 (default for HF Spaces, or adjust as needed)
+EXPOSE 7860
+# Run the application with uvicorn
+CMD ["uvicorn", "app:api", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,29 @@
----
-title: Rag Slack
-emoji: 🚀
-colorFrom: yellow
-colorTo: pink
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Slack RAG Bot
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# Slack RAG Bot
+A production-ready Slack bot that processes documents (PDF/DOCX) and answers questions using RAG (Retrieval-Augmented Generation).
+## Features
+- 📄 Process PDF and DOCX files
+- 🔍 Vector search using Supabase
+- 💬 Answer questions based on uploaded documents
+- 🤖 Powered by sentence-transformers and RoBERTa
+## Environment Variables
+Set these in your HuggingFace Space settings:
+- `HF_TOKEN`: Your HuggingFace token
+- `SUPABASE_URL`: Your Supabase project URL
+- `SUPABASE_KEY`: Your Supabase anon key
+- `SLACK_BOT_TOKEN`: Your Slack bot token (xoxb-...)
+- `SLACK_SIGNING_SECRET`: Your Slack signing secret

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+import io
+import re
+from typing import List, Dict, Any
+from fastapi import FastAPI, Request
+from slack_bolt import App
+from slack_bolt.adapter.fastapi import SlackRequestHandler
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline
+from supabase import create_client, Client
+import pypdf
+from docx import Document
+import requests
+import uvicorn
+# Load secrets from environment variables
+SUPABASE_URL = os.environ.get("SUPABASE_URL")
+SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
+SLACK_BOT_TOKEN = os.environ.get("SLACK_BOT_TOKEN")
+SLACK_SIGNING_SECRET = os.environ.get("SLACK_SIGNING_SECRET")
+SLACK_CLIENT_ID = os.environ.get("SLACK_CLIENT_ID")
+SLACK_CLIENT_SECRET = os.environ.get("SLACK_CLIENT_SECRET")
+HF_TOKEN = os.environ.get("HF_TOKEN")  # Optional for public models, but suppresses warnings
+# Set HF_TOKEN if provided (helps with authentication for Hub access)
+if HF_TOKEN:
+    from huggingface_hub import login
+    login(token=HF_TOKEN)
+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+app = App(token=SLACK_BOT_TOKEN, signing_secret=SLACK_SIGNING_SECRET)
+api = FastAPI()
+print("Loading embedding model...")
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+print("Loading QA model...")
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
+print("Models loaded successfully!")
+def download_slack_file(url: str, token: str) -> bytes:
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+    return response.content
+def extract_text_from_pdf(file_content: bytes) -> str:
+    pdf_reader = pypdf.PdfReader(io.BytesIO(file_content))
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text() + "\n"
+    return text
+def extract_text_from_docx(file_content: bytes) -> str:
+    doc = Document(io.BytesIO(file_content))
+    text = ""
+    for paragraph in doc.paragraphs:
+        text += paragraph.text + "\n"
+    return text
+def chunk_text(text: str, chunk_size: int = 300) -> List[str]:
+    words = text.split()
+    chunks = []
+    for i in range(0, len(words), chunk_size):
+        chunk = " ".join(words[i:i + chunk_size])
+        if chunk.strip():
+            chunks.append(chunk)
+    return chunks
+def embed_text(text: str) -> List[float]:
+    embedding = embedding_model.encode(text)
+    return embedding.tolist()
+def store_embeddings(chunks: List[str]):
+    for chunk in chunks:
+        embedding = embed_text(chunk)
+        supabase.table("documents").insert({
+            "content": chunk,
+            "embedding": embedding
+        }).execute()
+def search_documents(query: str, match_count: int = 5) -> List[Dict[str, Any]]:
+    query_embedding = embed_text(query)
+    result = supabase.rpc("match_documents", {
+        "query_embedding": query_embedding,
+        "match_count": match_count
+    }).execute()
+    return result.data
+def answer_question(question: str, context: str) -> str:
+    if not context.strip():
+        return "No relevant documents found."
+    result = qa_pipeline(question=question, context=context[:4096])
+    return result['answer']
+@app.event("file_shared")
+def handle_file_shared(event, say, client):
+    file_id = event["file_id"]
+    file_info = client.files_info(file=file_id)
+    file_data = file_info["file"]
+    file_type = file_data.get("mimetype", "")
+    file_url = file_data.get("url_private_download")
+    if not file_url:
+        return
+    try:
+        file_content = download_slack_file(file_url, SLACK_BOT_TOKEN)
+        text = ""
+        if "pdf" in file_type:
+            text = extract_text_from_pdf(file_content)
+        elif "wordprocessingml" in file_type or "msword" in file_type:
+            text = extract_text_from_docx(file_content)
+        else:
+            say("Unsupported file type. Please upload PDF or DOCX files.")
+            return
+        chunks = chunk_text(text)
+        store_embeddings(chunks)
+        say(f"✅ File processed successfully! Added {len(chunks)} chunks to knowledge base.")
+    except Exception as e:
+        say(f"❌ Error processing file: {str(e)}")
+@app.event("app_mention")
+def handle_mention(event, say):
+    text = event["text"]
+    user_query = re.sub(r'<@[A-Z0-9]+>', '', text).strip()
+    if not user_query:
+        say("Please ask me a question!")
+        return
+    try:
+        results = search_documents(user_query, match_count=5)
+        if not results:
+            say("I couldn't find any relevant information in my knowledge base.")
+            return
+        context = " ".join([doc["content"] for doc in results])
+        answer = answer_question(user_query, context)
+        say(f"💡 *Answer:* {answer}")
+    except Exception as e:
+        say(f"❌ Error answering question: {str(e)}")
+handler = SlackRequestHandler(app)
+@api.post("/slack/events")
+async def slack_events(request: Request):
+    return await handler.handle(request)
+@api.get("/")
+async def root():
+    return {"status": "Slack RAG Bot is running!", "message": "Use /slack/events endpoint for Slack events"}
+@api.get("/health")
+async def health():
+    return {"status": "ok"}
+@api.get("/slack/oauth/callback")
+async def oauth_callback(code: str, state: str = None):
+    # Handle OAuth installation
+    from slack_sdk.oauth import AuthorizeUrlGenerator
+    from slack_sdk.web import WebClient
+    client = WebClient()
+    oauth_response = client.oauth_v2_access(
+        client_id=SLACK_CLIENT_ID,
+        client_secret=SLACK_CLIENT_SECRET,
+        code=code
+    )
+    # Save the token for this workspace
+    return {"status": "success", "team_id": oauth_response["team"]["id"]}
+if __name__ == "__main__":
+    uvicorn.run(api, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+slack-bolt==1.18.0
+sentence-transformers==2.2.2
+transformers==4.35.2
+supabase==2.0.3
+pypdf==3.17.1
+python-docx==1.1.0
+requests==2.31.0
+torch==2.1.0
+huggingface-hub==0.17.3