Spaces:

rottg
/

telegram-analytics

Sleeping

App Files Files Community

rottg commited on Feb 6

Commit

85ff768

verified ·

1 Parent(s): 171ae15

Update code

Browse files

Files changed (12) hide show

Dockerfile +2 -0
dashboard.py +208 -41
gemini_client.py +226 -0
hybrid_search.py +496 -0
requirements.txt +2 -0
semantic_search.py +4 -4
templates/ai_search.html +449 -0
templates/index.html +6 -0
templates/moderation.html +6 -0
templates/search.html +6 -0
templates/settings.html +6 -0
templates/users.html +6 -0

Dockerfile CHANGED Viewed

@@ -14,6 +14,8 @@ COPY data_structures.py .
 COPY indexer.py .
 COPY search.py .
 COPY semantic_search.py .
 COPY schema.sql .
 COPY static/ static/
 COPY templates/ templates/

 COPY indexer.py .
 COPY search.py .
 COPY semantic_search.py .
+COPY hybrid_search.py .
+COPY gemini_client.py .
 COPY schema.sql .
 COPY static/ static/
 COPY templates/ templates/

dashboard.py CHANGED Viewed

@@ -27,59 +27,96 @@ from collections import defaultdict
 # DATABASE DOWNLOAD FROM HF DATASET
 # ==========================================
 HF_DATASET_REPO = "rottg/telegram-db"
-DB_FILENAME = "telegram.db"
 APP_DIR = os.path.dirname(os.path.abspath(__file__))
-DB_PATH_FULL = os.path.join(APP_DIR, DB_FILENAME)
 def ensure_db_exists():
-    """Download DB from HF Dataset repo if it doesn't exist locally."""
-    print(f"[DB] Checking for database at: {DB_PATH_FULL}")
     print(f"[DB] Current working directory: {os.getcwd()}")
     if os.path.exists(DB_PATH_FULL):
         size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
-        print(f"✓ Database found: {DB_PATH_FULL} ({size_mb:.0f} MB)")
-        return True
-    print(f"[DB] Database not found. Downloading from HF Dataset {HF_DATASET_REPO}...")
-    try:
-        from huggingface_hub import hf_hub_download
-        import shutil
-        # Get token from environment
-        token = os.environ.get("HF_TOKEN")
-        print(f"[DB] HF_TOKEN from env: {'set' if token else 'NOT SET'}")
-        if not token:
-            token_file = os.path.join(APP_DIR, ".hf_token")
-            if os.path.exists(token_file):
-                with open(token_file) as f:
-                    token = f.read().strip()
-                print(f"[DB] HF_TOKEN from file: set")
-        # Download to cache, then copy to app dir
-        cached_path = hf_hub_download(
-            repo_id=HF_DATASET_REPO,
-            filename=DB_FILENAME,
-            repo_type="dataset",
-            token=token,
-        )
-        print(f"[DB] Downloaded to cache: {cached_path}")
-        # Copy to app directory
-        shutil.copy2(cached_path, DB_PATH_FULL)
-        size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
-        print(f"✓ Database ready: {DB_PATH_FULL} ({size_mb:.0f} MB)")
-        return True
-    except Exception as e:
-        print(f"✗ Failed to download database: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-# Download DB on module import (for gunicorn)
 ensure_db_exists()
 # ==========================================
@@ -256,6 +293,12 @@ def settings_page():
     return render_template('settings.html')
 # ==========================================
 # API ENDPOINTS - OVERVIEW STATS
 # ==========================================
@@ -1844,6 +1887,130 @@ def api_ai_search():
         return jsonify({'error': str(e), 'query': query})
 def fallback_ai_search(query: str):
     """Fallback search when AI is not available."""
     conn = get_db()

 # DATABASE DOWNLOAD FROM HF DATASET
 # ==========================================
 HF_DATASET_REPO = "rottg/telegram-db"
 APP_DIR = os.path.dirname(os.path.abspath(__file__))
+DB_PATH_FULL = os.path.join(APP_DIR, "telegram.db")
+EMBEDDINGS_PATH_FULL = os.path.join(APP_DIR, "embeddings.db")
+CHUNK_EMBEDDINGS_PATH = os.path.join(APP_DIR, "chunk_embeddings.db")
+BM25_INDEX_PATH = os.path.join(APP_DIR, "bm25_index.pkl")
+def download_from_hf(filename, local_path):
+    """Download a file from HF Dataset repo."""
+    from huggingface_hub import hf_hub_download
+    import shutil
+    token = os.environ.get("HF_TOKEN")
+    if not token:
+        token_file = os.path.join(APP_DIR, ".hf_token")
+        if os.path.exists(token_file):
+            with open(token_file) as f:
+                token = f.read().strip()
+    cached_path = hf_hub_download(
+        repo_id=HF_DATASET_REPO,
+        filename=filename,
+        repo_type="dataset",
+        token=token,
+    )
+    shutil.copy2(cached_path, local_path)
+    return True
 def ensure_db_exists():
+    """Download DBs from HF Dataset repo if they don't exist locally."""
     print(f"[DB] Current working directory: {os.getcwd()}")
+    # Download telegram.db
     if os.path.exists(DB_PATH_FULL):
         size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
+        print(f"✓ telegram.db found ({size_mb:.0f} MB)")
+    else:
+        print(f"[DB] Downloading telegram.db from HF...")
+        try:
+            download_from_hf("telegram.db", DB_PATH_FULL)
+            size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
+            print(f"✓ telegram.db downloaded ({size_mb:.0f} MB)")
+        except Exception as e:
+            print(f"✗ Failed to download telegram.db: {e}")
+            return False
+    # Download embeddings.db (optional - for semantic search)
+    if os.path.exists(EMBEDDINGS_PATH_FULL):
+        size_mb = os.path.getsize(EMBEDDINGS_PATH_FULL) / (1024 * 1024)
+        print(f"✓ embeddings.db found ({size_mb:.0f} MB)")
+    else:
+        print(f"[DB] Downloading embeddings.db from HF...")
+        try:
+            download_from_hf("embeddings.db", EMBEDDINGS_PATH_FULL)
+            size_mb = os.path.getsize(EMBEDDINGS_PATH_FULL) / (1024 * 1024)
+            print(f"✓ embeddings.db downloaded ({size_mb:.0f} MB)")
+        except Exception as e:
+            print(f"⚠ embeddings.db not available: {e}")
+    # Download chunk_embeddings.db (for hybrid search)
+    if os.path.exists(CHUNK_EMBEDDINGS_PATH):
+        size_mb = os.path.getsize(CHUNK_EMBEDDINGS_PATH) / (1024 * 1024)
+        print(f"✓ chunk_embeddings.db found ({size_mb:.0f} MB)")
+    else:
+        print(f"[DB] Downloading chunk_embeddings.db from HF...")
+        try:
+            download_from_hf("chunk_embeddings.db", CHUNK_EMBEDDINGS_PATH)
+            size_mb = os.path.getsize(CHUNK_EMBEDDINGS_PATH) / (1024 * 1024)
+            print(f"✓ chunk_embeddings.db downloaded ({size_mb:.0f} MB)")
+        except Exception as e:
+            print(f"⚠ chunk_embeddings.db not available: {e}")
+    # Download bm25_index.pkl (for hybrid search)
+    if os.path.exists(BM25_INDEX_PATH):
+        size_mb = os.path.getsize(BM25_INDEX_PATH) / (1024 * 1024)
+        print(f"✓ bm25_index.pkl found ({size_mb:.0f} MB)")
+    else:
+        print(f"[DB] Downloading bm25_index.pkl from HF...")
+        try:
+            download_from_hf("bm25_index.pkl", BM25_INDEX_PATH)
+            size_mb = os.path.getsize(BM25_INDEX_PATH) / (1024 * 1024)
+            print(f"✓ bm25_index.pkl downloaded ({size_mb:.0f} MB)")
+        except Exception as e:
+            print(f"⚠ bm25_index.pkl not available: {e}")
+    return True
+# Download DBs on module import (for gunicorn)
 ensure_db_exists()
 # ==========================================
     return render_template('settings.html')
+@app.route('/ai-search')
+def ai_search_page():
+    """AI-powered search page with Gemini."""
+    return render_template('ai_search.html')
 # ==========================================
 # API ENDPOINTS - OVERVIEW STATS
 # ==========================================
         return jsonify({'error': str(e), 'query': query})
+@app.route('/api/hybrid/search', methods=['POST'])
+def api_hybrid_search():
+    """
+    Hybrid search combining:
+    - Chunk-based vector search (conversation context)
+    - BM25 keyword search (exact matches)
+    - Query expansion (synonyms, variations)
+    """
+    data = request.get_json()
+    query = data.get('query', '')
+    limit = data.get('limit', 20)
+    include_context = data.get('include_context', True)
+    if not query:
+        return jsonify({'error': 'Query required'})
+    try:
+        from hybrid_search import get_hybrid_search
+        hs = get_hybrid_search()
+        # Get stats
+        stats = hs.stats()
+        if not stats.get('chunks_available') and not stats.get('single_embeddings_available'):
+            return jsonify({
+                'error': 'No search indexes available. Run the Colab notebook first.',
+                'stats': stats
+            })
+        # Search with or without context
+        if include_context:
+            results = hs.search_with_context(query, limit=limit)
+        else:
+            results = hs.hybrid_search(query, limit=limit)
+        # Get expanded queries for display
+        expanded = hs.expand_query(query)
+        return jsonify({
+            'query': query,
+            'expanded_queries': expanded,
+            'results': results,
+            'count': len(results),
+            'stats': stats,
+            'mode': 'hybrid'
+        })
+    except ImportError as e:
+        return jsonify({'error': f'Hybrid search not available: {str(e)}'})
+    except Exception as e:
+        import traceback
+        return jsonify({
+            'error': str(e),
+            'traceback': traceback.format_exc()
+        })
+@app.route('/api/gemini/search', methods=['POST'])
+def api_gemini_search():
+    """
+    AI-powered search using Gemini 1.5 Flash.
+    Combines hybrid search with Gemini for natural language answers.
+    """
+    data = request.get_json()
+    query = data.get('query', '')
+    limit = data.get('limit', 5)
+    if not query:
+        return jsonify({'error': 'Query required'})
+    try:
+        from gemini_client import ai_search, get_gemini_client
+        # Check if Gemini is available
+        client = get_gemini_client()
+        if not client.is_available():
+            # Fall back to hybrid search without AI
+            from hybrid_search import get_hybrid_search
+            hs = get_hybrid_search()
+            results = hs.search_with_context(query, limit=limit)
+            return jsonify({
+                'query': query,
+                'success': False,
+                'error': 'Gemini API not available. Set GEMINI_API_KEY environment variable.',
+                'search_results': results,
+                'count': len(results),
+                'mode': 'hybrid_only'
+            })
+        # Perform AI search
+        result = ai_search(query, limit=limit)
+        return jsonify(result)
+    except ImportError as e:
+        return jsonify({'error': f'AI search not available: {str(e)}'})
+    except Exception as e:
+        import traceback
+        return jsonify({
+            'error': str(e),
+            'traceback': traceback.format_exc()
+        })
+@app.route('/api/gemini/status')
+def api_gemini_status():
+    """Check Gemini API status."""
+    try:
+        from gemini_client import get_gemini_client
+        client = get_gemini_client()
+        api_key = os.environ.get('GEMINI_API_KEY', '')
+        return jsonify({
+            'available': client.is_available(),
+            'api_key_set': bool(api_key),
+            'api_key_preview': f"{api_key[:8]}..." if len(api_key) > 8 else None
+        })
+    except Exception as e:
+        return jsonify({
+            'available': False,
+            'error': str(e)
+        })
 def fallback_ai_search(query: str):
     """Fallback search when AI is not available."""
     conn = get_db()

gemini_client.py ADDED Viewed

	@@ -0,0 +1,226 @@

+"""
+Gemini AI Client for Chat Search
+Uses Gemini 1.5 Flash to summarize search results and answer questions.
+"""
+import os
+import json
+from typing import List, Dict, Optional
+# Try importing Google Generative AI
+try:
+    import google.generativeai as genai
+    HAS_GEMINI = True
+except ImportError:
+    HAS_GEMINI = False
+class GeminiClient:
+    """Client for Gemini AI API."""
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.environ.get('GEMINI_API_KEY')
+        self.model = None
+        self._initialized = False
+    def _initialize(self):
+        """Initialize the Gemini client."""
+        if self._initialized:
+            return True
+        if not HAS_GEMINI:
+            print("google-generativeai not installed")
+            return False
+        if not self.api_key:
+            print("GEMINI_API_KEY not set")
+            return False
+        try:
+            genai.configure(api_key=self.api_key)
+            self.model = genai.GenerativeModel('gemini-1.5-flash')
+            self._initialized = True
+            print("Gemini client initialized")
+            return True
+        except Exception as e:
+            print(f"Failed to initialize Gemini: {e}")
+            return False
+    def answer_from_context(self, query: str, search_results: List[Dict],
+                           max_results: int = 5) -> Dict:
+        """
+        Generate an answer based on search results.
+        Args:
+            query: User's question
+            search_results: List of search results with context
+            max_results: Max results to include in context
+        Returns:
+            Dict with 'answer', 'sources', and 'success'
+        """
+        if not self._initialize():
+            return {
+                'success': False,
+                'error': 'Gemini not available',
+                'answer': None
+            }
+        # Build context from search results
+        context_parts = []
+        sources = []
+        for i, result in enumerate(search_results[:max_results]):
+            # Handle different result formats
+            if 'message' in result:
+                # search_with_context format
+                msg = result['message']
+                context_parts.append(f"""
+--- תוצאה {i+1} (ציון: {result.get('score', 0):.2f}) ---
+מאת: {msg.get('from_name', 'לא ידוע')}
+תאריך: {msg.get('date', 'לא ידוע')}
+הודעה: {msg.get('text', '')}
+""")
+                sources.append({
+                    'from_name': msg.get('from_name'),
+                    'date': msg.get('date'),
+                    'message_id': result.get('message_id')
+                })
+                # Add context if available
+                if result.get('context_before'):
+                    context_parts.append("הקשר לפני:")
+                    for ctx in result['context_before']:
+                        context_parts.append(f"  [{ctx.get('from_name', '?')}] {ctx.get('text_plain', '')[:100]}")
+                if result.get('context_after'):
+                    context_parts.append("הקשר אחרי:")
+                    for ctx in result['context_after']:
+                        context_parts.append(f"  [{ctx.get('from_name', '?')}] {ctx.get('text_plain', '')[:100]}")
+            elif 'chunk_text' in result:
+                # hybrid_search format
+                context_parts.append(f"""
+--- תוצאה {i+1} (ציון: {result.get('score', 0):.2f}) ---
+{result.get('chunk_text', '')}
+""")
+                sources.append({
+                    'message_id': result.get('message_id'),
+                    'score': result.get('score')
+                })
+        context = "\n".join(context_parts)
+        # Build prompt
+        prompt = f"""אתה עוזר שמנתח שיחות מקבוצת טלגרם ועונה על שאלות.
+השאלה: {query}
+להלן תוצאות חיפוש רלוונטיות מהשיחות:
+{context}
+הנחיות:
+1. ענה בעברית
+2. תן תשובה קצרה וממוקדת (1-3 משפטים)
+3. אם המידע לא ברור או לא קיים בתוצאות, אמור "לא מצאתי מידע ברור"
+4. ציין את המקור (שם השולח והתאריך) אם רלוונטי
+5. אל תמציא מידע שלא מופיע בתוצאות
+התשובה:"""
+        try:
+            response = self.model.generate_content(prompt)
+            answer = response.text.strip()
+            return {
+                'success': True,
+                'answer': answer,
+                'sources': sources,
+                'query': query,
+                'results_used': len(context_parts)
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'error': str(e),
+                'answer': None
+            }
+    def is_available(self) -> bool:
+        """Check if Gemini is available."""
+        return self._initialize()
+# Singleton instance
+_gemini_client = None
+def get_gemini_client() -> GeminiClient:
+    """Get or create Gemini client instance."""
+    global _gemini_client
+    if _gemini_client is None:
+        _gemini_client = GeminiClient()
+    return _gemini_client
+def ai_search(query: str, limit: int = 5) -> Dict:
+    """
+    Perform AI-powered search: hybrid search + Gemini summarization.
+    Args:
+        query: Search query
+        limit: Max results to use
+    Returns:
+        Dict with answer and metadata
+    """
+    from hybrid_search import get_hybrid_search
+    # Get hybrid search results
+    hs = get_hybrid_search()
+    results = hs.search_with_context(query, limit=limit)
+    if not results:
+        return {
+            'success': False,
+            'error': 'No search results found',
+            'answer': 'לא נמצאו תוצאות לחיפוש זה',
+            'query': query
+        }
+    # Get AI answer
+    client = get_gemini_client()
+    response = client.answer_from_context(query, results, max_results=limit)
+    # Add raw results for transparency
+    response['search_results'] = results
+    return response
+# CLI for testing
+if __name__ == '__main__':
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python gemini_client.py 'search query'")
+        print("\nChecking Gemini availability...")
+        client = get_gemini_client()
+        if client.is_available():
+            print("Gemini is available!")
+        else:
+            print("Gemini is NOT available. Set GEMINI_API_KEY environment variable.")
+        sys.exit(0)
+    query = ' '.join(sys.argv[1:])
+    print(f"\n=== AI Search: {query} ===\n")
+    result = ai_search(query)
+    if result['success']:
+        print(f"Answer: {result['answer']}")
+        print(f"\nSources: {len(result.get('sources', []))} results used")
+    else:
+        print(f"Error: {result.get('error')}")

hybrid_search.py ADDED Viewed

	@@ -0,0 +1,496 @@

+"""
+Hybrid Search - Combines Vector Search, BM25, and Query Expansion
+This provides much better search for chat data by:
+1. Chunk-based vector search (captures context)
+2. BM25 keyword search (finds exact matches)
+3. Query expansion (handles variations)
+"""
+import sqlite3
+import numpy as np
+import pickle
+import re
+import os
+from typing import List, Dict, Any, Optional
+# Try importing sentence-transformers
+try:
+    from sentence_transformers import SentenceTransformer
+    HAS_TRANSFORMERS = True
+except ImportError:
+    HAS_TRANSFORMERS = False
+# Try importing BM25
+try:
+    from rank_bm25 import BM25Okapi
+    HAS_BM25 = True
+except ImportError:
+    HAS_BM25 = False
+class HybridSearch:
+    """
+    Hybrid search combining:
+    - Chunk-based vector search (conversation context)
+    - BM25 keyword search (exact matches)
+    - Query expansion (synonyms, variations)
+    """
+    def __init__(self,
+                 messages_db: str = 'telegram.db',
+                 chunk_embeddings_db: str = 'chunk_embeddings.db',
+                 bm25_index_path: str = 'bm25_index.pkl',
+                 single_embeddings_db: str = 'embeddings.db'):
+        self.messages_db = messages_db
+        self.chunk_embeddings_db = chunk_embeddings_db
+        self.bm25_index_path = bm25_index_path
+        self.single_embeddings_db = single_embeddings_db
+        # Lazy-loaded components
+        self.model = None
+        self.chunk_embeddings = None
+        self.chunk_data = None
+        self.bm25 = None
+        self.bm25_message_ids = None
+        self.single_embeddings = None
+        self.single_message_ids = None
+    def _load_model(self):
+        """Load the embedding model."""
+        if self.model is not None:
+            return
+        if not HAS_TRANSFORMERS:
+            raise RuntimeError("sentence-transformers not installed")
+        print("Loading embedding model...")
+        self.model = SentenceTransformer('intfloat/multilingual-e5-large')
+        print("Model loaded!")
+    def _load_chunk_embeddings(self):
+        """Load chunk embeddings."""
+        if self.chunk_embeddings is not None:
+            return True
+        if not os.path.exists(self.chunk_embeddings_db):
+            print(f"Chunk embeddings not found: {self.chunk_embeddings_db}")
+            return False
+        print(f"Loading chunk embeddings from {self.chunk_embeddings_db}...")
+        conn = sqlite3.connect(self.chunk_embeddings_db)
+        # Check if chunk_type column exists (for backwards compatibility)
+        cursor = conn.execute("PRAGMA table_info(chunk_embeddings)")
+        columns = [col[1] for col in cursor.fetchall()]
+        has_type = 'chunk_type' in columns
+        if has_type:
+            rows = conn.execute("""
+                SELECT chunk_id, chunk_type, text, message_ids, anchor_message_id, embedding
+                FROM chunk_embeddings
+            """).fetchall()
+        else:
+            rows = conn.execute("""
+                SELECT chunk_id, 'window' as chunk_type, text, message_ids, anchor_message_id, embedding
+                FROM chunk_embeddings
+            """).fetchall()
+        conn.close()
+        if not rows:
+            return False
+        import json
+        self.chunk_data = []
+        emb_list = []
+        for row in rows:
+            chunk_id, chunk_type, text, msg_ids_json, anchor_id, emb_blob = row
+            emb = np.frombuffer(emb_blob, dtype=np.float32)
+            emb_list.append(emb)
+            self.chunk_data.append({
+                'chunk_id': chunk_id,
+                'chunk_type': chunk_type,
+                'text': text,
+                'message_ids': json.loads(msg_ids_json),
+                'anchor_message_id': anchor_id
+            })
+        self.chunk_embeddings = np.vstack(emb_list)
+        # Normalize
+        norms = np.linalg.norm(self.chunk_embeddings, axis=1, keepdims=True)
+        self.chunk_embeddings = self.chunk_embeddings / np.where(norms == 0, 1, norms)
+        print(f"Loaded {len(self.chunk_data)} chunk embeddings")
+        return True
+    def _load_single_embeddings(self):
+        """Load single-message embeddings (fallback)."""
+        if self.single_embeddings is not None:
+            return True
+        if not os.path.exists(self.single_embeddings_db):
+            return False
+        print(f"Loading single embeddings from {self.single_embeddings_db}...")
+        conn = sqlite3.connect(self.single_embeddings_db)
+        rows = conn.execute("""
+            SELECT message_id, embedding FROM embeddings
+        """).fetchall()
+        conn.close()
+        if not rows:
+            return False
+        self.single_message_ids = []
+        emb_list = []
+        for row in rows:
+            msg_id, emb_blob = row
+            emb = np.frombuffer(emb_blob, dtype=np.float32)
+            emb_list.append(emb)
+            self.single_message_ids.append(msg_id)
+        self.single_embeddings = np.vstack(emb_list)
+        norms = np.linalg.norm(self.single_embeddings, axis=1, keepdims=True)
+        self.single_embeddings = self.single_embeddings / np.where(norms == 0, 1, norms)
+        print(f"Loaded {len(self.single_message_ids)} single embeddings")
+        return True
+    def _load_bm25(self):
+        """Load BM25 index."""
+        if self.bm25 is not None:
+            return True
+        if not os.path.exists(self.bm25_index_path):
+            print(f"BM25 index not found: {self.bm25_index_path}")
+            return False
+        print(f"Loading BM25 index from {self.bm25_index_path}...")
+        with open(self.bm25_index_path, 'rb') as f:
+            data = pickle.load(f)
+        self.bm25 = data['bm25']
+        self.bm25_message_ids = data['message_ids']
+        print(f"Loaded BM25 index with {len(self.bm25_message_ids)} documents")
+        return True
+    def expand_query(self, query: str) -> List[str]:
+        """
+        Expand query with variations.
+        Returns list of query variations to search.
+        """
+        queries = [query]
+        # Hebrew question word expansions
+        expansions = {
+            'איפה': ['איפה', 'היכן', 'מיקום', 'כתובת', 'עיר'],
+            'מתי': ['מתי', 'באיזה תאריך', 'מועד', 'זמן'],
+            'מי': ['מי', 'מיהו', 'מיהי', 'שם'],
+            'כמה': ['כמה', 'מספר', 'כמות'],
+            'למה': ['למה', 'מדוע', 'סיבה'],
+            'גר': ['גר', 'גרה', 'מתגורר', 'מתגוררת', 'גרים'],
+            'עובד': ['עובד', 'עובדת', 'עובדים', 'מועסק', 'עבודה'],
+        }
+        # Add expanded variations
+        for word, synonyms in expansions.items():
+            if word in query:
+                for syn in synonyms:
+                    if syn != word:
+                        expanded = query.replace(word, syn)
+                        if expanded not in queries:
+                            queries.append(expanded)
+        return queries[:5]  # Limit to 5 variations
+    def search_chunks(self, query: str, limit: int = 20) -> List[Dict]:
+        """Search using chunk embeddings (context-aware)."""
+        if not self._load_chunk_embeddings():
+            return []
+        self._load_model()
+        # Encode query with e5 prefix
+        query_emb = self.model.encode([f"query: {query}"], convert_to_numpy=True)[0]
+        query_norm = query_emb / np.linalg.norm(query_emb)
+        # Compute similarities
+        similarities = np.dot(self.chunk_embeddings, query_norm)
+        # Get top results
+        top_indices = np.argsort(similarities)[::-1][:limit]
+        results = []
+        for idx in top_indices:
+            score = float(similarities[idx])
+            chunk = self.chunk_data[idx]
+            results.append({
+                'type': 'chunk',
+                'chunk_type': chunk.get('chunk_type', 'window'),  # 'thread' or 'window'
+                'chunk_id': chunk['chunk_id'],
+                'text': chunk['text'],
+                'message_ids': chunk['message_ids'],
+                'anchor_message_id': chunk['anchor_message_id'],
+                'score': score
+            })
+        return results
+    def search_bm25(self, query: str, limit: int = 20) -> List[Dict]:
+        """Search using BM25 (keyword-based)."""
+        if not self._load_bm25():
+            return []
+        # Tokenize query
+        query_tokens = re.findall(r'\w+', query.lower())
+        # Get BM25 scores
+        scores = self.bm25.get_scores(query_tokens)
+        # Get top results
+        top_indices = np.argsort(scores)[::-1][:limit]
+        results = []
+        for idx in top_indices:
+            score = float(scores[idx])
+            if score > 0:
+                results.append({
+                    'type': 'bm25',
+                    'message_id': self.bm25_message_ids[idx],
+                    'score': score
+                })
+        return results
+    def search_single(self, query: str, limit: int = 20) -> List[Dict]:
+        """Search using single-message embeddings (fallback)."""
+        if not self._load_single_embeddings():
+            return []
+        self._load_model()
+        query_emb = self.model.encode([f"query: {query}"], convert_to_numpy=True)[0]
+        query_norm = query_emb / np.linalg.norm(query_emb)
+        similarities = np.dot(self.single_embeddings, query_norm)
+        top_indices = np.argsort(similarities)[::-1][:limit]
+        results = []
+        for idx in top_indices:
+            score = float(similarities[idx])
+            results.append({
+                'type': 'single',
+                'message_id': self.single_message_ids[idx],
+                'score': score
+            })
+        return results
+    def hybrid_search(self, query: str, limit: int = 20,
+                      vector_weight: float = 0.6,
+                      bm25_weight: float = 0.4,
+                      use_expansion: bool = True) -> List[Dict]:
+        """
+        Hybrid search combining vector and BM25.
+        Args:
+            query: Search query
+            limit: Max results
+            vector_weight: Weight for vector search (0-1)
+            bm25_weight: Weight for BM25 search (0-1)
+            use_expansion: Whether to expand query
+        Returns:
+            Combined search results
+        """
+        all_message_scores = {}
+        # Get expanded queries
+        queries = self.expand_query(query) if use_expansion else [query]
+        # Search with each query variation
+        for q in queries:
+            # Chunk/Vector search
+            chunk_results = self.search_chunks(q, limit=limit * 2)
+            for r in chunk_results:
+                for msg_id in r['message_ids']:
+                    if msg_id not in all_message_scores:
+                        all_message_scores[msg_id] = {'vector': 0, 'bm25': 0, 'chunk_text': None}
+                    # Use max score across message appearances
+                    all_message_scores[msg_id]['vector'] = max(
+                        all_message_scores[msg_id]['vector'],
+                        r['score'] * vector_weight
+                    )
+                    if all_message_scores[msg_id]['chunk_text'] is None:
+                        all_message_scores[msg_id]['chunk_text'] = r['text']
+            # BM25 search
+            bm25_results = self.search_bm25(q, limit=limit * 2)
+            for r in bm25_results:
+                msg_id = r['message_id']
+                if msg_id not in all_message_scores:
+                    all_message_scores[msg_id] = {'vector': 0, 'bm25': 0, 'chunk_text': None}
+                all_message_scores[msg_id]['bm25'] = max(
+                    all_message_scores[msg_id]['bm25'],
+                    r['score'] * bm25_weight / 10  # Normalize BM25 scores
+                )
+        # Combine scores
+        combined = []
+        for msg_id, scores in all_message_scores.items():
+            total_score = scores['vector'] + scores['bm25']
+            combined.append({
+                'message_id': msg_id,
+                'score': total_score,
+                'vector_score': scores['vector'],
+                'bm25_score': scores['bm25'],
+                'chunk_text': scores['chunk_text']
+            })
+        # Sort by combined score
+        combined.sort(key=lambda x: x['score'], reverse=True)
+        return combined[:limit]
+    def search_with_context(self, query: str, limit: int = 20,
+                           context_window: int = 3) -> List[Dict]:
+        """
+        Search and return results with surrounding context.
+        Args:
+            query: Search query
+            limit: Max results
+            context_window: Messages before/after to include
+        Returns:
+            Results with full context
+        """
+        # Get hybrid search results
+        results = self.hybrid_search(query, limit=limit)
+        if not results:
+            return []
+        # Get full context from DB
+        conn = sqlite3.connect(self.messages_db)
+        conn.row_factory = sqlite3.Row
+        enriched = []
+        for r in results:
+            msg_id = r['message_id']
+            # Get the message
+            msg = conn.execute(
+                "SELECT * FROM messages WHERE id = ?", (msg_id,)
+            ).fetchone()
+            if not msg:
+                continue
+            # Get surrounding messages
+            context_before = conn.execute("""
+                SELECT id, date, from_name, text_plain FROM messages
+                WHERE date_unixtime < (SELECT date_unixtime FROM messages WHERE id = ?)
+                ORDER BY date_unixtime DESC LIMIT ?
+            """, (msg_id, context_window)).fetchall()
+            context_after = conn.execute("""
+                SELECT id, date, from_name, text_plain FROM messages
+                WHERE date_unixtime > (SELECT date_unixtime FROM messages WHERE id = ?)
+                ORDER BY date_unixtime ASC LIMIT ?
+            """, (msg_id, context_window)).fetchall()
+            enriched.append({
+                'message_id': msg_id,
+                'score': r['score'],
+                'message': {
+                    'id': msg['id'],
+                    'date': msg['date'],
+                    'from_name': msg['from_name'],
+                    'text': msg['text_plain']
+                },
+                'context_before': [dict(m) for m in reversed(context_before)],
+                'context_after': [dict(m) for m in context_after],
+                'chunk_text': r.get('chunk_text')
+            })
+        conn.close()
+        return enriched
+    def stats(self) -> Dict[str, Any]:
+        """Get search index statistics."""
+        stats = {
+            'chunks_available': os.path.exists(self.chunk_embeddings_db),
+            'bm25_available': os.path.exists(self.bm25_index_path),
+            'single_embeddings_available': os.path.exists(self.single_embeddings_db),
+        }
+        if stats['chunks_available']:
+            conn = sqlite3.connect(self.chunk_embeddings_db)
+            stats['chunk_count'] = conn.execute(
+                "SELECT COUNT(*) FROM chunk_embeddings"
+            ).fetchone()[0]
+            conn.close()
+        if stats['single_embeddings_available']:
+            conn = sqlite3.connect(self.single_embeddings_db)
+            stats['single_embedding_count'] = conn.execute(
+                "SELECT COUNT(*) FROM embeddings"
+            ).fetchone()[0]
+            conn.close()
+        return stats
+# Singleton instance
+_hybrid_search = None
+def get_hybrid_search() -> HybridSearch:
+    """Get or create hybrid search instance."""
+    global _hybrid_search
+    if _hybrid_search is None:
+        _hybrid_search = HybridSearch()
+    return _hybrid_search
+# CLI for testing
+if __name__ == '__main__':
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python hybrid_search.py 'search query'")
+        print("\nStats:")
+        hs = get_hybrid_search()
+        print(hs.stats())
+        sys.exit(0)
+    query = ' '.join(sys.argv[1:])
+    hs = get_hybrid_search()
+    print(f"\n=== Searching: {query} ===\n")
+    # Show expanded queries
+    expanded = hs.expand_query(query)
+    print(f"Expanded queries: {expanded}\n")
+    # Search
+    results = hs.search_with_context(query, limit=5)
+    for i, r in enumerate(results, 1):
+        print(f"--- Result {i} (score: {r['score']:.3f}) ---")
+        print(f"From: {r['message']['from_name']}")
+        print(f"Date: {r['message']['date']}")
+        print(f"Text: {r['message']['text'][:200]}...")
+        if r['context_before']:
+            print(f"\nContext before:")
+            for ctx in r['context_before']:
+                print(f"  [{ctx['from_name']}] {ctx['text_plain'][:100]}...")
+        if r['context_after']:
+            print(f"\nContext after:")
+            for ctx in r['context_after']:
+                print(f"  [{ctx['from_name']}] {ctx['text_plain'][:100]}...")
+        print()

requirements.txt CHANGED Viewed

@@ -3,3 +3,5 @@ gunicorn>=21.2
 requests>=2.31
 ijson>=3.2
 huggingface_hub>=0.20

 requests>=2.31
 ijson>=3.2
 huggingface_hub>=0.20
+rank_bm25>=0.2.2
+google-generativeai>=0.3.0

semantic_search.py CHANGED Viewed

@@ -43,7 +43,7 @@ class SemanticSearch:
             )
         if self.model is None:
             print("Loading embedding model...")
-            self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
             print("Model loaded!")
     def reload_embeddings(self):
@@ -117,8 +117,8 @@ class SemanticSearch:
         if len(self.message_ids) == 0:
             return []
-        # Encode query
-        query_emb = self.model.encode([query], convert_to_numpy=True)[0]
         # Compute cosine similarity with all embeddings
         # embeddings are already normalized from Colab
@@ -384,7 +384,7 @@ Answer:"""
             'available': True,
             'count': count,
             'size_mb': round(size_mb, 1),
-            'model': 'paraphrase-multilingual-MiniLM-L12-v2'
         }

             )
         if self.model is None:
             print("Loading embedding model...")
+            self.model = SentenceTransformer('intfloat/multilingual-e5-large')
             print("Model loaded!")
     def reload_embeddings(self):
         if len(self.message_ids) == 0:
             return []
+        # Encode query (e5 model requires "query: " prefix)
+        query_emb = self.model.encode([f"query: {query}"], convert_to_numpy=True)[0]
         # Compute cosine similarity with all embeddings
         # embeddings are already normalized from Colab
             'available': True,
             'count': count,
             'size_mb': round(size_mb, 1),
+            'model': 'intfloat/multilingual-e5-large'
         }

templates/ai_search.html ADDED Viewed

	@@ -0,0 +1,449 @@

+<!DOCTYPE html>
+<html lang="he" dir="rtl">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AI Search - Telegram Analytics</title>
+    <link rel="stylesheet" href="/static/css/style.css">
+    <style>
+        .ai-container {
+            max-width: 900px;
+            margin: 0 auto;
+        }
+        .ai-search-box {
+            background: var(--card-bg);
+            border-radius: var(--radius-lg);
+            padding: var(--spacing-lg);
+            margin-bottom: var(--spacing-lg);
+            border: 1px solid var(--border-color);
+        }
+        .ai-search-input {
+            width: 100%;
+            padding: var(--spacing-md);
+            font-size: 1.1rem;
+            border: 2px solid var(--border-color);
+            border-radius: var(--radius-md);
+            background: var(--bg-secondary);
+            color: var(--text-primary);
+            margin-bottom: var(--spacing-md);
+            direction: rtl;
+        }
+        .ai-search-input:focus {
+            outline: none;
+            border-color: var(--accent-color);
+        }
+        .ai-search-btn {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border: none;
+            padding: var(--spacing-md) var(--spacing-xl);
+            font-size: 1rem;
+            font-weight: 600;
+            border-radius: var(--radius-md);
+            cursor: pointer;
+            display: flex;
+            align-items: center;
+            gap: var(--spacing-sm);
+            transition: transform 0.2s, box-shadow 0.2s;
+        }
+        .ai-search-btn:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
+        }
+        .ai-search-btn:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .ai-answer-box {
+            background: var(--card-bg);
+            border-radius: var(--radius-lg);
+            padding: var(--spacing-lg);
+            margin-bottom: var(--spacing-lg);
+            border: 1px solid var(--border-color);
+            display: none;
+        }
+        .ai-answer-box.visible {
+            display: block;
+        }
+        .ai-answer-header {
+            display: flex;
+            align-items: center;
+            gap: var(--spacing-sm);
+            margin-bottom: var(--spacing-md);
+            color: var(--accent-color);
+            font-weight: 600;
+        }
+        .ai-answer-content {
+            font-size: 1.1rem;
+            line-height: 1.8;
+            color: var(--text-primary);
+            white-space: pre-wrap;
+        }
+        .ai-sources {
+            margin-top: var(--spacing-lg);
+            padding-top: var(--spacing-lg);
+            border-top: 1px solid var(--border-color);
+        }
+        .ai-sources-header {
+            font-weight: 600;
+            color: var(--text-secondary);
+            margin-bottom: var(--spacing-md);
+        }
+        .source-item {
+            background: var(--bg-secondary);
+            border-radius: var(--radius-md);
+            padding: var(--spacing-md);
+            margin-bottom: var(--spacing-sm);
+            border-right: 3px solid var(--accent-color);
+        }
+        .source-meta {
+            font-size: 0.85rem;
+            color: var(--text-muted);
+            margin-bottom: var(--spacing-xs);
+        }
+        .source-text {
+            color: var(--text-secondary);
+            font-size: 0.95rem;
+        }
+        .context-message {
+            background: var(--bg-secondary);
+            border-radius: var(--radius-md);
+            padding: var(--spacing-md);
+            margin-bottom: var(--spacing-sm);
+            border-right: 3px solid transparent;
+        }
+        .context-message.main {
+            border-right-color: var(--accent-color);
+            background: var(--card-bg);
+        }
+        .status-badge {
+            display: inline-flex;
+            align-items: center;
+            gap: var(--spacing-xs);
+            padding: var(--spacing-xs) var(--spacing-sm);
+            border-radius: var(--radius-sm);
+            font-size: 0.8rem;
+            margin-right: var(--spacing-sm);
+        }
+        .status-badge.available {
+            background: rgba(46, 204, 113, 0.2);
+            color: #2ecc71;
+        }
+        .status-badge.unavailable {
+            background: rgba(231, 76, 60, 0.2);
+            color: #e74c3c;
+        }
+        .example-queries {
+            display: flex;
+            flex-wrap: wrap;
+            gap: var(--spacing-sm);
+            margin-top: var(--spacing-md);
+        }
+        .example-query {
+            background: var(--bg-secondary);
+            color: var(--text-secondary);
+            border: 1px solid var(--border-color);
+            padding: var(--spacing-xs) var(--spacing-sm);
+            border-radius: var(--radius-sm);
+            font-size: 0.85rem;
+            cursor: pointer;
+            transition: all 0.2s;
+        }
+        .example-query:hover {
+            background: var(--accent-color);
+            color: white;
+            border-color: var(--accent-color);
+        }
+        .loading-animation {
+            display: flex;
+            align-items: center;
+            gap: var(--spacing-sm);
+        }
+        .loading-dots {
+            display: flex;
+            gap: 4px;
+        }
+        .loading-dots span {
+            width: 8px;
+            height: 8px;
+            background: var(--accent-color);
+            border-radius: 50%;
+            animation: bounce 1.4s infinite ease-in-out both;
+        }
+        .loading-dots span:nth-child(1) { animation-delay: -0.32s; }
+        .loading-dots span:nth-child(2) { animation-delay: -0.16s; }
+        @keyframes bounce {
+            0%, 80%, 100% { transform: scale(0); }
+            40% { transform: scale(1); }
+        }
+    </style>
+</head>
+<body>
+    <button class="mobile-menu-btn" onclick="toggleMobileMenu()">&#9776;</button>
+    <div class="sidebar-overlay" onclick="toggleMobileMenu()"></div>
+    <!-- Sidebar -->
+    <nav class="sidebar">
+        <div class="logo">
+            <span class="logo-icon">&#128202;</span>
+            <span class="logo-text">TG Analytics</span>
+        </div>
+        <ul class="nav-menu">
+            <li class="nav-item">
+                <a href="/" class="nav-link">
+                    <span class="icon">&#128200;</span>
+                    <span>Overview</span>
+                </a>
+            </li>
+            <li class="nav-item">
+                <a href="/users" class="nav-link">
+                    <span class="icon">&#128101;</span>
+                    <span>Users</span>
+                </a>
+            </li>
+            <li class="nav-item">
+                <a href="/chat" class="nav-link">
+                    <span class="icon">&#128172;</span>
+                    <span>Chat</span>
+                </a>
+            </li>
+            <li class="nav-item">
+                <a href="/search" class="nav-link">
+                    <span class="icon">&#128269;</span>
+                    <span>Search</span>
+                </a>
+            </li>
+            <li class="nav-item active">
+                <a href="/ai-search" class="nav-link">
+                    <span class="icon">&#129302;</span>
+                    <span>AI Search</span>
+                </a>
+            </li>
+            <li class="nav-item">
+                <a href="/moderation" class="nav-link">
+                    <span class="icon">&#128737;</span>
+                    <span>Moderation</span>
+                </a>
+            </li>
+            <li class="nav-item">
+                <a href="/settings" class="nav-link">
+                    <span class="icon">&#9881;</span>
+                    <span>Settings</span>
+                </a>
+            </li>
+        </ul>
+    </nav>
+    <!-- Main Content -->
+    <main class="main-content">
+        <!-- Header -->
+        <header class="header">
+            <h1>&#129302; AI Search</h1>
+            <div class="header-controls">
+                <span id="gemini-status" class="status-badge unavailable">Checking...</span>
+            </div>
+        </header>
+        <div class="ai-container">
+            <!-- Search Box -->
+            <div class="ai-search-box">
+                <input type="text" id="ai-query" class="ai-search-input"
+                       placeholder="&#1513;&#1488;&#1500; &#1513;&#1488;&#1500;&#1492; &#1489;&#1513;&#1508;&#1492; &#1496;&#1489;&#1506;&#1497;&#1514;... (&#1488;&#1497;&#1508;&#1492; &#1491;&#1504;&#1497; &#1490;&#1512;?)"
+                       onkeypress="if(event.key === 'Enter') performAISearch()">
+                <div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap; gap: var(--spacing-md);">
+                    <button onclick="performAISearch()" class="ai-search-btn" id="search-btn">
+                        <span>&#129302;</span> Search with AI
+                    </button>
+                    <div class="example-queries">
+                        <span style="color: var(--text-muted); font-size: 0.85rem;">Examples:</span>
+                        <button class="example-query" onclick="setQuery('&#1488;&#1497;&#1508;&#1492; &#1491;&#1504;&#1497; &#1490;&#1512;?')">&#1488;&#1497;&#1508;&#1492; &#1491;&#1504;&#1497; &#1490;&#1512;?</button>
+                        <button class="example-query" onclick="setQuery('&#1502;&#1497; &#1492;&#1499;&#1497; &#1508;&#1506;&#1497;&#1500; &#1489;&#1511;&#1489;&#1493;&#1510;&#1492;?')">&#1502;&#1497; &#1492;&#1499;&#1497; &#1508;&#1506;&#1497;&#1500;?</button>
+                        <button class="example-query" onclick="setQuery('&#1502;&#1492; &#1491;&#1497;&#1489;&#1512;&#1493; &#1506;&#1500; &#1492;&#1489;&#1495;&#1497;&#1512;&#1493;&#1514;?')">&#1502;&#1492; &#1491;&#1497;&#1489;&#1512;&#1493; &#1506;&#1500;...?</button>
+                    </div>
+                </div>
+            </div>
+            <!-- Answer Box -->
+            <div class="ai-answer-box" id="answer-box">
+                <div class="ai-answer-header">
+                    <span>&#129302;</span> AI Answer
+                </div>
+                <div class="ai-answer-content" id="answer-content">
+                    <!-- Answer will be inserted here -->
+                </div>
+                <div class="ai-sources" id="sources-section" style="display: none;">
+                    <div class="ai-sources-header">&#128214; Sources Used</div>
+                    <div id="sources-list">
+                        <!-- Sources will be inserted here -->
+                    </div>
+                </div>
+            </div>
+            <!-- How it works -->
+            <div class="chart-card">
+                <div class="chart-header">
+                    <h3>&#128161; How AI Search Works</h3>
+                </div>
+                <div style="padding: var(--spacing-md); color: var(--text-secondary); font-size: 0.9rem; direction: rtl;">
+                    <ol style="line-height: 2;">
+                        <li><strong>Hybrid Search</strong> - &#1502;&#1495;&#1508;&#1513; &#1489;-BM25 (&#1502;&#1497;&#1500;&#1497;&#1501;) + Vector Search (&#1502;&#1513;&#1502;&#1506;&#1493;&#1514;)</li>
+                        <li><strong>Thread Chunking</strong> - &#1502;&#1511;&#1489;&#1509; &#1513;&#1488;&#1500;&#1493;&#1514; &#1493;&#1514;&#1513;&#1493;&#1489;&#1493;&#1514; &#1497;&#1495;&#1491;</li>
+                        <li><strong>Query Expansion</strong> - &#1502;&#1512;&#1495;&#1497;&#1489; &#1488;&#1514; &#1492;&#1513;&#1488;&#1497;&#1500;&#1514;&#1492; &#1506;&#1501; &#1502;&#1497;&#1500;&#1497;&#1501; &#1504;&#1512;&#1491;&#1508;&#1493;&#1514;</li>
+                        <li><strong>Gemini 1.5 Flash</strong> - &#1502;&#1505;&#1499;&#1501; &#1488;&#1514; &#1492;&#1514;&#1493;&#1510;&#1488;&#1493;&#1514; &#1500;&#1514;&#1513;&#1493;&#1489;&#1492; &#1488;&#1495;&#1514;</li>
+                    </ol>
+                </div>
+            </div>
+        </div>
+    </main>
+    <script>
+        // Check Gemini status on load
+        async function checkGeminiStatus() {
+            try {
+                const response = await fetch('/api/gemini/status');
+                const data = await response.json();
+                const badge = document.getElementById('gemini-status');
+                if (data.available) {
+                    badge.className = 'status-badge available';
+                    badge.innerHTML = '&#10003; Gemini Ready';
+                } else {
+                    badge.className = 'status-badge unavailable';
+                    badge.innerHTML = '&#10007; Gemini Unavailable';
+                }
+            } catch (e) {
+                const badge = document.getElementById('gemini-status');
+                badge.className = 'status-badge unavailable';
+                badge.innerHTML = '&#10007; Error';
+            }
+        }
+        function setQuery(query) {
+            document.getElementById('ai-query').value = query;
+            document.getElementById('ai-query').focus();
+        }
+        async function performAISearch() {
+            const query = document.getElementById('ai-query').value.trim();
+            if (!query) return;
+            const btn = document.getElementById('search-btn');
+            const answerBox = document.getElementById('answer-box');
+            const answerContent = document.getElementById('answer-content');
+            const sourcesSection = document.getElementById('sources-section');
+            const sourcesList = document.getElementById('sources-list');
+            // Show loading
+            btn.disabled = true;
+            btn.innerHTML = '<div class="loading-animation"><div class="loading-dots"><span></span><span></span><span></span></div> Searching...</div>';
+            answerBox.classList.add('visible');
+            answerContent.innerHTML = '<div class="loading-animation"><div class="loading-dots"><span></span><span></span><span></span></div> <span>Searching and analyzing...</span></div>';
+            sourcesSection.style.display = 'none';
+            try {
+                const response = await fetch('/api/gemini/search', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ query, limit: 5 })
+                });
+                const data = await response.json();
+                if (data.error && !data.answer) {
+                    answerContent.innerHTML = `<span style="color: #e74c3c;">&#10060; Error: ${escapeHtml(data.error)}</span>`;
+                } else if (data.success === false && data.error) {
+                    // Gemini not available, show hybrid results
+                    answerContent.innerHTML = `<span style="color: #f39c12;">&#9888; ${escapeHtml(data.error)}</span><br><br>Showing search results without AI summarization:`;
+                    if (data.search_results && data.search_results.length > 0) {
+                        displaySources(data.search_results);
+                    }
+                } else {
+                    // Success with AI answer
+                    answerContent.textContent = data.answer || 'No answer available';
+                    // Show sources
+                    if (data.search_results && data.search_results.length > 0) {
+                        displaySources(data.search_results);
+                    } else if (data.sources && data.sources.length > 0) {
+                        displaySourcesMeta(data.sources);
+                    }
+                }
+            } catch (error) {
+                answerContent.innerHTML = `<span style="color: #e74c3c;">&#10060; Error: ${escapeHtml(error.message)}</span>`;
+            }
+            // Reset button
+            btn.disabled = false;
+            btn.innerHTML = '<span>&#129302;</span> Search with AI';
+        }
+        function displaySources(results) {
+            const sourcesSection = document.getElementById('sources-section');
+            const sourcesList = document.getElementById('sources-list');
+            sourcesSection.style.display = 'block';
+            sourcesList.innerHTML = results.map(result => {
+                const msg = result.message || result;
+                const score = result.score ? ` (${(result.score * 100).toFixed(0)}%)` : '';
+                let html = `<div class="source-item">
+                    <div class="source-meta">
+                        ${escapeHtml(msg.from_name || 'Unknown')} - ${msg.date || ''}${score}
+                    </div>
+                    <div class="source-text">${escapeHtml((msg.text || '').substring(0, 200))}${(msg.text || '').length > 200 ? '...' : ''}</div>`;
+                // Show context if available
+                if (result.context_before && result.context_before.length > 0) {
+                    html += '<div style="margin-top: 0.5rem; padding-top: 0.5rem; border-top: 1px dashed var(--border-color);">';
+                    result.context_before.forEach(ctx => {
+                        html += `<div class="context-message"><small>${escapeHtml(ctx.from_name || '?')}</small>: ${escapeHtml((ctx.text_plain || '').substring(0, 100))}</div>`;
+                    });
+                    html += '</div>';
+                }
+                html += '</div>';
+                return html;
+            }).join('');
+        }
+        function displaySourcesMeta(sources) {
+            const sourcesSection = document.getElementById('sources-section');
+            const sourcesList = document.getElementById('sources-list');
+            if (sources.length === 0) return;
+            sourcesSection.style.display = 'block';
+            sourcesList.innerHTML = sources.map(src => `
+                <div class="source-item">
+                    <div class="source-meta">
+                        ${escapeHtml(src.from_name || 'Unknown')} - ${src.date || ''}
+                    </div>
+                </div>
+            `).join('');
+        }
+        function escapeHtml(text) {
+            if (!text) return '';
+            const div = document.createElement('div');
+            div.textContent = text;
+            return div.innerHTML;
+        }
+        function toggleMobileMenu() {
+            var s = document.querySelector('.sidebar');
+            var o = document.querySelector('.sidebar-overlay');
+            s.classList.toggle('open');
+            if (o) o.classList.toggle('active');
+        }
+        // Initialize
+        document.addEventListener('DOMContentLoaded', () => {
+            checkGeminiStatus();
+            document.getElementById('ai-query').focus();
+        });
+    </script>
+</body>
+</html>

templates/index.html CHANGED Viewed

@@ -41,6 +41,12 @@
                     <span>Search</span>
                 </a>
             </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

                     <span>Search</span>
                 </a>
             </li>
+            <li class="nav-item">
+                <a href="/ai-search" class="nav-link">
+                    <span class="icon">🤖</span>
+                    <span>AI Search</span>
+                </a>
+            </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

templates/moderation.html CHANGED Viewed

@@ -41,6 +41,12 @@
                     <span>Search</span>
                 </a>
             </li>
             <li class="nav-item active">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

                     <span>Search</span>
                 </a>
             </li>
+            <li class="nav-item">
+                <a href="/ai-search" class="nav-link">
+                    <span class="icon">🤖</span>
+                    <span>AI Search</span>
+                </a>
+            </li>
             <li class="nav-item active">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

templates/search.html CHANGED Viewed

@@ -40,6 +40,12 @@
                     <span>Search</span>
                 </a>
             </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

                     <span>Search</span>
                 </a>
             </li>
+            <li class="nav-item">
+                <a href="/ai-search" class="nav-link">
+                    <span class="icon">🤖</span>
+                    <span>AI Search</span>
+                </a>
+            </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

templates/settings.html CHANGED Viewed

@@ -200,6 +200,12 @@
                     <span>Search</span>
                 </a>
             </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

                     <span>Search</span>
                 </a>
             </li>
+            <li class="nav-item">
+                <a href="/ai-search" class="nav-link">
+                    <span class="icon">🤖</span>
+                    <span>AI Search</span>
+                </a>
+            </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

templates/users.html CHANGED Viewed

@@ -41,6 +41,12 @@
                     <span>Search</span>
                 </a>
             </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>

                     <span>Search</span>
                 </a>
             </li>
+            <li class="nav-item">
+                <a href="/ai-search" class="nav-link">
+                    <span class="icon">🤖</span>
+                    <span>AI Search</span>
+                </a>
+            </li>
             <li class="nav-item">
                 <a href="/moderation" class="nav-link">
                     <span class="icon">🛡️</span>