Spaces:
Sleeping
Sleeping
Update code
Browse files- Dockerfile +2 -0
- dashboard.py +208 -41
- gemini_client.py +226 -0
- hybrid_search.py +496 -0
- requirements.txt +2 -0
- semantic_search.py +4 -4
- templates/ai_search.html +449 -0
- templates/index.html +6 -0
- templates/moderation.html +6 -0
- templates/search.html +6 -0
- templates/settings.html +6 -0
- templates/users.html +6 -0
Dockerfile
CHANGED
|
@@ -14,6 +14,8 @@ COPY data_structures.py .
|
|
| 14 |
COPY indexer.py .
|
| 15 |
COPY search.py .
|
| 16 |
COPY semantic_search.py .
|
|
|
|
|
|
|
| 17 |
COPY schema.sql .
|
| 18 |
COPY static/ static/
|
| 19 |
COPY templates/ templates/
|
|
|
|
| 14 |
COPY indexer.py .
|
| 15 |
COPY search.py .
|
| 16 |
COPY semantic_search.py .
|
| 17 |
+
COPY hybrid_search.py .
|
| 18 |
+
COPY gemini_client.py .
|
| 19 |
COPY schema.sql .
|
| 20 |
COPY static/ static/
|
| 21 |
COPY templates/ templates/
|
dashboard.py
CHANGED
|
@@ -27,59 +27,96 @@ from collections import defaultdict
|
|
| 27 |
# DATABASE DOWNLOAD FROM HF DATASET
|
| 28 |
# ==========================================
|
| 29 |
HF_DATASET_REPO = "rottg/telegram-db"
|
| 30 |
-
DB_FILENAME = "telegram.db"
|
| 31 |
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 32 |
-
DB_PATH_FULL = os.path.join(APP_DIR,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def ensure_db_exists():
|
| 36 |
-
"""Download
|
| 37 |
-
print(f"[DB] Checking for database at: {DB_PATH_FULL}")
|
| 38 |
print(f"[DB] Current working directory: {os.getcwd()}")
|
| 39 |
|
|
|
|
| 40 |
if os.path.exists(DB_PATH_FULL):
|
| 41 |
size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
|
| 42 |
-
print(f"✓
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
with open(token_file) as f:
|
| 58 |
-
token = f.read().strip()
|
| 59 |
-
print(f"[DB] HF_TOKEN from file: set")
|
| 60 |
-
|
| 61 |
-
# Download to cache, then copy to app dir
|
| 62 |
-
cached_path = hf_hub_download(
|
| 63 |
-
repo_id=HF_DATASET_REPO,
|
| 64 |
-
filename=DB_FILENAME,
|
| 65 |
-
repo_type="dataset",
|
| 66 |
-
token=token,
|
| 67 |
-
)
|
| 68 |
-
print(f"[DB] Downloaded to cache: {cached_path}")
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
size_mb = os.path.getsize(
|
| 73 |
-
print(f"✓
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
-
# Download
|
| 83 |
ensure_db_exists()
|
| 84 |
|
| 85 |
# ==========================================
|
|
@@ -256,6 +293,12 @@ def settings_page():
|
|
| 256 |
return render_template('settings.html')
|
| 257 |
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
# ==========================================
|
| 260 |
# API ENDPOINTS - OVERVIEW STATS
|
| 261 |
# ==========================================
|
|
@@ -1844,6 +1887,130 @@ def api_ai_search():
|
|
| 1844 |
return jsonify({'error': str(e), 'query': query})
|
| 1845 |
|
| 1846 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1847 |
def fallback_ai_search(query: str):
|
| 1848 |
"""Fallback search when AI is not available."""
|
| 1849 |
conn = get_db()
|
|
|
|
| 27 |
# DATABASE DOWNLOAD FROM HF DATASET
|
| 28 |
# ==========================================
|
| 29 |
HF_DATASET_REPO = "rottg/telegram-db"
|
|
|
|
| 30 |
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 31 |
+
DB_PATH_FULL = os.path.join(APP_DIR, "telegram.db")
|
| 32 |
+
EMBEDDINGS_PATH_FULL = os.path.join(APP_DIR, "embeddings.db")
|
| 33 |
+
CHUNK_EMBEDDINGS_PATH = os.path.join(APP_DIR, "chunk_embeddings.db")
|
| 34 |
+
BM25_INDEX_PATH = os.path.join(APP_DIR, "bm25_index.pkl")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def download_from_hf(filename, local_path):
|
| 38 |
+
"""Download a file from HF Dataset repo."""
|
| 39 |
+
from huggingface_hub import hf_hub_download
|
| 40 |
+
import shutil
|
| 41 |
+
|
| 42 |
+
token = os.environ.get("HF_TOKEN")
|
| 43 |
+
if not token:
|
| 44 |
+
token_file = os.path.join(APP_DIR, ".hf_token")
|
| 45 |
+
if os.path.exists(token_file):
|
| 46 |
+
with open(token_file) as f:
|
| 47 |
+
token = f.read().strip()
|
| 48 |
+
|
| 49 |
+
cached_path = hf_hub_download(
|
| 50 |
+
repo_id=HF_DATASET_REPO,
|
| 51 |
+
filename=filename,
|
| 52 |
+
repo_type="dataset",
|
| 53 |
+
token=token,
|
| 54 |
+
)
|
| 55 |
+
shutil.copy2(cached_path, local_path)
|
| 56 |
+
return True
|
| 57 |
|
| 58 |
|
| 59 |
def ensure_db_exists():
|
| 60 |
+
"""Download DBs from HF Dataset repo if they don't exist locally."""
|
|
|
|
| 61 |
print(f"[DB] Current working directory: {os.getcwd()}")
|
| 62 |
|
| 63 |
+
# Download telegram.db
|
| 64 |
if os.path.exists(DB_PATH_FULL):
|
| 65 |
size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
|
| 66 |
+
print(f"✓ telegram.db found ({size_mb:.0f} MB)")
|
| 67 |
+
else:
|
| 68 |
+
print(f"[DB] Downloading telegram.db from HF...")
|
| 69 |
+
try:
|
| 70 |
+
download_from_hf("telegram.db", DB_PATH_FULL)
|
| 71 |
+
size_mb = os.path.getsize(DB_PATH_FULL) / (1024 * 1024)
|
| 72 |
+
print(f"✓ telegram.db downloaded ({size_mb:.0f} MB)")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"✗ Failed to download telegram.db: {e}")
|
| 75 |
+
return False
|
| 76 |
|
| 77 |
+
# Download embeddings.db (optional - for semantic search)
|
| 78 |
+
if os.path.exists(EMBEDDINGS_PATH_FULL):
|
| 79 |
+
size_mb = os.path.getsize(EMBEDDINGS_PATH_FULL) / (1024 * 1024)
|
| 80 |
+
print(f"✓ embeddings.db found ({size_mb:.0f} MB)")
|
| 81 |
+
else:
|
| 82 |
+
print(f"[DB] Downloading embeddings.db from HF...")
|
| 83 |
+
try:
|
| 84 |
+
download_from_hf("embeddings.db", EMBEDDINGS_PATH_FULL)
|
| 85 |
+
size_mb = os.path.getsize(EMBEDDINGS_PATH_FULL) / (1024 * 1024)
|
| 86 |
+
print(f"✓ embeddings.db downloaded ({size_mb:.0f} MB)")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print(f"⚠ embeddings.db not available: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
# Download chunk_embeddings.db (for hybrid search)
|
| 91 |
+
if os.path.exists(CHUNK_EMBEDDINGS_PATH):
|
| 92 |
+
size_mb = os.path.getsize(CHUNK_EMBEDDINGS_PATH) / (1024 * 1024)
|
| 93 |
+
print(f"✓ chunk_embeddings.db found ({size_mb:.0f} MB)")
|
| 94 |
+
else:
|
| 95 |
+
print(f"[DB] Downloading chunk_embeddings.db from HF...")
|
| 96 |
+
try:
|
| 97 |
+
download_from_hf("chunk_embeddings.db", CHUNK_EMBEDDINGS_PATH)
|
| 98 |
+
size_mb = os.path.getsize(CHUNK_EMBEDDINGS_PATH) / (1024 * 1024)
|
| 99 |
+
print(f"✓ chunk_embeddings.db downloaded ({size_mb:.0f} MB)")
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"⚠ chunk_embeddings.db not available: {e}")
|
| 102 |
+
|
| 103 |
+
# Download bm25_index.pkl (for hybrid search)
|
| 104 |
+
if os.path.exists(BM25_INDEX_PATH):
|
| 105 |
+
size_mb = os.path.getsize(BM25_INDEX_PATH) / (1024 * 1024)
|
| 106 |
+
print(f"✓ bm25_index.pkl found ({size_mb:.0f} MB)")
|
| 107 |
+
else:
|
| 108 |
+
print(f"[DB] Downloading bm25_index.pkl from HF...")
|
| 109 |
+
try:
|
| 110 |
+
download_from_hf("bm25_index.pkl", BM25_INDEX_PATH)
|
| 111 |
+
size_mb = os.path.getsize(BM25_INDEX_PATH) / (1024 * 1024)
|
| 112 |
+
print(f"✓ bm25_index.pkl downloaded ({size_mb:.0f} MB)")
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"⚠ bm25_index.pkl not available: {e}")
|
| 115 |
+
|
| 116 |
+
return True
|
| 117 |
|
| 118 |
|
| 119 |
+
# Download DBs on module import (for gunicorn)
|
| 120 |
ensure_db_exists()
|
| 121 |
|
| 122 |
# ==========================================
|
|
|
|
| 293 |
return render_template('settings.html')
|
| 294 |
|
| 295 |
|
| 296 |
+
@app.route('/ai-search')
|
| 297 |
+
def ai_search_page():
|
| 298 |
+
"""AI-powered search page with Gemini."""
|
| 299 |
+
return render_template('ai_search.html')
|
| 300 |
+
|
| 301 |
+
|
| 302 |
# ==========================================
|
| 303 |
# API ENDPOINTS - OVERVIEW STATS
|
| 304 |
# ==========================================
|
|
|
|
| 1887 |
return jsonify({'error': str(e), 'query': query})
|
| 1888 |
|
| 1889 |
|
| 1890 |
+
@app.route('/api/hybrid/search', methods=['POST'])
|
| 1891 |
+
def api_hybrid_search():
|
| 1892 |
+
"""
|
| 1893 |
+
Hybrid search combining:
|
| 1894 |
+
- Chunk-based vector search (conversation context)
|
| 1895 |
+
- BM25 keyword search (exact matches)
|
| 1896 |
+
- Query expansion (synonyms, variations)
|
| 1897 |
+
"""
|
| 1898 |
+
data = request.get_json()
|
| 1899 |
+
query = data.get('query', '')
|
| 1900 |
+
limit = data.get('limit', 20)
|
| 1901 |
+
include_context = data.get('include_context', True)
|
| 1902 |
+
|
| 1903 |
+
if not query:
|
| 1904 |
+
return jsonify({'error': 'Query required'})
|
| 1905 |
+
|
| 1906 |
+
try:
|
| 1907 |
+
from hybrid_search import get_hybrid_search
|
| 1908 |
+
hs = get_hybrid_search()
|
| 1909 |
+
|
| 1910 |
+
# Get stats
|
| 1911 |
+
stats = hs.stats()
|
| 1912 |
+
if not stats.get('chunks_available') and not stats.get('single_embeddings_available'):
|
| 1913 |
+
return jsonify({
|
| 1914 |
+
'error': 'No search indexes available. Run the Colab notebook first.',
|
| 1915 |
+
'stats': stats
|
| 1916 |
+
})
|
| 1917 |
+
|
| 1918 |
+
# Search with or without context
|
| 1919 |
+
if include_context:
|
| 1920 |
+
results = hs.search_with_context(query, limit=limit)
|
| 1921 |
+
else:
|
| 1922 |
+
results = hs.hybrid_search(query, limit=limit)
|
| 1923 |
+
|
| 1924 |
+
# Get expanded queries for display
|
| 1925 |
+
expanded = hs.expand_query(query)
|
| 1926 |
+
|
| 1927 |
+
return jsonify({
|
| 1928 |
+
'query': query,
|
| 1929 |
+
'expanded_queries': expanded,
|
| 1930 |
+
'results': results,
|
| 1931 |
+
'count': len(results),
|
| 1932 |
+
'stats': stats,
|
| 1933 |
+
'mode': 'hybrid'
|
| 1934 |
+
})
|
| 1935 |
+
|
| 1936 |
+
except ImportError as e:
|
| 1937 |
+
return jsonify({'error': f'Hybrid search not available: {str(e)}'})
|
| 1938 |
+
except Exception as e:
|
| 1939 |
+
import traceback
|
| 1940 |
+
return jsonify({
|
| 1941 |
+
'error': str(e),
|
| 1942 |
+
'traceback': traceback.format_exc()
|
| 1943 |
+
})
|
| 1944 |
+
|
| 1945 |
+
|
| 1946 |
+
@app.route('/api/gemini/search', methods=['POST'])
|
| 1947 |
+
def api_gemini_search():
|
| 1948 |
+
"""
|
| 1949 |
+
AI-powered search using Gemini 1.5 Flash.
|
| 1950 |
+
Combines hybrid search with Gemini for natural language answers.
|
| 1951 |
+
"""
|
| 1952 |
+
data = request.get_json()
|
| 1953 |
+
query = data.get('query', '')
|
| 1954 |
+
limit = data.get('limit', 5)
|
| 1955 |
+
|
| 1956 |
+
if not query:
|
| 1957 |
+
return jsonify({'error': 'Query required'})
|
| 1958 |
+
|
| 1959 |
+
try:
|
| 1960 |
+
from gemini_client import ai_search, get_gemini_client
|
| 1961 |
+
|
| 1962 |
+
# Check if Gemini is available
|
| 1963 |
+
client = get_gemini_client()
|
| 1964 |
+
if not client.is_available():
|
| 1965 |
+
# Fall back to hybrid search without AI
|
| 1966 |
+
from hybrid_search import get_hybrid_search
|
| 1967 |
+
hs = get_hybrid_search()
|
| 1968 |
+
results = hs.search_with_context(query, limit=limit)
|
| 1969 |
+
|
| 1970 |
+
return jsonify({
|
| 1971 |
+
'query': query,
|
| 1972 |
+
'success': False,
|
| 1973 |
+
'error': 'Gemini API not available. Set GEMINI_API_KEY environment variable.',
|
| 1974 |
+
'search_results': results,
|
| 1975 |
+
'count': len(results),
|
| 1976 |
+
'mode': 'hybrid_only'
|
| 1977 |
+
})
|
| 1978 |
+
|
| 1979 |
+
# Perform AI search
|
| 1980 |
+
result = ai_search(query, limit=limit)
|
| 1981 |
+
|
| 1982 |
+
return jsonify(result)
|
| 1983 |
+
|
| 1984 |
+
except ImportError as e:
|
| 1985 |
+
return jsonify({'error': f'AI search not available: {str(e)}'})
|
| 1986 |
+
except Exception as e:
|
| 1987 |
+
import traceback
|
| 1988 |
+
return jsonify({
|
| 1989 |
+
'error': str(e),
|
| 1990 |
+
'traceback': traceback.format_exc()
|
| 1991 |
+
})
|
| 1992 |
+
|
| 1993 |
+
|
| 1994 |
+
@app.route('/api/gemini/status')
|
| 1995 |
+
def api_gemini_status():
|
| 1996 |
+
"""Check Gemini API status."""
|
| 1997 |
+
try:
|
| 1998 |
+
from gemini_client import get_gemini_client
|
| 1999 |
+
client = get_gemini_client()
|
| 2000 |
+
|
| 2001 |
+
api_key = os.environ.get('GEMINI_API_KEY', '')
|
| 2002 |
+
return jsonify({
|
| 2003 |
+
'available': client.is_available(),
|
| 2004 |
+
'api_key_set': bool(api_key),
|
| 2005 |
+
'api_key_preview': f"{api_key[:8]}..." if len(api_key) > 8 else None
|
| 2006 |
+
})
|
| 2007 |
+
except Exception as e:
|
| 2008 |
+
return jsonify({
|
| 2009 |
+
'available': False,
|
| 2010 |
+
'error': str(e)
|
| 2011 |
+
})
|
| 2012 |
+
|
| 2013 |
+
|
| 2014 |
def fallback_ai_search(query: str):
|
| 2015 |
"""Fallback search when AI is not available."""
|
| 2016 |
conn = get_db()
|
gemini_client.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gemini AI Client for Chat Search
|
| 3 |
+
Uses Gemini 1.5 Flash to summarize search results and answer questions.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
from typing import List, Dict, Optional
|
| 9 |
+
|
| 10 |
+
# Try importing Google Generative AI
|
| 11 |
+
try:
|
| 12 |
+
import google.generativeai as genai
|
| 13 |
+
HAS_GEMINI = True
|
| 14 |
+
except ImportError:
|
| 15 |
+
HAS_GEMINI = False
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class GeminiClient:
|
| 19 |
+
"""Client for Gemini AI API."""
|
| 20 |
+
|
| 21 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 22 |
+
self.api_key = api_key or os.environ.get('GEMINI_API_KEY')
|
| 23 |
+
self.model = None
|
| 24 |
+
self._initialized = False
|
| 25 |
+
|
| 26 |
+
def _initialize(self):
|
| 27 |
+
"""Initialize the Gemini client."""
|
| 28 |
+
if self._initialized:
|
| 29 |
+
return True
|
| 30 |
+
|
| 31 |
+
if not HAS_GEMINI:
|
| 32 |
+
print("google-generativeai not installed")
|
| 33 |
+
return False
|
| 34 |
+
|
| 35 |
+
if not self.api_key:
|
| 36 |
+
print("GEMINI_API_KEY not set")
|
| 37 |
+
return False
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
genai.configure(api_key=self.api_key)
|
| 41 |
+
self.model = genai.GenerativeModel('gemini-1.5-flash')
|
| 42 |
+
self._initialized = True
|
| 43 |
+
print("Gemini client initialized")
|
| 44 |
+
return True
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Failed to initialize Gemini: {e}")
|
| 47 |
+
return False
|
| 48 |
+
|
| 49 |
+
def answer_from_context(self, query: str, search_results: List[Dict],
|
| 50 |
+
max_results: int = 5) -> Dict:
|
| 51 |
+
"""
|
| 52 |
+
Generate an answer based on search results.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
query: User's question
|
| 56 |
+
search_results: List of search results with context
|
| 57 |
+
max_results: Max results to include in context
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
Dict with 'answer', 'sources', and 'success'
|
| 61 |
+
"""
|
| 62 |
+
if not self._initialize():
|
| 63 |
+
return {
|
| 64 |
+
'success': False,
|
| 65 |
+
'error': 'Gemini not available',
|
| 66 |
+
'answer': None
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
# Build context from search results
|
| 70 |
+
context_parts = []
|
| 71 |
+
sources = []
|
| 72 |
+
|
| 73 |
+
for i, result in enumerate(search_results[:max_results]):
|
| 74 |
+
# Handle different result formats
|
| 75 |
+
if 'message' in result:
|
| 76 |
+
# search_with_context format
|
| 77 |
+
msg = result['message']
|
| 78 |
+
context_parts.append(f"""
|
| 79 |
+
--- תוצאה {i+1} (ציון: {result.get('score', 0):.2f}) ---
|
| 80 |
+
מאת: {msg.get('from_name', 'לא ידוע')}
|
| 81 |
+
תאריך: {msg.get('date', 'לא ידוע')}
|
| 82 |
+
הודעה: {msg.get('text', '')}
|
| 83 |
+
""")
|
| 84 |
+
sources.append({
|
| 85 |
+
'from_name': msg.get('from_name'),
|
| 86 |
+
'date': msg.get('date'),
|
| 87 |
+
'message_id': result.get('message_id')
|
| 88 |
+
})
|
| 89 |
+
|
| 90 |
+
# Add context if available
|
| 91 |
+
if result.get('context_before'):
|
| 92 |
+
context_parts.append("הקשר לפני:")
|
| 93 |
+
for ctx in result['context_before']:
|
| 94 |
+
context_parts.append(f" [{ctx.get('from_name', '?')}] {ctx.get('text_plain', '')[:100]}")
|
| 95 |
+
|
| 96 |
+
if result.get('context_after'):
|
| 97 |
+
context_parts.append("הקשר אחרי:")
|
| 98 |
+
for ctx in result['context_after']:
|
| 99 |
+
context_parts.append(f" [{ctx.get('from_name', '?')}] {ctx.get('text_plain', '')[:100]}")
|
| 100 |
+
|
| 101 |
+
elif 'chunk_text' in result:
|
| 102 |
+
# hybrid_search format
|
| 103 |
+
context_parts.append(f"""
|
| 104 |
+
--- תוצאה {i+1} (ציון: {result.get('score', 0):.2f}) ---
|
| 105 |
+
{result.get('chunk_text', '')}
|
| 106 |
+
""")
|
| 107 |
+
sources.append({
|
| 108 |
+
'message_id': result.get('message_id'),
|
| 109 |
+
'score': result.get('score')
|
| 110 |
+
})
|
| 111 |
+
|
| 112 |
+
context = "\n".join(context_parts)
|
| 113 |
+
|
| 114 |
+
# Build prompt
|
| 115 |
+
prompt = f"""אתה עוזר שמנתח שיחות מקבוצת טלגרם ועונה על שאלות.
|
| 116 |
+
|
| 117 |
+
השאלה: {query}
|
| 118 |
+
|
| 119 |
+
להלן תוצאות חיפוש רלוונטיות מהשיחות:
|
| 120 |
+
|
| 121 |
+
{context}
|
| 122 |
+
|
| 123 |
+
הנחיות:
|
| 124 |
+
1. ענה בעברית
|
| 125 |
+
2. תן תשובה קצרה וממוקדת (1-3 משפטים)
|
| 126 |
+
3. אם המידע לא ברור או לא קיים בתוצאות, אמור "לא מצאתי מידע ברור"
|
| 127 |
+
4. ציין את המקור (שם השולח והתאריך) אם רלוונטי
|
| 128 |
+
5. אל תמציא מידע שלא מופיע בתוצאות
|
| 129 |
+
|
| 130 |
+
התשובה:"""
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
response = self.model.generate_content(prompt)
|
| 134 |
+
answer = response.text.strip()
|
| 135 |
+
|
| 136 |
+
return {
|
| 137 |
+
'success': True,
|
| 138 |
+
'answer': answer,
|
| 139 |
+
'sources': sources,
|
| 140 |
+
'query': query,
|
| 141 |
+
'results_used': len(context_parts)
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
return {
|
| 146 |
+
'success': False,
|
| 147 |
+
'error': str(e),
|
| 148 |
+
'answer': None
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
def is_available(self) -> bool:
|
| 152 |
+
"""Check if Gemini is available."""
|
| 153 |
+
return self._initialize()
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# Singleton instance
|
| 157 |
+
_gemini_client = None
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def get_gemini_client() -> GeminiClient:
|
| 161 |
+
"""Get or create Gemini client instance."""
|
| 162 |
+
global _gemini_client
|
| 163 |
+
if _gemini_client is None:
|
| 164 |
+
_gemini_client = GeminiClient()
|
| 165 |
+
return _gemini_client
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def ai_search(query: str, limit: int = 5) -> Dict:
|
| 169 |
+
"""
|
| 170 |
+
Perform AI-powered search: hybrid search + Gemini summarization.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
query: Search query
|
| 174 |
+
limit: Max results to use
|
| 175 |
+
|
| 176 |
+
Returns:
|
| 177 |
+
Dict with answer and metadata
|
| 178 |
+
"""
|
| 179 |
+
from hybrid_search import get_hybrid_search
|
| 180 |
+
|
| 181 |
+
# Get hybrid search results
|
| 182 |
+
hs = get_hybrid_search()
|
| 183 |
+
results = hs.search_with_context(query, limit=limit)
|
| 184 |
+
|
| 185 |
+
if not results:
|
| 186 |
+
return {
|
| 187 |
+
'success': False,
|
| 188 |
+
'error': 'No search results found',
|
| 189 |
+
'answer': 'לא נמצאו תוצאות לחיפוש זה',
|
| 190 |
+
'query': query
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
# Get AI answer
|
| 194 |
+
client = get_gemini_client()
|
| 195 |
+
response = client.answer_from_context(query, results, max_results=limit)
|
| 196 |
+
|
| 197 |
+
# Add raw results for transparency
|
| 198 |
+
response['search_results'] = results
|
| 199 |
+
|
| 200 |
+
return response
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
# CLI for testing
|
| 204 |
+
if __name__ == '__main__':
|
| 205 |
+
import sys
|
| 206 |
+
|
| 207 |
+
if len(sys.argv) < 2:
|
| 208 |
+
print("Usage: python gemini_client.py 'search query'")
|
| 209 |
+
print("\nChecking Gemini availability...")
|
| 210 |
+
client = get_gemini_client()
|
| 211 |
+
if client.is_available():
|
| 212 |
+
print("Gemini is available!")
|
| 213 |
+
else:
|
| 214 |
+
print("Gemini is NOT available. Set GEMINI_API_KEY environment variable.")
|
| 215 |
+
sys.exit(0)
|
| 216 |
+
|
| 217 |
+
query = ' '.join(sys.argv[1:])
|
| 218 |
+
print(f"\n=== AI Search: {query} ===\n")
|
| 219 |
+
|
| 220 |
+
result = ai_search(query)
|
| 221 |
+
|
| 222 |
+
if result['success']:
|
| 223 |
+
print(f"Answer: {result['answer']}")
|
| 224 |
+
print(f"\nSources: {len(result.get('sources', []))} results used")
|
| 225 |
+
else:
|
| 226 |
+
print(f"Error: {result.get('error')}")
|
hybrid_search.py
ADDED
|
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hybrid Search - Combines Vector Search, BM25, and Query Expansion
|
| 3 |
+
|
| 4 |
+
This provides much better search for chat data by:
|
| 5 |
+
1. Chunk-based vector search (captures context)
|
| 6 |
+
2. BM25 keyword search (finds exact matches)
|
| 7 |
+
3. Query expansion (handles variations)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import sqlite3
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pickle
|
| 13 |
+
import re
|
| 14 |
+
import os
|
| 15 |
+
from typing import List, Dict, Any, Optional
|
| 16 |
+
|
| 17 |
+
# Try importing sentence-transformers
|
| 18 |
+
try:
|
| 19 |
+
from sentence_transformers import SentenceTransformer
|
| 20 |
+
HAS_TRANSFORMERS = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
HAS_TRANSFORMERS = False
|
| 23 |
+
|
| 24 |
+
# Try importing BM25
|
| 25 |
+
try:
|
| 26 |
+
from rank_bm25 import BM25Okapi
|
| 27 |
+
HAS_BM25 = True
|
| 28 |
+
except ImportError:
|
| 29 |
+
HAS_BM25 = False
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class HybridSearch:
|
| 33 |
+
"""
|
| 34 |
+
Hybrid search combining:
|
| 35 |
+
- Chunk-based vector search (conversation context)
|
| 36 |
+
- BM25 keyword search (exact matches)
|
| 37 |
+
- Query expansion (synonyms, variations)
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self,
|
| 41 |
+
messages_db: str = 'telegram.db',
|
| 42 |
+
chunk_embeddings_db: str = 'chunk_embeddings.db',
|
| 43 |
+
bm25_index_path: str = 'bm25_index.pkl',
|
| 44 |
+
single_embeddings_db: str = 'embeddings.db'):
|
| 45 |
+
self.messages_db = messages_db
|
| 46 |
+
self.chunk_embeddings_db = chunk_embeddings_db
|
| 47 |
+
self.bm25_index_path = bm25_index_path
|
| 48 |
+
self.single_embeddings_db = single_embeddings_db
|
| 49 |
+
|
| 50 |
+
# Lazy-loaded components
|
| 51 |
+
self.model = None
|
| 52 |
+
self.chunk_embeddings = None
|
| 53 |
+
self.chunk_data = None
|
| 54 |
+
self.bm25 = None
|
| 55 |
+
self.bm25_message_ids = None
|
| 56 |
+
self.single_embeddings = None
|
| 57 |
+
self.single_message_ids = None
|
| 58 |
+
|
| 59 |
+
def _load_model(self):
|
| 60 |
+
"""Load the embedding model."""
|
| 61 |
+
if self.model is not None:
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
if not HAS_TRANSFORMERS:
|
| 65 |
+
raise RuntimeError("sentence-transformers not installed")
|
| 66 |
+
|
| 67 |
+
print("Loading embedding model...")
|
| 68 |
+
self.model = SentenceTransformer('intfloat/multilingual-e5-large')
|
| 69 |
+
print("Model loaded!")
|
| 70 |
+
|
| 71 |
+
def _load_chunk_embeddings(self):
|
| 72 |
+
"""Load chunk embeddings."""
|
| 73 |
+
if self.chunk_embeddings is not None:
|
| 74 |
+
return True
|
| 75 |
+
|
| 76 |
+
if not os.path.exists(self.chunk_embeddings_db):
|
| 77 |
+
print(f"Chunk embeddings not found: {self.chunk_embeddings_db}")
|
| 78 |
+
return False
|
| 79 |
+
|
| 80 |
+
print(f"Loading chunk embeddings from {self.chunk_embeddings_db}...")
|
| 81 |
+
conn = sqlite3.connect(self.chunk_embeddings_db)
|
| 82 |
+
|
| 83 |
+
# Check if chunk_type column exists (for backwards compatibility)
|
| 84 |
+
cursor = conn.execute("PRAGMA table_info(chunk_embeddings)")
|
| 85 |
+
columns = [col[1] for col in cursor.fetchall()]
|
| 86 |
+
has_type = 'chunk_type' in columns
|
| 87 |
+
|
| 88 |
+
if has_type:
|
| 89 |
+
rows = conn.execute("""
|
| 90 |
+
SELECT chunk_id, chunk_type, text, message_ids, anchor_message_id, embedding
|
| 91 |
+
FROM chunk_embeddings
|
| 92 |
+
""").fetchall()
|
| 93 |
+
else:
|
| 94 |
+
rows = conn.execute("""
|
| 95 |
+
SELECT chunk_id, 'window' as chunk_type, text, message_ids, anchor_message_id, embedding
|
| 96 |
+
FROM chunk_embeddings
|
| 97 |
+
""").fetchall()
|
| 98 |
+
conn.close()
|
| 99 |
+
|
| 100 |
+
if not rows:
|
| 101 |
+
return False
|
| 102 |
+
|
| 103 |
+
import json
|
| 104 |
+
self.chunk_data = []
|
| 105 |
+
emb_list = []
|
| 106 |
+
|
| 107 |
+
for row in rows:
|
| 108 |
+
chunk_id, chunk_type, text, msg_ids_json, anchor_id, emb_blob = row
|
| 109 |
+
emb = np.frombuffer(emb_blob, dtype=np.float32)
|
| 110 |
+
emb_list.append(emb)
|
| 111 |
+
self.chunk_data.append({
|
| 112 |
+
'chunk_id': chunk_id,
|
| 113 |
+
'chunk_type': chunk_type,
|
| 114 |
+
'text': text,
|
| 115 |
+
'message_ids': json.loads(msg_ids_json),
|
| 116 |
+
'anchor_message_id': anchor_id
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
self.chunk_embeddings = np.vstack(emb_list)
|
| 120 |
+
# Normalize
|
| 121 |
+
norms = np.linalg.norm(self.chunk_embeddings, axis=1, keepdims=True)
|
| 122 |
+
self.chunk_embeddings = self.chunk_embeddings / np.where(norms == 0, 1, norms)
|
| 123 |
+
|
| 124 |
+
print(f"Loaded {len(self.chunk_data)} chunk embeddings")
|
| 125 |
+
return True
|
| 126 |
+
|
| 127 |
+
def _load_single_embeddings(self):
|
| 128 |
+
"""Load single-message embeddings (fallback)."""
|
| 129 |
+
if self.single_embeddings is not None:
|
| 130 |
+
return True
|
| 131 |
+
|
| 132 |
+
if not os.path.exists(self.single_embeddings_db):
|
| 133 |
+
return False
|
| 134 |
+
|
| 135 |
+
print(f"Loading single embeddings from {self.single_embeddings_db}...")
|
| 136 |
+
conn = sqlite3.connect(self.single_embeddings_db)
|
| 137 |
+
|
| 138 |
+
rows = conn.execute("""
|
| 139 |
+
SELECT message_id, embedding FROM embeddings
|
| 140 |
+
""").fetchall()
|
| 141 |
+
conn.close()
|
| 142 |
+
|
| 143 |
+
if not rows:
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
self.single_message_ids = []
|
| 147 |
+
emb_list = []
|
| 148 |
+
|
| 149 |
+
for row in rows:
|
| 150 |
+
msg_id, emb_blob = row
|
| 151 |
+
emb = np.frombuffer(emb_blob, dtype=np.float32)
|
| 152 |
+
emb_list.append(emb)
|
| 153 |
+
self.single_message_ids.append(msg_id)
|
| 154 |
+
|
| 155 |
+
self.single_embeddings = np.vstack(emb_list)
|
| 156 |
+
norms = np.linalg.norm(self.single_embeddings, axis=1, keepdims=True)
|
| 157 |
+
self.single_embeddings = self.single_embeddings / np.where(norms == 0, 1, norms)
|
| 158 |
+
|
| 159 |
+
print(f"Loaded {len(self.single_message_ids)} single embeddings")
|
| 160 |
+
return True
|
| 161 |
+
|
| 162 |
+
def _load_bm25(self):
|
| 163 |
+
"""Load BM25 index."""
|
| 164 |
+
if self.bm25 is not None:
|
| 165 |
+
return True
|
| 166 |
+
|
| 167 |
+
if not os.path.exists(self.bm25_index_path):
|
| 168 |
+
print(f"BM25 index not found: {self.bm25_index_path}")
|
| 169 |
+
return False
|
| 170 |
+
|
| 171 |
+
print(f"Loading BM25 index from {self.bm25_index_path}...")
|
| 172 |
+
with open(self.bm25_index_path, 'rb') as f:
|
| 173 |
+
data = pickle.load(f)
|
| 174 |
+
|
| 175 |
+
self.bm25 = data['bm25']
|
| 176 |
+
self.bm25_message_ids = data['message_ids']
|
| 177 |
+
print(f"Loaded BM25 index with {len(self.bm25_message_ids)} documents")
|
| 178 |
+
return True
|
| 179 |
+
|
| 180 |
+
def expand_query(self, query: str) -> List[str]:
|
| 181 |
+
"""
|
| 182 |
+
Expand query with variations.
|
| 183 |
+
Returns list of query variations to search.
|
| 184 |
+
"""
|
| 185 |
+
queries = [query]
|
| 186 |
+
|
| 187 |
+
# Hebrew question word expansions
|
| 188 |
+
expansions = {
|
| 189 |
+
'איפה': ['איפה', 'היכן', 'מיקום', 'כתובת', 'עיר'],
|
| 190 |
+
'מתי': ['מתי', 'באיזה תאריך', 'מועד', 'זמן'],
|
| 191 |
+
'מי': ['מי', 'מיהו', 'מיהי', 'שם'],
|
| 192 |
+
'כמה': ['כמה', 'מספר', 'כמות'],
|
| 193 |
+
'למה': ['למה', 'מדוע', 'סיבה'],
|
| 194 |
+
'גר': ['גר', 'גרה', 'מתגורר', 'מתגוררת', 'גרים'],
|
| 195 |
+
'עובד': ['עובד', 'עובדת', 'עובדים', 'מועסק', 'עבודה'],
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
# Add expanded variations
|
| 199 |
+
for word, synonyms in expansions.items():
|
| 200 |
+
if word in query:
|
| 201 |
+
for syn in synonyms:
|
| 202 |
+
if syn != word:
|
| 203 |
+
expanded = query.replace(word, syn)
|
| 204 |
+
if expanded not in queries:
|
| 205 |
+
queries.append(expanded)
|
| 206 |
+
|
| 207 |
+
return queries[:5] # Limit to 5 variations
|
| 208 |
+
|
| 209 |
+
def search_chunks(self, query: str, limit: int = 20) -> List[Dict]:
|
| 210 |
+
"""Search using chunk embeddings (context-aware)."""
|
| 211 |
+
if not self._load_chunk_embeddings():
|
| 212 |
+
return []
|
| 213 |
+
|
| 214 |
+
self._load_model()
|
| 215 |
+
|
| 216 |
+
# Encode query with e5 prefix
|
| 217 |
+
query_emb = self.model.encode([f"query: {query}"], convert_to_numpy=True)[0]
|
| 218 |
+
query_norm = query_emb / np.linalg.norm(query_emb)
|
| 219 |
+
|
| 220 |
+
# Compute similarities
|
| 221 |
+
similarities = np.dot(self.chunk_embeddings, query_norm)
|
| 222 |
+
|
| 223 |
+
# Get top results
|
| 224 |
+
top_indices = np.argsort(similarities)[::-1][:limit]
|
| 225 |
+
|
| 226 |
+
results = []
|
| 227 |
+
for idx in top_indices:
|
| 228 |
+
score = float(similarities[idx])
|
| 229 |
+
chunk = self.chunk_data[idx]
|
| 230 |
+
results.append({
|
| 231 |
+
'type': 'chunk',
|
| 232 |
+
'chunk_type': chunk.get('chunk_type', 'window'), # 'thread' or 'window'
|
| 233 |
+
'chunk_id': chunk['chunk_id'],
|
| 234 |
+
'text': chunk['text'],
|
| 235 |
+
'message_ids': chunk['message_ids'],
|
| 236 |
+
'anchor_message_id': chunk['anchor_message_id'],
|
| 237 |
+
'score': score
|
| 238 |
+
})
|
| 239 |
+
|
| 240 |
+
return results
|
| 241 |
+
|
| 242 |
+
def search_bm25(self, query: str, limit: int = 20) -> List[Dict]:
|
| 243 |
+
"""Search using BM25 (keyword-based)."""
|
| 244 |
+
if not self._load_bm25():
|
| 245 |
+
return []
|
| 246 |
+
|
| 247 |
+
# Tokenize query
|
| 248 |
+
query_tokens = re.findall(r'\w+', query.lower())
|
| 249 |
+
|
| 250 |
+
# Get BM25 scores
|
| 251 |
+
scores = self.bm25.get_scores(query_tokens)
|
| 252 |
+
|
| 253 |
+
# Get top results
|
| 254 |
+
top_indices = np.argsort(scores)[::-1][:limit]
|
| 255 |
+
|
| 256 |
+
results = []
|
| 257 |
+
for idx in top_indices:
|
| 258 |
+
score = float(scores[idx])
|
| 259 |
+
if score > 0:
|
| 260 |
+
results.append({
|
| 261 |
+
'type': 'bm25',
|
| 262 |
+
'message_id': self.bm25_message_ids[idx],
|
| 263 |
+
'score': score
|
| 264 |
+
})
|
| 265 |
+
|
| 266 |
+
return results
|
| 267 |
+
|
| 268 |
+
def search_single(self, query: str, limit: int = 20) -> List[Dict]:
|
| 269 |
+
"""Search using single-message embeddings (fallback)."""
|
| 270 |
+
if not self._load_single_embeddings():
|
| 271 |
+
return []
|
| 272 |
+
|
| 273 |
+
self._load_model()
|
| 274 |
+
|
| 275 |
+
query_emb = self.model.encode([f"query: {query}"], convert_to_numpy=True)[0]
|
| 276 |
+
query_norm = query_emb / np.linalg.norm(query_emb)
|
| 277 |
+
|
| 278 |
+
similarities = np.dot(self.single_embeddings, query_norm)
|
| 279 |
+
top_indices = np.argsort(similarities)[::-1][:limit]
|
| 280 |
+
|
| 281 |
+
results = []
|
| 282 |
+
for idx in top_indices:
|
| 283 |
+
score = float(similarities[idx])
|
| 284 |
+
results.append({
|
| 285 |
+
'type': 'single',
|
| 286 |
+
'message_id': self.single_message_ids[idx],
|
| 287 |
+
'score': score
|
| 288 |
+
})
|
| 289 |
+
|
| 290 |
+
return results
|
| 291 |
+
|
| 292 |
+
def hybrid_search(self, query: str, limit: int = 20,
|
| 293 |
+
vector_weight: float = 0.6,
|
| 294 |
+
bm25_weight: float = 0.4,
|
| 295 |
+
use_expansion: bool = True) -> List[Dict]:
|
| 296 |
+
"""
|
| 297 |
+
Hybrid search combining vector and BM25.
|
| 298 |
+
|
| 299 |
+
Args:
|
| 300 |
+
query: Search query
|
| 301 |
+
limit: Max results
|
| 302 |
+
vector_weight: Weight for vector search (0-1)
|
| 303 |
+
bm25_weight: Weight for BM25 search (0-1)
|
| 304 |
+
use_expansion: Whether to expand query
|
| 305 |
+
|
| 306 |
+
Returns:
|
| 307 |
+
Combined search results
|
| 308 |
+
"""
|
| 309 |
+
all_message_scores = {}
|
| 310 |
+
|
| 311 |
+
# Get expanded queries
|
| 312 |
+
queries = self.expand_query(query) if use_expansion else [query]
|
| 313 |
+
|
| 314 |
+
# Search with each query variation
|
| 315 |
+
for q in queries:
|
| 316 |
+
# Chunk/Vector search
|
| 317 |
+
chunk_results = self.search_chunks(q, limit=limit * 2)
|
| 318 |
+
for r in chunk_results:
|
| 319 |
+
for msg_id in r['message_ids']:
|
| 320 |
+
if msg_id not in all_message_scores:
|
| 321 |
+
all_message_scores[msg_id] = {'vector': 0, 'bm25': 0, 'chunk_text': None}
|
| 322 |
+
# Use max score across message appearances
|
| 323 |
+
all_message_scores[msg_id]['vector'] = max(
|
| 324 |
+
all_message_scores[msg_id]['vector'],
|
| 325 |
+
r['score'] * vector_weight
|
| 326 |
+
)
|
| 327 |
+
if all_message_scores[msg_id]['chunk_text'] is None:
|
| 328 |
+
all_message_scores[msg_id]['chunk_text'] = r['text']
|
| 329 |
+
|
| 330 |
+
# BM25 search
|
| 331 |
+
bm25_results = self.search_bm25(q, limit=limit * 2)
|
| 332 |
+
for r in bm25_results:
|
| 333 |
+
msg_id = r['message_id']
|
| 334 |
+
if msg_id not in all_message_scores:
|
| 335 |
+
all_message_scores[msg_id] = {'vector': 0, 'bm25': 0, 'chunk_text': None}
|
| 336 |
+
all_message_scores[msg_id]['bm25'] = max(
|
| 337 |
+
all_message_scores[msg_id]['bm25'],
|
| 338 |
+
r['score'] * bm25_weight / 10 # Normalize BM25 scores
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
# Combine scores
|
| 342 |
+
combined = []
|
| 343 |
+
for msg_id, scores in all_message_scores.items():
|
| 344 |
+
total_score = scores['vector'] + scores['bm25']
|
| 345 |
+
combined.append({
|
| 346 |
+
'message_id': msg_id,
|
| 347 |
+
'score': total_score,
|
| 348 |
+
'vector_score': scores['vector'],
|
| 349 |
+
'bm25_score': scores['bm25'],
|
| 350 |
+
'chunk_text': scores['chunk_text']
|
| 351 |
+
})
|
| 352 |
+
|
| 353 |
+
# Sort by combined score
|
| 354 |
+
combined.sort(key=lambda x: x['score'], reverse=True)
|
| 355 |
+
|
| 356 |
+
return combined[:limit]
|
| 357 |
+
|
| 358 |
+
def search_with_context(self, query: str, limit: int = 20,
|
| 359 |
+
context_window: int = 3) -> List[Dict]:
|
| 360 |
+
"""
|
| 361 |
+
Search and return results with surrounding context.
|
| 362 |
+
|
| 363 |
+
Args:
|
| 364 |
+
query: Search query
|
| 365 |
+
limit: Max results
|
| 366 |
+
context_window: Messages before/after to include
|
| 367 |
+
|
| 368 |
+
Returns:
|
| 369 |
+
Results with full context
|
| 370 |
+
"""
|
| 371 |
+
# Get hybrid search results
|
| 372 |
+
results = self.hybrid_search(query, limit=limit)
|
| 373 |
+
|
| 374 |
+
if not results:
|
| 375 |
+
return []
|
| 376 |
+
|
| 377 |
+
# Get full context from DB
|
| 378 |
+
conn = sqlite3.connect(self.messages_db)
|
| 379 |
+
conn.row_factory = sqlite3.Row
|
| 380 |
+
|
| 381 |
+
enriched = []
|
| 382 |
+
for r in results:
|
| 383 |
+
msg_id = r['message_id']
|
| 384 |
+
|
| 385 |
+
# Get the message
|
| 386 |
+
msg = conn.execute(
|
| 387 |
+
"SELECT * FROM messages WHERE id = ?", (msg_id,)
|
| 388 |
+
).fetchone()
|
| 389 |
+
|
| 390 |
+
if not msg:
|
| 391 |
+
continue
|
| 392 |
+
|
| 393 |
+
# Get surrounding messages
|
| 394 |
+
context_before = conn.execute("""
|
| 395 |
+
SELECT id, date, from_name, text_plain FROM messages
|
| 396 |
+
WHERE date_unixtime < (SELECT date_unixtime FROM messages WHERE id = ?)
|
| 397 |
+
ORDER BY date_unixtime DESC LIMIT ?
|
| 398 |
+
""", (msg_id, context_window)).fetchall()
|
| 399 |
+
|
| 400 |
+
context_after = conn.execute("""
|
| 401 |
+
SELECT id, date, from_name, text_plain FROM messages
|
| 402 |
+
WHERE date_unixtime > (SELECT date_unixtime FROM messages WHERE id = ?)
|
| 403 |
+
ORDER BY date_unixtime ASC LIMIT ?
|
| 404 |
+
""", (msg_id, context_window)).fetchall()
|
| 405 |
+
|
| 406 |
+
enriched.append({
|
| 407 |
+
'message_id': msg_id,
|
| 408 |
+
'score': r['score'],
|
| 409 |
+
'message': {
|
| 410 |
+
'id': msg['id'],
|
| 411 |
+
'date': msg['date'],
|
| 412 |
+
'from_name': msg['from_name'],
|
| 413 |
+
'text': msg['text_plain']
|
| 414 |
+
},
|
| 415 |
+
'context_before': [dict(m) for m in reversed(context_before)],
|
| 416 |
+
'context_after': [dict(m) for m in context_after],
|
| 417 |
+
'chunk_text': r.get('chunk_text')
|
| 418 |
+
})
|
| 419 |
+
|
| 420 |
+
conn.close()
|
| 421 |
+
return enriched
|
| 422 |
+
|
| 423 |
+
def stats(self) -> Dict[str, Any]:
|
| 424 |
+
"""Get search index statistics."""
|
| 425 |
+
stats = {
|
| 426 |
+
'chunks_available': os.path.exists(self.chunk_embeddings_db),
|
| 427 |
+
'bm25_available': os.path.exists(self.bm25_index_path),
|
| 428 |
+
'single_embeddings_available': os.path.exists(self.single_embeddings_db),
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
if stats['chunks_available']:
|
| 432 |
+
conn = sqlite3.connect(self.chunk_embeddings_db)
|
| 433 |
+
stats['chunk_count'] = conn.execute(
|
| 434 |
+
"SELECT COUNT(*) FROM chunk_embeddings"
|
| 435 |
+
).fetchone()[0]
|
| 436 |
+
conn.close()
|
| 437 |
+
|
| 438 |
+
if stats['single_embeddings_available']:
|
| 439 |
+
conn = sqlite3.connect(self.single_embeddings_db)
|
| 440 |
+
stats['single_embedding_count'] = conn.execute(
|
| 441 |
+
"SELECT COUNT(*) FROM embeddings"
|
| 442 |
+
).fetchone()[0]
|
| 443 |
+
conn.close()
|
| 444 |
+
|
| 445 |
+
return stats
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
# Singleton instance
|
| 449 |
+
_hybrid_search = None
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def get_hybrid_search() -> HybridSearch:
|
| 453 |
+
"""Get or create hybrid search instance."""
|
| 454 |
+
global _hybrid_search
|
| 455 |
+
if _hybrid_search is None:
|
| 456 |
+
_hybrid_search = HybridSearch()
|
| 457 |
+
return _hybrid_search
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
# CLI for testing
|
| 461 |
+
if __name__ == '__main__':
|
| 462 |
+
import sys
|
| 463 |
+
|
| 464 |
+
if len(sys.argv) < 2:
|
| 465 |
+
print("Usage: python hybrid_search.py 'search query'")
|
| 466 |
+
print("\nStats:")
|
| 467 |
+
hs = get_hybrid_search()
|
| 468 |
+
print(hs.stats())
|
| 469 |
+
sys.exit(0)
|
| 470 |
+
|
| 471 |
+
query = ' '.join(sys.argv[1:])
|
| 472 |
+
hs = get_hybrid_search()
|
| 473 |
+
|
| 474 |
+
print(f"\n=== Searching: {query} ===\n")
|
| 475 |
+
|
| 476 |
+
# Show expanded queries
|
| 477 |
+
expanded = hs.expand_query(query)
|
| 478 |
+
print(f"Expanded queries: {expanded}\n")
|
| 479 |
+
|
| 480 |
+
# Search
|
| 481 |
+
results = hs.search_with_context(query, limit=5)
|
| 482 |
+
|
| 483 |
+
for i, r in enumerate(results, 1):
|
| 484 |
+
print(f"--- Result {i} (score: {r['score']:.3f}) ---")
|
| 485 |
+
print(f"From: {r['message']['from_name']}")
|
| 486 |
+
print(f"Date: {r['message']['date']}")
|
| 487 |
+
print(f"Text: {r['message']['text'][:200]}...")
|
| 488 |
+
if r['context_before']:
|
| 489 |
+
print(f"\nContext before:")
|
| 490 |
+
for ctx in r['context_before']:
|
| 491 |
+
print(f" [{ctx['from_name']}] {ctx['text_plain'][:100]}...")
|
| 492 |
+
if r['context_after']:
|
| 493 |
+
print(f"\nContext after:")
|
| 494 |
+
for ctx in r['context_after']:
|
| 495 |
+
print(f" [{ctx['from_name']}] {ctx['text_plain'][:100]}...")
|
| 496 |
+
print()
|
requirements.txt
CHANGED
|
@@ -3,3 +3,5 @@ gunicorn>=21.2
|
|
| 3 |
requests>=2.31
|
| 4 |
ijson>=3.2
|
| 5 |
huggingface_hub>=0.20
|
|
|
|
|
|
|
|
|
| 3 |
requests>=2.31
|
| 4 |
ijson>=3.2
|
| 5 |
huggingface_hub>=0.20
|
| 6 |
+
rank_bm25>=0.2.2
|
| 7 |
+
google-generativeai>=0.3.0
|
semantic_search.py
CHANGED
|
@@ -43,7 +43,7 @@ class SemanticSearch:
|
|
| 43 |
)
|
| 44 |
if self.model is None:
|
| 45 |
print("Loading embedding model...")
|
| 46 |
-
self.model = SentenceTransformer('
|
| 47 |
print("Model loaded!")
|
| 48 |
|
| 49 |
def reload_embeddings(self):
|
|
@@ -117,8 +117,8 @@ class SemanticSearch:
|
|
| 117 |
if len(self.message_ids) == 0:
|
| 118 |
return []
|
| 119 |
|
| 120 |
-
# Encode query
|
| 121 |
-
query_emb = self.model.encode([query], convert_to_numpy=True)[0]
|
| 122 |
|
| 123 |
# Compute cosine similarity with all embeddings
|
| 124 |
# embeddings are already normalized from Colab
|
|
@@ -384,7 +384,7 @@ Answer:"""
|
|
| 384 |
'available': True,
|
| 385 |
'count': count,
|
| 386 |
'size_mb': round(size_mb, 1),
|
| 387 |
-
'model': '
|
| 388 |
}
|
| 389 |
|
| 390 |
|
|
|
|
| 43 |
)
|
| 44 |
if self.model is None:
|
| 45 |
print("Loading embedding model...")
|
| 46 |
+
self.model = SentenceTransformer('intfloat/multilingual-e5-large')
|
| 47 |
print("Model loaded!")
|
| 48 |
|
| 49 |
def reload_embeddings(self):
|
|
|
|
| 117 |
if len(self.message_ids) == 0:
|
| 118 |
return []
|
| 119 |
|
| 120 |
+
# Encode query (e5 model requires "query: " prefix)
|
| 121 |
+
query_emb = self.model.encode([f"query: {query}"], convert_to_numpy=True)[0]
|
| 122 |
|
| 123 |
# Compute cosine similarity with all embeddings
|
| 124 |
# embeddings are already normalized from Colab
|
|
|
|
| 384 |
'available': True,
|
| 385 |
'count': count,
|
| 386 |
'size_mb': round(size_mb, 1),
|
| 387 |
+
'model': 'intfloat/multilingual-e5-large'
|
| 388 |
}
|
| 389 |
|
| 390 |
|
templates/ai_search.html
ADDED
|
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="he" dir="rtl">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>AI Search - Telegram Analytics</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
<style>
|
| 9 |
+
.ai-container {
|
| 10 |
+
max-width: 900px;
|
| 11 |
+
margin: 0 auto;
|
| 12 |
+
}
|
| 13 |
+
.ai-search-box {
|
| 14 |
+
background: var(--card-bg);
|
| 15 |
+
border-radius: var(--radius-lg);
|
| 16 |
+
padding: var(--spacing-lg);
|
| 17 |
+
margin-bottom: var(--spacing-lg);
|
| 18 |
+
border: 1px solid var(--border-color);
|
| 19 |
+
}
|
| 20 |
+
.ai-search-input {
|
| 21 |
+
width: 100%;
|
| 22 |
+
padding: var(--spacing-md);
|
| 23 |
+
font-size: 1.1rem;
|
| 24 |
+
border: 2px solid var(--border-color);
|
| 25 |
+
border-radius: var(--radius-md);
|
| 26 |
+
background: var(--bg-secondary);
|
| 27 |
+
color: var(--text-primary);
|
| 28 |
+
margin-bottom: var(--spacing-md);
|
| 29 |
+
direction: rtl;
|
| 30 |
+
}
|
| 31 |
+
.ai-search-input:focus {
|
| 32 |
+
outline: none;
|
| 33 |
+
border-color: var(--accent-color);
|
| 34 |
+
}
|
| 35 |
+
.ai-search-btn {
|
| 36 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 37 |
+
color: white;
|
| 38 |
+
border: none;
|
| 39 |
+
padding: var(--spacing-md) var(--spacing-xl);
|
| 40 |
+
font-size: 1rem;
|
| 41 |
+
font-weight: 600;
|
| 42 |
+
border-radius: var(--radius-md);
|
| 43 |
+
cursor: pointer;
|
| 44 |
+
display: flex;
|
| 45 |
+
align-items: center;
|
| 46 |
+
gap: var(--spacing-sm);
|
| 47 |
+
transition: transform 0.2s, box-shadow 0.2s;
|
| 48 |
+
}
|
| 49 |
+
.ai-search-btn:hover {
|
| 50 |
+
transform: translateY(-2px);
|
| 51 |
+
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
|
| 52 |
+
}
|
| 53 |
+
.ai-search-btn:disabled {
|
| 54 |
+
opacity: 0.6;
|
| 55 |
+
cursor: not-allowed;
|
| 56 |
+
transform: none;
|
| 57 |
+
}
|
| 58 |
+
.ai-answer-box {
|
| 59 |
+
background: var(--card-bg);
|
| 60 |
+
border-radius: var(--radius-lg);
|
| 61 |
+
padding: var(--spacing-lg);
|
| 62 |
+
margin-bottom: var(--spacing-lg);
|
| 63 |
+
border: 1px solid var(--border-color);
|
| 64 |
+
display: none;
|
| 65 |
+
}
|
| 66 |
+
.ai-answer-box.visible {
|
| 67 |
+
display: block;
|
| 68 |
+
}
|
| 69 |
+
.ai-answer-header {
|
| 70 |
+
display: flex;
|
| 71 |
+
align-items: center;
|
| 72 |
+
gap: var(--spacing-sm);
|
| 73 |
+
margin-bottom: var(--spacing-md);
|
| 74 |
+
color: var(--accent-color);
|
| 75 |
+
font-weight: 600;
|
| 76 |
+
}
|
| 77 |
+
.ai-answer-content {
|
| 78 |
+
font-size: 1.1rem;
|
| 79 |
+
line-height: 1.8;
|
| 80 |
+
color: var(--text-primary);
|
| 81 |
+
white-space: pre-wrap;
|
| 82 |
+
}
|
| 83 |
+
.ai-sources {
|
| 84 |
+
margin-top: var(--spacing-lg);
|
| 85 |
+
padding-top: var(--spacing-lg);
|
| 86 |
+
border-top: 1px solid var(--border-color);
|
| 87 |
+
}
|
| 88 |
+
.ai-sources-header {
|
| 89 |
+
font-weight: 600;
|
| 90 |
+
color: var(--text-secondary);
|
| 91 |
+
margin-bottom: var(--spacing-md);
|
| 92 |
+
}
|
| 93 |
+
.source-item {
|
| 94 |
+
background: var(--bg-secondary);
|
| 95 |
+
border-radius: var(--radius-md);
|
| 96 |
+
padding: var(--spacing-md);
|
| 97 |
+
margin-bottom: var(--spacing-sm);
|
| 98 |
+
border-right: 3px solid var(--accent-color);
|
| 99 |
+
}
|
| 100 |
+
.source-meta {
|
| 101 |
+
font-size: 0.85rem;
|
| 102 |
+
color: var(--text-muted);
|
| 103 |
+
margin-bottom: var(--spacing-xs);
|
| 104 |
+
}
|
| 105 |
+
.source-text {
|
| 106 |
+
color: var(--text-secondary);
|
| 107 |
+
font-size: 0.95rem;
|
| 108 |
+
}
|
| 109 |
+
.context-message {
|
| 110 |
+
background: var(--bg-secondary);
|
| 111 |
+
border-radius: var(--radius-md);
|
| 112 |
+
padding: var(--spacing-md);
|
| 113 |
+
margin-bottom: var(--spacing-sm);
|
| 114 |
+
border-right: 3px solid transparent;
|
| 115 |
+
}
|
| 116 |
+
.context-message.main {
|
| 117 |
+
border-right-color: var(--accent-color);
|
| 118 |
+
background: var(--card-bg);
|
| 119 |
+
}
|
| 120 |
+
.status-badge {
|
| 121 |
+
display: inline-flex;
|
| 122 |
+
align-items: center;
|
| 123 |
+
gap: var(--spacing-xs);
|
| 124 |
+
padding: var(--spacing-xs) var(--spacing-sm);
|
| 125 |
+
border-radius: var(--radius-sm);
|
| 126 |
+
font-size: 0.8rem;
|
| 127 |
+
margin-right: var(--spacing-sm);
|
| 128 |
+
}
|
| 129 |
+
.status-badge.available {
|
| 130 |
+
background: rgba(46, 204, 113, 0.2);
|
| 131 |
+
color: #2ecc71;
|
| 132 |
+
}
|
| 133 |
+
.status-badge.unavailable {
|
| 134 |
+
background: rgba(231, 76, 60, 0.2);
|
| 135 |
+
color: #e74c3c;
|
| 136 |
+
}
|
| 137 |
+
.example-queries {
|
| 138 |
+
display: flex;
|
| 139 |
+
flex-wrap: wrap;
|
| 140 |
+
gap: var(--spacing-sm);
|
| 141 |
+
margin-top: var(--spacing-md);
|
| 142 |
+
}
|
| 143 |
+
.example-query {
|
| 144 |
+
background: var(--bg-secondary);
|
| 145 |
+
color: var(--text-secondary);
|
| 146 |
+
border: 1px solid var(--border-color);
|
| 147 |
+
padding: var(--spacing-xs) var(--spacing-sm);
|
| 148 |
+
border-radius: var(--radius-sm);
|
| 149 |
+
font-size: 0.85rem;
|
| 150 |
+
cursor: pointer;
|
| 151 |
+
transition: all 0.2s;
|
| 152 |
+
}
|
| 153 |
+
.example-query:hover {
|
| 154 |
+
background: var(--accent-color);
|
| 155 |
+
color: white;
|
| 156 |
+
border-color: var(--accent-color);
|
| 157 |
+
}
|
| 158 |
+
.loading-animation {
|
| 159 |
+
display: flex;
|
| 160 |
+
align-items: center;
|
| 161 |
+
gap: var(--spacing-sm);
|
| 162 |
+
}
|
| 163 |
+
.loading-dots {
|
| 164 |
+
display: flex;
|
| 165 |
+
gap: 4px;
|
| 166 |
+
}
|
| 167 |
+
.loading-dots span {
|
| 168 |
+
width: 8px;
|
| 169 |
+
height: 8px;
|
| 170 |
+
background: var(--accent-color);
|
| 171 |
+
border-radius: 50%;
|
| 172 |
+
animation: bounce 1.4s infinite ease-in-out both;
|
| 173 |
+
}
|
| 174 |
+
.loading-dots span:nth-child(1) { animation-delay: -0.32s; }
|
| 175 |
+
.loading-dots span:nth-child(2) { animation-delay: -0.16s; }
|
| 176 |
+
@keyframes bounce {
|
| 177 |
+
0%, 80%, 100% { transform: scale(0); }
|
| 178 |
+
40% { transform: scale(1); }
|
| 179 |
+
}
|
| 180 |
+
</style>
|
| 181 |
+
</head>
|
| 182 |
+
<body>
|
| 183 |
+
<button class="mobile-menu-btn" onclick="toggleMobileMenu()">☰</button>
|
| 184 |
+
<div class="sidebar-overlay" onclick="toggleMobileMenu()"></div>
|
| 185 |
+
<!-- Sidebar -->
|
| 186 |
+
<nav class="sidebar">
|
| 187 |
+
<div class="logo">
|
| 188 |
+
<span class="logo-icon">📊</span>
|
| 189 |
+
<span class="logo-text">TG Analytics</span>
|
| 190 |
+
</div>
|
| 191 |
+
<ul class="nav-menu">
|
| 192 |
+
<li class="nav-item">
|
| 193 |
+
<a href="/" class="nav-link">
|
| 194 |
+
<span class="icon">📈</span>
|
| 195 |
+
<span>Overview</span>
|
| 196 |
+
</a>
|
| 197 |
+
</li>
|
| 198 |
+
<li class="nav-item">
|
| 199 |
+
<a href="/users" class="nav-link">
|
| 200 |
+
<span class="icon">👥</span>
|
| 201 |
+
<span>Users</span>
|
| 202 |
+
</a>
|
| 203 |
+
</li>
|
| 204 |
+
<li class="nav-item">
|
| 205 |
+
<a href="/chat" class="nav-link">
|
| 206 |
+
<span class="icon">💬</span>
|
| 207 |
+
<span>Chat</span>
|
| 208 |
+
</a>
|
| 209 |
+
</li>
|
| 210 |
+
<li class="nav-item">
|
| 211 |
+
<a href="/search" class="nav-link">
|
| 212 |
+
<span class="icon">🔍</span>
|
| 213 |
+
<span>Search</span>
|
| 214 |
+
</a>
|
| 215 |
+
</li>
|
| 216 |
+
<li class="nav-item active">
|
| 217 |
+
<a href="/ai-search" class="nav-link">
|
| 218 |
+
<span class="icon">🤖</span>
|
| 219 |
+
<span>AI Search</span>
|
| 220 |
+
</a>
|
| 221 |
+
</li>
|
| 222 |
+
<li class="nav-item">
|
| 223 |
+
<a href="/moderation" class="nav-link">
|
| 224 |
+
<span class="icon">🛡</span>
|
| 225 |
+
<span>Moderation</span>
|
| 226 |
+
</a>
|
| 227 |
+
</li>
|
| 228 |
+
<li class="nav-item">
|
| 229 |
+
<a href="/settings" class="nav-link">
|
| 230 |
+
<span class="icon">⚙</span>
|
| 231 |
+
<span>Settings</span>
|
| 232 |
+
</a>
|
| 233 |
+
</li>
|
| 234 |
+
</ul>
|
| 235 |
+
</nav>
|
| 236 |
+
|
| 237 |
+
<!-- Main Content -->
|
| 238 |
+
<main class="main-content">
|
| 239 |
+
<!-- Header -->
|
| 240 |
+
<header class="header">
|
| 241 |
+
<h1>🤖 AI Search</h1>
|
| 242 |
+
<div class="header-controls">
|
| 243 |
+
<span id="gemini-status" class="status-badge unavailable">Checking...</span>
|
| 244 |
+
</div>
|
| 245 |
+
</header>
|
| 246 |
+
|
| 247 |
+
<div class="ai-container">
|
| 248 |
+
<!-- Search Box -->
|
| 249 |
+
<div class="ai-search-box">
|
| 250 |
+
<input type="text" id="ai-query" class="ai-search-input"
|
| 251 |
+
placeholder="שאל שאלה בשפה טבעית... (איפה דני גר?)"
|
| 252 |
+
onkeypress="if(event.key === 'Enter') performAISearch()">
|
| 253 |
+
<div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap; gap: var(--spacing-md);">
|
| 254 |
+
<button onclick="performAISearch()" class="ai-search-btn" id="search-btn">
|
| 255 |
+
<span>🤖</span> Search with AI
|
| 256 |
+
</button>
|
| 257 |
+
<div class="example-queries">
|
| 258 |
+
<span style="color: var(--text-muted); font-size: 0.85rem;">Examples:</span>
|
| 259 |
+
<button class="example-query" onclick="setQuery('איפה דני גר?')">איפה דני גר?</button>
|
| 260 |
+
<button class="example-query" onclick="setQuery('מי הכי פעיל בקבוצה?')">מי הכי פעיל?</button>
|
| 261 |
+
<button class="example-query" onclick="setQuery('מה דיברו על הבחירות?')">מה דיברו על...?</button>
|
| 262 |
+
</div>
|
| 263 |
+
</div>
|
| 264 |
+
</div>
|
| 265 |
+
|
| 266 |
+
<!-- Answer Box -->
|
| 267 |
+
<div class="ai-answer-box" id="answer-box">
|
| 268 |
+
<div class="ai-answer-header">
|
| 269 |
+
<span>🤖</span> AI Answer
|
| 270 |
+
</div>
|
| 271 |
+
<div class="ai-answer-content" id="answer-content">
|
| 272 |
+
<!-- Answer will be inserted here -->
|
| 273 |
+
</div>
|
| 274 |
+
<div class="ai-sources" id="sources-section" style="display: none;">
|
| 275 |
+
<div class="ai-sources-header">📖 Sources Used</div>
|
| 276 |
+
<div id="sources-list">
|
| 277 |
+
<!-- Sources will be inserted here -->
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
</div>
|
| 281 |
+
|
| 282 |
+
<!-- How it works -->
|
| 283 |
+
<div class="chart-card">
|
| 284 |
+
<div class="chart-header">
|
| 285 |
+
<h3>💡 How AI Search Works</h3>
|
| 286 |
+
</div>
|
| 287 |
+
<div style="padding: var(--spacing-md); color: var(--text-secondary); font-size: 0.9rem; direction: rtl;">
|
| 288 |
+
<ol style="line-height: 2;">
|
| 289 |
+
<li><strong>Hybrid Search</strong> - מחפש ב-BM25 (מילים) + Vector Search (משמעות)</li>
|
| 290 |
+
<li><strong>Thread Chunking</strong> - מקבץ שאלות ותשובות יחד</li>
|
| 291 |
+
<li><strong>Query Expansion</strong> - מרחיב את השאילתה עם מילים נרדפות</li>
|
| 292 |
+
<li><strong>Gemini 1.5 Flash</strong> - מסכם את התוצאות לתשובה אחת</li>
|
| 293 |
+
</ol>
|
| 294 |
+
</div>
|
| 295 |
+
</div>
|
| 296 |
+
</div>
|
| 297 |
+
</main>
|
| 298 |
+
|
| 299 |
+
<script>
|
| 300 |
+
// Check Gemini status on load
|
| 301 |
+
async function checkGeminiStatus() {
|
| 302 |
+
try {
|
| 303 |
+
const response = await fetch('/api/gemini/status');
|
| 304 |
+
const data = await response.json();
|
| 305 |
+
|
| 306 |
+
const badge = document.getElementById('gemini-status');
|
| 307 |
+
if (data.available) {
|
| 308 |
+
badge.className = 'status-badge available';
|
| 309 |
+
badge.innerHTML = '✓ Gemini Ready';
|
| 310 |
+
} else {
|
| 311 |
+
badge.className = 'status-badge unavailable';
|
| 312 |
+
badge.innerHTML = '✗ Gemini Unavailable';
|
| 313 |
+
}
|
| 314 |
+
} catch (e) {
|
| 315 |
+
const badge = document.getElementById('gemini-status');
|
| 316 |
+
badge.className = 'status-badge unavailable';
|
| 317 |
+
badge.innerHTML = '✗ Error';
|
| 318 |
+
}
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
function setQuery(query) {
|
| 322 |
+
document.getElementById('ai-query').value = query;
|
| 323 |
+
document.getElementById('ai-query').focus();
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
async function performAISearch() {
|
| 327 |
+
const query = document.getElementById('ai-query').value.trim();
|
| 328 |
+
if (!query) return;
|
| 329 |
+
|
| 330 |
+
const btn = document.getElementById('search-btn');
|
| 331 |
+
const answerBox = document.getElementById('answer-box');
|
| 332 |
+
const answerContent = document.getElementById('answer-content');
|
| 333 |
+
const sourcesSection = document.getElementById('sources-section');
|
| 334 |
+
const sourcesList = document.getElementById('sources-list');
|
| 335 |
+
|
| 336 |
+
// Show loading
|
| 337 |
+
btn.disabled = true;
|
| 338 |
+
btn.innerHTML = '<div class="loading-animation"><div class="loading-dots"><span></span><span></span><span></span></div> Searching...</div>';
|
| 339 |
+
|
| 340 |
+
answerBox.classList.add('visible');
|
| 341 |
+
answerContent.innerHTML = '<div class="loading-animation"><div class="loading-dots"><span></span><span></span><span></span></div> <span>Searching and analyzing...</span></div>';
|
| 342 |
+
sourcesSection.style.display = 'none';
|
| 343 |
+
|
| 344 |
+
try {
|
| 345 |
+
const response = await fetch('/api/gemini/search', {
|
| 346 |
+
method: 'POST',
|
| 347 |
+
headers: { 'Content-Type': 'application/json' },
|
| 348 |
+
body: JSON.stringify({ query, limit: 5 })
|
| 349 |
+
});
|
| 350 |
+
|
| 351 |
+
const data = await response.json();
|
| 352 |
+
|
| 353 |
+
if (data.error && !data.answer) {
|
| 354 |
+
answerContent.innerHTML = `<span style="color: #e74c3c;">❌ Error: ${escapeHtml(data.error)}</span>`;
|
| 355 |
+
} else if (data.success === false && data.error) {
|
| 356 |
+
// Gemini not available, show hybrid results
|
| 357 |
+
answerContent.innerHTML = `<span style="color: #f39c12;">⚠ ${escapeHtml(data.error)}</span><br><br>Showing search results without AI summarization:`;
|
| 358 |
+
|
| 359 |
+
if (data.search_results && data.search_results.length > 0) {
|
| 360 |
+
displaySources(data.search_results);
|
| 361 |
+
}
|
| 362 |
+
} else {
|
| 363 |
+
// Success with AI answer
|
| 364 |
+
answerContent.textContent = data.answer || 'No answer available';
|
| 365 |
+
|
| 366 |
+
// Show sources
|
| 367 |
+
if (data.search_results && data.search_results.length > 0) {
|
| 368 |
+
displaySources(data.search_results);
|
| 369 |
+
} else if (data.sources && data.sources.length > 0) {
|
| 370 |
+
displaySourcesMeta(data.sources);
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
} catch (error) {
|
| 374 |
+
answerContent.innerHTML = `<span style="color: #e74c3c;">❌ Error: ${escapeHtml(error.message)}</span>`;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
// Reset button
|
| 378 |
+
btn.disabled = false;
|
| 379 |
+
btn.innerHTML = '<span>🤖</span> Search with AI';
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
function displaySources(results) {
|
| 383 |
+
const sourcesSection = document.getElementById('sources-section');
|
| 384 |
+
const sourcesList = document.getElementById('sources-list');
|
| 385 |
+
|
| 386 |
+
sourcesSection.style.display = 'block';
|
| 387 |
+
|
| 388 |
+
sourcesList.innerHTML = results.map(result => {
|
| 389 |
+
const msg = result.message || result;
|
| 390 |
+
const score = result.score ? ` (${(result.score * 100).toFixed(0)}%)` : '';
|
| 391 |
+
|
| 392 |
+
let html = `<div class="source-item">
|
| 393 |
+
<div class="source-meta">
|
| 394 |
+
${escapeHtml(msg.from_name || 'Unknown')} - ${msg.date || ''}${score}
|
| 395 |
+
</div>
|
| 396 |
+
<div class="source-text">${escapeHtml((msg.text || '').substring(0, 200))}${(msg.text || '').length > 200 ? '...' : ''}</div>`;
|
| 397 |
+
|
| 398 |
+
// Show context if available
|
| 399 |
+
if (result.context_before && result.context_before.length > 0) {
|
| 400 |
+
html += '<div style="margin-top: 0.5rem; padding-top: 0.5rem; border-top: 1px dashed var(--border-color);">';
|
| 401 |
+
result.context_before.forEach(ctx => {
|
| 402 |
+
html += `<div class="context-message"><small>${escapeHtml(ctx.from_name || '?')}</small>: ${escapeHtml((ctx.text_plain || '').substring(0, 100))}</div>`;
|
| 403 |
+
});
|
| 404 |
+
html += '</div>';
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
html += '</div>';
|
| 408 |
+
return html;
|
| 409 |
+
}).join('');
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
function displaySourcesMeta(sources) {
|
| 413 |
+
const sourcesSection = document.getElementById('sources-section');
|
| 414 |
+
const sourcesList = document.getElementById('sources-list');
|
| 415 |
+
|
| 416 |
+
if (sources.length === 0) return;
|
| 417 |
+
|
| 418 |
+
sourcesSection.style.display = 'block';
|
| 419 |
+
sourcesList.innerHTML = sources.map(src => `
|
| 420 |
+
<div class="source-item">
|
| 421 |
+
<div class="source-meta">
|
| 422 |
+
${escapeHtml(src.from_name || 'Unknown')} - ${src.date || ''}
|
| 423 |
+
</div>
|
| 424 |
+
</div>
|
| 425 |
+
`).join('');
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
function escapeHtml(text) {
|
| 429 |
+
if (!text) return '';
|
| 430 |
+
const div = document.createElement('div');
|
| 431 |
+
div.textContent = text;
|
| 432 |
+
return div.innerHTML;
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
function toggleMobileMenu() {
|
| 436 |
+
var s = document.querySelector('.sidebar');
|
| 437 |
+
var o = document.querySelector('.sidebar-overlay');
|
| 438 |
+
s.classList.toggle('open');
|
| 439 |
+
if (o) o.classList.toggle('active');
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
// Initialize
|
| 443 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 444 |
+
checkGeminiStatus();
|
| 445 |
+
document.getElementById('ai-query').focus();
|
| 446 |
+
});
|
| 447 |
+
</script>
|
| 448 |
+
</body>
|
| 449 |
+
</html>
|
templates/index.html
CHANGED
|
@@ -41,6 +41,12 @@
|
|
| 41 |
<span>Search</span>
|
| 42 |
</a>
|
| 43 |
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
<li class="nav-item">
|
| 45 |
<a href="/moderation" class="nav-link">
|
| 46 |
<span class="icon">🛡️</span>
|
|
|
|
| 41 |
<span>Search</span>
|
| 42 |
</a>
|
| 43 |
</li>
|
| 44 |
+
<li class="nav-item">
|
| 45 |
+
<a href="/ai-search" class="nav-link">
|
| 46 |
+
<span class="icon">🤖</span>
|
| 47 |
+
<span>AI Search</span>
|
| 48 |
+
</a>
|
| 49 |
+
</li>
|
| 50 |
<li class="nav-item">
|
| 51 |
<a href="/moderation" class="nav-link">
|
| 52 |
<span class="icon">🛡️</span>
|
templates/moderation.html
CHANGED
|
@@ -41,6 +41,12 @@
|
|
| 41 |
<span>Search</span>
|
| 42 |
</a>
|
| 43 |
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
<li class="nav-item active">
|
| 45 |
<a href="/moderation" class="nav-link">
|
| 46 |
<span class="icon">🛡️</span>
|
|
|
|
| 41 |
<span>Search</span>
|
| 42 |
</a>
|
| 43 |
</li>
|
| 44 |
+
<li class="nav-item">
|
| 45 |
+
<a href="/ai-search" class="nav-link">
|
| 46 |
+
<span class="icon">🤖</span>
|
| 47 |
+
<span>AI Search</span>
|
| 48 |
+
</a>
|
| 49 |
+
</li>
|
| 50 |
<li class="nav-item active">
|
| 51 |
<a href="/moderation" class="nav-link">
|
| 52 |
<span class="icon">🛡️</span>
|
templates/search.html
CHANGED
|
@@ -40,6 +40,12 @@
|
|
| 40 |
<span>Search</span>
|
| 41 |
</a>
|
| 42 |
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
<li class="nav-item">
|
| 44 |
<a href="/moderation" class="nav-link">
|
| 45 |
<span class="icon">🛡️</span>
|
|
|
|
| 40 |
<span>Search</span>
|
| 41 |
</a>
|
| 42 |
</li>
|
| 43 |
+
<li class="nav-item">
|
| 44 |
+
<a href="/ai-search" class="nav-link">
|
| 45 |
+
<span class="icon">🤖</span>
|
| 46 |
+
<span>AI Search</span>
|
| 47 |
+
</a>
|
| 48 |
+
</li>
|
| 49 |
<li class="nav-item">
|
| 50 |
<a href="/moderation" class="nav-link">
|
| 51 |
<span class="icon">🛡️</span>
|
templates/settings.html
CHANGED
|
@@ -200,6 +200,12 @@
|
|
| 200 |
<span>Search</span>
|
| 201 |
</a>
|
| 202 |
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
<li class="nav-item">
|
| 204 |
<a href="/moderation" class="nav-link">
|
| 205 |
<span class="icon">🛡️</span>
|
|
|
|
| 200 |
<span>Search</span>
|
| 201 |
</a>
|
| 202 |
</li>
|
| 203 |
+
<li class="nav-item">
|
| 204 |
+
<a href="/ai-search" class="nav-link">
|
| 205 |
+
<span class="icon">🤖</span>
|
| 206 |
+
<span>AI Search</span>
|
| 207 |
+
</a>
|
| 208 |
+
</li>
|
| 209 |
<li class="nav-item">
|
| 210 |
<a href="/moderation" class="nav-link">
|
| 211 |
<span class="icon">🛡️</span>
|
templates/users.html
CHANGED
|
@@ -41,6 +41,12 @@
|
|
| 41 |
<span>Search</span>
|
| 42 |
</a>
|
| 43 |
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
<li class="nav-item">
|
| 45 |
<a href="/moderation" class="nav-link">
|
| 46 |
<span class="icon">🛡️</span>
|
|
|
|
| 41 |
<span>Search</span>
|
| 42 |
</a>
|
| 43 |
</li>
|
| 44 |
+
<li class="nav-item">
|
| 45 |
+
<a href="/ai-search" class="nav-link">
|
| 46 |
+
<span class="icon">🤖</span>
|
| 47 |
+
<span>AI Search</span>
|
| 48 |
+
</a>
|
| 49 |
+
</li>
|
| 50 |
<li class="nav-item">
|
| 51 |
<a href="/moderation" class="nav-link">
|
| 52 |
<span class="icon">🛡️</span>
|