Spaces:
Sleeping
Sleeping
Commit ·
8b656e5
1
Parent(s): a9ab2f8
v2.2.0: ai-forever/ru-en-RoSBERTa + normalize_embeddings + cache fix
Browse files- .env.example +39 -4
- CHANGELOG.md +359 -0
- Dockerfile +30 -3
- README.md +106 -32
- main.py +670 -170
- requirements.txt +11 -1
.env.example
CHANGED
|
@@ -1,6 +1,41 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
|
| 5 |
-
#
|
|
|
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================
|
| 2 |
+
# Embedding Service v2.1.0 Configuration
|
| 3 |
+
# ============================================
|
| 4 |
|
| 5 |
+
# Model settings
|
| 6 |
+
EMBEDDING_MODEL=ai-forever/ru-en-RoSBERTa
|
| 7 |
|
| 8 |
+
# ============================================
|
| 9 |
+
# Limits (защита от перегрузки)
|
| 10 |
+
# ============================================
|
| 11 |
+
MAX_BATCH_SIZE=128 # Максимум элементов в батче
|
| 12 |
+
MAX_TEXT_LENGTH=10000 # Максимум символов в тексте
|
| 13 |
+
MAX_CONCURRENT_REQUESTS=6 # Параллельные encode операции
|
| 14 |
+
ENCODE_TIMEOUT_SECONDS=15.0 # Таймаут на encode
|
| 15 |
+
|
| 16 |
+
# ============================================
|
| 17 |
+
# Rate Limiting
|
| 18 |
+
# ============================================
|
| 19 |
+
RATE_LIMIT=100/minute # Лимит для /embed, /prepare-and-embed, /reindex
|
| 20 |
+
RATE_LIMIT_BATCH=60/minute # Лимит для /batch, /reindex-batch
|
| 21 |
+
|
| 22 |
+
# ============================================
|
| 23 |
+
# Caching (in-memory)
|
| 24 |
+
# ============================================
|
| 25 |
+
CACHE_ENABLED=true # Включить кэширование
|
| 26 |
+
CACHE_TTL_SECONDS=3600 # TTL = 1 час
|
| 27 |
+
CACHE_MAX_SIZE=10000 # Максимум 10k эмбеддингов в кэше
|
| 28 |
+
|
| 29 |
+
# ============================================
|
| 30 |
+
# Security
|
| 31 |
+
# ============================================
|
| 32 |
+
ALLOWED_ORIGINS=* # CORS origins (разделитель: запятая)
|
| 33 |
+
# API_KEY=your-secret-key # API key для авторизации (опционально)
|
| 34 |
+
|
| 35 |
+
# ============================================
|
| 36 |
+
# Production рекомендации
|
| 37 |
+
# ============================================
|
| 38 |
+
# 1. Измените ALLOWED_ORIGINS на конкретные домены
|
| 39 |
+
# 2. Установите API_KEY для защиты endpoints
|
| 40 |
+
# 3. Настройте Prometheus scraping на /metrics
|
| 41 |
+
# 4. Для Redis кэша добавьте REDIS_URL (будущее улучшение)
|
CHANGELOG.md
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Changelog
|
| 2 |
+
|
| 3 |
+
## [2.2.0] - 2024-12-20 - Model Upgrade & Critical Fixes
|
| 4 |
+
|
| 5 |
+
### 🔥 Критические исправления
|
| 6 |
+
|
| 7 |
+
1. **Новая модель: `ai-forever/ru-en-RoSBERTa`**
|
| 8 |
+
- Оптимизирована для русского языка
|
| 9 |
+
- Размерность: 768 (вместо 384)
|
| 10 |
+
- Лучшее качество для semantic matching
|
| 11 |
+
|
| 12 |
+
2. **Нормализация эмбеддингов**
|
| 13 |
+
```python
|
| 14 |
+
model.encode(
|
| 15 |
+
texts,
|
| 16 |
+
batch_size=32,
|
| 17 |
+
normalize_embeddings=True, # КРИТИЧНО для cosine similarity!
|
| 18 |
+
...
|
| 19 |
+
)
|
| 20 |
+
```
|
| 21 |
+
- pgvector + cosine (`<=>`) ожидает нормализованные векторы
|
| 22 |
+
- Без нормализации similarity "плывёт" и хуже ранжирование
|
| 23 |
+
|
| 24 |
+
3. **Унифицированная кэш-логика**
|
| 25 |
+
- Новая функция `encode_single_async_with_flag()` возвращает `(embedding, cached)`
|
| 26 |
+
- Исправлены двойные `CACHE_MISSES`
|
| 27 |
+
- Корректный флаг `cached` во всех ответах
|
| 28 |
+
|
| 29 |
+
4. **MAX_CONCURRENT_REQUESTS = 6** (было 4)
|
| 30 |
+
- Оптимально для 8-16 vCPU
|
| 31 |
+
|
| 32 |
+
### ⚠️ Breaking Changes
|
| 33 |
+
|
| 34 |
+
- **Размерность эмбеддингов изменилась: 384 → 768**
|
| 35 |
+
- Необходимо переиндексировать все объекты!
|
| 36 |
+
- SQL миграция:
|
| 37 |
+
```sql
|
| 38 |
+
ALTER TABLE leads DROP COLUMN embedding;
|
| 39 |
+
ALTER TABLE leads ADD COLUMN embedding vector(768);
|
| 40 |
+
-- Пересоздать индекс
|
| 41 |
+
CREATE INDEX ON leads USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## [2.1.0] - 2024-12-19 - Production-Ready Release
|
| 47 |
+
|
| 48 |
+
### 🚀 Основные улучшения
|
| 49 |
+
|
| 50 |
+
Полная переработка сервиса для production-ready статуса по рекомендациям экспертов.
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
|
| 54 |
+
## Что было (v2.0.0) vs Что стало (v2.1.0)
|
| 55 |
+
|
| 56 |
+
### 1. Асинхронность и CPU/IO разграничение
|
| 57 |
+
|
| 58 |
+
**Было:**
|
| 59 |
+
```python
|
| 60 |
+
# Синхронный вызов, блокирует event loop FastAPI
|
| 61 |
+
embedding = model.encode(request.text, convert_to_numpy=True)
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
**Стало:**
|
| 65 |
+
```python
|
| 66 |
+
# Асинхронный вызов через ThreadPoolExecutor
|
| 67 |
+
async def encode_async(texts: List[str]) -> np.ndarray:
|
| 68 |
+
loop = asyncio.get_event_loop()
|
| 69 |
+
result = await asyncio.wait_for(
|
| 70 |
+
loop.run_in_executor(
|
| 71 |
+
executor,
|
| 72 |
+
lambda: model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
| 73 |
+
),
|
| 74 |
+
timeout=ENCODE_TIMEOUT_SECONDS
|
| 75 |
+
)
|
| 76 |
+
return result
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
**Влияние на индексацию:**
|
| 80 |
+
- ✅ Сервис остаётся отзывчивым при параллельных запросах
|
| 81 |
+
- ✅ Таймаут 30 секунд предотвращает "зависание" запросов
|
| 82 |
+
- ✅ До 4 параллельных encode операций (настраивается через `MAX_CONCURRENT_REQUESTS`)
|
| 83 |
+
|
| 84 |
+
---
|
| 85 |
+
|
| 86 |
+
### 2. Валидация входных данных
|
| 87 |
+
|
| 88 |
+
**Было:**
|
| 89 |
+
- Нет ограничений на размер текста
|
| 90 |
+
- Нет ограничений на размер батча
|
| 91 |
+
- Возможность DoS-атаки через огромные запросы
|
| 92 |
+
|
| 93 |
+
**Стало:**
|
| 94 |
+
```python
|
| 95 |
+
MAX_BATCH_SIZE = 128 # Максимум элементов в батче
|
| 96 |
+
MAX_TEXT_LENGTH = 10000 # Максимум символов в тексте
|
| 97 |
+
MAX_CONCURRENT_REQUESTS = 4 # Параллельные encode операции
|
| 98 |
+
ENCODE_TIMEOUT_SECONDS = 30 # Таймаут на encode
|
| 99 |
+
|
| 100 |
+
class EmbedRequest(BaseModel):
|
| 101 |
+
text: str = Field(..., min_length=1, max_length=MAX_TEXT_LENGTH)
|
| 102 |
+
|
| 103 |
+
class BatchRequest(BaseModel):
|
| 104 |
+
items: List[BatchItem] = Field(..., max_length=MAX_BATCH_SIZE)
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
**Влияние на индексацию:**
|
| 108 |
+
- ✅ Защита от перегрузки сервиса большими запросами
|
| 109 |
+
- ✅ Понятные 400 ошибки при превышении лимитов
|
| 110 |
+
- ✅ Предсказуемое время ответа
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
### 3. Prometheus метрики
|
| 115 |
+
|
| 116 |
+
**Было:**
|
| 117 |
+
- Нет метрик
|
| 118 |
+
- Невозможно отследить производительность
|
| 119 |
+
- "Слепой полёт" в production
|
| 120 |
+
|
| 121 |
+
**Стало:**
|
| 122 |
+
```python
|
| 123 |
+
# Endpoint /metrics возвращает:
|
| 124 |
+
embedding_requests_total{endpoint="/embed", status="success"} 150
|
| 125 |
+
embedding_request_latency_seconds_bucket{endpoint="/embed", le="0.1"} 120
|
| 126 |
+
embedding_batch_size_bucket{le="10"} 45
|
| 127 |
+
embedding_encode_failures_total{reason="timeout"} 2
|
| 128 |
+
embedding_model_loaded 1
|
| 129 |
+
embedding_cache_hits_total 89
|
| 130 |
+
embedding_cache_misses_total 61
|
| 131 |
+
embedding_active_requests 3
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
**Влияние на индексацию:**
|
| 135 |
+
- ✅ Мониторинг в Grafana: requests/s, latency, batch sizes
|
| 136 |
+
- ✅ Алерты на encode_failures и model_loaded
|
| 137 |
+
- ✅ Отслеживание cache hit rate для оптимизации
|
| 138 |
+
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
### 4. Rate Limiting
|
| 142 |
+
|
| 143 |
+
**Было:**
|
| 144 |
+
- Нет ограничений на частоту запросов
|
| 145 |
+
- Возможность перегрузки сервиса одним клиентом
|
| 146 |
+
|
| 147 |
+
**Стало:**
|
| 148 |
+
```python
|
| 149 |
+
RATE_LIMIT = "100/minute" # Для одиночных запросов
|
| 150 |
+
RATE_LIMIT_BATCH = "20/minute" # Для батчей
|
| 151 |
+
|
| 152 |
+
@app.post("/embed")
|
| 153 |
+
@limiter.limit(RATE_LIMIT)
|
| 154 |
+
async def embed_text(request: Request, body: EmbedRequest):
|
| 155 |
+
...
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
**Влияние на индексацию:**
|
| 159 |
+
- ✅ Защита от перегрузки
|
| 160 |
+
- ✅ Справедливое распределение ресурсов между клиентами
|
| 161 |
+
- ✅ HTTP 429 при превышении лимита
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
### 5. In-Memory кэширование
|
| 166 |
+
|
| 167 |
+
**Было:**
|
| 168 |
+
- Каждый запрос генерирует эмбеддинг заново
|
| 169 |
+
- Повторные запросы тратят CPU
|
| 170 |
+
|
| 171 |
+
**Стало:**
|
| 172 |
+
```python
|
| 173 |
+
CACHE_ENABLED = True
|
| 174 |
+
CACHE_TTL_SECONDS = 3600 # 1 час
|
| 175 |
+
CACHE_MAX_SIZE = 10000 # 10k эмбеддингов
|
| 176 |
+
|
| 177 |
+
# Автоматическое кэширование:
|
| 178 |
+
cache_key = hashlib.sha256(text.encode()).hexdigest()
|
| 179 |
+
if cache_key in embedding_cache:
|
| 180 |
+
return embedding_cache[cache_key] # Мгновенно!
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
**Влияние на индексацию:**
|
| 184 |
+
- ✅ **До 100x ускорение** для повторных запросов (0.1-0.5s → <1ms)
|
| 185 |
+
- ✅ Экономия CPU для часто запрашиваемых объектов
|
| 186 |
+
- ✅ TTL автоматически инвалидирует устаревший кэш
|
| 187 |
+
- ✅ Статистика: `GET /cache/stats`, очистка: `POST /cache/clear`
|
| 188 |
+
|
| 189 |
+
---
|
| 190 |
+
|
| 191 |
+
### 6. Версионирование модели
|
| 192 |
+
|
| 193 |
+
**Было:**
|
| 194 |
+
- Невозможно отследить какая модель использовалась
|
| 195 |
+
- Проблемы при обновлении модели
|
| 196 |
+
|
| 197 |
+
**Стало:**
|
| 198 |
+
```python
|
| 199 |
+
# Каждый ответ содержит:
|
| 200 |
+
{
|
| 201 |
+
"embedding": [...],
|
| 202 |
+
"model_version": "2.1.0",
|
| 203 |
+
"model_checksum": "a1b2c3d4e5f6" # MD5 от model_name:dimensions
|
| 204 |
+
}
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
**Влияние на индексацию:**
|
| 208 |
+
- ✅ Go Backend может хранить model_checksum вместе с эмбеддингом
|
| 209 |
+
- ✅ При обновлении модели можно переиндексировать только устаревшие записи
|
| 210 |
+
- ✅ `/model-info` показывает время загрузки модели
|
| 211 |
+
|
| 212 |
+
---
|
| 213 |
+
|
| 214 |
+
### 7. Structured Logging (JSON)
|
| 215 |
+
|
| 216 |
+
**Было:**
|
| 217 |
+
```
|
| 218 |
+
Loading embedding model: sentence-transformers/...
|
| 219 |
+
Model loaded. Dimensions: 384
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
**Стало:**
|
| 223 |
+
```json
|
| 224 |
+
{"event": "model_loading", "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "timestamp": "2024-12-19T10:30:00Z"}
|
| 225 |
+
{"event": "model_loaded", "dimensions": 384, "checksum": "a1b2c3d4e5f6", "load_time_seconds": 12.5, "timestamp": "2024-12-19T10:30:12Z"}
|
| 226 |
+
{"event": "batch_process", "total": 50, "successful": 50, "cached": 12, "timestamp": "2024-12-19T10:35:00Z"}
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
**Влияние на индексацию:**
|
| 230 |
+
- ✅ Интеграция с ELK/Loki/CloudWatch
|
| 231 |
+
- ✅ Поиск и анализ логов
|
| 232 |
+
- ✅ Трейсинг запросов
|
| 233 |
+
|
| 234 |
+
---
|
| 235 |
+
|
| 236 |
+
### 8. Улучшенная батч-обработка
|
| 237 |
+
|
| 238 |
+
**Было:**
|
| 239 |
+
```python
|
| 240 |
+
# Все тексты генер��руются заново
|
| 241 |
+
embeddings = model.encode(texts, convert_to_numpy=True)
|
| 242 |
+
```
|
| 243 |
+
|
| 244 |
+
**Стало:**
|
| 245 |
+
```python
|
| 246 |
+
# Сначала проверяем кэш
|
| 247 |
+
for item in body.items:
|
| 248 |
+
cache_key = get_cache_key(prepared)
|
| 249 |
+
if cache_key in embedding_cache:
|
| 250 |
+
# Мгновенно из кэша!
|
| 251 |
+
cached_count += 1
|
| 252 |
+
continue
|
| 253 |
+
texts_to_encode.append(prepared)
|
| 254 |
+
|
| 255 |
+
# Только некэшированные идут в model.encode
|
| 256 |
+
if texts_to_encode:
|
| 257 |
+
embeddings = await encode_async(texts_to_encode)
|
| 258 |
+
```
|
| 259 |
+
|
| 260 |
+
**Влияние на индексацию:**
|
| 261 |
+
- ✅ Смешанный батч (кэш + compute) обрабатывается оптимально
|
| 262 |
+
- ✅ Ответ содержит `cached_count` для аналитики
|
| 263 |
+
- ✅ Каждый `BatchResultItem` имеет флаг `cached: true/false`
|
| 264 |
+
|
| 265 |
+
---
|
| 266 |
+
|
| 267 |
+
### 9. Graceful Error Handling
|
| 268 |
+
|
| 269 |
+
**Было:**
|
| 270 |
+
- 500 при любой ошибке
|
| 271 |
+
- Нет информации о причине
|
| 272 |
+
|
| 273 |
+
**Стало:**
|
| 274 |
+
```python
|
| 275 |
+
# Таймаут
|
| 276 |
+
except asyncio.TimeoutError:
|
| 277 |
+
ENCODE_FAILURES.labels(reason="timeout").inc()
|
| 278 |
+
raise HTTPException(status_code=503, detail=f"Encoding timeout after {ENCODE_TIMEOUT_SECONDS}s")
|
| 279 |
+
|
| 280 |
+
# Ошибка модели
|
| 281 |
+
except Exception as e:
|
| 282 |
+
ENCODE_FAILURES.labels(reason="error").inc()
|
| 283 |
+
raise HTTPException(status_code=500, detail=f"Encoding error: {str(e)}")
|
| 284 |
+
```
|
| 285 |
+
|
| 286 |
+
**Влияние на индексацию:**
|
| 287 |
+
- ✅ 503 для временных проблем (клиент может повторить)
|
| 288 |
+
- ✅ 400 для ошибок валидации (клиент должен исправить запрос)
|
| 289 |
+
- ✅ Метрики для алертов на ошибки
|
| 290 |
+
|
| 291 |
+
---
|
| 292 |
+
|
| 293 |
+
### 10. Новые endpoints
|
| 294 |
+
|
| 295 |
+
| Endpoint | Описание |
|
| 296 |
+
|----------|----------|
|
| 297 |
+
| `GET /metrics` | Prometheus метрики |
|
| 298 |
+
| `GET /cache/stats` | Статистика кэша |
|
| 299 |
+
| `POST /cache/clear` | Очистка кэша |
|
| 300 |
+
|
| 301 |
+
---
|
| 302 |
+
|
| 303 |
+
## Конфигурация (переменные окружения)
|
| 304 |
+
|
| 305 |
+
| Переменная | По умолчанию | Описание |
|
| 306 |
+
|------------|--------------|----------|
|
| 307 |
+
| `EMBEDDING_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | Модель эмбеддингов |
|
| 308 |
+
| `MAX_BATCH_SIZE` | `128` | Максимум элементов в батче |
|
| 309 |
+
| `MAX_TEXT_LENGTH` | `10000` | Максимум символов в тексте |
|
| 310 |
+
| `MAX_CONCURRENT_REQUESTS` | `4` | Параллельные encode |
|
| 311 |
+
| `ENCODE_TIMEOUT_SECONDS` | `30` | Таймаут на encode |
|
| 312 |
+
| `RATE_LIMIT` | `100/minute` | Rate limit для одиночных |
|
| 313 |
+
| `RATE_LIMIT_BATCH` | `20/minute` | Rate limit для батчей |
|
| 314 |
+
| `CACHE_ENABLED` | `true` | Включить кэш |
|
| 315 |
+
| `CACHE_TTL_SECONDS` | `3600` | TTL кэша (1 час) |
|
| 316 |
+
| `CACHE_MAX_SIZE` | `10000` | Максимум записей в кэше |
|
| 317 |
+
| `ALLOWED_ORIGINS` | `*` | CORS origins |
|
| 318 |
+
|
| 319 |
+
---
|
| 320 |
+
|
| 321 |
+
## Оценка готовности к production
|
| 322 |
+
|
| 323 |
+
| Критерий | v2.0.0 | v2.1.0 |
|
| 324 |
+
|----------|--------|--------|
|
| 325 |
+
| Асинхронность | ❌ Блокирует event loop | ✅ ThreadPoolExecutor |
|
| 326 |
+
| Валидация | ❌ Нет лимитов | ✅ Batch/text limits |
|
| 327 |
+
| Метрики | ❌ Нет | ✅ Prometheus |
|
| 328 |
+
| Rate limiting | ❌ Нет | ✅ slowapi |
|
| 329 |
+
| Кэширование | ❌ Нет | ✅ TTLCache |
|
| 330 |
+
| Версионирование | ❌ Нет | ✅ checksum в ответах |
|
| 331 |
+
| Логирование | ❌ print() | ✅ structlog JSON |
|
| 332 |
+
| Таймауты | ❌ Нет | ✅ 30s timeout |
|
| 333 |
+
| Error handling | ❌ Базовый | ✅ Graceful 503/400 |
|
| 334 |
+
|
| 335 |
+
**Рейтинг:** 5/10 → **8/10** ✅
|
| 336 |
+
|
| 337 |
+
---
|
| 338 |
+
|
| 339 |
+
## Следующие шаги (roadmap для 9/10)
|
| 340 |
+
|
| 341 |
+
1. **Redis кэширование** — для распределённого кэша
|
| 342 |
+
2. **OpenTelemetry tracing** — trace_id propagation
|
| 343 |
+
3. **API Key авторизация** — уже подготовлено (`API_KEY` env)
|
| 344 |
+
4. **Background workers** — для длинных reindex-batch (Celery/RQ)
|
| 345 |
+
5. **ONNX Runtime** — для ускорения инференса
|
| 346 |
+
6. **Health check с warmup** — pre-load model weights
|
| 347 |
+
|
| 348 |
+
---
|
| 349 |
+
|
| 350 |
+
## Миграция с v2.0.0
|
| 351 |
+
|
| 352 |
+
1. Обновить `requirements.txt`
|
| 353 |
+
2. Обновить `main.py`
|
| 354 |
+
3. Обновить `Dockerfile` (опционально)
|
| 355 |
+
4. Настроить Prometheus scraping на `/metrics`
|
| 356 |
+
5. Добавить переменные окружения (опционально)
|
| 357 |
+
|
| 358 |
+
**Breaking changes:** Нет. Все endpoints совместимы.
|
| 359 |
+
|
Dockerfile
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
# Dockerfile for HuggingFace Spaces
|
|
|
|
| 3 |
|
| 4 |
FROM python:3.11-slim
|
| 5 |
|
|
@@ -13,13 +14,35 @@ ENV PATH="/home/user/.local/bin:$PATH"
|
|
| 13 |
# Set working directory
|
| 14 |
WORKDIR /app
|
| 15 |
|
| 16 |
-
# Environment
|
|
|
|
|
|
|
| 17 |
ENV PYTHONUNBUFFERED=1
|
| 18 |
ENV TRANSFORMERS_CACHE=/home/user/.cache/transformers
|
| 19 |
ENV SENTENCE_TRANSFORMERS_HOME=/home/user/.cache/sentence_transformers
|
| 20 |
ENV HF_HOME=/home/user/.cache/huggingface
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Copy requirements and install dependencies
|
| 25 |
COPY --chown=user requirements.txt .
|
|
@@ -31,6 +54,10 @@ COPY --chown=user main.py .
|
|
| 31 |
# Expose port 7860 (HuggingFace Spaces standard)
|
| 32 |
EXPOSE 7860
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Start the application
|
| 35 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 36 |
|
|
|
|
| 1 |
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
# Dockerfile for HuggingFace Spaces
|
| 3 |
+
# Version: 2.1.0 (Production-Ready)
|
| 4 |
|
| 5 |
FROM python:3.11-slim
|
| 6 |
|
|
|
|
| 14 |
# Set working directory
|
| 15 |
WORKDIR /app
|
| 16 |
|
| 17 |
+
# ============== Environment Variables ==============
|
| 18 |
+
|
| 19 |
+
# Base settings
|
| 20 |
ENV PYTHONUNBUFFERED=1
|
| 21 |
ENV TRANSFORMERS_CACHE=/home/user/.cache/transformers
|
| 22 |
ENV SENTENCE_TRANSFORMERS_HOME=/home/user/.cache/sentence_transformers
|
| 23 |
ENV HF_HOME=/home/user/.cache/huggingface
|
| 24 |
+
|
| 25 |
+
# Model settings
|
| 26 |
+
ENV EMBEDDING_MODEL=ai-forever/ru-en-RoSBERTa
|
| 27 |
+
ENV EMBEDDING_DIMENSIONS=768
|
| 28 |
+
|
| 29 |
+
# Limits (production-ready)
|
| 30 |
+
ENV MAX_BATCH_SIZE=128
|
| 31 |
+
ENV MAX_TEXT_LENGTH=10000
|
| 32 |
+
ENV MAX_CONCURRENT_REQUESTS=6
|
| 33 |
+
ENV ENCODE_TIMEOUT_SECONDS=30.0
|
| 34 |
+
|
| 35 |
+
# Rate limiting
|
| 36 |
+
ENV RATE_LIMIT=100/minute
|
| 37 |
+
ENV RATE_LIMIT_BATCH=20/minute
|
| 38 |
+
|
| 39 |
+
# Cache settings
|
| 40 |
+
ENV CACHE_ENABLED=true
|
| 41 |
+
ENV CACHE_TTL_SECONDS=3600
|
| 42 |
+
ENV CACHE_MAX_SIZE=10000
|
| 43 |
+
|
| 44 |
+
# Security (переопределите в production!)
|
| 45 |
+
ENV ALLOWED_ORIGINS=*
|
| 46 |
|
| 47 |
# Copy requirements and install dependencies
|
| 48 |
COPY --chown=user requirements.txt .
|
|
|
|
| 54 |
# Expose port 7860 (HuggingFace Spaces standard)
|
| 55 |
EXPOSE 7860
|
| 56 |
|
| 57 |
+
# Health check
|
| 58 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
| 59 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
|
| 60 |
+
|
| 61 |
# Start the application
|
| 62 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 63 |
|
README.md
CHANGED
|
@@ -9,74 +9,148 @@ license: mit
|
|
| 9 |
app_port: 7860
|
| 10 |
---
|
| 11 |
|
| 12 |
-
# Matching Embedding Service
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
## Возможности
|
| 17 |
|
| 18 |
- 🔢 Генерация эмбеддингов для русского и английского текста
|
| 19 |
- 🔍 Семантический поиск и матчинг
|
| 20 |
-
- 📊 In-memory хранилище векторов
|
| 21 |
- 🚀 FastAPI с автоматической документацией
|
| 22 |
-
- 🌐 CORS-ready для интеграции
|
| 23 |
-
|
| 24 |
-
## API Документация
|
| 25 |
-
|
| 26 |
-
После запуска доступна по адресам:
|
| 27 |
-
- Swagger UI: `/docs`
|
| 28 |
-
- ReDoc: `/redoc`
|
| 29 |
|
| 30 |
## Модель
|
| 31 |
|
| 32 |
-
Используется модель: `
|
| 33 |
-
-
|
| 34 |
-
-
|
| 35 |
-
-
|
| 36 |
-
-
|
| 37 |
|
| 38 |
## Endpoints
|
| 39 |
|
| 40 |
### Основные
|
| 41 |
-
|
| 42 |
-
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
## Использование
|
| 53 |
|
|
|
|
| 54 |
```python
|
| 55 |
import requests
|
| 56 |
|
| 57 |
# Health check
|
| 58 |
-
response = requests.get("https://
|
| 59 |
print(response.json())
|
|
|
|
| 60 |
|
| 61 |
# Генерация эмбеддинга
|
| 62 |
response = requests.post(
|
| 63 |
-
"https://
|
| 64 |
-
json={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
)
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
```
|
| 68 |
|
| 69 |
## Разработка
|
| 70 |
|
| 71 |
-
Локальный запуск
|
| 72 |
```bash
|
| 73 |
pip install -r requirements.txt
|
| 74 |
uvicorn main:app --host 0.0.0.0 --port 7860
|
| 75 |
```
|
| 76 |
|
| 77 |
-
Docker
|
| 78 |
```bash
|
| 79 |
docker build -t matching-service .
|
| 80 |
-
docker run -p 7860:7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
```
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
app_port: 7860
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Matching Embedding Service v2.2.0
|
| 13 |
|
| 14 |
+
**Production-Ready** сервис для генерации эмбеддингов текста и семантического поиска объектов недвижимости.
|
| 15 |
+
|
| 16 |
+
## 🆕 Что нового в v2.2.0
|
| 17 |
+
|
| 18 |
+
- ✅ **Новая модель** — `ai-forever/ru-en-RoSBERTa` (768 dimensions)
|
| 19 |
+
- ✅ **Нормализация эмбеддингов** — `normalize_embeddings=True` для cosine similarity
|
| 20 |
+
- ✅ **Унифицированная кэш-логика** — корректный флаг `cached`
|
| 21 |
+
- ✅ **Асинхронная обработка** — не блокирует event loop
|
| 22 |
+
- ✅ **Prometheus метрики** — `/metrics` endpoint
|
| 23 |
+
- ✅ **Rate limiting** — защита от перегрузки
|
| 24 |
+
- ✅ **In-memory кэширование** — до 100x ускорение повторных запросов
|
| 25 |
|
| 26 |
## Возможности
|
| 27 |
|
| 28 |
- 🔢 Генерация эмбеддингов для русского и английского текста
|
| 29 |
- 🔍 Семантический поиск и матчинг
|
|
|
|
| 30 |
- 🚀 FastAPI с автоматической документацией
|
| 31 |
+
- 🌐 CORS-ready для интеграции с Go Backend
|
| 32 |
+
- 📊 Prometheus метрики для мониторинга
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
## Модель
|
| 35 |
|
| 36 |
+
Используется модель: `ai-forever/ru-en-RoSBERTa`
|
| 37 |
+
- 🇷🇺 Оптимизирована для русского языка
|
| 38 |
+
- 🇬🇧 Поддержка английского языка
|
| 39 |
+
- Размерность векторов: **768**
|
| 40 |
+
- Нормализованные эмбеддинги для pgvector + cosine similarity
|
| 41 |
|
| 42 |
## Endpoints
|
| 43 |
|
| 44 |
### Основные
|
| 45 |
+
| Метод | Endpoint | Описание |
|
| 46 |
+
|-------|----------|----------|
|
| 47 |
+
| `GET` | `/health` | Проверка здоровья |
|
| 48 |
+
| `GET` | `/metrics` | Prometheus метрики |
|
| 49 |
+
| `GET` | `/model-info` | Информация о модели |
|
| 50 |
+
| `POST` | `/embed` | Эмбеддинг из текста |
|
| 51 |
+
| `POST` | `/prepare-and-embed` | ⭐ Основной endpoint |
|
| 52 |
+
| `POST` | `/batch` | Пакетная обработка |
|
| 53 |
+
|
| 54 |
+
### Переиндексация
|
| 55 |
+
| Метод | Endpoint | Описание |
|
| 56 |
+
|-------|----------|----------|
|
| 57 |
+
| `POST` | `/reindex` | Переиндексация объекта |
|
| 58 |
+
| `POST` | `/reindex-batch` | Пакетная переиндексация |
|
| 59 |
+
|
| 60 |
+
### Кэш
|
| 61 |
+
| Метод | Endpoint | Описание |
|
| 62 |
+
|-------|----------|----------|
|
| 63 |
+
| `GET` | `/cache/stats` | Статистика кэша |
|
| 64 |
+
| `POST` | `/cache/clear` | Очистка кэша |
|
| 65 |
+
|
| 66 |
+
## Конфигурация
|
| 67 |
+
|
| 68 |
+
| Переменная | По умолчанию | Описание |
|
| 69 |
+
|------------|--------------|----------|
|
| 70 |
+
| `EMBEDDING_MODEL` | `paraphrase-multilingual-MiniLM-L12-v2` | Модель |
|
| 71 |
+
| `MAX_BATCH_SIZE` | `128` | Макс. элементов в батче |
|
| 72 |
+
| `MAX_TEXT_LENGTH` | `10000` | Макс. символов |
|
| 73 |
+
| `RATE_LIMIT` | `100/minute` | Rate limit |
|
| 74 |
+
| `CACHE_ENABLED` | `true` | Включить кэш |
|
| 75 |
+
| `CACHE_TTL_SECONDS` | `3600` | TTL кэша |
|
| 76 |
|
| 77 |
## Использование
|
| 78 |
|
| 79 |
+
### Python
|
| 80 |
```python
|
| 81 |
import requests
|
| 82 |
|
| 83 |
# Health check
|
| 84 |
+
response = requests.get("https://your-space.hf.space/health")
|
| 85 |
print(response.json())
|
| 86 |
+
# {"status": "healthy", "model": "...", "version": "2.1.0", "cache_enabled": true}
|
| 87 |
|
| 88 |
# Генерация эмбеддинга
|
| 89 |
response = requests.post(
|
| 90 |
+
"https://your-space.hf.space/prepare-and-embed",
|
| 91 |
+
json={
|
| 92 |
+
"title": "Уютная квартира в центре",
|
| 93 |
+
"description": "Для семьи с детьми",
|
| 94 |
+
"price": 10000000,
|
| 95 |
+
"rooms": 3
|
| 96 |
+
}
|
| 97 |
)
|
| 98 |
+
result = response.json()
|
| 99 |
+
embedding = result["embedding"] # [0.123, -0.456, ...]
|
| 100 |
+
checksum = result["model_checksum"] # "a1b2c3d4e5f6"
|
| 101 |
+
cached = result["cached"] # true/false
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### Go
|
| 105 |
+
```go
|
| 106 |
+
type EmbedRequest struct {
|
| 107 |
+
Title string `json:"title"`
|
| 108 |
+
Description string `json:"description"`
|
| 109 |
+
Price float64 `json:"price,omitempty"`
|
| 110 |
+
Rooms int `json:"rooms,omitempty"`
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
type EmbedResponse struct {
|
| 114 |
+
Embedding []float64 `json:"embedding"`
|
| 115 |
+
Dimensions int `json:"dimensions"`
|
| 116 |
+
ModelVersion string `json:"model_version"`
|
| 117 |
+
ModelChecksum string `json:"model_checksum"`
|
| 118 |
+
Cached bool `json:"cached"`
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
// Сохраняем в PostgreSQL + pgvector
|
| 122 |
+
// UPDATE leads SET embedding = $1, model_checksum = $2 WHERE id = $3
|
| 123 |
```
|
| 124 |
|
| 125 |
## Разработка
|
| 126 |
|
| 127 |
+
### Локальный запуск
|
| 128 |
```bash
|
| 129 |
pip install -r requirements.txt
|
| 130 |
uvicorn main:app --host 0.0.0.0 --port 7860
|
| 131 |
```
|
| 132 |
|
| 133 |
+
### Docker
|
| 134 |
```bash
|
| 135 |
docker build -t matching-service .
|
| 136 |
+
docker run -p 7860:7860 \
|
| 137 |
+
-e CACHE_ENABLED=true \
|
| 138 |
+
-e RATE_LIMIT=100/minute \
|
| 139 |
+
matching-service
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
### Мониторинг
|
| 143 |
+
|
| 144 |
+
Prometheus scrape config:
|
| 145 |
+
```yaml
|
| 146 |
+
scrape_configs:
|
| 147 |
+
- job_name: 'embedding-service'
|
| 148 |
+
static_configs:
|
| 149 |
+
- targets: ['localhost:7860']
|
| 150 |
+
metrics_path: '/metrics'
|
| 151 |
```
|
| 152 |
|
| 153 |
+
## Changelog
|
| 154 |
+
|
| 155 |
+
См. [CHANGELOG.md](CHANGELOG.md) для полного списка изменений.
|
| 156 |
+
|
main.py
CHANGED
|
@@ -6,74 +6,434 @@ STATELESS сервис - не хранит данные, только генер
|
|
| 6 |
|
| 7 |
Используется для матчинга лидов с объектами недвижимости.
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
Endpoints:
|
| 10 |
- POST /embed - генерация эмбеддинга из текста
|
| 11 |
- POST /prepare-and-embed - подготовка полей + эмбеддинг (ОСНОВНОЙ)
|
| 12 |
- POST /batch - пакетная обработка
|
|
|
|
|
|
|
| 13 |
- GET /health - проверка здоровья
|
| 14 |
- GET /model-info - информация о модели
|
|
|
|
| 15 |
"""
|
| 16 |
|
| 17 |
import os
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
from contextlib import asynccontextmanager
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
from fastapi import FastAPI, HTTPException
|
| 22 |
from fastapi.middleware.cors import CORSMiddleware
|
| 23 |
-
from
|
|
|
|
| 24 |
from sentence_transformers import SentenceTransformer
|
| 25 |
import numpy as np
|
| 26 |
from dotenv import load_dotenv
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
load_dotenv()
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
# Глобальная модель
|
| 35 |
model: Optional[SentenceTransformer] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
@asynccontextmanager
|
| 39 |
async def lifespan(app: FastAPI):
|
| 40 |
-
"""Загрузка модели при старте."""
|
| 41 |
-
global model
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
yield
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
model = None
|
|
|
|
| 48 |
|
| 49 |
|
|
|
|
|
|
|
| 50 |
app = FastAPI(
|
| 51 |
title="Embedding Service",
|
| 52 |
-
description="
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
lifespan=lifespan
|
| 55 |
)
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
app.add_middleware(
|
| 58 |
CORSMiddleware,
|
| 59 |
-
allow_origins=
|
| 60 |
allow_credentials=True,
|
| 61 |
allow_methods=["*"],
|
| 62 |
allow_headers=["*"],
|
| 63 |
)
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# ============== Pydantic Models ==============
|
| 67 |
|
| 68 |
class EmbedRequest(BaseModel):
|
| 69 |
"""Запрос на генерацию эмбеддинга из готового текста."""
|
| 70 |
-
text: str = Field(..., min_length=1, description="Текст для эмбеддинга")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
class EmbedResponse(BaseModel):
|
| 74 |
"""Ответ с эмбеддингом."""
|
| 75 |
embedding: List[float]
|
| 76 |
dimensions: int
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
class PrepareAndEmbedRequest(BaseModel):
|
|
@@ -82,14 +442,14 @@ class PrepareAndEmbedRequest(BaseModel):
|
|
| 82 |
|
| 83 |
Это ОСНОВНОЙ endpoint для интеграции с Go Backend.
|
| 84 |
"""
|
| 85 |
-
title: str = Field(default="", description="Название")
|
| 86 |
-
description: str = Field(default="", description="Описание")
|
| 87 |
requirement: Optional[Dict[str, Any]] = Field(default=None, description="Требования (JSON)")
|
| 88 |
-
price: Optional[float] = Field(default=None, description="Цена")
|
| 89 |
-
district: Optional[str] = Field(default=None, description="Район")
|
| 90 |
-
rooms: Optional[int] = Field(default=None, description="Количество комнат")
|
| 91 |
-
area: Optional[float] = Field(default=None, description="Площадь")
|
| 92 |
-
address: Optional[str] = Field(default=None, description="Адрес")
|
| 93 |
|
| 94 |
|
| 95 |
class PrepareAndEmbedResponse(BaseModel):
|
|
@@ -97,24 +457,36 @@ class PrepareAndEmbedResponse(BaseModel):
|
|
| 97 |
embedding: List[float]
|
| 98 |
dimensions: int
|
| 99 |
prepared_text: str = Field(description="Подготовленный текст (для отладки)")
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
class BatchItem(BaseModel):
|
| 103 |
"""Один элемент для пакетной обработки."""
|
| 104 |
entity_id: str = Field(..., description="ID объекта")
|
| 105 |
-
title: str = Field(default="")
|
| 106 |
-
description: str = Field(default="")
|
| 107 |
requirement: Optional[Dict[str, Any]] = None
|
| 108 |
-
price: Optional[float] = None
|
| 109 |
-
district: Optional[str] = None
|
| 110 |
-
rooms: Optional[int] = None
|
| 111 |
-
area: Optional[float] = None
|
| 112 |
-
address: Optional[str] = None
|
| 113 |
|
| 114 |
|
| 115 |
class BatchRequest(BaseModel):
|
| 116 |
"""Запрос на пакетную обработку."""
|
| 117 |
-
items: List[BatchItem]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
class BatchResultItem(BaseModel):
|
|
@@ -123,6 +495,7 @@ class BatchResultItem(BaseModel):
|
|
| 123 |
embedding: List[float]
|
| 124 |
success: bool = True
|
| 125 |
error: Optional[str] = None
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
class BatchResponse(BaseModel):
|
|
@@ -131,6 +504,9 @@ class BatchResponse(BaseModel):
|
|
| 131 |
dimensions: int
|
| 132 |
total: int
|
| 133 |
successful: int
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
class HealthResponse(BaseModel):
|
|
@@ -138,49 +514,38 @@ class HealthResponse(BaseModel):
|
|
| 138 |
status: str
|
| 139 |
model: str
|
| 140 |
dimensions: int
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
if title:
|
| 159 |
-
parts.append(f"Название: {title}")
|
| 160 |
-
if description:
|
| 161 |
-
parts.append(f"Описание: {description}")
|
| 162 |
-
|
| 163 |
-
if requirement:
|
| 164 |
-
req_parts = [f"{k}: {v}" for k, v in requirement.items() if v is not None]
|
| 165 |
-
if req_parts:
|
| 166 |
-
parts.append(f"Требования: {', '.join(req_parts)}")
|
| 167 |
-
|
| 168 |
-
params = []
|
| 169 |
-
if price is not None:
|
| 170 |
-
params.append(f"цена {price:,.0f}₽")
|
| 171 |
-
if district:
|
| 172 |
-
params.append(f"район {district}")
|
| 173 |
-
if rooms is not None:
|
| 174 |
-
params.append(f"{rooms}-комнатная")
|
| 175 |
-
if area is not None:
|
| 176 |
-
params.append(f"площадь {area}м²")
|
| 177 |
-
if address:
|
| 178 |
-
params.append(f"адрес: {address}")
|
| 179 |
|
| 180 |
-
if params:
|
| 181 |
-
parts.append(f"Параметры: {', '.join(params)}")
|
| 182 |
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
|
| 186 |
# ============== Endpoints ==============
|
|
@@ -190,9 +555,22 @@ async def root():
|
|
| 190 |
"""Информация о сервисе."""
|
| 191 |
return {
|
| 192 |
"service": "Embedding Service",
|
| 193 |
-
"version":
|
| 194 |
"type": "STATELESS",
|
| 195 |
"description": "Генерирует эмбеддинги. Хранение на стороне Go Backend + pgvector.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
"endpoints": {
|
| 197 |
"POST /embed": "Эмбеддинг из готового текста",
|
| 198 |
"POST /prepare-and-embed": "Подготовка полей + эмбеддинг (создание)",
|
|
@@ -200,7 +578,8 @@ async def root():
|
|
| 200 |
"POST /batch": "Пакетная обработка (создание)",
|
| 201 |
"POST /reindex-batch": "Пакетная переиндексация (обновление)",
|
| 202 |
"GET /health": "Проверка здоровья",
|
| 203 |
-
"GET /model-info": "Информация о модели для pgvector"
|
|
|
|
| 204 |
},
|
| 205 |
"docs": "/docs"
|
| 206 |
}
|
|
@@ -211,105 +590,147 @@ async def health_check():
|
|
| 211 |
"""Проверка здоровья сервиса."""
|
| 212 |
if model is None:
|
| 213 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
|
|
|
|
|
|
|
|
|
| 214 |
return HealthResponse(
|
| 215 |
status="healthy",
|
| 216 |
model=MODEL_NAME,
|
| 217 |
-
dimensions=model.get_sentence_embedding_dimension()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
)
|
| 219 |
|
| 220 |
|
| 221 |
@app.post("/embed", response_model=EmbedResponse)
|
| 222 |
-
|
|
|
|
| 223 |
"""
|
| 224 |
Генерация эмбеддинга из готового текста.
|
| 225 |
|
| 226 |
Используйте если текст уже подготовлен на стороне бэкенда.
|
| 227 |
-
|
|
|
|
|
|
|
| 228 |
if model is None:
|
| 229 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 230 |
|
| 231 |
-
embedding =
|
|
|
|
| 232 |
return EmbedResponse(
|
| 233 |
embedding=embedding.tolist(),
|
| 234 |
-
dimensions=len(embedding)
|
|
|
|
|
|
|
|
|
|
| 235 |
)
|
| 236 |
|
| 237 |
|
| 238 |
@app.post("/prepare-and-embed", response_model=PrepareAndEmbedResponse)
|
| 239 |
-
|
|
|
|
| 240 |
"""
|
| 241 |
Подготовка текста из полей и генерация эмбеддинга.
|
| 242 |
|
| 243 |
⭐ ОСНОВНОЙ ENDPOINT для интеграции с Go Backend.
|
| 244 |
|
| 245 |
-
|
|
|
|
|
|
|
| 246 |
```json
|
| 247 |
-
{
|
| 248 |
"title": "Ищу квартиру в центре",
|
| 249 |
"description": "Для семьи с детьми",
|
| 250 |
"price": 10000000,
|
| 251 |
"district": "Центральный",
|
| 252 |
"rooms": 3
|
| 253 |
-
}
|
| 254 |
```
|
| 255 |
|
| 256 |
Go Backend сохраняет embedding в PostgreSQL:
|
| 257 |
```sql
|
| 258 |
UPDATE leads SET embedding = $1 WHERE lead_id = $2
|
| 259 |
```
|
| 260 |
-
"""
|
| 261 |
if model is None:
|
| 262 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 263 |
|
| 264 |
prepared = prepare_text(
|
| 265 |
-
title=
|
| 266 |
-
description=
|
| 267 |
-
requirement=
|
| 268 |
-
price=
|
| 269 |
-
district=
|
| 270 |
-
rooms=
|
| 271 |
-
area=
|
| 272 |
-
address=
|
| 273 |
)
|
| 274 |
|
| 275 |
if not prepared:
|
| 276 |
raise HTTPException(status_code=400, detail="All fields are empty")
|
| 277 |
|
| 278 |
-
embedding =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
return PrepareAndEmbedResponse(
|
| 281 |
embedding=embedding.tolist(),
|
| 282 |
dimensions=len(embedding),
|
| 283 |
-
prepared_text=prepared
|
|
|
|
|
|
|
|
|
|
| 284 |
)
|
| 285 |
|
| 286 |
|
| 287 |
@app.post("/batch", response_model=BatchResponse)
|
| 288 |
-
|
|
|
|
| 289 |
"""
|
| 290 |
Пакетная обработка нескольких объектов.
|
| 291 |
|
|
|
|
|
|
|
|
|
|
| 292 |
Используйте для массовой индексации при первоначальной загрузке.
|
| 293 |
|
| 294 |
-
Пример:
|
| 295 |
```json
|
| 296 |
-
{
|
| 297 |
"items": [
|
| 298 |
-
{"entity_id": "lead-1", "title": "Ищу квартиру", "rooms": 3},
|
| 299 |
-
{"entity_id": "lead-2", "title": "Нужен офис", "area": 100}
|
| 300 |
]
|
| 301 |
-
}
|
| 302 |
```
|
| 303 |
-
"""
|
| 304 |
if model is None:
|
| 305 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 306 |
|
|
|
|
|
|
|
| 307 |
results = []
|
| 308 |
-
|
| 309 |
-
|
|
|
|
| 310 |
|
| 311 |
-
# Подготовка текстов
|
| 312 |
-
for item in
|
| 313 |
prepared = prepare_text(
|
| 314 |
title=item.title,
|
| 315 |
description=item.description,
|
|
@@ -320,37 +741,73 @@ async def batch_process(request: BatchRequest):
|
|
| 320 |
area=item.area,
|
| 321 |
address=item.address
|
| 322 |
)
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
valid_items.append(item)
|
| 326 |
-
else:
|
| 327 |
results.append(BatchResultItem(
|
| 328 |
entity_id=item.entity_id,
|
| 329 |
embedding=[],
|
| 330 |
success=False,
|
| 331 |
error="All fields are empty"
|
| 332 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
-
# Генерация эмбеддингов батчем
|
| 335 |
-
if texts:
|
| 336 |
-
embeddings = model.encode(texts, convert_to_numpy=True)
|
| 337 |
-
for i, item in enumerate(valid_items):
|
| 338 |
results.append(BatchResultItem(
|
| 339 |
entity_id=item.entity_id,
|
| 340 |
-
embedding=
|
| 341 |
-
success=True
|
|
|
|
| 342 |
))
|
| 343 |
|
| 344 |
# Сортировка по порядку входных items
|
| 345 |
results_map = {r.entity_id: r for r in results}
|
| 346 |
-
sorted_results = [results_map[item.entity_id] for item in
|
| 347 |
successful = sum(1 for r in sorted_results if r.success)
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
return BatchResponse(
|
| 350 |
results=sorted_results,
|
| 351 |
-
dimensions=
|
| 352 |
-
total=len(
|
| 353 |
-
successful=successful
|
|
|
|
|
|
|
|
|
|
| 354 |
)
|
| 355 |
|
| 356 |
|
|
@@ -368,7 +825,21 @@ async def get_model_info():
|
|
| 368 |
|
| 369 |
return {
|
| 370 |
"model_name": MODEL_NAME,
|
|
|
|
|
|
|
| 371 |
"dimensions": dims,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
"sql_examples": {
|
| 373 |
"extension": "CREATE EXTENSION IF NOT EXISTS vector;",
|
| 374 |
"column": f"ALTER TABLE leads ADD COLUMN embedding vector({dims});",
|
|
@@ -384,52 +855,24 @@ LIMIT 10;
|
|
| 384 |
}
|
| 385 |
|
| 386 |
|
| 387 |
-
# ============== Reindex Endpoint ==============
|
| 388 |
-
|
| 389 |
-
class ReindexRequest(BaseModel):
|
| 390 |
-
"""
|
| 391 |
-
Запрос на переиндексацию объекта.
|
| 392 |
-
|
| 393 |
-
Используется когда пользователь обновил лида/объект и нужно
|
| 394 |
-
пересоздать эмбеддинг.
|
| 395 |
-
"""
|
| 396 |
-
entity_id: str = Field(..., description="ID объекта для переиндексации")
|
| 397 |
-
entity_type: str = Field(default="lead", description="Тип: 'lead' или 'property'")
|
| 398 |
-
title: str = Field(default="", description="Название")
|
| 399 |
-
description: str = Field(default="", description="Описание")
|
| 400 |
-
requirement: Optional[Dict[str, Any]] = Field(default=None, description="Требования (JSON)")
|
| 401 |
-
price: Optional[float] = Field(default=None, description="Цена")
|
| 402 |
-
district: Optional[str] = Field(default=None, description="Район")
|
| 403 |
-
rooms: Optional[int] = Field(default=None, description="Количество комнат")
|
| 404 |
-
area: Optional[float] = Field(default=None, description="Площадь")
|
| 405 |
-
address: Optional[str] = Field(default=None, description="Адрес")
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
class ReindexResponse(BaseModel):
|
| 409 |
-
"""Ответ на переиндексацию."""
|
| 410 |
-
entity_id: str
|
| 411 |
-
entity_type: str
|
| 412 |
-
embedding: List[float]
|
| 413 |
-
dimensions: int
|
| 414 |
-
prepared_text: str
|
| 415 |
-
message: str = Field(default="Reindex successful. Update embedding in your database.")
|
| 416 |
-
|
| 417 |
-
|
| 418 |
@app.post("/reindex", response_model=ReindexResponse)
|
| 419 |
-
|
|
|
|
| 420 |
"""
|
| 421 |
Переиндексация объекта (лида или недвижимости).
|
| 422 |
|
| 423 |
⭐ Используйте когда пользователь ОБНОВИЛ данные объекта.
|
| 424 |
|
| 425 |
-
|
|
|
|
|
|
|
| 426 |
1. Пользователь создал лида → POST /prepare-and-embed → сохранили embedding
|
| 427 |
2. Пользователь ИЗМЕНИЛ лида → POST /reindex → получили новый embedding
|
| 428 |
3. Go Backend обновляет embedding в PostgreSQL
|
| 429 |
|
| 430 |
-
Пример запроса:
|
| 431 |
```json
|
| 432 |
-
{
|
| 433 |
"entity_id": "lead-123",
|
| 434 |
"entity_type": "lead",
|
| 435 |
"title": "Обновлённый заголовок",
|
|
@@ -437,52 +880,109 @@ async def reindex_entity(request: ReindexRequest):
|
|
| 437 |
"price": 12000000,
|
| 438 |
"district": "Арбат",
|
| 439 |
"rooms": 4
|
| 440 |
-
}
|
| 441 |
```
|
| 442 |
|
| 443 |
Go Backend должен выполнить:
|
| 444 |
```sql
|
| 445 |
UPDATE leads SET embedding = $1, updated_at = NOW() WHERE lead_id = $2
|
| 446 |
```
|
| 447 |
-
"""
|
| 448 |
if model is None:
|
| 449 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 450 |
|
| 451 |
prepared = prepare_text(
|
| 452 |
-
title=
|
| 453 |
-
description=
|
| 454 |
-
requirement=
|
| 455 |
-
price=
|
| 456 |
-
district=
|
| 457 |
-
rooms=
|
| 458 |
-
area=
|
| 459 |
-
address=
|
| 460 |
)
|
| 461 |
|
| 462 |
if not prepared:
|
| 463 |
raise HTTPException(status_code=400, detail="All fields are empty - nothing to reindex")
|
| 464 |
|
| 465 |
-
embedding =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
return ReindexResponse(
|
| 468 |
-
entity_id=
|
| 469 |
-
entity_type=
|
| 470 |
embedding=embedding.tolist(),
|
| 471 |
dimensions=len(embedding),
|
| 472 |
prepared_text=prepared,
|
| 473 |
-
|
|
|
|
|
|
|
| 474 |
)
|
| 475 |
|
| 476 |
|
| 477 |
@app.post("/reindex-batch", response_model=BatchResponse)
|
| 478 |
-
|
|
|
|
| 479 |
"""
|
| 480 |
Пакетная переиндексация нескольких объектов.
|
| 481 |
|
|
|
|
|
|
|
| 482 |
Используйте когда нужно переиндексировать много объектов после
|
| 483 |
массового обновления или изменения модели.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
-
|
|
|
|
| 486 |
"""
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
Используется для матчинга лидов с объектами недвижимости.
|
| 8 |
|
| 9 |
+
Version: 2.1.0 (Production-Ready)
|
| 10 |
+
|
| 11 |
+
Улучшения v2.1.0:
|
| 12 |
+
- Асинхронность через ThreadPoolExecutor (не блокирует event loop)
|
| 13 |
+
- Валидация лимитов (batch size, text length)
|
| 14 |
+
- Prometheus метрики (/metrics)
|
| 15 |
+
- Rate limiting
|
| 16 |
+
- Structured logging (JSON)
|
| 17 |
+
- In-memory кэширование эмбеддингов
|
| 18 |
+
- Версионирование модели
|
| 19 |
+
- Таймауты и graceful error handling
|
| 20 |
+
|
| 21 |
Endpoints:
|
| 22 |
- POST /embed - генерация эмбеддинга из текста
|
| 23 |
- POST /prepare-and-embed - подготовка полей + эмбеддинг (ОСНОВНОЙ)
|
| 24 |
- POST /batch - пакетная обработка
|
| 25 |
+
- POST /reindex - переиндексация объекта
|
| 26 |
+
- POST /reindex-batch - пакетная переиндексация
|
| 27 |
- GET /health - проверка здоровья
|
| 28 |
- GET /model-info - информация о модели
|
| 29 |
+
- GET /metrics - Prometheus метрики
|
| 30 |
"""
|
| 31 |
|
| 32 |
import os
|
| 33 |
+
import sys
|
| 34 |
+
import time
|
| 35 |
+
import hashlib
|
| 36 |
+
import asyncio
|
| 37 |
+
from typing import List, Optional, Dict, Any, Tuple
|
| 38 |
from contextlib import asynccontextmanager
|
| 39 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 40 |
+
from functools import lru_cache
|
| 41 |
+
import logging
|
| 42 |
|
| 43 |
+
from fastapi import FastAPI, HTTPException, Request, Response
|
| 44 |
from fastapi.middleware.cors import CORSMiddleware
|
| 45 |
+
from fastapi.responses import PlainTextResponse
|
| 46 |
+
from pydantic import BaseModel, Field, field_validator
|
| 47 |
from sentence_transformers import SentenceTransformer
|
| 48 |
import numpy as np
|
| 49 |
from dotenv import load_dotenv
|
| 50 |
|
| 51 |
+
# Prometheus метрики
|
| 52 |
+
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
|
| 53 |
+
|
| 54 |
+
# Rate limiting
|
| 55 |
+
from slowapi import Limiter, _rate_limit_exceeded_handler
|
| 56 |
+
from slowapi.util import get_remote_address
|
| 57 |
+
from slowapi.errors import RateLimitExceeded
|
| 58 |
+
|
| 59 |
+
# Кэширование
|
| 60 |
+
from cachetools import TTLCache
|
| 61 |
+
|
| 62 |
+
# Structured logging
|
| 63 |
+
import structlog
|
| 64 |
+
|
| 65 |
load_dotenv()
|
| 66 |
|
| 67 |
+
# ============== Configuration ==============
|
| 68 |
+
|
| 69 |
+
# Model settings
|
| 70 |
+
MODEL_NAME = os.getenv("EMBEDDING_MODEL", "ai-forever/ru-en-RoSBERTa")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# Limits
|
| 74 |
+
MAX_BATCH_SIZE = int(os.getenv("MAX_BATCH_SIZE", "128"))
|
| 75 |
+
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "10000")) # символов
|
| 76 |
+
MAX_CONCURRENT_REQUESTS = int(os.getenv("MAX_CONCURRENT_REQUESTS", "6"))
|
| 77 |
+
ENCODE_TIMEOUT_SECONDS = float(os.getenv("ENCODE_TIMEOUT_SECONDS", "30.0"))
|
| 78 |
+
|
| 79 |
+
# Rate limiting
|
| 80 |
+
RATE_LIMIT = os.getenv("RATE_LIMIT", "100/minute")
|
| 81 |
+
RATE_LIMIT_BATCH = os.getenv("RATE_LIMIT_BATCH", "20/minute")
|
| 82 |
+
|
| 83 |
+
# Cache settings
|
| 84 |
+
CACHE_ENABLED = os.getenv("CACHE_ENABLED", "true").lower() == "true"
|
| 85 |
+
CACHE_TTL_SECONDS = int(os.getenv("CACHE_TTL_SECONDS", "3600")) # 1 час
|
| 86 |
+
CACHE_MAX_SIZE = int(os.getenv("CACHE_MAX_SIZE", "10000"))
|
| 87 |
+
|
| 88 |
+
# Security
|
| 89 |
+
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*").split(",")
|
| 90 |
+
API_KEY = os.getenv("API_KEY", None) # Опционально: API key для авторизации
|
| 91 |
+
|
| 92 |
+
# Version info
|
| 93 |
+
SERVICE_VERSION = "2.2.0"
|
| 94 |
+
|
| 95 |
+
# ============== Structured Logging ==============
|
| 96 |
+
|
| 97 |
+
structlog.configure(
|
| 98 |
+
processors=[
|
| 99 |
+
structlog.stdlib.filter_by_level,
|
| 100 |
+
structlog.stdlib.add_logger_name,
|
| 101 |
+
structlog.stdlib.add_log_level,
|
| 102 |
+
structlog.stdlib.PositionalArgumentsFormatter(),
|
| 103 |
+
structlog.processors.TimeStamper(fmt="iso"),
|
| 104 |
+
structlog.processors.StackInfoRenderer(),
|
| 105 |
+
structlog.processors.format_exc_info,
|
| 106 |
+
structlog.processors.UnicodeDecoder(),
|
| 107 |
+
structlog.processors.JSONRenderer()
|
| 108 |
+
],
|
| 109 |
+
wrapper_class=structlog.stdlib.BoundLogger,
|
| 110 |
+
context_class=dict,
|
| 111 |
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
| 112 |
+
cache_logger_on_first_use=True,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
logging.basicConfig(
|
| 116 |
+
format="%(message)s",
|
| 117 |
+
stream=sys.stdout,
|
| 118 |
+
level=logging.INFO,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
logger = structlog.get_logger()
|
| 122 |
+
|
| 123 |
+
# ============== Prometheus Metrics ==============
|
| 124 |
+
|
| 125 |
+
REQUESTS_TOTAL = Counter(
|
| 126 |
+
'embedding_requests_total',
|
| 127 |
+
'Total number of embedding requests',
|
| 128 |
+
['endpoint', 'status']
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
REQUEST_LATENCY = Histogram(
|
| 132 |
+
'embedding_request_latency_seconds',
|
| 133 |
+
'Request latency in seconds',
|
| 134 |
+
['endpoint'],
|
| 135 |
+
buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
BATCH_SIZE_HISTOGRAM = Histogram(
|
| 139 |
+
'embedding_batch_size',
|
| 140 |
+
'Batch sizes for batch requests',
|
| 141 |
+
buckets=[1, 5, 10, 25, 50, 100, 128, 256]
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
ENCODE_FAILURES = Counter(
|
| 145 |
+
'embedding_encode_failures_total',
|
| 146 |
+
'Total number of encoding failures',
|
| 147 |
+
['reason']
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
MODEL_LOADED = Gauge(
|
| 151 |
+
'embedding_model_loaded',
|
| 152 |
+
'Whether the model is loaded (1) or not (0)'
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
CACHE_HITS = Counter(
|
| 156 |
+
'embedding_cache_hits_total',
|
| 157 |
+
'Total number of cache hits'
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
CACHE_MISSES = Counter(
|
| 161 |
+
'embedding_cache_misses_total',
|
| 162 |
+
'Total number of cache misses'
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
ACTIVE_REQUESTS = Gauge(
|
| 166 |
+
'embedding_active_requests',
|
| 167 |
+
'Number of currently active requests'
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# ============== Global State ==============
|
| 171 |
|
|
|
|
| 172 |
model: Optional[SentenceTransformer] = None
|
| 173 |
+
model_checksum: Optional[str] = None
|
| 174 |
+
model_load_time: Optional[float] = None
|
| 175 |
+
executor: Optional[ThreadPoolExecutor] = None
|
| 176 |
+
embedding_cache: Optional[TTLCache] = None
|
| 177 |
+
|
| 178 |
+
# Rate limiter
|
| 179 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 180 |
+
|
| 181 |
+
# ============== Helper Functions ==============
|
| 182 |
+
|
| 183 |
+
def compute_model_checksum() -> str:
|
| 184 |
+
"""Вычисляет контрольную сумму модели для версионирования."""
|
| 185 |
+
if model is None:
|
| 186 |
+
return "unknown"
|
| 187 |
+
# Используем хэш от имени модели и параметров
|
| 188 |
+
model_info = f"{MODEL_NAME}:{model.get_sentence_embedding_dimension()}"
|
| 189 |
+
return hashlib.md5(model_info.encode()).hexdigest()[:12]
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def get_cache_key(text: str) -> str:
|
| 193 |
+
"""Генерирует ключ кэша для текста."""
|
| 194 |
+
return hashlib.sha256(text.encode()).hexdigest()
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def prepare_text(
|
| 198 |
+
title: str = "",
|
| 199 |
+
description: str = "",
|
| 200 |
+
requirement: Optional[Dict[str, Any]] = None,
|
| 201 |
+
price: Optional[float] = None,
|
| 202 |
+
district: Optional[str] = None,
|
| 203 |
+
rooms: Optional[int] = None,
|
| 204 |
+
area: Optional[float] = None,
|
| 205 |
+
address: Optional[str] = None
|
| 206 |
+
) -> str:
|
| 207 |
+
"""Объединяет поля в текст для эмбеддинга."""
|
| 208 |
+
parts = []
|
| 209 |
+
|
| 210 |
+
if title:
|
| 211 |
+
parts.append(f"Название: {title}")
|
| 212 |
+
if description:
|
| 213 |
+
parts.append(f"Описание: {description}")
|
| 214 |
+
|
| 215 |
+
if requirement:
|
| 216 |
+
req_parts = [f"{k}: {v}" for k, v in requirement.items() if v is not None]
|
| 217 |
+
if req_parts:
|
| 218 |
+
parts.append(f"Требования: {', '.join(req_parts)}")
|
| 219 |
+
|
| 220 |
+
params = []
|
| 221 |
+
if price is not None:
|
| 222 |
+
params.append(f"цена {price:,.0f}₽")
|
| 223 |
+
if district:
|
| 224 |
+
params.append(f"район {district}")
|
| 225 |
+
if rooms is not None:
|
| 226 |
+
params.append(f"{rooms}-комнатная")
|
| 227 |
+
if area is not None:
|
| 228 |
+
params.append(f"площадь {area}м²")
|
| 229 |
+
if address:
|
| 230 |
+
params.append(f"адрес: {address}")
|
| 231 |
+
|
| 232 |
+
if params:
|
| 233 |
+
parts.append(f"Параметры: {', '.join(params)}")
|
| 234 |
+
|
| 235 |
+
return ". ".join(parts)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
async def encode_async(texts: List[str]) -> np.ndarray:
|
| 239 |
+
"""
|
| 240 |
+
Асинхронно кодирует тексты через ThreadPoolExecutor.
|
| 241 |
+
Не блокирует event loop FastAPI.
|
| 242 |
+
|
| 243 |
+
Важно: normalize_embeddings=True для корректной работы с pgvector + cosine similarity
|
| 244 |
+
"""
|
| 245 |
+
if model is None:
|
| 246 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 247 |
+
|
| 248 |
+
loop = asyncio.get_event_loop()
|
| 249 |
+
|
| 250 |
+
try:
|
| 251 |
+
result = await asyncio.wait_for(
|
| 252 |
+
loop.run_in_executor(
|
| 253 |
+
executor,
|
| 254 |
+
lambda: model.encode(
|
| 255 |
+
texts,
|
| 256 |
+
batch_size=32,
|
| 257 |
+
convert_to_numpy=True,
|
| 258 |
+
normalize_embeddings=True, # Критично для cosine similarity!
|
| 259 |
+
show_progress_bar=False
|
| 260 |
+
)
|
| 261 |
+
),
|
| 262 |
+
timeout=ENCODE_TIMEOUT_SECONDS
|
| 263 |
+
)
|
| 264 |
+
return result
|
| 265 |
+
except asyncio.TimeoutError:
|
| 266 |
+
ENCODE_FAILURES.labels(reason="timeout").inc()
|
| 267 |
+
logger.error("encode_timeout", texts_count=len(texts), timeout=ENCODE_TIMEOUT_SECONDS)
|
| 268 |
+
raise HTTPException(status_code=503, detail=f"Encoding timeout after {ENCODE_TIMEOUT_SECONDS}s")
|
| 269 |
+
except Exception as e:
|
| 270 |
+
ENCODE_FAILURES.labels(reason="error").inc()
|
| 271 |
+
logger.error("encode_error", error=str(e), texts_count=len(texts))
|
| 272 |
+
raise HTTPException(status_code=500, detail=f"Encoding error: {str(e)}")
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
async def encode_single_async_with_flag(text: str) -> Tuple[np.ndarray, bool]:
|
| 276 |
+
"""
|
| 277 |
+
Кодирует один текст с кэшированием.
|
| 278 |
+
Возвращает (embedding, cached_flag) для корректного отслеживания.
|
| 279 |
+
"""
|
| 280 |
+
if CACHE_ENABLED and embedding_cache is not None:
|
| 281 |
+
cache_key = get_cache_key(text)
|
| 282 |
+
if cache_key in embedding_cache:
|
| 283 |
+
CACHE_HITS.inc()
|
| 284 |
+
return embedding_cache[cache_key], True
|
| 285 |
+
CACHE_MISSES.inc()
|
| 286 |
+
else:
|
| 287 |
+
cache_key = None
|
| 288 |
|
| 289 |
+
# Генерируем эмбеддинг
|
| 290 |
+
embedding = await encode_async([text])
|
| 291 |
+
result = embedding[0]
|
| 292 |
+
|
| 293 |
+
# Сохраняем в кэш
|
| 294 |
+
if CACHE_ENABLED and embedding_cache is not None and cache_key is not None:
|
| 295 |
+
embedding_cache[cache_key] = result
|
| 296 |
+
|
| 297 |
+
return result, False
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
# ============== Lifespan ==============
|
| 301 |
|
| 302 |
@asynccontextmanager
|
| 303 |
async def lifespan(app: FastAPI):
|
| 304 |
+
"""Загрузка модели и инициализация ресурсов при старте."""
|
| 305 |
+
global model, model_checksum, model_load_time, executor, embedding_cache
|
| 306 |
+
|
| 307 |
+
start_time = time.time()
|
| 308 |
+
logger.info("service_starting", version=SERVICE_VERSION, model=MODEL_NAME)
|
| 309 |
+
|
| 310 |
+
# Инициализация ThreadPoolExecutor
|
| 311 |
+
executor = ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS)
|
| 312 |
+
|
| 313 |
+
# Инициализация кэша
|
| 314 |
+
if CACHE_ENABLED:
|
| 315 |
+
embedding_cache = TTLCache(maxsize=CACHE_MAX_SIZE, ttl=CACHE_TTL_SECONDS)
|
| 316 |
+
logger.info("cache_initialized", max_size=CACHE_MAX_SIZE, ttl=CACHE_TTL_SECONDS)
|
| 317 |
+
|
| 318 |
+
# Загрузка модели
|
| 319 |
+
logger.info("model_loading", model=MODEL_NAME)
|
| 320 |
+
try:
|
| 321 |
+
model = SentenceTransformer(MODEL_NAME, device='cpu')
|
| 322 |
+
model_checksum = compute_model_checksum()
|
| 323 |
+
model_load_time = time.time() - start_time
|
| 324 |
+
MODEL_LOADED.set(1)
|
| 325 |
+
logger.info(
|
| 326 |
+
"model_loaded",
|
| 327 |
+
model=MODEL_NAME,
|
| 328 |
+
dimensions=model.get_sentence_embedding_dimension(),
|
| 329 |
+
checksum=model_checksum,
|
| 330 |
+
load_time_seconds=round(model_load_time, 2)
|
| 331 |
+
)
|
| 332 |
+
except Exception as e:
|
| 333 |
+
MODEL_LOADED.set(0)
|
| 334 |
+
logger.error("model_load_failed", error=str(e))
|
| 335 |
+
raise
|
| 336 |
+
|
| 337 |
yield
|
| 338 |
+
|
| 339 |
+
# Cleanup
|
| 340 |
+
logger.info("service_stopping")
|
| 341 |
+
MODEL_LOADED.set(0)
|
| 342 |
+
if executor:
|
| 343 |
+
executor.shutdown(wait=True)
|
| 344 |
model = None
|
| 345 |
+
embedding_cache = None
|
| 346 |
|
| 347 |
|
| 348 |
+
# ============== FastAPI App ==============
|
| 349 |
+
|
| 350 |
app = FastAPI(
|
| 351 |
title="Embedding Service",
|
| 352 |
+
description="""
|
| 353 |
+
## Stateless сервис генерации эмбеддингов для матчинга недвижимости
|
| 354 |
+
|
| 355 |
+
### Версия 2.1.0 (Production-Ready)
|
| 356 |
+
|
| 357 |
+
**Улучшения:**
|
| 358 |
+
- ✅ Асинхронная обработка (не блокирует event loop)
|
| 359 |
+
- ✅ Валидация лимитов (batch size, text length)
|
| 360 |
+
- ✅ Prometheus метрики (`/metrics`)
|
| 361 |
+
- ✅ Rate limiting
|
| 362 |
+
- ✅ In-memory кэширование эмбеддингов
|
| 363 |
+
- ✅ Версионирование модели
|
| 364 |
+
|
| 365 |
+
**Лимиты:**
|
| 366 |
+
- Максимальный размер батча: {max_batch}
|
| 367 |
+
- Максимальная длина текста: {max_text} символов
|
| 368 |
+
- Rate limit: {rate_limit}
|
| 369 |
+
|
| 370 |
+
**Интеграция с Go Backend:**
|
| 371 |
+
```go
|
| 372 |
+
resp, _ := http.Post(embeddingURL+"/prepare-and-embed", "application/json", body)
|
| 373 |
+
// Сохранить embedding в PostgreSQL + pgvector
|
| 374 |
+
```
|
| 375 |
+
""".format(max_batch=MAX_BATCH_SIZE, max_text=MAX_TEXT_LENGTH, rate_limit=RATE_LIMIT),
|
| 376 |
+
version=SERVICE_VERSION,
|
| 377 |
lifespan=lifespan
|
| 378 |
)
|
| 379 |
|
| 380 |
+
# Rate limiting exception handler
|
| 381 |
+
app.state.limiter = limiter
|
| 382 |
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
| 383 |
+
|
| 384 |
app.add_middleware(
|
| 385 |
CORSMiddleware,
|
| 386 |
+
allow_origins=ALLOWED_ORIGINS,
|
| 387 |
allow_credentials=True,
|
| 388 |
allow_methods=["*"],
|
| 389 |
allow_headers=["*"],
|
| 390 |
)
|
| 391 |
|
| 392 |
|
| 393 |
+
# ============== Middleware ==============
|
| 394 |
+
|
| 395 |
+
@app.middleware("http")
|
| 396 |
+
async def metrics_middleware(request: Request, call_next):
|
| 397 |
+
"""Middleware для сбора метрик."""
|
| 398 |
+
start_time = time.time()
|
| 399 |
+
endpoint = request.url.path
|
| 400 |
+
|
| 401 |
+
ACTIVE_REQUESTS.inc()
|
| 402 |
+
|
| 403 |
+
try:
|
| 404 |
+
response = await call_next(request)
|
| 405 |
+
status = "success" if response.status_code < 400 else "error"
|
| 406 |
+
REQUESTS_TOTAL.labels(endpoint=endpoint, status=status).inc()
|
| 407 |
+
return response
|
| 408 |
+
except Exception as e:
|
| 409 |
+
REQUESTS_TOTAL.labels(endpoint=endpoint, status="error").inc()
|
| 410 |
+
raise
|
| 411 |
+
finally:
|
| 412 |
+
ACTIVE_REQUESTS.dec()
|
| 413 |
+
REQUEST_LATENCY.labels(endpoint=endpoint).observe(time.time() - start_time)
|
| 414 |
+
|
| 415 |
+
|
| 416 |
# ============== Pydantic Models ==============
|
| 417 |
|
| 418 |
class EmbedRequest(BaseModel):
|
| 419 |
"""Запрос на генерацию эмбеддинга из готового текста."""
|
| 420 |
+
text: str = Field(..., min_length=1, max_length=MAX_TEXT_LENGTH, description="Текст для эмбеддинга")
|
| 421 |
+
|
| 422 |
+
@field_validator('text')
|
| 423 |
+
@classmethod
|
| 424 |
+
def validate_text_length(cls, v: str) -> str:
|
| 425 |
+
if len(v) > MAX_TEXT_LENGTH:
|
| 426 |
+
raise ValueError(f"Text length exceeds maximum of {MAX_TEXT_LENGTH} characters")
|
| 427 |
+
return v
|
| 428 |
|
| 429 |
|
| 430 |
class EmbedResponse(BaseModel):
|
| 431 |
"""Ответ с эмбеддингом."""
|
| 432 |
embedding: List[float]
|
| 433 |
dimensions: int
|
| 434 |
+
model_version: str = Field(description="Версия модели")
|
| 435 |
+
model_checksum: str = Field(description="Контрольная сумма модели")
|
| 436 |
+
cached: bool = Field(default=False, description="Результат из кэша")
|
| 437 |
|
| 438 |
|
| 439 |
class PrepareAndEmbedRequest(BaseModel):
|
|
|
|
| 442 |
|
| 443 |
Это ОСНОВНОЙ endpoint для интеграции с Go Backend.
|
| 444 |
"""
|
| 445 |
+
title: str = Field(default="", max_length=500, description="Название")
|
| 446 |
+
description: str = Field(default="", max_length=5000, description="Описание")
|
| 447 |
requirement: Optional[Dict[str, Any]] = Field(default=None, description="Требования (JSON)")
|
| 448 |
+
price: Optional[float] = Field(default=None, ge=0, description="Цена")
|
| 449 |
+
district: Optional[str] = Field(default=None, max_length=200, description="Район")
|
| 450 |
+
rooms: Optional[int] = Field(default=None, ge=0, le=100, description="Количество комнат")
|
| 451 |
+
area: Optional[float] = Field(default=None, ge=0, description="Площадь")
|
| 452 |
+
address: Optional[str] = Field(default=None, max_length=500, description="Адрес")
|
| 453 |
|
| 454 |
|
| 455 |
class PrepareAndEmbedResponse(BaseModel):
|
|
|
|
| 457 |
embedding: List[float]
|
| 458 |
dimensions: int
|
| 459 |
prepared_text: str = Field(description="Подготовленный текст (для отладки)")
|
| 460 |
+
model_version: str = Field(description="Версия модели")
|
| 461 |
+
model_checksum: str = Field(description="Контрольная сумма модели")
|
| 462 |
+
cached: bool = Field(default=False, description="Результат из кэша")
|
| 463 |
|
| 464 |
|
| 465 |
class BatchItem(BaseModel):
|
| 466 |
"""Один элемент для пакетной обработки."""
|
| 467 |
entity_id: str = Field(..., description="ID объекта")
|
| 468 |
+
title: str = Field(default="", max_length=500)
|
| 469 |
+
description: str = Field(default="", max_length=5000)
|
| 470 |
requirement: Optional[Dict[str, Any]] = None
|
| 471 |
+
price: Optional[float] = Field(default=None, ge=0)
|
| 472 |
+
district: Optional[str] = Field(default=None, max_length=200)
|
| 473 |
+
rooms: Optional[int] = Field(default=None, ge=0, le=100)
|
| 474 |
+
area: Optional[float] = Field(default=None, ge=0)
|
| 475 |
+
address: Optional[str] = Field(default=None, max_length=500)
|
| 476 |
|
| 477 |
|
| 478 |
class BatchRequest(BaseModel):
|
| 479 |
"""Запрос на пакетную обработку."""
|
| 480 |
+
items: List[BatchItem] = Field(..., max_length=MAX_BATCH_SIZE)
|
| 481 |
+
|
| 482 |
+
@field_validator('items')
|
| 483 |
+
@classmethod
|
| 484 |
+
def validate_batch_size(cls, v: List[BatchItem]) -> List[BatchItem]:
|
| 485 |
+
if len(v) > MAX_BATCH_SIZE:
|
| 486 |
+
raise ValueError(f"Batch size exceeds maximum of {MAX_BATCH_SIZE} items")
|
| 487 |
+
if len(v) == 0:
|
| 488 |
+
raise ValueError("Batch cannot be empty")
|
| 489 |
+
return v
|
| 490 |
|
| 491 |
|
| 492 |
class BatchResultItem(BaseModel):
|
|
|
|
| 495 |
embedding: List[float]
|
| 496 |
success: bool = True
|
| 497 |
error: Optional[str] = None
|
| 498 |
+
cached: bool = Field(default=False, description="Результат из кэша")
|
| 499 |
|
| 500 |
|
| 501 |
class BatchResponse(BaseModel):
|
|
|
|
| 504 |
dimensions: int
|
| 505 |
total: int
|
| 506 |
successful: int
|
| 507 |
+
cached_count: int = Field(default=0, description="Количество результатов из кэша")
|
| 508 |
+
model_version: str
|
| 509 |
+
model_checksum: str
|
| 510 |
|
| 511 |
|
| 512 |
class HealthResponse(BaseModel):
|
|
|
|
| 514 |
status: str
|
| 515 |
model: str
|
| 516 |
dimensions: int
|
| 517 |
+
version: str
|
| 518 |
+
model_checksum: str
|
| 519 |
+
cache_enabled: bool
|
| 520 |
+
cache_size: int = Field(default=0)
|
| 521 |
|
| 522 |
|
| 523 |
+
class ReindexRequest(BaseModel):
|
| 524 |
+
"""
|
| 525 |
+
Запрос на переиндексацию объекта.
|
| 526 |
+
"""
|
| 527 |
+
entity_id: str = Field(..., description="ID объекта для переиндексации")
|
| 528 |
+
entity_type: str = Field(default="lead", description="Тип: 'lead' или 'property'")
|
| 529 |
+
title: str = Field(default="", max_length=500, description="Название")
|
| 530 |
+
description: str = Field(default="", max_length=5000, description="Описание")
|
| 531 |
+
requirement: Optional[Dict[str, Any]] = Field(default=None, description="Требования (JSON)")
|
| 532 |
+
price: Optional[float] = Field(default=None, ge=0, description="Цена")
|
| 533 |
+
district: Optional[str] = Field(default=None, max_length=200, description="Район")
|
| 534 |
+
rooms: Optional[int] = Field(default=None, ge=0, le=100, description="Количество комнат")
|
| 535 |
+
area: Optional[float] = Field(default=None, ge=0, description="Площадь")
|
| 536 |
+
address: Optional[str] = Field(default=None, max_length=500, description="Адрес")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
|
|
|
|
|
|
|
| 538 |
|
| 539 |
+
class ReindexResponse(BaseModel):
|
| 540 |
+
"""Ответ на переиндексацию."""
|
| 541 |
+
entity_id: str
|
| 542 |
+
entity_type: str
|
| 543 |
+
embedding: List[float]
|
| 544 |
+
dimensions: int
|
| 545 |
+
prepared_text: str
|
| 546 |
+
model_version: str
|
| 547 |
+
model_checksum: str
|
| 548 |
+
message: str = Field(default="Reindex successful. Update embedding in your database.")
|
| 549 |
|
| 550 |
|
| 551 |
# ============== Endpoints ==============
|
|
|
|
| 555 |
"""Информация о сервисе."""
|
| 556 |
return {
|
| 557 |
"service": "Embedding Service",
|
| 558 |
+
"version": SERVICE_VERSION,
|
| 559 |
"type": "STATELESS",
|
| 560 |
"description": "Генерирует эмбеддинги. Хранение на стороне Go Backend + pgvector.",
|
| 561 |
+
"model": MODEL_NAME,
|
| 562 |
+
"model_checksum": model_checksum,
|
| 563 |
+
"limits": {
|
| 564 |
+
"max_batch_size": MAX_BATCH_SIZE,
|
| 565 |
+
"max_text_length": MAX_TEXT_LENGTH,
|
| 566 |
+
"rate_limit": RATE_LIMIT,
|
| 567 |
+
"rate_limit_batch": RATE_LIMIT_BATCH
|
| 568 |
+
},
|
| 569 |
+
"cache": {
|
| 570 |
+
"enabled": CACHE_ENABLED,
|
| 571 |
+
"ttl_seconds": CACHE_TTL_SECONDS,
|
| 572 |
+
"max_size": CACHE_MAX_SIZE
|
| 573 |
+
},
|
| 574 |
"endpoints": {
|
| 575 |
"POST /embed": "Эмбеддинг из готового текста",
|
| 576 |
"POST /prepare-and-embed": "Подготовка полей + эмбеддинг (создание)",
|
|
|
|
| 578 |
"POST /batch": "Пакетная обработка (создание)",
|
| 579 |
"POST /reindex-batch": "Пакетная переиндексация (обновление)",
|
| 580 |
"GET /health": "Проверка здоровья",
|
| 581 |
+
"GET /model-info": "Информация о модели для pgvector",
|
| 582 |
+
"GET /metrics": "Prometheus метрики"
|
| 583 |
},
|
| 584 |
"docs": "/docs"
|
| 585 |
}
|
|
|
|
| 590 |
"""Проверка здоровья сервиса."""
|
| 591 |
if model is None:
|
| 592 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 593 |
+
|
| 594 |
+
cache_size = len(embedding_cache) if embedding_cache else 0
|
| 595 |
+
|
| 596 |
return HealthResponse(
|
| 597 |
status="healthy",
|
| 598 |
model=MODEL_NAME,
|
| 599 |
+
dimensions=model.get_sentence_embedding_dimension(),
|
| 600 |
+
version=SERVICE_VERSION,
|
| 601 |
+
model_checksum=model_checksum or "unknown",
|
| 602 |
+
cache_enabled=CACHE_ENABLED,
|
| 603 |
+
cache_size=cache_size
|
| 604 |
+
)
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
@app.get("/metrics", response_class=PlainTextResponse)
|
| 608 |
+
async def metrics():
|
| 609 |
+
"""Prometheus метрики."""
|
| 610 |
+
return Response(
|
| 611 |
+
content=generate_latest(),
|
| 612 |
+
media_type=CONTENT_TYPE_LATEST
|
| 613 |
)
|
| 614 |
|
| 615 |
|
| 616 |
@app.post("/embed", response_model=EmbedResponse)
|
| 617 |
+
@limiter.limit(RATE_LIMIT)
|
| 618 |
+
async def embed_text(request: Request, body: EmbedRequest):
|
| 619 |
"""
|
| 620 |
Генерация эмбеддинга из готового текста.
|
| 621 |
|
| 622 |
Используйте если текст уже подготовлен на стороне бэкенда.
|
| 623 |
+
|
| 624 |
+
**Rate limit:** {rate_limit}
|
| 625 |
+
""".format(rate_limit=RATE_LIMIT)
|
| 626 |
if model is None:
|
| 627 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 628 |
|
| 629 |
+
embedding, cached = await encode_single_async_with_flag(body.text)
|
| 630 |
+
|
| 631 |
return EmbedResponse(
|
| 632 |
embedding=embedding.tolist(),
|
| 633 |
+
dimensions=len(embedding),
|
| 634 |
+
model_version=SERVICE_VERSION,
|
| 635 |
+
model_checksum=model_checksum or "unknown",
|
| 636 |
+
cached=cached
|
| 637 |
)
|
| 638 |
|
| 639 |
|
| 640 |
@app.post("/prepare-and-embed", response_model=PrepareAndEmbedResponse)
|
| 641 |
+
@limiter.limit(RATE_LIMIT)
|
| 642 |
+
async def prepare_and_embed(request: Request, body: PrepareAndEmbedRequest):
|
| 643 |
"""
|
| 644 |
Подготовка текста из полей и генерация эмбеддинга.
|
| 645 |
|
| 646 |
⭐ ОСНОВНОЙ ENDPOINT для интеграции с Go Backend.
|
| 647 |
|
| 648 |
+
**Rate limit:** {rate_limit}
|
| 649 |
+
|
| 650 |
+
**Пример запроса:**
|
| 651 |
```json
|
| 652 |
+
{{
|
| 653 |
"title": "Ищу квартиру в центре",
|
| 654 |
"description": "Для семьи с детьми",
|
| 655 |
"price": 10000000,
|
| 656 |
"district": "Центральный",
|
| 657 |
"rooms": 3
|
| 658 |
+
}}
|
| 659 |
```
|
| 660 |
|
| 661 |
Go Backend сохраняет embedding в PostgreSQL:
|
| 662 |
```sql
|
| 663 |
UPDATE leads SET embedding = $1 WHERE lead_id = $2
|
| 664 |
```
|
| 665 |
+
""".format(rate_limit=RATE_LIMIT)
|
| 666 |
if model is None:
|
| 667 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 668 |
|
| 669 |
prepared = prepare_text(
|
| 670 |
+
title=body.title,
|
| 671 |
+
description=body.description,
|
| 672 |
+
requirement=body.requirement,
|
| 673 |
+
price=body.price,
|
| 674 |
+
district=body.district,
|
| 675 |
+
rooms=body.rooms,
|
| 676 |
+
area=body.area,
|
| 677 |
+
address=body.address
|
| 678 |
)
|
| 679 |
|
| 680 |
if not prepared:
|
| 681 |
raise HTTPException(status_code=400, detail="All fields are empty")
|
| 682 |
|
| 683 |
+
embedding, cached = await encode_single_async_with_flag(prepared)
|
| 684 |
+
|
| 685 |
+
logger.info(
|
| 686 |
+
"prepare_and_embed",
|
| 687 |
+
text_length=len(prepared),
|
| 688 |
+
cached=cached
|
| 689 |
+
)
|
| 690 |
|
| 691 |
return PrepareAndEmbedResponse(
|
| 692 |
embedding=embedding.tolist(),
|
| 693 |
dimensions=len(embedding),
|
| 694 |
+
prepared_text=prepared,
|
| 695 |
+
model_version=SERVICE_VERSION,
|
| 696 |
+
model_checksum=model_checksum or "unknown",
|
| 697 |
+
cached=cached
|
| 698 |
)
|
| 699 |
|
| 700 |
|
| 701 |
@app.post("/batch", response_model=BatchResponse)
|
| 702 |
+
@limiter.limit(RATE_LIMIT_BATCH)
|
| 703 |
+
async def batch_process(request: Request, body: BatchRequest):
|
| 704 |
"""
|
| 705 |
Пакетная обработка нескольких объектов.
|
| 706 |
|
| 707 |
+
**Rate limit:** {rate_limit}
|
| 708 |
+
**Max batch size:** {max_batch}
|
| 709 |
+
|
| 710 |
Используйте для массовой индексации при первоначальной загрузке.
|
| 711 |
|
| 712 |
+
**Пример:**
|
| 713 |
```json
|
| 714 |
+
{{
|
| 715 |
"items": [
|
| 716 |
+
{{"entity_id": "lead-1", "title": "Ищу квартиру", "rooms": 3}},
|
| 717 |
+
{{"entity_id": "lead-2", "title": "Нужен офис", "area": 100}}
|
| 718 |
]
|
| 719 |
+
}}
|
| 720 |
```
|
| 721 |
+
""".format(rate_limit=RATE_LIMIT_BATCH, max_batch=MAX_BATCH_SIZE)
|
| 722 |
if model is None:
|
| 723 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 724 |
|
| 725 |
+
BATCH_SIZE_HISTOGRAM.observe(len(body.items))
|
| 726 |
+
|
| 727 |
results = []
|
| 728 |
+
texts_to_encode = []
|
| 729 |
+
items_to_encode = []
|
| 730 |
+
cached_count = 0
|
| 731 |
|
| 732 |
+
# Подготовка текстов и проверка кэша
|
| 733 |
+
for item in body.items:
|
| 734 |
prepared = prepare_text(
|
| 735 |
title=item.title,
|
| 736 |
description=item.description,
|
|
|
|
| 741 |
area=item.area,
|
| 742 |
address=item.address
|
| 743 |
)
|
| 744 |
+
|
| 745 |
+
if not prepared:
|
|
|
|
|
|
|
| 746 |
results.append(BatchResultItem(
|
| 747 |
entity_id=item.entity_id,
|
| 748 |
embedding=[],
|
| 749 |
success=False,
|
| 750 |
error="All fields are empty"
|
| 751 |
))
|
| 752 |
+
continue
|
| 753 |
+
|
| 754 |
+
# Проверяем кэш
|
| 755 |
+
if CACHE_ENABLED and embedding_cache is not None:
|
| 756 |
+
cache_key = get_cache_key(prepared)
|
| 757 |
+
if cache_key in embedding_cache:
|
| 758 |
+
CACHE_HITS.inc()
|
| 759 |
+
results.append(BatchResultItem(
|
| 760 |
+
entity_id=item.entity_id,
|
| 761 |
+
embedding=embedding_cache[cache_key].tolist(),
|
| 762 |
+
success=True,
|
| 763 |
+
cached=True
|
| 764 |
+
))
|
| 765 |
+
cached_count += 1
|
| 766 |
+
continue
|
| 767 |
+
CACHE_MISSES.inc()
|
| 768 |
+
|
| 769 |
+
texts_to_encode.append(prepared)
|
| 770 |
+
items_to_encode.append(item)
|
| 771 |
+
|
| 772 |
+
# Генерация эмбеддингов батчем для некэшированных
|
| 773 |
+
if texts_to_encode:
|
| 774 |
+
embeddings = await encode_async(texts_to_encode)
|
| 775 |
+
|
| 776 |
+
for i, item in enumerate(items_to_encode):
|
| 777 |
+
embedding = embeddings[i]
|
| 778 |
+
|
| 779 |
+
# Сохраняем в кэш
|
| 780 |
+
if CACHE_ENABLED and embedding_cache is not None:
|
| 781 |
+
cache_key = get_cache_key(texts_to_encode[i])
|
| 782 |
+
embedding_cache[cache_key] = embedding
|
| 783 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
results.append(BatchResultItem(
|
| 785 |
entity_id=item.entity_id,
|
| 786 |
+
embedding=embedding.tolist(),
|
| 787 |
+
success=True,
|
| 788 |
+
cached=False
|
| 789 |
))
|
| 790 |
|
| 791 |
# Сортировка по порядку входных items
|
| 792 |
results_map = {r.entity_id: r for r in results}
|
| 793 |
+
sorted_results = [results_map[item.entity_id] for item in body.items]
|
| 794 |
successful = sum(1 for r in sorted_results if r.success)
|
| 795 |
|
| 796 |
+
logger.info(
|
| 797 |
+
"batch_process",
|
| 798 |
+
total=len(body.items),
|
| 799 |
+
successful=successful,
|
| 800 |
+
cached=cached_count
|
| 801 |
+
)
|
| 802 |
+
|
| 803 |
return BatchResponse(
|
| 804 |
results=sorted_results,
|
| 805 |
+
dimensions=model.get_sentence_embedding_dimension(),
|
| 806 |
+
total=len(body.items),
|
| 807 |
+
successful=successful,
|
| 808 |
+
cached_count=cached_count,
|
| 809 |
+
model_version=SERVICE_VERSION,
|
| 810 |
+
model_checksum=model_checksum or "unknown"
|
| 811 |
)
|
| 812 |
|
| 813 |
|
|
|
|
| 825 |
|
| 826 |
return {
|
| 827 |
"model_name": MODEL_NAME,
|
| 828 |
+
"model_version": SERVICE_VERSION,
|
| 829 |
+
"model_checksum": model_checksum,
|
| 830 |
"dimensions": dims,
|
| 831 |
+
"model_load_time_seconds": round(model_load_time, 2) if model_load_time else None,
|
| 832 |
+
"limits": {
|
| 833 |
+
"max_batch_size": MAX_BATCH_SIZE,
|
| 834 |
+
"max_text_length": MAX_TEXT_LENGTH,
|
| 835 |
+
"encode_timeout_seconds": ENCODE_TIMEOUT_SECONDS
|
| 836 |
+
},
|
| 837 |
+
"cache": {
|
| 838 |
+
"enabled": CACHE_ENABLED,
|
| 839 |
+
"ttl_seconds": CACHE_TTL_SECONDS,
|
| 840 |
+
"current_size": len(embedding_cache) if embedding_cache else 0,
|
| 841 |
+
"max_size": CACHE_MAX_SIZE
|
| 842 |
+
},
|
| 843 |
"sql_examples": {
|
| 844 |
"extension": "CREATE EXTENSION IF NOT EXISTS vector;",
|
| 845 |
"column": f"ALTER TABLE leads ADD COLUMN embedding vector({dims});",
|
|
|
|
| 855 |
}
|
| 856 |
|
| 857 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
@app.post("/reindex", response_model=ReindexResponse)
|
| 859 |
+
@limiter.limit(RATE_LIMIT)
|
| 860 |
+
async def reindex_entity(request: Request, body: ReindexRequest):
|
| 861 |
"""
|
| 862 |
Переиндексация объекта (лида или недвижимости).
|
| 863 |
|
| 864 |
⭐ Используйте когда пользователь ОБНОВИЛ данные объекта.
|
| 865 |
|
| 866 |
+
**Rate limit:** {rate_limit}
|
| 867 |
+
|
| 868 |
+
**Сценарий:**
|
| 869 |
1. Пользователь создал лида → POST /prepare-and-embed → сохранили embedding
|
| 870 |
2. Пользователь ИЗМЕНИЛ лида → POST /reindex → получили новый embedding
|
| 871 |
3. Go Backend обновляет embedding в PostgreSQL
|
| 872 |
|
| 873 |
+
**Пример запроса:**
|
| 874 |
```json
|
| 875 |
+
{{
|
| 876 |
"entity_id": "lead-123",
|
| 877 |
"entity_type": "lead",
|
| 878 |
"title": "Обновлённый заголовок",
|
|
|
|
| 880 |
"price": 12000000,
|
| 881 |
"district": "Арбат",
|
| 882 |
"rooms": 4
|
| 883 |
+
}}
|
| 884 |
```
|
| 885 |
|
| 886 |
Go Backend должен выполнить:
|
| 887 |
```sql
|
| 888 |
UPDATE leads SET embedding = $1, updated_at = NOW() WHERE lead_id = $2
|
| 889 |
```
|
| 890 |
+
""".format(rate_limit=RATE_LIMIT)
|
| 891 |
if model is None:
|
| 892 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 893 |
|
| 894 |
prepared = prepare_text(
|
| 895 |
+
title=body.title,
|
| 896 |
+
description=body.description,
|
| 897 |
+
requirement=body.requirement,
|
| 898 |
+
price=body.price,
|
| 899 |
+
district=body.district,
|
| 900 |
+
rooms=body.rooms,
|
| 901 |
+
area=body.area,
|
| 902 |
+
address=body.address
|
| 903 |
)
|
| 904 |
|
| 905 |
if not prepared:
|
| 906 |
raise HTTPException(status_code=400, detail="All fields are empty - nothing to reindex")
|
| 907 |
|
| 908 |
+
embedding, _ = await encode_single_async_with_flag(prepared)
|
| 909 |
+
|
| 910 |
+
logger.info(
|
| 911 |
+
"reindex",
|
| 912 |
+
entity_id=body.entity_id,
|
| 913 |
+
entity_type=body.entity_type,
|
| 914 |
+
text_length=len(prepared)
|
| 915 |
+
)
|
| 916 |
|
| 917 |
return ReindexResponse(
|
| 918 |
+
entity_id=body.entity_id,
|
| 919 |
+
entity_type=body.entity_type,
|
| 920 |
embedding=embedding.tolist(),
|
| 921 |
dimensions=len(embedding),
|
| 922 |
prepared_text=prepared,
|
| 923 |
+
model_version=SERVICE_VERSION,
|
| 924 |
+
model_checksum=model_checksum or "unknown",
|
| 925 |
+
message=f"Reindex successful for {body.entity_type} '{body.entity_id}'. Update embedding in your database."
|
| 926 |
)
|
| 927 |
|
| 928 |
|
| 929 |
@app.post("/reindex-batch", response_model=BatchResponse)
|
| 930 |
+
@limiter.limit(RATE_LIMIT_BATCH)
|
| 931 |
+
async def reindex_batch(request: Request, body: BatchRequest):
|
| 932 |
"""
|
| 933 |
Пакетная переиндексация нескольких объектов.
|
| 934 |
|
| 935 |
+
**Rate limit:** {rate_limit}
|
| 936 |
+
|
| 937 |
Используйте когда нужно переиндексировать много объектов после
|
| 938 |
массового обновления или изменения модели.
|
| 939 |
+
""".format(rate_limit=RATE_LIMIT_BATCH)
|
| 940 |
+
return await batch_process(request, body)
|
| 941 |
+
|
| 942 |
+
|
| 943 |
+
@app.post("/cache/clear")
|
| 944 |
+
async def clear_cache():
|
| 945 |
+
"""
|
| 946 |
+
Очистка кэша эмбеддингов.
|
| 947 |
+
|
| 948 |
+
Используйте при обновлении модели или для принудительного пересчёта.
|
| 949 |
+
"""
|
| 950 |
+
global embedding_cache
|
| 951 |
+
|
| 952 |
+
if not CACHE_ENABLED:
|
| 953 |
+
return {"message": "Cache is disabled", "cleared": 0}
|
| 954 |
+
|
| 955 |
+
if embedding_cache is None:
|
| 956 |
+
return {"message": "Cache not initialized", "cleared": 0}
|
| 957 |
+
|
| 958 |
+
size_before = len(embedding_cache)
|
| 959 |
+
embedding_cache.clear()
|
| 960 |
+
|
| 961 |
+
logger.info("cache_cleared", size_before=size_before)
|
| 962 |
+
|
| 963 |
+
return {
|
| 964 |
+
"message": "Cache cleared successfully",
|
| 965 |
+
"cleared": size_before
|
| 966 |
+
}
|
| 967 |
+
|
| 968 |
|
| 969 |
+
@app.get("/cache/stats")
|
| 970 |
+
async def cache_stats():
|
| 971 |
"""
|
| 972 |
+
Статистика кэша эмбеддингов.
|
| 973 |
+
"""
|
| 974 |
+
if not CACHE_ENABLED:
|
| 975 |
+
return {
|
| 976 |
+
"enabled": False,
|
| 977 |
+
"message": "Cache is disabled"
|
| 978 |
+
}
|
| 979 |
|
| 980 |
+
return {
|
| 981 |
+
"enabled": True,
|
| 982 |
+
"current_size": len(embedding_cache) if embedding_cache else 0,
|
| 983 |
+
"max_size": CACHE_MAX_SIZE,
|
| 984 |
+
"ttl_seconds": CACHE_TTL_SECONDS,
|
| 985 |
+
"utilization_percent": round(
|
| 986 |
+
(len(embedding_cache) / CACHE_MAX_SIZE * 100) if embedding_cache else 0, 2
|
| 987 |
+
)
|
| 988 |
+
}
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# Requirements for HuggingFace Space
|
| 2 |
-
# Оптимизировано для стабильной работы
|
| 3 |
|
| 4 |
fastapi==0.104.1
|
| 5 |
uvicorn[standard]==0.24.0
|
|
@@ -15,3 +15,13 @@ transformers==4.36.2
|
|
| 15 |
sentence-transformers==2.3.1
|
| 16 |
huggingface_hub>=0.19.0,<0.20.0
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Requirements for HuggingFace Space
|
| 2 |
+
# Оптимизировано для стабильной работы и production-ready
|
| 3 |
|
| 4 |
fastapi==0.104.1
|
| 5 |
uvicorn[standard]==0.24.0
|
|
|
|
| 15 |
sentence-transformers==2.3.1
|
| 16 |
huggingface_hub>=0.19.0,<0.20.0
|
| 17 |
|
| 18 |
+
# Production-ready улучшения (v2.1.0)
|
| 19 |
+
prometheus-client>=0.19.0 # Метрики для мониторинга
|
| 20 |
+
slowapi>=0.1.9 # Rate limiting
|
| 21 |
+
structlog>=23.2.0 # Structured logging (JSON)
|
| 22 |
+
cachetools>=5.3.0 # In-memory кэширование
|
| 23 |
+
redis>=5.0.0 # Redis кэширование (опционально)
|
| 24 |
+
opentelemetry-api>=1.21.0 # Tracing (опционально)
|
| 25 |
+
opentelemetry-sdk>=1.21.0
|
| 26 |
+
opentelemetry-instrumentation-fastapi>=0.42b0
|
| 27 |
+
|