test / app.py
u7un's picture
Create app.py
3d458c2 verified
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
import os
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import requests
app = FastAPI()
# РАЗРЕШАЕМ ДОСТУП ДЛЯ VERCEL
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # В продакшене замени на свой домен vercel
allow_methods=["*"],
allow_headers=["*"],
)
# Инициализация модели (загрузится один раз при старте Space)
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
index = None
chunks = []
@app.post("/upload")
async def upload_pdf(file: UploadFile = File(...)):
global index, chunks
pdf_reader = PdfReader(file.file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Простой чанкинг
new_chunks = [text[i:i+800] for i in range(0, len(text), 800)]
chunks.extend(new_chunks)
# Эмбеддинги и FAISS
embeddings = model.encode(new_chunks)
dimension = embeddings.shape[1]
if index is None:
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype('float32'))
return {"message": f"Загружено {len(new_chunks)} фрагментов"}
@app.post("/ask")
async def ask(question: str = Form(...)):
if not index or not chunks:
return {"answer": "Сначала загрузи PDF!"}
# Поиск
q_emb = model.encode([question])
D, I = index.search(np.array(q_emb).astype('float32'), k=3)
context = "\n".join([chunks[i] for i in I[0]])
# Запрос к DeepSeek (или любому другому API)
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {os.getenv('DEEPSEEK_API_KEY')}"},
json={
"model": "deepseek-chat",
"messages": [
{"role": "system", "content": f"Отвечай кратко на основе текста:\n{context}"},
{"role": "user", "content": question}
],
"temperature": 0.1
}
)
ans_data = response.json()
return {"answer": ans_data['choices'][0]['message']['content']}