| from fastapi import FastAPI, UploadFile, File, Form |
| from fastapi.middleware.cors import CORSMiddleware |
| import os |
| from PyPDF2 import PdfReader |
| from sentence_transformers import SentenceTransformer |
| import faiss |
| import numpy as np |
| import requests |
|
|
| app = FastAPI() |
|
|
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') |
| index = None |
| chunks = [] |
|
|
| @app.post("/upload") |
| async def upload_pdf(file: UploadFile = File(...)): |
| global index, chunks |
| pdf_reader = PdfReader(file.file) |
| text = "" |
| for page in pdf_reader.pages: |
| text += page.extract_text() |
| |
| |
| new_chunks = [text[i:i+800] for i in range(0, len(text), 800)] |
| chunks.extend(new_chunks) |
| |
| |
| embeddings = model.encode(new_chunks) |
| dimension = embeddings.shape[1] |
| |
| if index is None: |
| index = faiss.IndexFlatL2(dimension) |
| |
| index.add(np.array(embeddings).astype('float32')) |
| return {"message": f"Загружено {len(new_chunks)} фрагментов"} |
|
|
| @app.post("/ask") |
| async def ask(question: str = Form(...)): |
| if not index or not chunks: |
| return {"answer": "Сначала загрузи PDF!"} |
| |
| |
| q_emb = model.encode([question]) |
| D, I = index.search(np.array(q_emb).astype('float32'), k=3) |
| |
| context = "\n".join([chunks[i] for i in I[0]]) |
| |
| |
| response = requests.post( |
| "https://api.deepseek.com/v1/chat/completions", |
| headers={"Authorization": f"Bearer {os.getenv('DEEPSEEK_API_KEY')}"}, |
| json={ |
| "model": "deepseek-chat", |
| "messages": [ |
| {"role": "system", "content": f"Отвечай кратко на основе текста:\n{context}"}, |
| {"role": "user", "content": question} |
| ], |
| "temperature": 0.1 |
| } |
| ) |
| |
| ans_data = response.json() |
| return {"answer": ans_data['choices'][0]['message']['content']} |