u7un commited on
Commit
3d458c2
·
verified ·
1 Parent(s): 663e9e8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import os
4
+ from PyPDF2 import PdfReader
5
+ from sentence_transformers import SentenceTransformer
6
+ import faiss
7
+ import numpy as np
8
+ import requests
9
+
10
+ app = FastAPI()
11
+
12
+ # РАЗРЕШАЕМ ДОСТУП ДЛЯ VERCEL
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"], # В продакшене замени на свой домен vercel
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ # Инициализация модели (загрузится один раз при старте Space)
21
+ model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
22
+ index = None
23
+ chunks = []
24
+
25
+ @app.post("/upload")
26
+ async def upload_pdf(file: UploadFile = File(...)):
27
+ global index, chunks
28
+ pdf_reader = PdfReader(file.file)
29
+ text = ""
30
+ for page in pdf_reader.pages:
31
+ text += page.extract_text()
32
+
33
+ # Простой чанкинг
34
+ new_chunks = [text[i:i+800] for i in range(0, len(text), 800)]
35
+ chunks.extend(new_chunks)
36
+
37
+ # Эмбеддинги и FAISS
38
+ embeddings = model.encode(new_chunks)
39
+ dimension = embeddings.shape[1]
40
+
41
+ if index is None:
42
+ index = faiss.IndexFlatL2(dimension)
43
+
44
+ index.add(np.array(embeddings).astype('float32'))
45
+ return {"message": f"Загружено {len(new_chunks)} фрагментов"}
46
+
47
+ @app.post("/ask")
48
+ async def ask(question: str = Form(...)):
49
+ if not index or not chunks:
50
+ return {"answer": "Сначала загрузи PDF!"}
51
+
52
+ # Поиск
53
+ q_emb = model.encode([question])
54
+ D, I = index.search(np.array(q_emb).astype('float32'), k=3)
55
+
56
+ context = "\n".join([chunks[i] for i in I[0]])
57
+
58
+ # Запрос к DeepSeek (или любому другому API)
59
+ response = requests.post(
60
+ "https://api.deepseek.com/v1/chat/completions",
61
+ headers={"Authorization": f"Bearer {os.getenv('DEEPSEEK_API_KEY')}"},
62
+ json={
63
+ "model": "deepseek-chat",
64
+ "messages": [
65
+ {"role": "system", "content": f"Отвечай кратко на основе текста:\n{context}"},
66
+ {"role": "user", "content": question}
67
+ ],
68
+ "temperature": 0.1
69
+ }
70
+ )
71
+
72
+ ans_data = response.json()
73
+ return {"answer": ans_data['choices'][0]['message']['content']}