Spaces:
Running
Running
Commit ·
3394ee5
1
Parent(s): 9b7c6ff
fix(api): improve stream token chunks robustness and sync year filter
Browse files- frontend-next/app/page.tsx +6 -2
- src/rag/llm_client.py +8 -2
- test_stream.py +16 -0
frontend-next/app/page.tsx
CHANGED
|
@@ -296,11 +296,15 @@ export default function App() {
|
|
| 296 |
body: JSON.stringify({
|
| 297 |
question: originalQuery,
|
| 298 |
top_k: topK,
|
| 299 |
-
filter_category: category === "All" ? undefined : category
|
|
|
|
| 300 |
})
|
| 301 |
});
|
| 302 |
|
| 303 |
-
if (!res.ok || !res.body)
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
const reader = res.body.getReader();
|
| 306 |
const decoder = new TextDecoder();
|
|
|
|
| 296 |
body: JSON.stringify({
|
| 297 |
question: originalQuery,
|
| 298 |
top_k: topK,
|
| 299 |
+
filter_category: category === "All" ? undefined : category,
|
| 300 |
+
filter_year_gte: yearFilter ? yearFrom : undefined
|
| 301 |
})
|
| 302 |
});
|
| 303 |
|
| 304 |
+
if (!res.ok || !res.body) {
|
| 305 |
+
const errText = await res.text();
|
| 306 |
+
throw new Error(`API error: ${res.status} ${errText}`);
|
| 307 |
+
}
|
| 308 |
|
| 309 |
const reader = res.body.getReader();
|
| 310 |
const decoder = new TextDecoder();
|
src/rag/llm_client.py
CHANGED
|
@@ -72,7 +72,10 @@ class MultiModelClient:
|
|
| 72 |
break
|
| 73 |
try:
|
| 74 |
data = json.loads(data_str)
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
| 76 |
if token:
|
| 77 |
yield token
|
| 78 |
except:
|
|
@@ -95,7 +98,10 @@ class MultiModelClient:
|
|
| 95 |
if stream:
|
| 96 |
def generator():
|
| 97 |
for chunk in response:
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
| 99 |
if token:
|
| 100 |
yield token
|
| 101 |
return generator()
|
|
|
|
| 72 |
break
|
| 73 |
try:
|
| 74 |
data = json.loads(data_str)
|
| 75 |
+
choices = data.get("choices", [])
|
| 76 |
+
if not choices:
|
| 77 |
+
continue
|
| 78 |
+
token = choices[0].get("delta", {}).get("content", "")
|
| 79 |
if token:
|
| 80 |
yield token
|
| 81 |
except:
|
|
|
|
| 98 |
if stream:
|
| 99 |
def generator():
|
| 100 |
for chunk in response:
|
| 101 |
+
choices = chunk.choices
|
| 102 |
+
if not choices:
|
| 103 |
+
continue
|
| 104 |
+
token = choices[0].delta.content
|
| 105 |
if token:
|
| 106 |
yield token
|
| 107 |
return generator()
|
test_stream.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
sys.path.append(os.getcwd())
|
| 7 |
+
|
| 8 |
+
from src.rag.pipeline import RAGPipeline
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
pipeline = RAGPipeline()
|
| 12 |
+
gen = pipeline.stream_query("What is LoRA?", top_k=2)
|
| 13 |
+
for x in gen:
|
| 14 |
+
print(x)
|
| 15 |
+
except Exception as e:
|
| 16 |
+
print(f"Error: {e}")
|