Spaces:
Running
Running
Commit ·
99e045a
1
Parent(s): 88b90e7
fix(backend): bust docker cache + fix async streaming generator to not block event loop
Browse files- Dockerfile +4 -0
- src/api/main.py +40 -7
Dockerfile
CHANGED
|
@@ -13,6 +13,10 @@ RUN apt-get update && apt-get install -y \
|
|
| 13 |
COPY requirements.txt .
|
| 14 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# Copy source code
|
| 17 |
COPY src/ ./src/
|
| 18 |
COPY config/ ./config/
|
|
|
|
| 13 |
COPY requirements.txt .
|
| 14 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
|
| 16 |
+
# Cache-bust: forces Docker to re-copy source code on every build
|
| 17 |
+
# This ensures HuggingFace always gets the latest code from git
|
| 18 |
+
ARG CACHEBUST=20260411_1
|
| 19 |
+
|
| 20 |
# Copy source code
|
| 21 |
COPY src/ ./src/
|
| 22 |
COPY config/ ./config/
|
src/api/main.py
CHANGED
|
@@ -170,15 +170,48 @@ async def stream_query_papers(
|
|
| 170 |
request: Request,
|
| 171 |
query_input: QueryRequest,
|
| 172 |
):
|
|
|
|
| 173 |
pipeline = request.app.state.rag_pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
return StreamingResponse(
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
media_type="text/event-stream"
|
| 182 |
)
|
| 183 |
|
| 184 |
@app.post(
|
|
|
|
| 170 |
request: Request,
|
| 171 |
query_input: QueryRequest,
|
| 172 |
):
|
| 173 |
+
import asyncio
|
| 174 |
pipeline = request.app.state.rag_pipeline
|
| 175 |
+
|
| 176 |
+
async def async_generator():
|
| 177 |
+
"""
|
| 178 |
+
Wraps the synchronous pipeline.stream_query() generator in an
|
| 179 |
+
async-friendly way using a thread + asyncio.Queue so we never
|
| 180 |
+
block the FastAPI event loop.
|
| 181 |
+
"""
|
| 182 |
+
loop = asyncio.get_event_loop()
|
| 183 |
+
queue: asyncio.Queue = asyncio.Queue()
|
| 184 |
+
SENTINEL = object()
|
| 185 |
+
|
| 186 |
+
def run_sync():
|
| 187 |
+
try:
|
| 188 |
+
for chunk in pipeline.stream_query(
|
| 189 |
+
question = query_input.question,
|
| 190 |
+
top_k = query_input.top_k,
|
| 191 |
+
filter_category = query_input.filter_category,
|
| 192 |
+
filter_year_gte = query_input.filter_year_gte,
|
| 193 |
+
):
|
| 194 |
+
loop.call_soon_threadsafe(queue.put_nowait, chunk)
|
| 195 |
+
finally:
|
| 196 |
+
loop.call_soon_threadsafe(queue.put_nowait, SENTINEL)
|
| 197 |
+
|
| 198 |
+
import threading
|
| 199 |
+
thread = threading.Thread(target=run_sync, daemon=True)
|
| 200 |
+
thread.start()
|
| 201 |
+
|
| 202 |
+
while True:
|
| 203 |
+
item = await queue.get()
|
| 204 |
+
if item is SENTINEL:
|
| 205 |
+
break
|
| 206 |
+
yield item
|
| 207 |
+
|
| 208 |
return StreamingResponse(
|
| 209 |
+
async_generator(),
|
| 210 |
+
media_type="text/event-stream",
|
| 211 |
+
headers={
|
| 212 |
+
"Cache-Control": "no-cache",
|
| 213 |
+
"X-Accel-Buffering": "no",
|
| 214 |
+
}
|
|
|
|
| 215 |
)
|
| 216 |
|
| 217 |
@app.post(
|