Subhadip007 commited on
Commit
99e045a
·
1 Parent(s): 88b90e7

fix(backend): bust docker cache + fix async streaming generator to not block event loop

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -0
  2. src/api/main.py +40 -7
Dockerfile CHANGED
@@ -13,6 +13,10 @@ RUN apt-get update && apt-get install -y \
13
  COPY requirements.txt .
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
 
 
 
 
16
  # Copy source code
17
  COPY src/ ./src/
18
  COPY config/ ./config/
 
13
  COPY requirements.txt .
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
+ # Cache-bust: forces Docker to re-copy source code on every build
17
+ # This ensures HuggingFace always gets the latest code from git
18
+ ARG CACHEBUST=20260411_1
19
+
20
  # Copy source code
21
  COPY src/ ./src/
22
  COPY config/ ./config/
src/api/main.py CHANGED
@@ -170,15 +170,48 @@ async def stream_query_papers(
170
  request: Request,
171
  query_input: QueryRequest,
172
  ):
 
173
  pipeline = request.app.state.rag_pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  return StreamingResponse(
175
- pipeline.stream_query(
176
- question = query_input.question,
177
- top_k = query_input.top_k,
178
- filter_category = query_input.filter_category,
179
- filter_year_gte = query_input.filter_year_gte,
180
- ),
181
- media_type="text/event-stream"
182
  )
183
 
184
  @app.post(
 
170
  request: Request,
171
  query_input: QueryRequest,
172
  ):
173
+ import asyncio
174
  pipeline = request.app.state.rag_pipeline
175
+
176
+ async def async_generator():
177
+ """
178
+ Wraps the synchronous pipeline.stream_query() generator in an
179
+ async-friendly way using a thread + asyncio.Queue so we never
180
+ block the FastAPI event loop.
181
+ """
182
+ loop = asyncio.get_event_loop()
183
+ queue: asyncio.Queue = asyncio.Queue()
184
+ SENTINEL = object()
185
+
186
+ def run_sync():
187
+ try:
188
+ for chunk in pipeline.stream_query(
189
+ question = query_input.question,
190
+ top_k = query_input.top_k,
191
+ filter_category = query_input.filter_category,
192
+ filter_year_gte = query_input.filter_year_gte,
193
+ ):
194
+ loop.call_soon_threadsafe(queue.put_nowait, chunk)
195
+ finally:
196
+ loop.call_soon_threadsafe(queue.put_nowait, SENTINEL)
197
+
198
+ import threading
199
+ thread = threading.Thread(target=run_sync, daemon=True)
200
+ thread.start()
201
+
202
+ while True:
203
+ item = await queue.get()
204
+ if item is SENTINEL:
205
+ break
206
+ yield item
207
+
208
  return StreamingResponse(
209
+ async_generator(),
210
+ media_type="text/event-stream",
211
+ headers={
212
+ "Cache-Control": "no-cache",
213
+ "X-Accel-Buffering": "no",
214
+ }
 
215
  )
216
 
217
  @app.post(