Spaces:

ub-aac-chatbot
/

aac-chatbot

Sleeping

App Files Files Community

shwetangisingh commited on Apr 20

Commit

c09a7e7

1 Parent(s): df78c68

Stop blocking /chat on evals; let the UI poll for them

Browse files

Files changed (4) hide show

backend/api/main.py +100 -40
frontend/src/components/ChatPanel.tsx +41 -3
frontend/src/lib/api.ts +51 -0
frontend/vite.config.ts +3 -0

backend/api/main.py CHANGED Viewed

@@ -4,10 +4,12 @@ from __future__ import annotations
 import json
 import logging
 import re
 import time
 from pathlib import Path
-from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
@@ -65,6 +67,33 @@ def _warmup():
 # ── In-memory session store (replace with Redis for multi-worker deployments) ──
 _sessions: dict[str, dict] = {}
 # ── Request / response schemas ─────────────────────────────────────────────────
@@ -324,8 +353,11 @@ def _compute_and_persist_evals(
         )
     except Exception:
         _log.exception("evals scoring failed for run %s", run_id)
         return None
     try:
         entry = {
             "run_id": run_id,
@@ -345,7 +377,7 @@ def _compute_and_persist_evals(
 @app.post("/chat", response_model=ChatResponse)
-def chat(req: ChatRequest):
     guard = check_input(req.query)
     if not guard["allowed"]:
         return ChatResponse(
@@ -375,17 +407,21 @@ def chat(req: ChatRequest):
     affect_emotion = (result.get("affect") or {}).get("emotion", "NEUTRAL")
     run_id = result.get("run_id")
-    eval_scores = _compute_and_persist_evals(
-        run_id=run_id,
-        user_id=req.user_id,
-        turn_id=result["turn_id"],
-        response=result["selected_response"] or "",
-        chunks=list(result.get("retrieved_chunks") or []),
-        latency_log=dict(result.get("latency_log") or {}),
-        affect=affect_emotion,
-        gesture_tag=req.gesture_tag,
-        gaze_bucket=req.gaze_bucket,
-    )
     return ChatResponse(
         user_id=req.user_id,
@@ -400,7 +436,7 @@ def chat(req: ChatRequest):
         guardrail_passed=result.get("guardrail_passed", True),
         run_id=run_id,
         turn_id=result["turn_id"],
-        eval_scores=eval_scores,
     )
@@ -412,7 +448,6 @@ def chat_stream(req: ChatRequest):
     """
     guard = check_input(req.query)
     if not guard["allowed"]:
-        # Mirror the non-stream /chat early-exit.
         payload = {
             "user_id": req.user_id,
             "query": req.query,
@@ -463,17 +498,24 @@ def chat_stream(req: ChatRequest):
         affect_emotion = (state.get("affect") or {}).get("emotion", "NEUTRAL")
         run_id = state.get("run_id")
-        eval_scores = _compute_and_persist_evals(
-            run_id=run_id,
-            user_id=req.user_id,
-            turn_id=state["turn_id"],
-            response=state["selected_response"] or "",
-            chunks=list(state.get("retrieved_chunks") or []),
-            latency_log=dict(state.get("latency_log") or {}),
-            affect=affect_emotion,
-            gesture_tag=req.gesture_tag,
-            gaze_bucket=req.gaze_bucket,
-        )
         final = {
             "user_id": req.user_id,
@@ -488,7 +530,7 @@ def chat_stream(req: ChatRequest):
             "guardrail_passed": state.get("guardrail_passed", True),
             "run_id": run_id,
             "turn_id": state["turn_id"],
-            "eval_scores": eval_scores,
         }
         yield _sse({"type": "complete", "response": final})
@@ -503,8 +545,23 @@ def _sse(data: dict) -> str:
     return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
 @app.post("/chat/turnaround", response_model=ChatResponse)
-def chat_turnaround(req: TurnaroundRequest):
     if req.user_id not in _sessions:
         raise HTTPException(status_code=404, detail="no active session")
@@ -577,17 +634,20 @@ def chat_turnaround(req: TurnaroundRequest):
     affect_emotion = (replan_state.get("affect") or {}).get("emotion", "NEUTRAL")
     run_id = replan_state.get("run_id")
-    eval_scores = _compute_and_persist_evals(
-        run_id=run_id,
-        user_id=req.user_id,
-        turn_id=replan_state["turn_id"],
-        response=replan_state["selected_response"] or "",
-        chunks=list(replan_state.get("retrieved_chunks") or []),
-        latency_log=dict(replan_state.get("latency_log") or {}),
-        affect=affect_emotion,
-        gesture_tag=replan_state.get("gesture_tag"),
-        gaze_bucket=replan_state.get("gaze_bucket"),
-    )
     return ChatResponse(
         user_id=req.user_id,
@@ -884,7 +944,7 @@ def chat_regenerate(req: RegenerateRequest):
         guardrail_passed=replan_state.get("guardrail_passed", True),
         run_id=run_id,
         turn_id=replan_state["turn_id"],
-        eval_scores=eval_scores,
     )

 import json
 import logging
 import re
+import threading
 import time
+from collections import OrderedDict
 from pathlib import Path
+from fastapi import BackgroundTasks, FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 # ── In-memory session store (replace with Redis for multi-worker deployments) ──
 _sessions: dict[str, dict] = {}
+# Eval scores keyed by run_id, filled by a BackgroundTask after /chat returns
+# so the UI can render the response immediately and poll GET /evals/{run_id}.
+# Multi-worker deploys should swap this (and _sessions) for Redis.
+_EVAL_FAILED: dict = {"_failed": True}
+_eval_results: OrderedDict[str, dict] = OrderedDict()
+_eval_lock = threading.Lock()
+_EVAL_RESULTS_MAX = 200
+def _remember_eval(run_id: str, scores: dict | None) -> None:
+    value = scores if scores else _EVAL_FAILED
+    with _eval_lock:
+        _eval_results[run_id] = value
+        _eval_results.move_to_end(run_id)
+        while len(_eval_results) > _EVAL_RESULTS_MAX:
+            _eval_results.popitem(last=False)
+def _reserve_eval_slot(run_id: str) -> None:
+    """Mark a run_id as in-flight so /evals can report 'pending' vs 'unknown'."""
+    with _eval_lock:
+        if run_id not in _eval_results:
+            _eval_results[run_id] = {}  # empty dict = pending
+            _eval_results.move_to_end(run_id)
+            while len(_eval_results) > _EVAL_RESULTS_MAX:
+                _eval_results.popitem(last=False)
 # ── Request / response schemas ─────────────────────────────────────────────────
         )
     except Exception:
         _log.exception("evals scoring failed for run %s", run_id)
+        _remember_eval(run_id, None)
         return None
+    _remember_eval(run_id, scores)
     try:
         entry = {
             "run_id": run_id,
 @app.post("/chat", response_model=ChatResponse)
+def chat(req: ChatRequest, background_tasks: BackgroundTasks):
     guard = check_input(req.query)
     if not guard["allowed"]:
         return ChatResponse(
     affect_emotion = (result.get("affect") or {}).get("emotion", "NEUTRAL")
     run_id = result.get("run_id")
+    # Evals (NLI cross-encoder) run off the response path; UI polls /evals.
+    if run_id and settings.evals_enabled:
+        _reserve_eval_slot(run_id)
+        background_tasks.add_task(
+            _compute_and_persist_evals,
+            run_id=run_id,
+            user_id=req.user_id,
+            turn_id=result["turn_id"],
+            response=result["selected_response"] or "",
+            chunks=list(result.get("retrieved_chunks") or []),
+            latency_log=dict(result.get("latency_log") or {}),
+            affect=affect_emotion,
+            gesture_tag=req.gesture_tag,
+            gaze_bucket=req.gaze_bucket,
+        )
     return ChatResponse(
         user_id=req.user_id,
         guardrail_passed=result.get("guardrail_passed", True),
         run_id=run_id,
         turn_id=result["turn_id"],
+        eval_scores=None,
     )
     """
     guard = check_input(req.query)
     if not guard["allowed"]:
         payload = {
             "user_id": req.user_id,
             "query": req.query,
         affect_emotion = (state.get("affect") or {}).get("emotion", "NEUTRAL")
         run_id = state.get("run_id")
+        # Evals run off the response path; UI polls GET /evals/{run_id}.
+        if run_id and settings.evals_enabled:
+            _reserve_eval_slot(run_id)
+            threading.Thread(
+                target=_compute_and_persist_evals,
+                kwargs=dict(
+                    run_id=run_id,
+                    user_id=req.user_id,
+                    turn_id=state["turn_id"],
+                    response=state["selected_response"] or "",
+                    chunks=list(state.get("retrieved_chunks") or []),
+                    latency_log=dict(state.get("latency_log") or {}),
+                    affect=affect_emotion,
+                    gesture_tag=req.gesture_tag,
+                    gaze_bucket=req.gaze_bucket,
+                ),
+                daemon=True,
+            ).start()
         final = {
             "user_id": req.user_id,
             "guardrail_passed": state.get("guardrail_passed", True),
             "run_id": run_id,
             "turn_id": state["turn_id"],
+            "eval_scores": None,
         }
         yield _sse({"type": "complete", "response": final})
     return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+@app.get("/evals/{run_id}")
+def get_evals(run_id: str):
+    if not _RUN_ID_RE.match(run_id):
+        raise HTTPException(status_code=400, detail="invalid run_id")
+    with _eval_lock:
+        entry = _eval_results.get(run_id)
+    if entry is None:
+        return {"status": "unknown", "run_id": run_id, "eval_scores": None}
+    if entry is _EVAL_FAILED:
+        return {"status": "failed", "run_id": run_id, "eval_scores": None}
+    if not entry:
+        return {"status": "pending", "run_id": run_id, "eval_scores": None}
+    return {"status": "ready", "run_id": run_id, "eval_scores": entry}
 @app.post("/chat/turnaround", response_model=ChatResponse)
+def chat_turnaround(req: TurnaroundRequest, background_tasks: BackgroundTasks):
     if req.user_id not in _sessions:
         raise HTTPException(status_code=404, detail="no active session")
     affect_emotion = (replan_state.get("affect") or {}).get("emotion", "NEUTRAL")
     run_id = replan_state.get("run_id")
+    if run_id and settings.evals_enabled:
+        _reserve_eval_slot(run_id)
+        background_tasks.add_task(
+            _compute_and_persist_evals,
+            run_id=run_id,
+            user_id=req.user_id,
+            turn_id=replan_state["turn_id"],
+            response=replan_state["selected_response"] or "",
+            chunks=list(replan_state.get("retrieved_chunks") or []),
+            latency_log=dict(replan_state.get("latency_log") or {}),
+            affect=affect_emotion,
+            gesture_tag=replan_state.get("gesture_tag"),
+            gaze_bucket=replan_state.get("gaze_bucket"),
+        )
     return ChatResponse(
         user_id=req.user_id,
         guardrail_passed=replan_state.get("guardrail_passed", True),
         run_id=run_id,
         turn_id=replan_state["turn_id"],
+        eval_scores=None,
     )

frontend/src/components/ChatPanel.tsx CHANGED Viewed

@@ -7,6 +7,7 @@ import type {
   SensingState,
 } from "../types";
 import {
   sendPick,
   sendTurnaround,
   streamChat,
@@ -141,6 +142,7 @@ export function ChatPanel({
   // against the new turnaround bubble's own head-signal re-firing turnaround
   // on itself.
   const turnaroundConsumedTurnRef = useRef<number | null>(null);
   useEffect(() => {
     bottomRef.current?.scrollIntoView({ behavior: "smooth" });
@@ -152,8 +154,39 @@ export function ChatPanel({
     lastTurnIdRef.current = null;
     turnaroundConsumedTurnRef.current = null;
     lastResponseTsRef.current = 0;
   }, [userId]);
   const handleTurnaround = useCallback(
     async (reason: "head" | "manual") => {
       if (!userId || !backendReady || turnaroundLoading || loading) return;
@@ -188,7 +221,7 @@ export function ChatPanel({
             affect: res.affect,
             runId: res.run_id,
             turnId: res.turn_id,
-            evalScores: res.eval_scores ?? null,
             isTurnaround: true,
             candidates: res.candidates ?? [],
             picked: true,
@@ -196,6 +229,7 @@ export function ChatPanel({
           return next;
         });
         onLatency(res.latency);
         // Do NOT advance lastResponseTsRef — keep the original turn's window so
         // the user can't head-shake the turnaround itself into another loop.
       } catch (e) {
@@ -223,6 +257,7 @@ export function ChatPanel({
       setMessages,
       onLatency,
       onHeadSignalConsumed,
     ]
   );
@@ -312,11 +347,12 @@ export function ChatPanel({
                 affect: res.affect,
                 runId: res.run_id,
                 turnId: res.turn_id,
-                evalScores: res.eval_scores ?? null,
                 candidates: res.candidates ?? m.candidates ?? [],
                 picked: false,
               }));
               onLatency(res.latency);
             }
           },
         );
@@ -337,6 +373,7 @@ export function ChatPanel({
       queueToken,
       flushNow,
       onLatency,
     ]
   );
@@ -476,12 +513,13 @@ export function ChatPanel({
               affect: res.affect,
               runId: res.run_id,
               turnId: res.turn_id,
-              evalScores: res.eval_scores ?? null,
               candidates: res.candidates ?? m.candidates ?? [],
               picked: (res.candidates ?? []).length <= 1,
             }));
             onLatency(res.latency);
             lastResponseTsRef.current = performance.now();
           }
         },
       );

   SensingState,
 } from "../types";
 import {
+  pollEvals,
   sendPick,
   sendTurnaround,
   streamChat,
   // against the new turnaround bubble's own head-signal re-firing turnaround
   // on itself.
   const turnaroundConsumedTurnRef = useRef<number | null>(null);
+  const evalPollAbortsRef = useRef<Set<AbortController>>(new Set());
   useEffect(() => {
     bottomRef.current?.scrollIntoView({ behavior: "smooth" });
     lastTurnIdRef.current = null;
     turnaroundConsumedTurnRef.current = null;
     lastResponseTsRef.current = 0;
+    evalPollAbortsRef.current.forEach((ac) => ac.abort());
+    evalPollAbortsRef.current.clear();
   }, [userId]);
+  useEffect(() => {
+    const active = evalPollAbortsRef.current;
+    return () => {
+      active.forEach((ac) => ac.abort());
+      active.clear();
+    };
+  }, []);
+  const startEvalPolling = useCallback(
+    (runId: string | null | undefined) => {
+      if (!runId) return;
+      const ac = new AbortController();
+      evalPollAbortsRef.current.add(ac);
+      void pollEvals(runId, { signal: ac.signal })
+        .then((scores) => {
+          if (ac.signal.aborted || !scores) return;
+          setMessages((prev) =>
+            prev.map((m) =>
+              m.runId === runId ? { ...m, evalScores: scores } : m
+            )
+          );
+        })
+        .finally(() => {
+          evalPollAbortsRef.current.delete(ac);
+        });
+    },
+    [setMessages]
+  );
   const handleTurnaround = useCallback(
     async (reason: "head" | "manual") => {
       if (!userId || !backendReady || turnaroundLoading || loading) return;
             affect: res.affect,
             runId: res.run_id,
             turnId: res.turn_id,
+            evalScores: null,
             isTurnaround: true,
             candidates: res.candidates ?? [],
             picked: true,
           return next;
         });
         onLatency(res.latency);
+        startEvalPolling(res.run_id);
         // Do NOT advance lastResponseTsRef — keep the original turn's window so
         // the user can't head-shake the turnaround itself into another loop.
       } catch (e) {
       setMessages,
       onLatency,
       onHeadSignalConsumed,
+      startEvalPolling,
     ]
   );
                 affect: res.affect,
                 runId: res.run_id,
                 turnId: res.turn_id,
+                evalScores: null,
                 candidates: res.candidates ?? m.candidates ?? [],
                 picked: false,
               }));
               onLatency(res.latency);
+              startEvalPolling(res.run_id);
             }
           },
         );
       queueToken,
       flushNow,
       onLatency,
+      startEvalPolling,
     ]
   );
               affect: res.affect,
               runId: res.run_id,
               turnId: res.turn_id,
+              evalScores: null,
               candidates: res.candidates ?? m.candidates ?? [],
               picked: (res.candidates ?? []).length <= 1,
             }));
             onLatency(res.latency);
             lastResponseTsRef.current = performance.now();
+            startEvalPolling(res.run_id);
           }
         },
       );

frontend/src/lib/api.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type {
   ChatRequest,
   ChatResponse,
   Persona,
   TurnaroundRequest,
 } from "../types";
@@ -140,6 +141,56 @@ export async function sendPick(args: {
   if (!res.ok) throw new Error(`API error: ${res.status}`);
 }
 export async function submitRating(args: {
   run_id: string;
   user_id: string;

 import type {
   ChatRequest,
   ChatResponse,
+  EvalScores,
   Persona,
   TurnaroundRequest,
 } from "../types";
   if (!res.ok) throw new Error(`API error: ${res.status}`);
 }
+export type EvalsStatus = "pending" | "ready" | "failed" | "unknown";
+export interface EvalsFetchResult {
+  status: EvalsStatus;
+  run_id: string;
+  eval_scores: EvalScores | null;
+}
+export async function fetchEvals(runId: string): Promise<EvalsFetchResult> {
+  const res = await fetch(`${API_BASE}/evals/${encodeURIComponent(runId)}`);
+  if (!res.ok) throw new Error(`API error: ${res.status}`);
+  return res.json();
+}
+export async function pollEvals(
+  runId: string,
+  opts: {
+    initialDelayMs?: number;
+    maxDelayMs?: number;
+    timeoutMs?: number;
+    signal?: AbortSignal;
+  } = {}
+): Promise<EvalScores | null> {
+  const maxDelayMs = opts.maxDelayMs ?? 2000;
+  const timeoutMs = opts.timeoutMs ?? 20000;
+  let delay = opts.initialDelayMs ?? 300;
+  const start = performance.now();
+  // Track consecutive "unknown" responses so transient race conditions (poll
+  // racing the server picking up the new run_id) don't immediately give up.
+  let unknownStreak = 0;
+  while (performance.now() - start < timeoutMs) {
+    if (opts.signal?.aborted) return null;
+    try {
+      const r = await fetchEvals(runId);
+      if (r.status === "ready") return r.eval_scores;
+      if (r.status === "failed") return null;
+      if (r.status === "unknown") {
+        unknownStreak += 1;
+        if (unknownStreak >= 3) return null;
+      } else {
+        unknownStreak = 0;
+      }
+    } catch (e) {
+      console.warn("pollEvals: transient error", e);
+    }
+    await new Promise((res) => setTimeout(res, delay));
+    delay = Math.min(delay * 2, maxDelayMs);
+  }
+  return null;
+}
 export async function submitRating(args: {
   run_id: string;
   user_id: string;

frontend/vite.config.ts CHANGED Viewed

@@ -11,6 +11,9 @@ export default defineConfig({
       "/users": "http://localhost:8000",
       "/session": "http://localhost:8000",
       "/health": "http://localhost:8000",
     },
   },
 })

       "/users": "http://localhost:8000",
       "/session": "http://localhost:8000",
       "/health": "http://localhost:8000",
+      "/evals": "http://localhost:8000",
+      "/feedback": "http://localhost:8000",
+      "/debug": "http://localhost:8000",
     },
   },
 })