Aksel Joonas Reedi commited on
Commit
962191f
·
unverified ·
1 Parent(s): 28b8f2b

Restore expired sessions via summary-based catch-up (#52)

Browse files

When the backend loses a session (typically: HF Space restarted), the
chat now shows a small inline banner instead of silently erroring:

Where were we?
Let me skim the conversation so far and pick up right where we left
off — or we can start something new.
[ Catch me up ] [ Start fresh ]

Flow, in one pass:
* Frontend stashes the raw backend messages in localStorage on every
mount-hydrate and turn_complete. When the backend 404s for the
session id, the SSE transport and the mount effect both fire
onSessionDead → the session is flagged `expired` in sessionStore;
the sidebar marks it "needs a catch-up".
* Catch me up → POST /api/session/restore-summary with the cached
messages. Backend creates a fresh session, runs the existing
summarizer (factored into summarize_messages() and shared with
in-session compaction) with a restore-specific prompt that
preserves the tool-call trail, and seeds the new session with that
summary wrapped in a [SYSTEM: ...] user turn. New id is swapped
back via renameSession; UIMessages + backend-cache move with it.
* Start fresh → delete the session + its caches.
* The design is lazy per session: users with 5 stale tabs only pay
for a summary on the ones they actually reopen.

Frontend filters [SYSTEM: ...] user turns from rendering so the seed
message (plus existing doom-loop / compact nudges) stays invisible.

For sessions that predate the raw-message cache, fall back to
reconstructing the backend message list from the longstanding
UIMessage cache (tool calls + paired results, text preserved).

Also sets litellm.modify_params = True globally in agent/__init__.py
(moved out of agent/main.py) so the backend entry also picks it up —
required for Anthropic to accept a history containing tool_calls
without a `tools=` kwarg, which is exactly the summarization shape.

agent/__init__.py CHANGED
@@ -2,6 +2,20 @@
2
  HF Agent - Main agent module
3
  """
4
 
5
- from agent.core.agent_loop import submission_loop
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  __all__ = ["submission_loop"]
 
2
  HF Agent - Main agent module
3
  """
4
 
5
+ import litellm
6
+
7
+ # Global LiteLLM behavior — set once at package import so both CLI and
8
+ # backend entries share the same config.
9
+ # drop_params: quietly drop unsupported params rather than raising
10
+ # suppress_debug_info: hide the noisy "Give Feedback" banner on errors
11
+ # modify_params: let LiteLLM patch Anthropic's tool-call requirements
12
+ # (synthesize a dummy tool spec when we call completion on a history
13
+ # that contains tool_calls but aren't passing `tools=` — happens
14
+ # during summarization / session seeding).
15
+ litellm.drop_params = True
16
+ litellm.suppress_debug_info = True
17
+ litellm.modify_params = True
18
+
19
+ from agent.core.agent_loop import submission_loop # noqa: E402
20
 
21
  __all__ = ["submission_loop"]
agent/context_manager/manager.py CHANGED
@@ -68,6 +68,63 @@ def _get_hf_username(hf_token: str | None = None) -> str:
68
  return "unknown"
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  class ContextManager:
72
  """Manages conversation context and message history for the agent"""
73
 
@@ -318,25 +375,15 @@ class ContextManager:
318
  if not messages_to_summarize:
319
  return
320
 
321
- messages_to_summarize.append(
322
- Message(
323
- role="user",
324
- content="Please provide a concise summary of the conversation above, focusing on key decisions, the 'why' behind the decisions, problems solved, and important context needed for developing further. Your summary will be given to someone who has never worked on this project before and they will be have to be filled in.",
325
- )
326
- )
327
-
328
- from agent.core.llm_params import _resolve_llm_params
329
-
330
- llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort="high")
331
- response = await acompletion(
332
- messages=messages_to_summarize,
333
- max_completion_tokens=self.compact_size,
334
- tools=tool_specs,
335
- **llm_params,
336
- )
337
- summarized_message = Message(
338
- role="assistant", content=response.choices[0].message.content
339
  )
 
340
 
341
  # Reconstruct: system + first user msg + summary + recent messages
342
  head = [system_msg] if system_msg else []
@@ -344,6 +391,16 @@ class ContextManager:
344
  head.append(first_user_msg)
345
  self.items = head + [summarized_message] + recent_messages
346
 
347
- self.running_context_usage = (
348
- len(self.system_prompt) // 4 + response.usage.completion_tokens
349
- )
 
 
 
 
 
 
 
 
 
 
 
68
  return "unknown"
69
 
70
 
71
+ _COMPACT_PROMPT = (
72
+ "Please provide a concise summary of the conversation above, focusing on "
73
+ "key decisions, the 'why' behind the decisions, problems solved, and "
74
+ "important context needed for developing further. Your summary will be "
75
+ "given to someone who has never worked on this project before and they "
76
+ "will be have to be filled in."
77
+ )
78
+
79
+ # Used when seeding a brand-new session from prior browser-cached messages.
80
+ # Here we're writing a note to *ourselves* — so preserve the tool-call trail,
81
+ # files produced, and planned next steps in first person. Optimized for
82
+ # continuity, not brevity.
83
+ _RESTORE_PROMPT = (
84
+ "You're about to be restored into a fresh session with no memory of the "
85
+ "conversation above. Write a first-person note to your future self so "
86
+ "you can continue right where you left off. Include:\n"
87
+ " • What the user originally asked for and what progress you've made.\n"
88
+ " • Every tool you called, with arguments and a one-line result summary.\n"
89
+ " • Any code, files, scripts, or artifacts you produced (with paths).\n"
90
+ " • Key decisions and the reasoning behind them.\n"
91
+ " • What you were planning to do next.\n\n"
92
+ "Don't be cute. Be specific. This is the only context you'll have."
93
+ )
94
+
95
+
96
+ async def summarize_messages(
97
+ messages: list[Message],
98
+ model_name: str,
99
+ hf_token: str | None = None,
100
+ max_tokens: int = 2000,
101
+ tool_specs: list[dict] | None = None,
102
+ prompt: str = _COMPACT_PROMPT,
103
+ ) -> tuple[str, int]:
104
+ """Run a summarization prompt against a list of messages.
105
+
106
+ ``prompt`` defaults to the compaction prompt (terse, decision-focused).
107
+ Callers seeding a new session after a restart should pass ``_RESTORE_PROMPT``
108
+ instead — it preserves the tool-call trail so the agent can answer
109
+ follow-up questions about what it did.
110
+
111
+ Returns ``(summary_text, completion_tokens)``.
112
+ """
113
+ from agent.core.llm_params import _resolve_llm_params
114
+
115
+ prompt_messages = list(messages) + [Message(role="user", content=prompt)]
116
+ llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort="high")
117
+ response = await acompletion(
118
+ messages=prompt_messages,
119
+ max_completion_tokens=max_tokens,
120
+ tools=tool_specs,
121
+ **llm_params,
122
+ )
123
+ summary = response.choices[0].message.content or ""
124
+ completion_tokens = response.usage.completion_tokens if response.usage else 0
125
+ return summary, completion_tokens
126
+
127
+
128
  class ContextManager:
129
  """Manages conversation context and message history for the agent"""
130
 
 
375
  if not messages_to_summarize:
376
  return
377
 
378
+ summary, completion_tokens = await summarize_messages(
379
+ messages_to_summarize,
380
+ model_name=model_name,
381
+ hf_token=hf_token,
382
+ max_tokens=self.compact_size,
383
+ tool_specs=tool_specs,
384
+ prompt=_COMPACT_PROMPT,
 
 
 
 
 
 
 
 
 
 
 
385
  )
386
+ summarized_message = Message(role="assistant", content=summary)
387
 
388
  # Reconstruct: system + first user msg + summary + recent messages
389
  head = [system_msg] if system_msg else []
 
391
  head.append(first_user_msg)
392
  self.items = head + [summarized_message] + recent_messages
393
 
394
+ # Count the actual post-compact context — system prompt + first user
395
+ # turn + summary + the preserved tail all contribute, not just the
396
+ # summary. litellm.token_counter uses the model's real tokenizer.
397
+ from litellm import token_counter
398
+
399
+ try:
400
+ self.running_context_usage = token_counter(
401
+ model=model_name,
402
+ messages=[m.model_dump() for m in self.items],
403
+ )
404
+ except Exception as e:
405
+ logger.warning("token_counter failed post-compact (%s); falling back to rough estimate", e)
406
+ self.running_context_usage = len(self.system_prompt) // 4 + completion_tokens
backend/routes/agent.py CHANGED
@@ -227,6 +227,50 @@ async def create_session(
227
  return SessionResponse(session_id=session_id, ready=True)
228
 
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  @router.get("/session/{session_id}", response_model=SessionInfo)
231
  async def get_session(
232
  session_id: str, user: dict = Depends(get_current_user)
 
227
  return SessionResponse(session_id=session_id, ready=True)
228
 
229
 
230
+ @router.post("/session/restore-summary", response_model=SessionResponse)
231
+ async def restore_session_summary(
232
+ request: Request, body: dict, user: dict = Depends(get_current_user)
233
+ ) -> SessionResponse:
234
+ """Create a new session seeded with a summary of the caller's prior
235
+ conversation. The client sends its cached messages; we run the standard
236
+ summarization prompt on them and drop the result into the new
237
+ session's context as a user-role system note.
238
+ """
239
+ messages = body.get("messages")
240
+ if not isinstance(messages, list) or not messages:
241
+ raise HTTPException(status_code=400, detail="Missing 'messages' array")
242
+
243
+ hf_token = None
244
+ auth_header = request.headers.get("Authorization", "")
245
+ if auth_header.startswith("Bearer "):
246
+ hf_token = auth_header[7:]
247
+ if not hf_token:
248
+ hf_token = request.cookies.get("hf_access_token")
249
+ if not hf_token:
250
+ hf_token = os.environ.get("HF_TOKEN")
251
+
252
+ try:
253
+ session_id = await session_manager.create_session(
254
+ user_id=user["user_id"], hf_token=hf_token
255
+ )
256
+ except SessionCapacityError as e:
257
+ raise HTTPException(status_code=503, detail=str(e))
258
+
259
+ try:
260
+ summarized = await session_manager.seed_from_summary(session_id, messages)
261
+ except ValueError as e:
262
+ raise HTTPException(status_code=500, detail=str(e))
263
+ except Exception as e:
264
+ logger.exception("seed_from_summary failed")
265
+ raise HTTPException(status_code=500, detail=f"Summary failed: {e}")
266
+
267
+ logger.info(
268
+ f"Seeded session {session_id} for {user.get('username', 'unknown')} "
269
+ f"(summary of {summarized} messages)"
270
+ )
271
+ return SessionResponse(session_id=session_id, ready=True)
272
+
273
+
274
  @router.get("/session/{session_id}", response_model=SessionInfo)
275
  async def get_session(
276
  session_id: str, user: dict = Depends(get_current_user)
backend/session_manager.py CHANGED
@@ -207,6 +207,69 @@ class SessionManager:
207
  logger.info(f"Created session {session_id} for user {user_id}")
208
  return session_id
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  @staticmethod
211
  async def _cleanup_sandbox(session: Session) -> None:
212
  """Delete the sandbox Space if one was created for this session."""
 
207
  logger.info(f"Created session {session_id} for user {user_id}")
208
  return session_id
209
 
210
+ async def seed_from_summary(self, session_id: str, messages: list[dict]) -> int:
211
+ """Rehydrate a session from cached prior messages via summarization.
212
+
213
+ Runs the standard summarization prompt (same one compaction uses)
214
+ over the provided messages, then seeds the new session's context
215
+ with that summary. Tool-call pairing concerns disappear because the
216
+ output is plain text. Returns the number of messages summarized.
217
+ """
218
+ from litellm import Message
219
+
220
+ from agent.context_manager.manager import _RESTORE_PROMPT, summarize_messages
221
+
222
+ agent_session = self.sessions.get(session_id)
223
+ if not agent_session:
224
+ raise ValueError(f"Session {session_id} not found")
225
+
226
+ # Parse into Message objects, tolerating malformed entries.
227
+ parsed: list[Message] = []
228
+ for raw in messages:
229
+ if raw.get("role") == "system":
230
+ continue # the new session has its own system prompt
231
+ try:
232
+ parsed.append(Message.model_validate(raw))
233
+ except Exception as e:
234
+ logger.warning("Dropping malformed message during seed: %s", e)
235
+
236
+ if not parsed:
237
+ return 0
238
+
239
+ session = agent_session.session
240
+ # Pass the real tool specs so the summarizer sees what the agent
241
+ # actually has — otherwise Anthropic's modify_params injects a
242
+ # dummy tool and the summarizer editorializes that the original
243
+ # tool calls were fabricated.
244
+ tool_specs = None
245
+ try:
246
+ tool_specs = agent_session.tool_router.get_tool_specs_for_llm()
247
+ except Exception:
248
+ pass
249
+ try:
250
+ summary, _ = await summarize_messages(
251
+ parsed,
252
+ model_name=session.config.model_name,
253
+ hf_token=session.hf_token,
254
+ max_tokens=4000,
255
+ prompt=_RESTORE_PROMPT,
256
+ tool_specs=tool_specs,
257
+ )
258
+ except Exception as e:
259
+ logger.error("Summary call failed during seed: %s", e)
260
+ raise
261
+
262
+ seed = Message(
263
+ role="user",
264
+ content=(
265
+ "[SYSTEM: Your prior memory of this conversation — written "
266
+ "in your own voice right before restart. Continue from here.]\n\n"
267
+ + (summary or "(no summary returned)")
268
+ ),
269
+ )
270
+ session.context_manager.items.append(seed)
271
+ return len(parsed)
272
+
273
  @staticmethod
274
  async def _cleanup_sandbox(session: Session) -> None:
275
  """Delete the sandbox Space if one was created for this session."""
frontend/src/components/Chat/ExpiredBanner.tsx ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Shown inline in a chat when the backend no longer recognizes the
3
+ * session id (typically: Space was restarted). Lets the user catch the
4
+ * agent up with a summary of the prior conversation, or start over.
5
+ */
6
+ import { useState, useCallback } from 'react';
7
+ import { Box, Button, CircularProgress, Typography } from '@mui/material';
8
+ import { apiFetch } from '@/utils/api';
9
+ import { useSessionStore } from '@/store/sessionStore';
10
+ import { useAgentStore } from '@/store/agentStore';
11
+ import { loadBackendMessages } from '@/lib/backend-message-store';
12
+ import { loadMessages } from '@/lib/chat-message-store';
13
+ import { uiMessagesToLLMMessages } from '@/lib/convert-llm-messages';
14
+ import { logger } from '@/utils/logger';
15
+
16
+ interface Props {
17
+ sessionId: string;
18
+ }
19
+
20
+ export default function ExpiredBanner({ sessionId }: Props) {
21
+ const { renameSession, deleteSession } = useSessionStore();
22
+ const [busy, setBusy] = useState<'catch-up' | 'start-over' | null>(null);
23
+ const [error, setError] = useState<string | null>(null);
24
+
25
+ const handleCatchUp = useCallback(async () => {
26
+ setBusy('catch-up');
27
+ setError(null);
28
+ try {
29
+ // Prefer the raw backend-message cache; fall back to reconstructing
30
+ // from UIMessages (for sessions that predate the backend cache).
31
+ let messages = loadBackendMessages(sessionId);
32
+ if (!messages || messages.length === 0) {
33
+ const uiMsgs = loadMessages(sessionId);
34
+ if (uiMsgs.length > 0) messages = uiMessagesToLLMMessages(uiMsgs);
35
+ }
36
+ if (!messages || messages.length === 0) {
37
+ setError('Nothing to summarize from this chat.');
38
+ setBusy(null);
39
+ return;
40
+ }
41
+
42
+ const res = await apiFetch('/api/session/restore-summary', {
43
+ method: 'POST',
44
+ body: JSON.stringify({ messages }),
45
+ });
46
+ if (!res.ok) throw new Error(`restore-summary failed: ${res.status}`);
47
+ const data = await res.json();
48
+ const newId = data.session_id as string | undefined;
49
+ if (!newId) throw new Error('no session_id in response');
50
+
51
+ useAgentStore.getState().clearSessionState(sessionId);
52
+ renameSession(sessionId, newId);
53
+ } catch (e) {
54
+ logger.warn('Catch-up failed:', e);
55
+ setError("Couldn't catch up — try starting over.");
56
+ setBusy(null);
57
+ }
58
+ }, [sessionId, renameSession]);
59
+
60
+ const handleStartOver = useCallback(() => {
61
+ setBusy('start-over');
62
+ useAgentStore.getState().clearSessionState(sessionId);
63
+ deleteSession(sessionId);
64
+ }, [sessionId, deleteSession]);
65
+
66
+ return (
67
+ <Box
68
+ sx={{
69
+ mx: { xs: 2, md: 'auto' },
70
+ my: 2,
71
+ maxWidth: 720,
72
+ p: 2.5,
73
+ borderRadius: 2,
74
+ border: '1px solid',
75
+ borderColor: 'divider',
76
+ bgcolor: 'background.paper',
77
+ boxShadow: '0 1px 3px rgba(0,0,0,0.06)',
78
+ }}
79
+ >
80
+ <Typography variant="body1" sx={{ fontWeight: 600, mb: 0.5 }}>
81
+ Where were we?
82
+ </Typography>
83
+ <Typography variant="body2" sx={{ color: 'text.secondary', mb: 2 }}>
84
+ Let me skim the conversation so far and pick up right where we left
85
+ off — or we can start something new.
86
+ </Typography>
87
+ <Box sx={{ display: 'flex', gap: 1, flexWrap: 'wrap' }}>
88
+ <Button
89
+ variant="contained"
90
+ onClick={handleCatchUp}
91
+ disabled={busy !== null}
92
+ startIcon={busy === 'catch-up' ? <CircularProgress size={16} color="inherit" /> : null}
93
+ sx={{ textTransform: 'none' }}
94
+ >
95
+ {busy === 'catch-up' ? 'Catching up…' : 'Catch me up'}
96
+ </Button>
97
+ <Button
98
+ variant="outlined"
99
+ onClick={handleStartOver}
100
+ disabled={busy !== null}
101
+ sx={{ textTransform: 'none' }}
102
+ >
103
+ Start fresh
104
+ </Button>
105
+ </Box>
106
+ {error && (
107
+ <Typography variant="caption" sx={{ display: 'block', mt: 1.5, color: 'error.main' }}>
108
+ {error}
109
+ </Typography>
110
+ )}
111
+ </Box>
112
+ );
113
+ }
frontend/src/components/Layout/AppLayout.tsx CHANGED
@@ -29,7 +29,7 @@ import { apiFetch } from '@/utils/api';
29
  const DRAWER_WIDTH = 260;
30
 
31
  export default function AppLayout() {
32
- const { sessions, activeSessionId, deleteSession } = useSessionStore();
33
  const { isConnected, llmHealthError, setLlmHealthError, user } = useAgentStore();
34
  const {
35
  isLeftSidebarOpen,
@@ -123,10 +123,13 @@ export default function AppLayout() {
123
 
124
  const handleSessionDead = useCallback(
125
  (deadSessionId: string) => {
126
- useAgentStore.getState().clearSessionState(deadSessionId);
127
- deleteSession(deadSessionId);
 
 
 
128
  },
129
- [deleteSession],
130
  );
131
 
132
  // Close sidebar on mobile after selecting a session
 
29
  const DRAWER_WIDTH = 260;
30
 
31
  export default function AppLayout() {
32
+ const { sessions, activeSessionId, markExpired } = useSessionStore();
33
  const { isConnected, llmHealthError, setLlmHealthError, user } = useAgentStore();
34
  const {
35
  isLeftSidebarOpen,
 
123
 
124
  const handleSessionDead = useCallback(
125
  (deadSessionId: string) => {
126
+ // Backend lost this session — mark it expired so the chat shows a
127
+ // recovery banner instead of either silently failing or eagerly
128
+ // creating a new backend session (which would pay a summary-call
129
+ // cost for sessions the user may never revisit).
130
+ markExpired(deadSessionId);
131
  },
132
+ [markExpired],
133
  );
134
 
135
  // Close sidebar on mobile after selecting a session
frontend/src/components/SessionChat.tsx CHANGED
@@ -11,6 +11,7 @@ import { useAgentStore } from '@/store/agentStore';
11
  import { useSessionStore } from '@/store/sessionStore';
12
  import MessageList from '@/components/Chat/MessageList';
13
  import ChatInput from '@/components/Chat/ChatInput';
 
14
  import { apiFetch } from '@/utils/api';
15
  import { logger } from '@/utils/logger';
16
 
@@ -22,7 +23,8 @@ interface SessionChatProps {
22
 
23
  export default function SessionChat({ sessionId, isActive, onSessionDead }: SessionChatProps) {
24
  const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
25
- const { updateSessionTitle } = useSessionStore();
 
26
 
27
  const { messages, sendMessage, stop, status, undoLastTurn, editAndRegenerate, approveTools } = useAgentChat({
28
  sessionId,
@@ -104,18 +106,22 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
104
  onUndoLastTurn={undoLastTurn}
105
  onEditAndRegenerate={editAndRegenerate}
106
  />
107
- <ChatInput
108
- sessionId={sessionId}
109
- onSend={handleSendMessage}
110
- onStop={handleStop}
111
- isProcessing={busy}
112
- disabled={!isConnected || activityStatus.type === 'waiting-approval'}
113
- placeholder={
114
- activityStatus.type === 'waiting-approval'
115
- ? 'Approve or reject pending tools first...'
116
- : undefined
117
- }
118
- />
 
 
 
 
119
  </>
120
  );
121
  }
 
11
  import { useSessionStore } from '@/store/sessionStore';
12
  import MessageList from '@/components/Chat/MessageList';
13
  import ChatInput from '@/components/Chat/ChatInput';
14
+ import ExpiredBanner from '@/components/Chat/ExpiredBanner';
15
  import { apiFetch } from '@/utils/api';
16
  import { logger } from '@/utils/logger';
17
 
 
23
 
24
  export default function SessionChat({ sessionId, isActive, onSessionDead }: SessionChatProps) {
25
  const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
26
+ const { updateSessionTitle, sessions } = useSessionStore();
27
+ const isExpired = sessions.find((s) => s.id === sessionId)?.expired === true;
28
 
29
  const { messages, sendMessage, stop, status, undoLastTurn, editAndRegenerate, approveTools } = useAgentChat({
30
  sessionId,
 
106
  onUndoLastTurn={undoLastTurn}
107
  onEditAndRegenerate={editAndRegenerate}
108
  />
109
+ {isExpired ? (
110
+ <ExpiredBanner sessionId={sessionId} />
111
+ ) : (
112
+ <ChatInput
113
+ sessionId={sessionId}
114
+ onSend={handleSendMessage}
115
+ onStop={handleStop}
116
+ isProcessing={busy}
117
+ disabled={!isConnected || activityStatus.type === 'waiting-approval'}
118
+ placeholder={
119
+ activityStatus.type === 'waiting-approval'
120
+ ? 'Approve or reject pending tools first...'
121
+ : undefined
122
+ }
123
+ />
124
+ )}
125
  </>
126
  );
127
  }
frontend/src/components/SessionSidebar/SessionSidebar.tsx CHANGED
@@ -270,7 +270,7 @@ export default function SessionSidebar({ onClose }: SessionSidebarProps) {
270
  lineHeight: 1.2,
271
  }}
272
  >
273
- {formatTime(session.createdAt)}
274
  </Typography>
275
  </Box>
276
 
 
270
  lineHeight: 1.2,
271
  }}
272
  >
273
+ {session.expired ? 'needs a catch-up' : formatTime(session.createdAt)}
274
  </Typography>
275
  </Box>
276
 
frontend/src/hooks/useAgentChat.ts CHANGED
@@ -12,6 +12,7 @@ import { useChat } from '@ai-sdk/react';
12
  import { type UIMessage, lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';
13
  import { SSEChatTransport, type SideChannelCallbacks } from '@/lib/sse-chat-transport';
14
  import { loadMessages, saveMessages } from '@/lib/chat-message-store';
 
15
  import { saveResearch, loadResearch, clearResearch, RESEARCH_MAX_STEPS } from '@/lib/research-store';
16
  import { llmMessagesToUIMessages } from '@/lib/convert-llm-messages';
17
  import { apiFetch } from '@/utils/api';
@@ -367,6 +368,14 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
367
  ]);
368
  if (cancelled) return;
369
 
 
 
 
 
 
 
 
 
370
  let pendingIds: Set<string> | undefined;
371
  let backendIsProcessing = false;
372
  if (infoRes.ok) {
@@ -385,6 +394,9 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
385
  if (msgsRes.ok) {
386
  const data = await msgsRes.json();
387
  if (cancelled || !Array.isArray(data) || data.length === 0) return;
 
 
 
388
  const uiMsgs = llmMessagesToUIMessages(data, pendingIds, chatActionsRef.current.messages);
389
  if (uiMsgs.length > 0) {
390
  chat.setMessages(uiMsgs);
@@ -447,6 +459,10 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
447
  const data = await msgsRes.json();
448
  if (!Array.isArray(data) || data.length === 0) return null;
449
 
 
 
 
 
450
  let pendingIds: Set<string> | undefined;
451
  if (infoRes.ok) {
452
  const info = await infoRes.json();
 
12
  import { type UIMessage, lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';
13
  import { SSEChatTransport, type SideChannelCallbacks } from '@/lib/sse-chat-transport';
14
  import { loadMessages, saveMessages } from '@/lib/chat-message-store';
15
+ import { saveBackendMessages } from '@/lib/backend-message-store';
16
  import { saveResearch, loadResearch, clearResearch, RESEARCH_MAX_STEPS } from '@/lib/research-store';
17
  import { llmMessagesToUIMessages } from '@/lib/convert-llm-messages';
18
  import { apiFetch } from '@/utils/api';
 
368
  ]);
369
  if (cancelled) return;
370
 
371
+ // If both endpoints say "not found", the backend lost this session
372
+ // (typically: Space restarted). Fire onSessionDead so AppLayout
373
+ // can flag it for the catch-up banner.
374
+ if (infoRes.status === 404 && msgsRes.status === 404) {
375
+ callbacksRef.current.onSessionDead?.(sessionId);
376
+ return;
377
+ }
378
+
379
  let pendingIds: Set<string> | undefined;
380
  let backendIsProcessing = false;
381
  if (infoRes.ok) {
 
394
  if (msgsRes.ok) {
395
  const data = await msgsRes.json();
396
  if (cancelled || !Array.isArray(data) || data.length === 0) return;
397
+ // Cache the raw backend messages so we can restore this session
398
+ // into a fresh backend if the Space restarts.
399
+ saveBackendMessages(sessionId, data);
400
  const uiMsgs = llmMessagesToUIMessages(data, pendingIds, chatActionsRef.current.messages);
401
  if (uiMsgs.length > 0) {
402
  chat.setMessages(uiMsgs);
 
459
  const data = await msgsRes.json();
460
  if (!Array.isArray(data) || data.length === 0) return null;
461
 
462
+ // Cache the raw backend messages so we can restore this session
463
+ // into a fresh backend if the Space restarts.
464
+ saveBackendMessages(sessionId, data);
465
+
466
  let pendingIds: Set<string> | undefined;
467
  if (infoRes.ok) {
468
  const info = await infoRes.json();
frontend/src/lib/backend-message-store.ts ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * localStorage cache of raw backend (litellm Message) dicts keyed by
3
+ * session ID. Used to restore a session into a fresh backend after the
4
+ * Space restarts — the browser-side UIMessages are what the user sees,
5
+ * but the LLM needs the backend format to continue the conversation.
6
+ */
7
+ import { logger } from '@/utils/logger';
8
+
9
+ const STORAGE_KEY = 'hf-agent-backend-messages';
10
+ const MAX_SESSIONS = 50;
11
+
12
+ type MessagesMap = Record<string, unknown[]>;
13
+
14
+ function readAll(): MessagesMap {
15
+ try {
16
+ const raw = localStorage.getItem(STORAGE_KEY);
17
+ if (!raw) return {};
18
+ const parsed = JSON.parse(raw);
19
+ if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
20
+ return parsed as MessagesMap;
21
+ }
22
+ return {};
23
+ } catch {
24
+ return {};
25
+ }
26
+ }
27
+
28
+ function writeAll(map: MessagesMap): void {
29
+ try {
30
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(map));
31
+ } catch (e) {
32
+ // Quota exceeded is the most common reason — the cache is best-effort.
33
+ logger.warn('Failed to persist backend messages:', e);
34
+ }
35
+ }
36
+
37
+ export function loadBackendMessages(sessionId: string): unknown[] {
38
+ const map = readAll();
39
+ return map[sessionId] ?? [];
40
+ }
41
+
42
+ export function saveBackendMessages(sessionId: string, messages: unknown[]): void {
43
+ const map = readAll();
44
+ map[sessionId] = messages;
45
+
46
+ const keys = Object.keys(map);
47
+ if (keys.length > MAX_SESSIONS) {
48
+ const toRemove = keys.slice(0, keys.length - MAX_SESSIONS);
49
+ for (const k of toRemove) delete map[k];
50
+ }
51
+
52
+ writeAll(map);
53
+ }
54
+
55
+ export function moveBackendMessages(fromId: string, toId: string): void {
56
+ const map = readAll();
57
+ if (!map[fromId]) return;
58
+ map[toId] = map[fromId];
59
+ delete map[fromId];
60
+ writeAll(map);
61
+ }
62
+
63
+ export function deleteBackendMessages(sessionId: string): void {
64
+ const map = readAll();
65
+ delete map[sessionId];
66
+ writeAll(map);
67
+ }
frontend/src/lib/chat-message-store.ts CHANGED
@@ -61,3 +61,11 @@ export function deleteMessages(sessionId: string): void {
61
  delete map[sessionId];
62
  writeAll(map);
63
  }
 
 
 
 
 
 
 
 
 
61
  delete map[sessionId];
62
  writeAll(map);
63
  }
64
+
65
+ export function moveMessages(fromId: string, toId: string): void {
66
+ const map = readAll();
67
+ if (!map[fromId]) return;
68
+ map[toId] = map[fromId];
69
+ delete map[fromId];
70
+ writeAll(map);
71
+ }
frontend/src/lib/convert-llm-messages.ts CHANGED
@@ -60,6 +60,12 @@ export function llmMessagesToUIMessages(
60
  if (msg.role === 'tool') continue; // handled via tool_calls pairing
61
 
62
  if (msg.role === 'user') {
 
 
 
 
 
 
63
  // Try to reuse existing ID if the message at this position matches
64
  const existingId = getExistingId(uiMessages.length, 'user');
65
  uiMessages.push({
@@ -137,3 +143,98 @@ export function llmMessagesToUIMessages(
137
 
138
  return uiMessages;
139
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  if (msg.role === 'tool') continue; // handled via tool_calls pairing
61
 
62
  if (msg.role === 'user') {
63
+ // Skip internal system-style nudges (doom-loop correction, compact
64
+ // hints, restore notices, etc.) — they're meant for the LLM, not
65
+ // the user. They always start with "[SYSTEM:".
66
+ if (typeof msg.content === 'string' && msg.content.trimStart().startsWith('[SYSTEM:')) {
67
+ continue;
68
+ }
69
  // Try to reuse existing ID if the message at this position matches
70
  const existingId = getExistingId(uiMessages.length, 'user');
71
  uiMessages.push({
 
143
 
144
  return uiMessages;
145
  }
146
+
147
+
148
+ interface ToolPart {
149
+ type: string;
150
+ toolCallId?: string;
151
+ toolName?: string;
152
+ state?: string;
153
+ input?: unknown;
154
+ output?: unknown;
155
+ errorText?: string;
156
+ }
157
+
158
+ function joinText(parts: UIMessage['parts']): string {
159
+ return parts
160
+ .filter((p): p is { type: 'text'; text: string } => p.type === 'text')
161
+ .map((p) => p.text)
162
+ .join('');
163
+ }
164
+
165
+ function stringifyOutput(output: unknown): string {
166
+ if (output == null) return '';
167
+ if (typeof output === 'string') return output;
168
+ try {
169
+ return JSON.stringify(output);
170
+ } catch {
171
+ return String(output);
172
+ }
173
+ }
174
+
175
+ /**
176
+ * Reverse of llmMessagesToUIMessages — used as a fallback when we need to
177
+ * restore a session but only have the UIMessage cache (e.g. the session
178
+ * predates the backend-message cache feature).
179
+ *
180
+ * Includes every tool call the assistant made, regardless of the part's
181
+ * stored state. If we have a captured output (or errorText), we emit a
182
+ * paired role=tool result. If we don't, we leave the tool_call dangling —
183
+ * the backend's ContextManager patches those via _patch_dangling_tool_calls.
184
+ */
185
+ export function uiMessagesToLLMMessages(uiMessages: UIMessage[]): LLMMessage[] {
186
+ const out: LLMMessage[] = [];
187
+ for (const msg of uiMessages) {
188
+ if (msg.role === 'user') {
189
+ const text = joinText(msg.parts);
190
+ if (text) out.push({ role: 'user', content: text });
191
+ continue;
192
+ }
193
+ if (msg.role === 'assistant') {
194
+ const text = joinText(msg.parts);
195
+ const toolCalls: LLMToolCall[] = [];
196
+ const pairedResults: Array<{ id: string; content: string }> = [];
197
+ for (const raw of msg.parts as ToolPart[]) {
198
+ if (!raw.type) continue;
199
+ const isTool = raw.type === 'dynamic-tool' || raw.type.startsWith('tool-');
200
+ if (!isTool) continue;
201
+ const toolCallId = raw.toolCallId;
202
+ const toolName =
203
+ raw.toolName ?? (raw.type.startsWith('tool-') ? raw.type.slice(5) : undefined);
204
+ if (!toolCallId || !toolName) continue;
205
+
206
+ toolCalls.push({
207
+ id: toolCallId,
208
+ function: {
209
+ name: toolName,
210
+ arguments: JSON.stringify(raw.input ?? {}),
211
+ },
212
+ });
213
+
214
+ // Prefer output; fall back to errorText for output-error /
215
+ // output-denied. A missing result leaves the tool_call dangling —
216
+ // the backend will patch it with a synthesized stub.
217
+ const result =
218
+ raw.output != null
219
+ ? stringifyOutput(raw.output)
220
+ : typeof raw.errorText === 'string' && raw.errorText
221
+ ? raw.errorText
222
+ : null;
223
+ if (result != null) {
224
+ pairedResults.push({ id: toolCallId, content: result });
225
+ }
226
+ }
227
+ if (text || toolCalls.length) {
228
+ out.push({
229
+ role: 'assistant',
230
+ content: text || null,
231
+ tool_calls: toolCalls.length ? toolCalls : null,
232
+ });
233
+ }
234
+ for (const r of pairedResults) {
235
+ out.push({ role: 'tool', content: r.content, tool_call_id: r.id });
236
+ }
237
+ }
238
+ }
239
+ return out;
240
+ }
frontend/src/lib/sse-chat-transport.ts CHANGED
@@ -351,6 +351,11 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
351
  },
352
  });
353
 
 
 
 
 
 
354
  if (!response.ok) {
355
  const errorText = await response.text().catch(() => 'Request failed');
356
  throw new Error(`Chat request failed: ${response.status} ${errorText}`);
 
351
  },
352
  });
353
 
354
+ if (response.status === 404) {
355
+ // Backend lost this session (e.g. Space restart). Signal the UI so
356
+ // it can flag the session for the catch-up banner.
357
+ this.sideChannel.onSessionDead(sessionId);
358
+ }
359
  if (!response.ok) {
360
  const errorText = await response.text().catch(() => 'Request failed');
361
  throw new Error(`Chat request failed: ${response.status} ${errorText}`);
frontend/src/store/sessionStore.ts CHANGED
@@ -1,7 +1,8 @@
1
  import { create } from 'zustand';
2
  import { persist } from 'zustand/middleware';
3
  import type { SessionMeta } from '@/types/agent';
4
- import { deleteMessages } from '@/lib/chat-message-store';
 
5
 
6
  interface SessionStore {
7
  sessions: SessionMeta[];
@@ -14,6 +15,15 @@ interface SessionStore {
14
  setSessionActive: (id: string, isActive: boolean) => void;
15
  updateSessionTitle: (id: string, title: string) => void;
16
  setNeedsAttention: (id: string, needs: boolean) => void;
 
 
 
 
 
 
 
 
 
17
  }
18
 
19
  export const useSessionStore = create<SessionStore>()(
@@ -38,6 +48,7 @@ export const useSessionStore = create<SessionStore>()(
38
 
39
  deleteSession: (id: string) => {
40
  deleteMessages(id);
 
41
  set((state) => {
42
  const newSessions = state.sessions.filter((s) => s.id !== id);
43
  const newActiveId =
@@ -51,6 +62,32 @@ export const useSessionStore = create<SessionStore>()(
51
  });
52
  },
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  switchSession: (id: string) => {
55
  set((state) => ({
56
  activeSessionId: id,
 
1
  import { create } from 'zustand';
2
  import { persist } from 'zustand/middleware';
3
  import type { SessionMeta } from '@/types/agent';
4
+ import { deleteMessages, moveMessages } from '@/lib/chat-message-store';
5
+ import { moveBackendMessages, deleteBackendMessages } from '@/lib/backend-message-store';
6
 
7
  interface SessionStore {
8
  sessions: SessionMeta[];
 
15
  setSessionActive: (id: string, isActive: boolean) => void;
16
  updateSessionTitle: (id: string, title: string) => void;
17
  setNeedsAttention: (id: string, needs: boolean) => void;
18
+ /** Mark a session as expired (backend no longer has it). The UI shows a
19
+ * recovery banner and disables input. */
20
+ markExpired: (id: string) => void;
21
+ /** Clear the expired flag (used after restore-with-summary succeeds). */
22
+ clearExpired: (id: string) => void;
23
+ /** Atomically swap a session's id in the list + both localStorage caches.
24
+ * Used when we rehydrate an expired session into a freshly-created backend
25
+ * session — preserves title, timestamps, and messages. */
26
+ renameSession: (oldId: string, newId: string) => void;
27
  }
28
 
29
  export const useSessionStore = create<SessionStore>()(
 
48
 
49
  deleteSession: (id: string) => {
50
  deleteMessages(id);
51
+ deleteBackendMessages(id);
52
  set((state) => {
53
  const newSessions = state.sessions.filter((s) => s.id !== id);
54
  const newActiveId =
 
62
  });
63
  },
64
 
65
+ markExpired: (id: string) => {
66
+ set((state) => ({
67
+ sessions: state.sessions.map((s) => (s.id === id ? { ...s, expired: true } : s)),
68
+ }));
69
+ },
70
+
71
+ clearExpired: (id: string) => {
72
+ set((state) => ({
73
+ sessions: state.sessions.map((s) =>
74
+ s.id === id ? { ...s, expired: false } : s,
75
+ ),
76
+ }));
77
+ },
78
+
79
+ renameSession: (oldId: string, newId: string) => {
80
+ if (oldId === newId) return;
81
+ moveMessages(oldId, newId);
82
+ moveBackendMessages(oldId, newId);
83
+ set((state) => ({
84
+ sessions: state.sessions.map((s) =>
85
+ s.id === oldId ? { ...s, id: newId, expired: false } : s,
86
+ ),
87
+ activeSessionId: state.activeSessionId === oldId ? newId : state.activeSessionId,
88
+ }));
89
+ },
90
+
91
  switchSession: (id: string) => {
92
  set((state) => ({
93
  activeSessionId: id,
frontend/src/types/agent.ts CHANGED
@@ -16,6 +16,11 @@ export interface SessionMeta {
16
  createdAt: string;
17
  isActive: boolean;
18
  needsAttention: boolean;
 
 
 
 
 
19
  }
20
 
21
  export interface ToolApproval {
 
16
  createdAt: string;
17
  isActive: boolean;
18
  needsAttention: boolean;
19
+ /** True when the backend no longer recognizes this session id (e.g.
20
+ * after a backend restart). The UI shows a recovery banner and
21
+ * disables input until the user chooses to restore-with-summary or
22
+ * start fresh. */
23
+ expired?: boolean;
24
  }
25
 
26
  export interface ToolApproval {