Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

Aksel Joonas Reedi commited on 15 days ago

Commit

962191f

unverified ·

1 Parent(s): 28b8f2b

Restore expired sessions via summary-based catch-up (#52)

When the backend loses a session (typically: HF Space restarted), the
chat now shows a small inline banner instead of silently erroring:

Where were we?
Let me skim the conversation so far and pick up right where we left
off — or we can start something new.
[ Catch me up ] [ Start fresh ]

Flow, in one pass:
* Frontend stashes the raw backend messages in localStorage on every
mount-hydrate and turn_complete. When the backend 404s for the
session id, the SSE transport and the mount effect both fire
onSessionDead → the session is flagged `expired` in sessionStore;
the sidebar marks it "needs a catch-up".
* Catch me up → POST /api/session/restore-summary with the cached
messages. Backend creates a fresh session, runs the existing
summarizer (factored into summarize_messages() and shared with
in-session compaction) with a restore-specific prompt that
preserves the tool-call trail, and seeds the new session with that
summary wrapped in a [SYSTEM: ...] user turn. New id is swapped
back via renameSession; UIMessages + backend-cache move with it.
* Start fresh → delete the session + its caches.
* The design is lazy per session: users with 5 stale tabs only pay
for a summary on the ones they actually reopen.

Frontend filters [SYSTEM: ...] user turns from rendering so the seed
message (plus existing doom-loop / compact nudges) stays invisible.

For sessions that predate the raw-message cache, fall back to
reconstructing the backend message list from the longstanding
UIMessage cache (tool calls + paired results, text preserved).

Also sets litellm.modify_params = True globally in agent/__init__.py
(moved out of agent/main.py) so the backend entry also picks it up —
required for Anthropic to accept a history containing tool_calls
without a `tools=` kwarg, which is exactly the summarization shape.

Files changed (15) hide show

agent/__init__.py +15 -1
agent/context_manager/manager.py +78 -21
backend/routes/agent.py +44 -0
backend/session_manager.py +63 -0
frontend/src/components/Chat/ExpiredBanner.tsx +113 -0
frontend/src/components/Layout/AppLayout.tsx +7 -4
frontend/src/components/SessionChat.tsx +19 -13
frontend/src/components/SessionSidebar/SessionSidebar.tsx +1 -1
frontend/src/hooks/useAgentChat.ts +16 -0
frontend/src/lib/backend-message-store.ts +67 -0
frontend/src/lib/chat-message-store.ts +8 -0
frontend/src/lib/convert-llm-messages.ts +101 -0
frontend/src/lib/sse-chat-transport.ts +5 -0
frontend/src/store/sessionStore.ts +38 -1
frontend/src/types/agent.ts +5 -0

agent/__init__.py CHANGED Viewed

@@ -2,6 +2,20 @@
 HF Agent - Main agent module
 """
-from agent.core.agent_loop import submission_loop
 __all__ = ["submission_loop"]

 HF Agent - Main agent module
 """
+import litellm
+# Global LiteLLM behavior — set once at package import so both CLI and
+# backend entries share the same config.
+#   drop_params: quietly drop unsupported params rather than raising
+#   suppress_debug_info: hide the noisy "Give Feedback" banner on errors
+#   modify_params: let LiteLLM patch Anthropic's tool-call requirements
+#     (synthesize a dummy tool spec when we call completion on a history
+#     that contains tool_calls but aren't passing `tools=` — happens
+#     during summarization / session seeding).
+litellm.drop_params = True
+litellm.suppress_debug_info = True
+litellm.modify_params = True
+from agent.core.agent_loop import submission_loop  # noqa: E402
 __all__ = ["submission_loop"]

agent/context_manager/manager.py CHANGED Viewed

@@ -68,6 +68,63 @@ def _get_hf_username(hf_token: str | None = None) -> str:
         return "unknown"
 class ContextManager:
     """Manages conversation context and message history for the agent"""
@@ -318,25 +375,15 @@ class ContextManager:
         if not messages_to_summarize:
             return
-        messages_to_summarize.append(
-            Message(
-                role="user",
-                content="Please provide a concise summary of the conversation above, focusing on key decisions, the 'why' behind the decisions, problems solved, and important context needed for developing further. Your summary will be given to someone who has never worked on this project before and they will be have to be filled in.",
-            )
-        )
-        from agent.core.llm_params import _resolve_llm_params
-        llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort="high")
-        response = await acompletion(
-            messages=messages_to_summarize,
-            max_completion_tokens=self.compact_size,
-            tools=tool_specs,
-            **llm_params,
-        )
-        summarized_message = Message(
-            role="assistant", content=response.choices[0].message.content
         )
         # Reconstruct: system + first user msg + summary + recent messages
         head = [system_msg] if system_msg else []
@@ -344,6 +391,16 @@ class ContextManager:
             head.append(first_user_msg)
         self.items = head + [summarized_message] + recent_messages
-        self.running_context_usage = (
-            len(self.system_prompt) // 4 + response.usage.completion_tokens
-        )

         return "unknown"
+_COMPACT_PROMPT = (
+    "Please provide a concise summary of the conversation above, focusing on "
+    "key decisions, the 'why' behind the decisions, problems solved, and "
+    "important context needed for developing further. Your summary will be "
+    "given to someone who has never worked on this project before and they "
+    "will be have to be filled in."
+)
+# Used when seeding a brand-new session from prior browser-cached messages.
+# Here we're writing a note to *ourselves* — so preserve the tool-call trail,
+# files produced, and planned next steps in first person. Optimized for
+# continuity, not brevity.
+_RESTORE_PROMPT = (
+    "You're about to be restored into a fresh session with no memory of the "
+    "conversation above. Write a first-person note to your future self so "
+    "you can continue right where you left off. Include:\n"
+    "  • What the user originally asked for and what progress you've made.\n"
+    "  • Every tool you called, with arguments and a one-line result summary.\n"
+    "  • Any code, files, scripts, or artifacts you produced (with paths).\n"
+    "  • Key decisions and the reasoning behind them.\n"
+    "  • What you were planning to do next.\n\n"
+    "Don't be cute. Be specific. This is the only context you'll have."
+)
+async def summarize_messages(
+    messages: list[Message],
+    model_name: str,
+    hf_token: str | None = None,
+    max_tokens: int = 2000,
+    tool_specs: list[dict] | None = None,
+    prompt: str = _COMPACT_PROMPT,
+) -> tuple[str, int]:
+    """Run a summarization prompt against a list of messages.
+    ``prompt`` defaults to the compaction prompt (terse, decision-focused).
+    Callers seeding a new session after a restart should pass ``_RESTORE_PROMPT``
+    instead — it preserves the tool-call trail so the agent can answer
+    follow-up questions about what it did.
+    Returns ``(summary_text, completion_tokens)``.
+    """
+    from agent.core.llm_params import _resolve_llm_params
+    prompt_messages = list(messages) + [Message(role="user", content=prompt)]
+    llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort="high")
+    response = await acompletion(
+        messages=prompt_messages,
+        max_completion_tokens=max_tokens,
+        tools=tool_specs,
+        **llm_params,
+    )
+    summary = response.choices[0].message.content or ""
+    completion_tokens = response.usage.completion_tokens if response.usage else 0
+    return summary, completion_tokens
 class ContextManager:
     """Manages conversation context and message history for the agent"""
         if not messages_to_summarize:
             return
+        summary, completion_tokens = await summarize_messages(
+            messages_to_summarize,
+            model_name=model_name,
+            hf_token=hf_token,
+            max_tokens=self.compact_size,
+            tool_specs=tool_specs,
+            prompt=_COMPACT_PROMPT,
         )
+        summarized_message = Message(role="assistant", content=summary)
         # Reconstruct: system + first user msg + summary + recent messages
         head = [system_msg] if system_msg else []
             head.append(first_user_msg)
         self.items = head + [summarized_message] + recent_messages
+        # Count the actual post-compact context — system prompt + first user
+        # turn + summary + the preserved tail all contribute, not just the
+        # summary. litellm.token_counter uses the model's real tokenizer.
+        from litellm import token_counter
+        try:
+            self.running_context_usage = token_counter(
+                model=model_name,
+                messages=[m.model_dump() for m in self.items],
+            )
+        except Exception as e:
+            logger.warning("token_counter failed post-compact (%s); falling back to rough estimate", e)
+            self.running_context_usage = len(self.system_prompt) // 4 + completion_tokens

backend/routes/agent.py CHANGED Viewed

@@ -227,6 +227,50 @@ async def create_session(
     return SessionResponse(session_id=session_id, ready=True)
 @router.get("/session/{session_id}", response_model=SessionInfo)
 async def get_session(
     session_id: str, user: dict = Depends(get_current_user)

     return SessionResponse(session_id=session_id, ready=True)
+@router.post("/session/restore-summary", response_model=SessionResponse)
+async def restore_session_summary(
+    request: Request, body: dict, user: dict = Depends(get_current_user)
+) -> SessionResponse:
+    """Create a new session seeded with a summary of the caller's prior
+    conversation. The client sends its cached messages; we run the standard
+    summarization prompt on them and drop the result into the new
+    session's context as a user-role system note.
+    """
+    messages = body.get("messages")
+    if not isinstance(messages, list) or not messages:
+        raise HTTPException(status_code=400, detail="Missing 'messages' array")
+    hf_token = None
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        hf_token = auth_header[7:]
+    if not hf_token:
+        hf_token = request.cookies.get("hf_access_token")
+    if not hf_token:
+        hf_token = os.environ.get("HF_TOKEN")
+    try:
+        session_id = await session_manager.create_session(
+            user_id=user["user_id"], hf_token=hf_token
+        )
+    except SessionCapacityError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+    try:
+        summarized = await session_manager.seed_from_summary(session_id, messages)
+    except ValueError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+        logger.exception("seed_from_summary failed")
+        raise HTTPException(status_code=500, detail=f"Summary failed: {e}")
+    logger.info(
+        f"Seeded session {session_id} for {user.get('username', 'unknown')} "
+        f"(summary of {summarized} messages)"
+    )
+    return SessionResponse(session_id=session_id, ready=True)
 @router.get("/session/{session_id}", response_model=SessionInfo)
 async def get_session(
     session_id: str, user: dict = Depends(get_current_user)

backend/session_manager.py CHANGED Viewed

@@ -207,6 +207,69 @@ class SessionManager:
         logger.info(f"Created session {session_id} for user {user_id}")
         return session_id
     @staticmethod
     async def _cleanup_sandbox(session: Session) -> None:
         """Delete the sandbox Space if one was created for this session."""

         logger.info(f"Created session {session_id} for user {user_id}")
         return session_id
+    async def seed_from_summary(self, session_id: str, messages: list[dict]) -> int:
+        """Rehydrate a session from cached prior messages via summarization.
+        Runs the standard summarization prompt (same one compaction uses)
+        over the provided messages, then seeds the new session's context
+        with that summary. Tool-call pairing concerns disappear because the
+        output is plain text. Returns the number of messages summarized.
+        """
+        from litellm import Message
+        from agent.context_manager.manager import _RESTORE_PROMPT, summarize_messages
+        agent_session = self.sessions.get(session_id)
+        if not agent_session:
+            raise ValueError(f"Session {session_id} not found")
+        # Parse into Message objects, tolerating malformed entries.
+        parsed: list[Message] = []
+        for raw in messages:
+            if raw.get("role") == "system":
+                continue  # the new session has its own system prompt
+            try:
+                parsed.append(Message.model_validate(raw))
+            except Exception as e:
+                logger.warning("Dropping malformed message during seed: %s", e)
+        if not parsed:
+            return 0
+        session = agent_session.session
+        # Pass the real tool specs so the summarizer sees what the agent
+        # actually has — otherwise Anthropic's modify_params injects a
+        # dummy tool and the summarizer editorializes that the original
+        # tool calls were fabricated.
+        tool_specs = None
+        try:
+            tool_specs = agent_session.tool_router.get_tool_specs_for_llm()
+        except Exception:
+            pass
+        try:
+            summary, _ = await summarize_messages(
+                parsed,
+                model_name=session.config.model_name,
+                hf_token=session.hf_token,
+                max_tokens=4000,
+                prompt=_RESTORE_PROMPT,
+                tool_specs=tool_specs,
+            )
+        except Exception as e:
+            logger.error("Summary call failed during seed: %s", e)
+            raise
+        seed = Message(
+            role="user",
+            content=(
+                "[SYSTEM: Your prior memory of this conversation — written "
+                "in your own voice right before restart. Continue from here.]\n\n"
+                + (summary or "(no summary returned)")
+            ),
+        )
+        session.context_manager.items.append(seed)
+        return len(parsed)
     @staticmethod
     async def _cleanup_sandbox(session: Session) -> None:
         """Delete the sandbox Space if one was created for this session."""

frontend/src/components/Chat/ExpiredBanner.tsx ADDED Viewed

	@@ -0,0 +1,113 @@

+/**
+ * Shown inline in a chat when the backend no longer recognizes the
+ * session id (typically: Space was restarted). Lets the user catch the
+ * agent up with a summary of the prior conversation, or start over.
+ */
+import { useState, useCallback } from 'react';
+import { Box, Button, CircularProgress, Typography } from '@mui/material';
+import { apiFetch } from '@/utils/api';
+import { useSessionStore } from '@/store/sessionStore';
+import { useAgentStore } from '@/store/agentStore';
+import { loadBackendMessages } from '@/lib/backend-message-store';
+import { loadMessages } from '@/lib/chat-message-store';
+import { uiMessagesToLLMMessages } from '@/lib/convert-llm-messages';
+import { logger } from '@/utils/logger';
+interface Props {
+  sessionId: string;
+}
+export default function ExpiredBanner({ sessionId }: Props) {
+  const { renameSession, deleteSession } = useSessionStore();
+  const [busy, setBusy] = useState<'catch-up' | 'start-over' | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const handleCatchUp = useCallback(async () => {
+    setBusy('catch-up');
+    setError(null);
+    try {
+      // Prefer the raw backend-message cache; fall back to reconstructing
+      // from UIMessages (for sessions that predate the backend cache).
+      let messages = loadBackendMessages(sessionId);
+      if (!messages || messages.length === 0) {
+        const uiMsgs = loadMessages(sessionId);
+        if (uiMsgs.length > 0) messages = uiMessagesToLLMMessages(uiMsgs);
+      }
+      if (!messages || messages.length === 0) {
+        setError('Nothing to summarize from this chat.');
+        setBusy(null);
+        return;
+      }
+      const res = await apiFetch('/api/session/restore-summary', {
+        method: 'POST',
+        body: JSON.stringify({ messages }),
+      });
+      if (!res.ok) throw new Error(`restore-summary failed: ${res.status}`);
+      const data = await res.json();
+      const newId = data.session_id as string | undefined;
+      if (!newId) throw new Error('no session_id in response');
+      useAgentStore.getState().clearSessionState(sessionId);
+      renameSession(sessionId, newId);
+    } catch (e) {
+      logger.warn('Catch-up failed:', e);
+      setError("Couldn't catch up — try starting over.");
+      setBusy(null);
+    }
+  }, [sessionId, renameSession]);
+  const handleStartOver = useCallback(() => {
+    setBusy('start-over');
+    useAgentStore.getState().clearSessionState(sessionId);
+    deleteSession(sessionId);
+  }, [sessionId, deleteSession]);
+  return (
+    <Box
+      sx={{
+        mx: { xs: 2, md: 'auto' },
+        my: 2,
+        maxWidth: 720,
+        p: 2.5,
+        borderRadius: 2,
+        border: '1px solid',
+        borderColor: 'divider',
+        bgcolor: 'background.paper',
+        boxShadow: '0 1px 3px rgba(0,0,0,0.06)',
+      }}
+    >
+      <Typography variant="body1" sx={{ fontWeight: 600, mb: 0.5 }}>
+        Where were we?
+      </Typography>
+      <Typography variant="body2" sx={{ color: 'text.secondary', mb: 2 }}>
+        Let me skim the conversation so far and pick up right where we left
+        off — or we can start something new.
+      </Typography>
+      <Box sx={{ display: 'flex', gap: 1, flexWrap: 'wrap' }}>
+        <Button
+          variant="contained"
+          onClick={handleCatchUp}
+          disabled={busy !== null}
+          startIcon={busy === 'catch-up' ? <CircularProgress size={16} color="inherit" /> : null}
+          sx={{ textTransform: 'none' }}
+        >
+          {busy === 'catch-up' ? 'Catching up…' : 'Catch me up'}
+        </Button>
+        <Button
+          variant="outlined"
+          onClick={handleStartOver}
+          disabled={busy !== null}
+          sx={{ textTransform: 'none' }}
+        >
+          Start fresh
+        </Button>
+      </Box>
+      {error && (
+        <Typography variant="caption" sx={{ display: 'block', mt: 1.5, color: 'error.main' }}>
+          {error}
+        </Typography>
+      )}
+    </Box>
+  );
+}

frontend/src/components/Layout/AppLayout.tsx CHANGED Viewed

@@ -29,7 +29,7 @@ import { apiFetch } from '@/utils/api';
 const DRAWER_WIDTH = 260;
 export default function AppLayout() {
-  const { sessions, activeSessionId, deleteSession } = useSessionStore();
   const { isConnected, llmHealthError, setLlmHealthError, user } = useAgentStore();
   const {
     isLeftSidebarOpen,
@@ -123,10 +123,13 @@ export default function AppLayout() {
   const handleSessionDead = useCallback(
     (deadSessionId: string) => {
-      useAgentStore.getState().clearSessionState(deadSessionId);
-      deleteSession(deadSessionId);
     },
-    [deleteSession],
   );
   // Close sidebar on mobile after selecting a session

 const DRAWER_WIDTH = 260;
 export default function AppLayout() {
+  const { sessions, activeSessionId, markExpired } = useSessionStore();
   const { isConnected, llmHealthError, setLlmHealthError, user } = useAgentStore();
   const {
     isLeftSidebarOpen,
   const handleSessionDead = useCallback(
     (deadSessionId: string) => {
+      // Backend lost this session — mark it expired so the chat shows a
+      // recovery banner instead of either silently failing or eagerly
+      // creating a new backend session (which would pay a summary-call
+      // cost for sessions the user may never revisit).
+      markExpired(deadSessionId);
     },
+    [markExpired],
   );
   // Close sidebar on mobile after selecting a session

frontend/src/components/SessionChat.tsx CHANGED Viewed

@@ -11,6 +11,7 @@ import { useAgentStore } from '@/store/agentStore';
 import { useSessionStore } from '@/store/sessionStore';
 import MessageList from '@/components/Chat/MessageList';
 import ChatInput from '@/components/Chat/ChatInput';
 import { apiFetch } from '@/utils/api';
 import { logger } from '@/utils/logger';
@@ -22,7 +23,8 @@ interface SessionChatProps {
 export default function SessionChat({ sessionId, isActive, onSessionDead }: SessionChatProps) {
   const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
-  const { updateSessionTitle } = useSessionStore();
   const { messages, sendMessage, stop, status, undoLastTurn, editAndRegenerate, approveTools } = useAgentChat({
     sessionId,
@@ -104,18 +106,22 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
         onUndoLastTurn={undoLastTurn}
         onEditAndRegenerate={editAndRegenerate}
       />
-      <ChatInput
-        sessionId={sessionId}
-        onSend={handleSendMessage}
-        onStop={handleStop}
-        isProcessing={busy}
-        disabled={!isConnected || activityStatus.type === 'waiting-approval'}
-        placeholder={
-          activityStatus.type === 'waiting-approval'
-            ? 'Approve or reject pending tools first...'
-            : undefined
-        }
-      />
     </>
   );
 }

 import { useSessionStore } from '@/store/sessionStore';
 import MessageList from '@/components/Chat/MessageList';
 import ChatInput from '@/components/Chat/ChatInput';
+import ExpiredBanner from '@/components/Chat/ExpiredBanner';
 import { apiFetch } from '@/utils/api';
 import { logger } from '@/utils/logger';
 export default function SessionChat({ sessionId, isActive, onSessionDead }: SessionChatProps) {
   const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
+  const { updateSessionTitle, sessions } = useSessionStore();
+  const isExpired = sessions.find((s) => s.id === sessionId)?.expired === true;
   const { messages, sendMessage, stop, status, undoLastTurn, editAndRegenerate, approveTools } = useAgentChat({
     sessionId,
         onUndoLastTurn={undoLastTurn}
         onEditAndRegenerate={editAndRegenerate}
       />
+      {isExpired ? (
+        <ExpiredBanner sessionId={sessionId} />
+      ) : (
+        <ChatInput
+          sessionId={sessionId}
+          onSend={handleSendMessage}
+          onStop={handleStop}
+          isProcessing={busy}
+          disabled={!isConnected || activityStatus.type === 'waiting-approval'}
+          placeholder={
+            activityStatus.type === 'waiting-approval'
+              ? 'Approve or reject pending tools first...'
+              : undefined
+          }
+        />
+      )}
     </>
   );
 }

frontend/src/components/SessionSidebar/SessionSidebar.tsx CHANGED Viewed

@@ -270,7 +270,7 @@ export default function SessionSidebar({ onClose }: SessionSidebarProps) {
                       lineHeight: 1.2,
                     }}
                   >
-                    {formatTime(session.createdAt)}
                   </Typography>
                 </Box>

                       lineHeight: 1.2,
                     }}
                   >
+                    {session.expired ? 'needs a catch-up' : formatTime(session.createdAt)}
                   </Typography>
                 </Box>

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -12,6 +12,7 @@ import { useChat } from '@ai-sdk/react';
 import { type UIMessage, lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';
 import { SSEChatTransport, type SideChannelCallbacks } from '@/lib/sse-chat-transport';
 import { loadMessages, saveMessages } from '@/lib/chat-message-store';
 import { saveResearch, loadResearch, clearResearch, RESEARCH_MAX_STEPS } from '@/lib/research-store';
 import { llmMessagesToUIMessages } from '@/lib/convert-llm-messages';
 import { apiFetch } from '@/utils/api';
@@ -367,6 +368,14 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
         ]);
         if (cancelled) return;
         let pendingIds: Set<string> | undefined;
         let backendIsProcessing = false;
         if (infoRes.ok) {
@@ -385,6 +394,9 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
         if (msgsRes.ok) {
           const data = await msgsRes.json();
           if (cancelled || !Array.isArray(data) || data.length === 0) return;
           const uiMsgs = llmMessagesToUIMessages(data, pendingIds, chatActionsRef.current.messages);
           if (uiMsgs.length > 0) {
             chat.setMessages(uiMsgs);
@@ -447,6 +459,10 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
         const data = await msgsRes.json();
         if (!Array.isArray(data) || data.length === 0) return null;
         let pendingIds: Set<string> | undefined;
         if (infoRes.ok) {
           const info = await infoRes.json();

 import { type UIMessage, lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';
 import { SSEChatTransport, type SideChannelCallbacks } from '@/lib/sse-chat-transport';
 import { loadMessages, saveMessages } from '@/lib/chat-message-store';
+import { saveBackendMessages } from '@/lib/backend-message-store';
 import { saveResearch, loadResearch, clearResearch, RESEARCH_MAX_STEPS } from '@/lib/research-store';
 import { llmMessagesToUIMessages } from '@/lib/convert-llm-messages';
 import { apiFetch } from '@/utils/api';
         ]);
         if (cancelled) return;
+        // If both endpoints say "not found", the backend lost this session
+        // (typically: Space restarted). Fire onSessionDead so AppLayout
+        // can flag it for the catch-up banner.
+        if (infoRes.status === 404 && msgsRes.status === 404) {
+          callbacksRef.current.onSessionDead?.(sessionId);
+          return;
+        }
         let pendingIds: Set<string> | undefined;
         let backendIsProcessing = false;
         if (infoRes.ok) {
         if (msgsRes.ok) {
           const data = await msgsRes.json();
           if (cancelled || !Array.isArray(data) || data.length === 0) return;
+          // Cache the raw backend messages so we can restore this session
+          // into a fresh backend if the Space restarts.
+          saveBackendMessages(sessionId, data);
           const uiMsgs = llmMessagesToUIMessages(data, pendingIds, chatActionsRef.current.messages);
           if (uiMsgs.length > 0) {
             chat.setMessages(uiMsgs);
         const data = await msgsRes.json();
         if (!Array.isArray(data) || data.length === 0) return null;
+        // Cache the raw backend messages so we can restore this session
+        // into a fresh backend if the Space restarts.
+        saveBackendMessages(sessionId, data);
         let pendingIds: Set<string> | undefined;
         if (infoRes.ok) {
           const info = await infoRes.json();

frontend/src/lib/backend-message-store.ts ADDED Viewed

	@@ -0,0 +1,67 @@

+/**
+ * localStorage cache of raw backend (litellm Message) dicts keyed by
+ * session ID. Used to restore a session into a fresh backend after the
+ * Space restarts — the browser-side UIMessages are what the user sees,
+ * but the LLM needs the backend format to continue the conversation.
+ */
+import { logger } from '@/utils/logger';
+const STORAGE_KEY = 'hf-agent-backend-messages';
+const MAX_SESSIONS = 50;
+type MessagesMap = Record<string, unknown[]>;
+function readAll(): MessagesMap {
+  try {
+    const raw = localStorage.getItem(STORAGE_KEY);
+    if (!raw) return {};
+    const parsed = JSON.parse(raw);
+    if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
+      return parsed as MessagesMap;
+    }
+    return {};
+  } catch {
+    return {};
+  }
+}
+function writeAll(map: MessagesMap): void {
+  try {
+    localStorage.setItem(STORAGE_KEY, JSON.stringify(map));
+  } catch (e) {
+    // Quota exceeded is the most common reason — the cache is best-effort.
+    logger.warn('Failed to persist backend messages:', e);
+  }
+}
+export function loadBackendMessages(sessionId: string): unknown[] {
+  const map = readAll();
+  return map[sessionId] ?? [];
+}
+export function saveBackendMessages(sessionId: string, messages: unknown[]): void {
+  const map = readAll();
+  map[sessionId] = messages;
+  const keys = Object.keys(map);
+  if (keys.length > MAX_SESSIONS) {
+    const toRemove = keys.slice(0, keys.length - MAX_SESSIONS);
+    for (const k of toRemove) delete map[k];
+  }
+  writeAll(map);
+}
+export function moveBackendMessages(fromId: string, toId: string): void {
+  const map = readAll();
+  if (!map[fromId]) return;
+  map[toId] = map[fromId];
+  delete map[fromId];
+  writeAll(map);
+}
+export function deleteBackendMessages(sessionId: string): void {
+  const map = readAll();
+  delete map[sessionId];
+  writeAll(map);
+}

frontend/src/lib/chat-message-store.ts CHANGED Viewed

@@ -61,3 +61,11 @@ export function deleteMessages(sessionId: string): void {
   delete map[sessionId];
   writeAll(map);
 }

   delete map[sessionId];
   writeAll(map);
 }
+export function moveMessages(fromId: string, toId: string): void {
+  const map = readAll();
+  if (!map[fromId]) return;
+  map[toId] = map[fromId];
+  delete map[fromId];
+  writeAll(map);
+}

frontend/src/lib/convert-llm-messages.ts CHANGED Viewed

@@ -60,6 +60,12 @@ export function llmMessagesToUIMessages(
     if (msg.role === 'tool') continue; // handled via tool_calls pairing
     if (msg.role === 'user') {
       // Try to reuse existing ID if the message at this position matches
       const existingId = getExistingId(uiMessages.length, 'user');
       uiMessages.push({
@@ -137,3 +143,98 @@ export function llmMessagesToUIMessages(
   return uiMessages;
 }

     if (msg.role === 'tool') continue; // handled via tool_calls pairing
     if (msg.role === 'user') {
+      // Skip internal system-style nudges (doom-loop correction, compact
+      // hints, restore notices, etc.) — they're meant for the LLM, not
+      // the user. They always start with "[SYSTEM:".
+      if (typeof msg.content === 'string' && msg.content.trimStart().startsWith('[SYSTEM:')) {
+        continue;
+      }
       // Try to reuse existing ID if the message at this position matches
       const existingId = getExistingId(uiMessages.length, 'user');
       uiMessages.push({
   return uiMessages;
 }
+interface ToolPart {
+  type: string;
+  toolCallId?: string;
+  toolName?: string;
+  state?: string;
+  input?: unknown;
+  output?: unknown;
+  errorText?: string;
+}
+function joinText(parts: UIMessage['parts']): string {
+  return parts
+    .filter((p): p is { type: 'text'; text: string } => p.type === 'text')
+    .map((p) => p.text)
+    .join('');
+}
+function stringifyOutput(output: unknown): string {
+  if (output == null) return '';
+  if (typeof output === 'string') return output;
+  try {
+    return JSON.stringify(output);
+  } catch {
+    return String(output);
+  }
+}
+/**
+ * Reverse of llmMessagesToUIMessages — used as a fallback when we need to
+ * restore a session but only have the UIMessage cache (e.g. the session
+ * predates the backend-message cache feature).
+ *
+ * Includes every tool call the assistant made, regardless of the part's
+ * stored state. If we have a captured output (or errorText), we emit a
+ * paired role=tool result. If we don't, we leave the tool_call dangling —
+ * the backend's ContextManager patches those via _patch_dangling_tool_calls.
+ */
+export function uiMessagesToLLMMessages(uiMessages: UIMessage[]): LLMMessage[] {
+  const out: LLMMessage[] = [];
+  for (const msg of uiMessages) {
+    if (msg.role === 'user') {
+      const text = joinText(msg.parts);
+      if (text) out.push({ role: 'user', content: text });
+      continue;
+    }
+    if (msg.role === 'assistant') {
+      const text = joinText(msg.parts);
+      const toolCalls: LLMToolCall[] = [];
+      const pairedResults: Array<{ id: string; content: string }> = [];
+      for (const raw of msg.parts as ToolPart[]) {
+        if (!raw.type) continue;
+        const isTool = raw.type === 'dynamic-tool' || raw.type.startsWith('tool-');
+        if (!isTool) continue;
+        const toolCallId = raw.toolCallId;
+        const toolName =
+          raw.toolName ?? (raw.type.startsWith('tool-') ? raw.type.slice(5) : undefined);
+        if (!toolCallId || !toolName) continue;
+        toolCalls.push({
+          id: toolCallId,
+          function: {
+            name: toolName,
+            arguments: JSON.stringify(raw.input ?? {}),
+          },
+        });
+        // Prefer output; fall back to errorText for output-error /
+        // output-denied. A missing result leaves the tool_call dangling —
+        // the backend will patch it with a synthesized stub.
+        const result =
+          raw.output != null
+            ? stringifyOutput(raw.output)
+            : typeof raw.errorText === 'string' && raw.errorText
+              ? raw.errorText
+              : null;
+        if (result != null) {
+          pairedResults.push({ id: toolCallId, content: result });
+        }
+      }
+      if (text || toolCalls.length) {
+        out.push({
+          role: 'assistant',
+          content: text || null,
+          tool_calls: toolCalls.length ? toolCalls : null,
+        });
+      }
+      for (const r of pairedResults) {
+        out.push({ role: 'tool', content: r.content, tool_call_id: r.id });
+      }
+    }
+  }
+  return out;
+}

frontend/src/lib/sse-chat-transport.ts CHANGED Viewed

@@ -351,6 +351,11 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
       },
     });
     if (!response.ok) {
       const errorText = await response.text().catch(() => 'Request failed');
       throw new Error(`Chat request failed: ${response.status} ${errorText}`);

       },
     });
+    if (response.status === 404) {
+      // Backend lost this session (e.g. Space restart). Signal the UI so
+      // it can flag the session for the catch-up banner.
+      this.sideChannel.onSessionDead(sessionId);
+    }
     if (!response.ok) {
       const errorText = await response.text().catch(() => 'Request failed');
       throw new Error(`Chat request failed: ${response.status} ${errorText}`);

frontend/src/store/sessionStore.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { create } from 'zustand';
 import { persist } from 'zustand/middleware';
 import type { SessionMeta } from '@/types/agent';
-import { deleteMessages } from '@/lib/chat-message-store';
 interface SessionStore {
   sessions: SessionMeta[];
@@ -14,6 +15,15 @@ interface SessionStore {
   setSessionActive: (id: string, isActive: boolean) => void;
   updateSessionTitle: (id: string, title: string) => void;
   setNeedsAttention: (id: string, needs: boolean) => void;
 }
 export const useSessionStore = create<SessionStore>()(
@@ -38,6 +48,7 @@ export const useSessionStore = create<SessionStore>()(
       deleteSession: (id: string) => {
         deleteMessages(id);
         set((state) => {
           const newSessions = state.sessions.filter((s) => s.id !== id);
           const newActiveId =
@@ -51,6 +62,32 @@ export const useSessionStore = create<SessionStore>()(
         });
       },
       switchSession: (id: string) => {
         set((state) => ({
           activeSessionId: id,

 import { create } from 'zustand';
 import { persist } from 'zustand/middleware';
 import type { SessionMeta } from '@/types/agent';
+import { deleteMessages, moveMessages } from '@/lib/chat-message-store';
+import { moveBackendMessages, deleteBackendMessages } from '@/lib/backend-message-store';
 interface SessionStore {
   sessions: SessionMeta[];
   setSessionActive: (id: string, isActive: boolean) => void;
   updateSessionTitle: (id: string, title: string) => void;
   setNeedsAttention: (id: string, needs: boolean) => void;
+  /** Mark a session as expired (backend no longer has it). The UI shows a
+   *  recovery banner and disables input. */
+  markExpired: (id: string) => void;
+  /** Clear the expired flag (used after restore-with-summary succeeds). */
+  clearExpired: (id: string) => void;
+  /** Atomically swap a session's id in the list + both localStorage caches.
+   *  Used when we rehydrate an expired session into a freshly-created backend
+   *  session — preserves title, timestamps, and messages. */
+  renameSession: (oldId: string, newId: string) => void;
 }
 export const useSessionStore = create<SessionStore>()(
       deleteSession: (id: string) => {
         deleteMessages(id);
+        deleteBackendMessages(id);
         set((state) => {
           const newSessions = state.sessions.filter((s) => s.id !== id);
           const newActiveId =
         });
       },
+      markExpired: (id: string) => {
+        set((state) => ({
+          sessions: state.sessions.map((s) => (s.id === id ? { ...s, expired: true } : s)),
+        }));
+      },
+      clearExpired: (id: string) => {
+        set((state) => ({
+          sessions: state.sessions.map((s) =>
+            s.id === id ? { ...s, expired: false } : s,
+          ),
+        }));
+      },
+      renameSession: (oldId: string, newId: string) => {
+        if (oldId === newId) return;
+        moveMessages(oldId, newId);
+        moveBackendMessages(oldId, newId);
+        set((state) => ({
+          sessions: state.sessions.map((s) =>
+            s.id === oldId ? { ...s, id: newId, expired: false } : s,
+          ),
+          activeSessionId: state.activeSessionId === oldId ? newId : state.activeSessionId,
+        }));
+      },
       switchSession: (id: string) => {
         set((state) => ({
           activeSessionId: id,

frontend/src/types/agent.ts CHANGED Viewed

@@ -16,6 +16,11 @@ export interface SessionMeta {
   createdAt: string;
   isActive: boolean;
   needsAttention: boolean;
 }
 export interface ToolApproval {

   createdAt: string;
   isActive: boolean;
   needsAttention: boolean;
+  /** True when the backend no longer recognizes this session id (e.g.
+   *  after a backend restart). The UI shows a recovery banner and
+   *  disables input until the user chooses to restore-with-summary or
+   *  start fresh. */
+  expired?: boolean;
 }
 export interface ToolApproval {