Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

Aksel Joonas Reedi

lee101 commited on 17 days ago

Commit

5ab7c4e

unverified ·

1 Parent(s): 1481358

Fix parallel-research display: per-call stats in CLI and web UI (#49)

Browse files

When multiple research sub-agents ran concurrently, the CLI collapsed
them into a single flickering slot and every web-UI tool card showed
the aggregate across all agents — so 3 parallel calls looked like 1.

CLI (agent/utils/terminal_display.py):
- Replace the global SubAgentDisplay singleton with
SubAgentDisplayManager that tracks each agent by agent_id and renders
one block per agent with independent (tool count · tokens · elapsed).
- 4 rolling tool-call lines per agent (was 2).
- When 2+ agents are live, switch to a compact one-line-per-agent layout
(label + stats + most-recent tool). Detailed 4-line rolling view only
when a single agent is active. Keeps the live region small enough to
fit on one terminal page so cursor-up/erase doesn't drift when content
would otherwise scroll into scrollback.
- Clip every live-region line to terminal width (ANSI-aware) so a
wrapped line can't corrupt the cursor-up math on narrow terminals.
- On completion, erase the live block and freeze a single ✓ summary
line above the live region with final stats.

Backend:
- Pass agent_id / label through tool_log events end-to-end
(research_tool.py). Derive agent_id from tool_call_id instead of
md5(task) so two parallel calls with identical task strings don't
collide, and so the frontend can match each research tool card to
its own agent state.
- Thread tool_call_id through the non-approval parallel tool-execution
path in agent_loop.py (it was being dropped there, so research_handler
never saw it on the hot path).
- Mirror agent_id / label forwarding in the headless event handler
(agent/main.py).

Frontend:
- Per-session researchAgents map in the store keyed by agent_id
(agentStore.ts). Forward agent_id / label from the SSE transport
and useAgentChat.onToolLog.
- Each research tool card looks up researchAgents[tool.toolCallId] and
renders only its own stats chip + rolling step list (was: aggregated
across all agents, shown on every card).
- Replace useElapsed (can't be called per-card inside a map) with a
single top-level useSecondTick; each card computes elapsed
synchronously from its own startedAt.
- Stable EMPTY_AGENTS constant instead of `?? {}` in the selector,
fixing a useMemo exhaustive-deps warning.

Co-authored-by: Lee Penkman <leepenkman@gmail.com>

Files changed (8) hide show

agent/core/agent_loop.py +1 -1
agent/main.py +30 -20
agent/tools/research_tool.py +19 -2
agent/utils/terminal_display.py +148 -58
frontend/src/components/Chat/ToolCallGroup.tsx +42 -34
frontend/src/hooks/useAgentChat.ts +40 -23
frontend/src/lib/sse-chat-transport.ts +3 -1
frontend/src/store/agentStore.ts +24 -5

agent/core/agent_loop.py CHANGED Viewed

@@ -690,7 +690,7 @@ class Handlers:
                         if not valid:
                             return (tc, name, args, err, False)
                         out, ok = await session.tool_router.call_tool(
-                            name, args, session=session
                         )
                         return (tc, name, args, out, ok)

                         if not valid:
                             return (tc, name, args, err, False)
                         out, ok = await session.tool_router.call_tool(
+                            name, args, session=session, tool_call_id=tc.id
                         )
                         return (tc, name, args, out, ok)

agent/main.py CHANGED Viewed

@@ -451,7 +451,9 @@ async def event_listener(
                 tool = event.data.get("tool", "") if event.data else ""
                 log = event.data.get("log", "") if event.data else ""
                 if log:
-                    print_tool_log(tool, log)
             elif event.event_type == "tool_state_change":
                 pass  # visual noise — approval flow handles this
             elif event.event_type == "error":
@@ -1204,10 +1206,10 @@ async def headless_main(
     stream_buf = _StreamBuffer(console)
     _hl_last_tool = [None]
     _hl_sub_id = [1]
-    # Research sub-agent tool calls are buffered and dumped once the sub-agent
-    # finishes, instead of streaming via the live redrawing SubAgentDisplay.
-    _hl_research_calls: list[str] = []
-    _hl_in_research = [False]
     while True:
         event = await event_queue.get()
@@ -1243,26 +1245,34 @@ async def headless_main(
             if not log:
                 pass
             elif tool == "research":
-                # Buffer research sub-agent activity; on completion, dump a
-                # single static block that mirrors the live overlay's styling
-                # without its line-erasing redraws (unfit for non-TTY output).
                 if log == "Starting research sub-agent...":
-                    _hl_in_research[0] = True
-                    _hl_research_calls.clear()
                 elif log == "Research complete.":
-                    _hl_in_research[0] = False
-                    f = get_console().file
-                    f.write("  \033[38;2;255;200;80m▸ research\033[0m\n")
-                    for call in _hl_research_calls:
-                        f.write(f"    \033[2m{call}\033[0m\n")
-                    f.flush()
-                    _hl_research_calls.clear()
                 elif log.startswith("tokens:") or log.startswith("tools:"):
                     pass  # stats updates — only useful for the live display
-                elif _hl_in_research[0]:
-                    _hl_research_calls.append(log)
                 else:
-                    print_tool_log(tool, log)
             else:
                 print_tool_log(tool, log)
         elif event.event_type == "approval_required":

                 tool = event.data.get("tool", "") if event.data else ""
                 log = event.data.get("log", "") if event.data else ""
                 if log:
+                    agent_id = event.data.get("agent_id", "") if event.data else ""
+                    label = event.data.get("label", "") if event.data else ""
+                    print_tool_log(tool, log, agent_id=agent_id, label=label)
             elif event.event_type == "tool_state_change":
                 pass  # visual noise — approval flow handles this
             elif event.event_type == "error":
     stream_buf = _StreamBuffer(console)
     _hl_last_tool = [None]
     _hl_sub_id = [1]
+    # Research sub-agent tool calls are buffered per agent_id and dumped as
+    # a static block once each sub-agent finishes, instead of streaming via
+    # the live redrawing SubAgentDisplayManager (which is TTY-only).
+    _hl_research_buffers: dict[str, dict] = {}
     while True:
         event = await event_queue.get()
             if not log:
                 pass
             elif tool == "research":
+                # Headless mode: buffer research sub-agent activity per-agent,
+                # then dump each as a static block on completion. The live
+                # SubAgentDisplayManager uses terminal cursor tricks that are
+                # unfit for non-TTY output, but parallel agents still need
+                # distinct output so we key buffers by agent_id.
+                agent_id = event.data.get("agent_id", "") if event.data else ""
+                label = event.data.get("label", "") if event.data else ""
+                aid = agent_id or "research"
                 if log == "Starting research sub-agent...":
+                    _hl_research_buffers[aid] = {
+                        "label": label or "research",
+                        "calls": [],
+                    }
                 elif log == "Research complete.":
+                    buf = _hl_research_buffers.pop(aid, None)
+                    if buf is not None:
+                        f = get_console().file
+                        f.write(f"  \033[38;2;255;200;80m▸ {buf['label']}\033[0m\n")
+                        for call in buf["calls"]:
+                            f.write(f"    \033[2m{call}\033[0m\n")
+                        f.flush()
                 elif log.startswith("tokens:") or log.startswith("tools:"):
                     pass  # stats updates — only useful for the live display
+                elif aid in _hl_research_buffers:
+                    _hl_research_buffers[aid]["calls"].append(log)
                 else:
+                    # Orphan event (Start was missed) — fall back to raw print
+                    print_tool_log(tool, log, agent_id=agent_id, label=label)
             else:
                 print_tool_log(tool, log)
         elif event.event_type == "approval_required":

agent/tools/research_tool.py CHANGED Viewed

@@ -222,7 +222,7 @@ def _get_research_model(main_model: str) -> str:
 async def research_handler(
-    arguments: dict[str, Any], session=None, **_kw
 ) -> tuple[str, bool]:
     """Execute a research sub-agent with its own context."""
     task = arguments.get("task", "")
@@ -259,11 +259,28 @@ async def research_handler(
         if spec["function"]["name"] in RESEARCH_TOOL_NAMES
     ]
     async def _log(text: str) -> None:
         """Send a progress event to the UI so it doesn't look frozen."""
         try:
             await session.send_event(
-                Event(event_type="tool_log", data={"tool": "research", "log": text})
             )
         except Exception:
             pass

 async def research_handler(
+    arguments: dict[str, Any], session=None, tool_call_id: str | None = None, **_kw
 ) -> tuple[str, bool]:
     """Execute a research sub-agent with its own context."""
     task = arguments.get("task", "")
         if spec["function"]["name"] in RESEARCH_TOOL_NAMES
     ]
+    # Unique ID + short label so parallel agents show separate status lines.
+    # Use the tool_call_id when available — it's unique per invocation and lets
+    # the frontend match a research tool card to its agent state. Fall back to
+    # uuid for offline/test paths. Previously used md5(task), which collided
+    # when the same task string was researched in parallel.
+    if tool_call_id:
+        _agent_id = tool_call_id
+    else:
+        import uuid
+        _agent_id = uuid.uuid4().hex[:8]
+    _agent_label = "research: " + (task[:50] + "…" if len(task) > 50 else task)
     async def _log(text: str) -> None:
         """Send a progress event to the UI so it doesn't look frozen."""
         try:
             await session.send_event(
+                Event(event_type="tool_log", data={
+                    "tool": "research",
+                    "log": text,
+                    "agent_id": _agent_id,
+                    "label": _agent_label,
+                })
             )
         except Exception:
             pass

agent/utils/terminal_display.py CHANGED Viewed

@@ -2,6 +2,8 @@
 Terminal display utilities — rich-powered CLI formatting.
 """
 from rich.console import Console
 from rich.markdown import Heading, Markdown
 from rich.panel import Panel
@@ -19,6 +21,42 @@ class _LeftHeading(Heading):
 Markdown.elements["heading_open"] = _LeftHeading
 _THEME = Theme({
     "tool.name": "bold rgb(255,200,80)",
     "tool.args": "dim",
@@ -129,74 +167,102 @@ def print_tool_output(output: str, success: bool, truncate: bool = True) -> None
     _console.print(f"[{style}]{indented}[/{style}]")
-class SubAgentDisplay:
-    """Live-updating display: header with stats (ticks every second) + rolling 2-line tool calls."""
-    _MAX_VISIBLE = 2
     def __init__(self):
-        self._calls: list[str] = []
-        self._tool_count = 0
-        self._token_count = 0
-        self._start_time: float | None = None
         self._lines_on_screen = 0
         self._ticker_task = None
-    def start(self) -> None:
-        """Begin the display with a 1-second ticker."""
         import asyncio
         import time
-        self._calls = []
-        self._tool_count = 0
-        self._token_count = 0
-        self._start_time = time.monotonic()
         self._redraw()
-        self._ticker_task = asyncio.ensure_future(self._tick())
-    def set_tokens(self, tokens: int) -> None:
-        self._token_count = tokens
-        # no redraw — ticker handles it
-    def set_tool_count(self, count: int) -> None:
-        self._tool_count = count
-        # no redraw — ticker handles it
-    def add_call(self, tool_desc: str) -> None:
-        self._calls.append(tool_desc)
-        self._redraw()
-    def clear(self) -> None:
-        if self._ticker_task:
-            self._ticker_task.cancel()
-            self._ticker_task = None
         self._erase()
         self._lines_on_screen = 0
-        self._calls = []
-        self._start_time = None
     async def _tick(self) -> None:
         import asyncio
         try:
             while True:
                 await asyncio.sleep(1.0)
-                self._redraw()
         except asyncio.CancelledError:
             pass
-    def _format_stats(self) -> str:
         import time
-        if self._start_time is None:
             return ""
-        elapsed = time.monotonic() - self._start_time
         if elapsed < 60:
             time_str = f"{elapsed:.0f}s"
         else:
             time_str = f"{elapsed / 60:.0f}m {elapsed % 60:.0f}s"
-        if self._token_count >= 1000:
-            tok_str = f"{self._token_count / 1000:.1f}k"
-        else:
-            tok_str = str(self._token_count)
-        return f"{self._tool_count} tool uses · {tok_str} tokens · {time_str}"
     def _erase(self) -> None:
         if self._lines_on_screen > 0:
@@ -205,42 +271,66 @@ class SubAgentDisplay:
                 f.write("\033[A\033[K")
             f.flush()
-    def _redraw(self) -> None:
-        f = _console.file
-        self._erase()
-        lines = []
-        # Header: ▸ research (stats)
-        stats = self._format_stats()
-        header = f"{_I}\033[38;2;255;200;80m▸ research\033[0m"
         if stats:
             header += f"  \033[2m({stats})\033[0m"
-        lines.append(header)
-        # Last 2 tool calls, gray
-        visible = self._calls[-self._MAX_VISIBLE:]
         for desc in visible:
             lines.append(f"{_I}  \033[2m{desc}\033[0m")
         for line in lines:
             f.write(line + "\n")
         f.flush()
         self._lines_on_screen = len(lines)
-_subagent_display = SubAgentDisplay()
-def print_tool_log(tool: str, log: str) -> None:
     """Handle tool log events — sub-agent calls get the rolling display."""
     if tool == "research":
         if log == "Starting research sub-agent...":
-            _subagent_display.start()
         elif log == "Research complete.":
-            _subagent_display.clear()
         elif log.startswith("tokens:"):
-            _subagent_display.set_tokens(int(log[7:]))
         elif log.startswith("tools:"):
-            _subagent_display.set_tool_count(int(log[6:]))
         else:
-            _subagent_display.add_call(log)
     else:
         _console.print(f"{_I}[dim]{tool}: {log}[/dim]")

 Terminal display utilities — rich-powered CLI formatting.
 """
+import re
 from rich.console import Console
 from rich.markdown import Heading, Markdown
 from rich.panel import Panel
 Markdown.elements["heading_open"] = _LeftHeading
+_ANSI_RE = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]")
+def _clip_to_width(s: str, width: int) -> str:
+    """Truncate a string to `width` visible columns, preserving ANSI styles.
+    Needed for the sub-agent live redraw: cursor-up-and-erase assumes one
+    logical line == one terminal row. If a line wraps, cursor-up undershoots
+    and the next redraw corrupts the display. Truncating prevents wrap.
+    """
+    if width <= 0:
+        return s
+    out: list[str] = []
+    visible = 0
+    i = 0
+    # Reserve 1 char for the trailing ellipsis
+    limit = width - 1
+    truncated = False
+    while i < len(s):
+        m = _ANSI_RE.match(s, i)
+        if m:
+            out.append(m.group())
+            i = m.end()
+            continue
+        if visible >= limit:
+            truncated = True
+            break
+        out.append(s[i])
+        visible += 1
+        i += 1
+    if truncated:
+        # Strip styles (so ellipsis isn't left hanging inside a style run)
+        out.append("\033[0m…")
+    return "".join(out)
 _THEME = Theme({
     "tool.name": "bold rgb(255,200,80)",
     "tool.args": "dim",
     _console.print(f"[{style}]{indented}[/{style}]")
+class SubAgentDisplayManager:
+    """Manages multiple concurrent sub-agent displays.
+    Each agent gets its own stats and rolling tool-call log.
+    All agents are rendered together so terminal escape-code
+    erase/redraw stays consistent.
+    """
+    _MAX_VISIBLE = 4  # tool-call lines shown per agent
     def __init__(self):
+        self._agents: dict[str, dict] = {}  # agent_id -> state dict
         self._lines_on_screen = 0
         self._ticker_task = None
+    def start(self, agent_id: str, label: str = "research") -> None:
         import asyncio
         import time
+        self._agents[agent_id] = {
+            "label": label,
+            "calls": [],
+            "tool_count": 0,
+            "token_count": 0,
+            "start_time": time.monotonic(),
+        }
+        if not self._ticker_task:
+            self._ticker_task = asyncio.ensure_future(self._tick())
         self._redraw()
+    def set_tokens(self, agent_id: str, tokens: int) -> None:
+        if agent_id in self._agents:
+            self._agents[agent_id]["token_count"] = tokens
+    def set_tool_count(self, agent_id: str, count: int) -> None:
+        if agent_id in self._agents:
+            self._agents[agent_id]["tool_count"] = count
+    def add_call(self, agent_id: str, tool_desc: str) -> None:
+        if agent_id in self._agents:
+            self._agents[agent_id]["calls"].append(tool_desc)
+            self._redraw()
+    def clear(self, agent_id: str) -> None:
+        # On completion: erase the live region, freeze a single-line summary
+        # for this agent ("✓ research: … (stats)") above the live region so
+        # the user sees each sub-agent finish cleanly without the tool-call
+        # noise, then redraw remaining live agents.
+        agent = self._agents.pop(agent_id, None)
         self._erase()
+        if agent is not None:
+            width = max(10, _console.width)
+            line = _clip_to_width(self._render_completion_line(agent), width)
+            _console.file.write(line + "\n")
+            _console.file.flush()
         self._lines_on_screen = 0
+        if not self._agents:
+            if self._ticker_task:
+                self._ticker_task.cancel()
+                self._ticker_task = None
+        else:
+            self._redraw()
+    @staticmethod
+    def _render_completion_line(agent: dict) -> str:
+        stats = SubAgentDisplayManager._format_stats(agent)
+        label = agent["label"]
+        # dim green check + dim label; stats in parens
+        line = f"{_I}\033[38;2;120;200;140m✓\033[0m \033[2m{label}\033[0m"
+        if stats:
+            line += f"  \033[2m({stats})\033[0m"
+        return line
     async def _tick(self) -> None:
         import asyncio
         try:
             while True:
                 await asyncio.sleep(1.0)
+                if self._agents:
+                    self._redraw()
         except asyncio.CancelledError:
             pass
+    @staticmethod
+    def _format_stats(agent: dict) -> str:
         import time
+        start = agent["start_time"]
+        if start is None:
             return ""
+        elapsed = time.monotonic() - start
         if elapsed < 60:
             time_str = f"{elapsed:.0f}s"
         else:
             time_str = f"{elapsed / 60:.0f}m {elapsed % 60:.0f}s"
+        tok = agent["token_count"]
+        tok_str = f"{tok / 1000:.1f}k" if tok >= 1000 else str(tok)
+        return f"{agent['tool_count']} tool uses · {tok_str} tokens · {time_str}"
     def _erase(self) -> None:
         if self._lines_on_screen > 0:
                 f.write("\033[A\033[K")
             f.flush()
+    def _render_agent_lines(self, agent: dict, compact: bool = False) -> list[str]:
+        """Render one agent's block.
+        compact=True → single line (label + stats + most-recent tool name);
+        compact=False → header + up to _MAX_VISIBLE rolling tool-call lines.
+        We use compact mode when multiple agents are live so the total live
+        region stays small enough to fit on one screen. Otherwise cursor-up
+        can't reach lines that have scrolled into scrollback, and every
+        redraw pollutes history with a stale copy.
+        """
+        stats = self._format_stats(agent)
+        label = agent["label"]
+        header = f"{_I}\033[38;2;255;200;80m▸ {label}\033[0m"
         if stats:
             header += f"  \033[2m({stats})\033[0m"
+        if compact:
+            latest = agent["calls"][-1] if agent["calls"] else ""
+            if latest:
+                # Strip long json tails for the inline view
+                short = latest.split("  ")[0] if "  " in latest else latest
+                header += f" \033[2m·\033[0m \033[2m{short}\033[0m"
+            return [header]
+        lines = [header]
+        visible = agent["calls"][-self._MAX_VISIBLE:]
         for desc in visible:
             lines.append(f"{_I}  \033[2m{desc}\033[0m")
+        return lines
+    def _redraw(self) -> None:
+        f = _console.file
+        self._erase()
+        compact = len(self._agents) > 1
+        width = max(10, _console.width)
+        lines: list[str] = []
+        for agent in self._agents.values():
+            for ln in self._render_agent_lines(agent, compact=compact):
+                lines.append(_clip_to_width(ln, width))
         for line in lines:
             f.write(line + "\n")
         f.flush()
         self._lines_on_screen = len(lines)
+_subagent_display = SubAgentDisplayManager()
+def print_tool_log(tool: str, log: str, agent_id: str = "", label: str = "") -> None:
     """Handle tool log events — sub-agent calls get the rolling display."""
     if tool == "research":
+        aid = agent_id or "research"
         if log == "Starting research sub-agent...":
+            _subagent_display.start(aid, label or "research")
         elif log == "Research complete.":
+            _subagent_display.clear(aid)
         elif log.startswith("tokens:"):
+            _subagent_display.set_tokens(aid, int(log[7:]))
         elif log.startswith("tools:"):
+            _subagent_display.set_tool_count(aid, int(log[6:]))
         else:
+            _subagent_display.add_call(aid, log)
     else:
         _console.print(f"{_I}[dim]{tool}: {log}[/dim]")

frontend/src/components/Chat/ToolCallGroup.tsx CHANGED Viewed

@@ -7,7 +7,7 @@ import HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';
 import LaunchIcon from '@mui/icons-material/Launch';
 import SendIcon from '@mui/icons-material/Send';
 import BlockIcon from '@mui/icons-material/Block';
-import { useAgentStore } from '@/store/agentStore';
 import { useLayoutStore } from '@/store/layoutStore';
 import { logger } from '@/utils/logger';
 import { RESEARCH_MAX_STEPS } from '@/lib/research-store';
@@ -36,16 +36,22 @@ interface ToolCallGroupProps {
 // Research sub-steps (inline under the research tool row)
 // ---------------------------------------------------------------------------
-/** Hook that ticks every second while startedAt is set, returning elapsed seconds. */
-function useElapsed(startedAt: number | null): number | null {
-  const [elapsed, setElapsed] = useState<number | null>(null);
   useEffect(() => {
-    if (startedAt === null) { setElapsed(null); return; }
-    setElapsed(Math.round((Date.now() - startedAt) / 1000));
-    const id = setInterval(() => setElapsed(Math.round((Date.now() - startedAt) / 1000)), 1000);
     return () => clearInterval(id);
-  }, [startedAt]);
-  return elapsed;
 }
 /** Format token count like the CLI: "12.4k" or "800". */
@@ -172,9 +178,8 @@ function formatResearchStep(raw: string): { label: string } {
   return { label: step.replace(/^▸\s*/, '') };
 }
-/** Rolling 2-line display of research sub-tool calls — hidden when complete. */
-function ResearchSteps({ steps, isRunning }: { steps: string[]; isRunning: boolean }) {
-  if (!isRunning) return null;
   const visible = steps.slice(-RESEARCH_MAX_STEPS);
   if (visible.length === 0) return null;
@@ -215,9 +220,6 @@ function ResearchSteps({ steps, isRunning }: { steps: string[]; isRunning: boole
   );
 }
-// Stable reference to avoid infinite re-renders from Zustand selectors
-const EMPTY_STEPS: string[] = [];
 // ---------------------------------------------------------------------------
 // Hardware pricing ($/hr) — from HF Spaces & Jobs pricing
 // ---------------------------------------------------------------------------
@@ -512,17 +514,22 @@ function InlineApproval({
 // Main component
 // ---------------------------------------------------------------------------
 export default function ToolCallGroup({ tools, approveTools }: ToolCallGroupProps) {
   const { setPanel, lockPanel, getJobUrl, getEditedScript, setJobStatus, getJobStatus, setToolError, getToolError, setToolRejected, getToolRejected } = useAgentStore();
-  const researchSteps = useAgentStore(s => {
     const activeId = s.activeSessionId;
-    return activeId ? (s.sessionStates[activeId]?.researchSteps) : undefined;
-  }) ?? EMPTY_STEPS;
-  const researchStats = useAgentStore(s => {
-    const activeId = s.activeSessionId;
-    return activeId ? s.sessionStates[activeId]?.researchStats : undefined;
-  }) ?? { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null };
-  const liveElapsed = useElapsed(researchStats.startedAt);
   const isProcessing = useAgentStore(s => s.isProcessing);
   const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
@@ -964,13 +971,17 @@ export default function ToolCallGroup({ tools, approveTools }: ToolCallGroupProp
                 {/* Status chip (non hf_jobs, or hf_jobs without final status) */}
                 {(() => {
-                  // Research tool: override chip label with live stats (but not if cancelled/done)
                   const researchDone = cancelled || state === 'output-available' || state === 'output-error' || state === 'output-denied';
-                  const researchLabel = tool.toolName === 'research' && !researchDone
-                    ? researchChipLabel(researchStats, liveElapsed)
-                    : (tool.toolName === 'research' && researchDone && researchStats.finalElapsed !== null)
-                      ? researchChipLabel({ ...researchStats, startedAt: null }, null)
-                      : null;
                   const chipLabel = researchLabel || label;
                   if (!chipLabel || (tool.toolName === 'hf_jobs' && jobMeta.jobStatus)) return null;
@@ -1048,11 +1059,8 @@ export default function ToolCallGroup({ tools, approveTools }: ToolCallGroupProp
               </Stack>
               {/* Research sub-agent rolling steps (visible only while running) */}
-              {tool.toolName === 'research' && !cancelled && state !== 'output-available' && state !== 'output-error' && state !== 'output-denied' && (
-                <ResearchSteps
-                  steps={researchSteps}
-                  isRunning={researchStats.startedAt !== null}
-                />
               )}
               {/* Per-tool approval: undecided */}

 import LaunchIcon from '@mui/icons-material/Launch';
 import SendIcon from '@mui/icons-material/Send';
 import BlockIcon from '@mui/icons-material/Block';
+import { useAgentStore, type ResearchAgentState } from '@/store/agentStore';
 import { useLayoutStore } from '@/store/layoutStore';
 import { logger } from '@/utils/logger';
 import { RESEARCH_MAX_STEPS } from '@/lib/research-store';
 // Research sub-steps (inline under the research tool row)
 // ---------------------------------------------------------------------------
+/** Hook that forces a re-render every second while enabled — used so each
+ * research card can compute its own elapsed seconds synchronously from
+ * Date.now() without needing its own timer. */
+function useSecondTick(enabled: boolean): void {
+  const [, setTick] = useState(0);
   useEffect(() => {
+    if (!enabled) return;
+    const id = setInterval(() => setTick(t => t + 1), 1000);
     return () => clearInterval(id);
+  }, [enabled]);
+}
+/** Compute elapsed seconds from startedAt (or null). Call under useSecondTick. */
+function computeElapsed(startedAt: number | null): number | null {
+  if (startedAt === null) return null;
+  return Math.round((Date.now() - startedAt) / 1000);
 }
 /** Format token count like the CLI: "12.4k" or "800". */
   return { label: step.replace(/^▸\s*/, '') };
 }
+/** Rolling display of research sub-tool calls for a single agent. */
+function ResearchSteps({ steps }: { steps: string[] }) {
   const visible = steps.slice(-RESEARCH_MAX_STEPS);
   if (visible.length === 0) return null;
   );
 }
 // ---------------------------------------------------------------------------
 // Hardware pricing ($/hr) — from HF Spaces & Jobs pricing
 // ---------------------------------------------------------------------------
 // Main component
 // ---------------------------------------------------------------------------
+const EMPTY_AGENTS: Record<string, ResearchAgentState> = {};
 export default function ToolCallGroup({ tools, approveTools }: ToolCallGroupProps) {
   const { setPanel, lockPanel, getJobUrl, getEditedScript, setJobStatus, getJobStatus, setToolError, getToolError, setToolRejected, getToolRejected } = useAgentStore();
+  const researchAgents = useAgentStore(s => {
     const activeId = s.activeSessionId;
+    return (activeId && s.sessionStates[activeId]?.researchAgents) || EMPTY_AGENTS;
+  });
+  // Tick once per second while any research agent is running so each card's
+  // elapsed seconds update in real time.
+  const anyResearchRunning = useMemo(
+    () => Object.values(researchAgents).some(a => a.stats.startedAt !== null),
+    [researchAgents],
+  );
+  useSecondTick(anyResearchRunning);
   const isProcessing = useAgentStore(s => s.isProcessing);
   const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();
                 {/* Status chip (non hf_jobs, or hf_jobs without final status) */}
                 {(() => {
+                  // Research tool: override chip label with this card's agent stats
+                  const agentState: ResearchAgentState | undefined = tool.toolName === 'research'
+                    ? researchAgents[tool.toolCallId]
+                    : undefined;
                   const researchDone = cancelled || state === 'output-available' || state === 'output-error' || state === 'output-denied';
+                  const liveElapsed = agentState ? computeElapsed(agentState.stats.startedAt) : null;
+                  const researchLabel = tool.toolName === 'research' && agentState
+                    ? (researchDone && agentState.stats.finalElapsed !== null
+                        ? researchChipLabel({ ...agentState.stats, startedAt: null }, null)
+                        : researchChipLabel(agentState.stats, liveElapsed))
+                    : null;
                   const chipLabel = researchLabel || label;
                   if (!chipLabel || (tool.toolName === 'hf_jobs' && jobMeta.jobStatus)) return null;
               </Stack>
               {/* Research sub-agent rolling steps (visible only while running) */}
+              {tool.toolName === 'research' && !cancelled && state !== 'output-available' && state !== 'output-error' && state !== 'output-denied' && researchAgents[tool.toolCallId] && (
+                <ResearchSteps steps={researchAgents[tool.toolCallId].steps} />
               )}
               {/* Per-tool approval: undecided */}

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -86,46 +86,63 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
           useLayoutStore.getState().setRightPanelOpen(true);
         }
       },
-      onToolLog: (tool: string, log: string) => {
-        // Research sub-agent: parse stats vs step logs
         if (tool === 'research') {
           const sessState = useAgentStore.getState().getSessionState(sessionId);
-          const stats = { ...sessState.researchStats };
           if (log === 'Starting research sub-agent...') {
-            const newStats = { toolCount: 0, tokenCount: 0, startedAt: Date.now(), finalElapsed: null };
             updateSession(sessionId, {
-              researchSteps: [],
-              researchStats: newStats,
-              activityStatus: { type: 'tool', toolName: 'research', description: log },
             });
-            saveResearch(sessionId, [], newStats);
           } else if (log.startsWith('tokens:')) {
-            stats.tokenCount = parseInt(log.slice(7), 10);
-            updateSession(sessionId, { researchStats: stats });
-            saveResearch(sessionId, sessState.researchSteps, stats);
           } else if (log.startsWith('tools:')) {
-            stats.toolCount = parseInt(log.slice(6), 10);
-            updateSession(sessionId, { researchStats: stats });
-            saveResearch(sessionId, sessState.researchSteps, stats);
           } else if (log === 'Research complete.') {
-            const elapsed = stats.startedAt
-              ? Math.round((Date.now() - stats.startedAt) / 1000)
               : null;
-            const doneStats = { ...stats, startedAt: null, finalElapsed: elapsed };
             updateSession(sessionId, {
-              researchStats: doneStats,
               activityStatus: { type: 'tool', toolName: 'research', description: log },
             });
-            clearResearch(sessionId);
           } else {
-            // Regular tool call step — append (trim to max)
-            const steps = [...sessState.researchSteps, log].slice(-RESEARCH_MAX_STEPS);
             updateSession(sessionId, {
-              researchSteps: steps,
               activityStatus: { type: 'tool', toolName: 'research', description: log },
             });
-            saveResearch(sessionId, steps, stats);
           }
           return;
         }

           useLayoutStore.getState().setRightPanelOpen(true);
         }
       },
+      onToolLog: (tool: string, log: string, agentId?: string, label?: string) => {
+        // Research sub-agent: parse stats vs step logs (per-agent)
         if (tool === 'research') {
+          const aid = agentId || 'research';
           const sessState = useAgentStore.getState().getSessionState(sessionId);
+          const agents = { ...sessState.researchAgents };
+          const agent = agents[aid] || { label: label || 'research', steps: [], stats: { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null } };
           if (log === 'Starting research sub-agent...') {
+            agents[aid] = {
+              label: label || 'research',
+              steps: [],
+              stats: { toolCount: 0, tokenCount: 0, startedAt: Date.now(), finalElapsed: null },
+            };
+            // Also update legacy flat fields (aggregate of all agents)
+            const allSteps = Object.values(agents).flatMap(a => a.steps);
+            const anyRunning = Object.values(agents).some(a => a.stats.startedAt !== null);
             updateSession(sessionId, {
+              researchAgents: agents,
+              researchSteps: allSteps.slice(-RESEARCH_MAX_STEPS),
+              researchStats: anyRunning ? agents[aid].stats : sessState.researchStats,
+              activityStatus: { type: 'tool', toolName: 'research', description: label || log },
             });
+            saveResearch(sessionId, allSteps.slice(-RESEARCH_MAX_STEPS), agents[aid].stats);
           } else if (log.startsWith('tokens:')) {
+            agent.stats = { ...agent.stats, tokenCount: parseInt(log.slice(7), 10) };
+            agents[aid] = agent;
+            updateSession(sessionId, { researchAgents: agents });
           } else if (log.startsWith('tools:')) {
+            agent.stats = { ...agent.stats, toolCount: parseInt(log.slice(6), 10) };
+            agents[aid] = agent;
+            updateSession(sessionId, { researchAgents: agents });
           } else if (log === 'Research complete.') {
+            const elapsed = agent.stats.startedAt
+              ? Math.round((Date.now() - agent.stats.startedAt) / 1000)
               : null;
+            agent.stats = { ...agent.stats, startedAt: null, finalElapsed: elapsed };
+            agents[aid] = agent;
+            const anyRunning = Object.values(agents).some(a => a.stats.startedAt !== null);
             updateSession(sessionId, {
+              researchAgents: agents,
+              researchStats: anyRunning ? sessState.researchStats : agent.stats,
               activityStatus: { type: 'tool', toolName: 'research', description: log },
             });
+            // Clear persistence only when ALL agents are done
+            if (!anyRunning) clearResearch(sessionId);
           } else {
+            // Regular tool call step — append to this agent
+            agent.steps = [...agent.steps, log].slice(-RESEARCH_MAX_STEPS);
+            agents[aid] = agent;
+            const allSteps = Object.values(agents).flatMap(a => a.steps);
             updateSession(sessionId, {
+              researchAgents: agents,
+              researchSteps: allSteps.slice(-RESEARCH_MAX_STEPS),
               activityStatus: { type: 'tool', toolName: 'research', description: log },
             });
+            saveResearch(sessionId, allSteps.slice(-RESEARCH_MAX_STEPS), agent.stats);
           }
           return;
         }

frontend/src/lib/sse-chat-transport.ts CHANGED Viewed

@@ -23,7 +23,7 @@ export interface SideChannelCallbacks {
   onUndoComplete: () => void;
   onCompacted: (oldTokens: number, newTokens: number) => void;
   onPlanUpdate: (plan: Array<{ id: string; content: string; status: string }>) => void;
-  onToolLog: (tool: string, log: string) => void;
   onConnectionChange: (connected: boolean) => void;
   onSessionDead: (sessionId: string) => void;
   onApprovalRequired: (tools: Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>) => void;
@@ -131,6 +131,8 @@ function createEventToChunkStream(sideChannel: SideChannelCallbacks): TransformS
           sideChannel.onToolLog(
             (event.data?.tool as string) || '',
             (event.data?.log as string) || '',
           );
           break;

   onUndoComplete: () => void;
   onCompacted: (oldTokens: number, newTokens: number) => void;
   onPlanUpdate: (plan: Array<{ id: string; content: string; status: string }>) => void;
+  onToolLog: (tool: string, log: string, agentId?: string, label?: string) => void;
   onConnectionChange: (connected: boolean) => void;
   onSessionDead: (sessionId: string) => void;
   onApprovalRequired: (tools: Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>) => void;
           sideChannel.onToolLog(
             (event.data?.tool as string) || '',
             (event.data?.log as string) || '',
+            (event.data?.agent_id as string) || '',
+            (event.data?.label as string) || '',
           );
           break;

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -53,6 +53,19 @@ export type ActivityStatus =
   | { type: 'streaming' }
   | { type: 'cancelled' };
 /** State that is tracked per-session (each session has its own copy). */
 export interface PerSessionState {
   isProcessing: boolean;
@@ -61,12 +74,16 @@ export interface PerSessionState {
   panelView: PanelView;
   panelEditable: boolean;
   plan: PlanItem[];
-  /** Steps completed by the research sub-agent (tool_log events). */
   researchSteps: string[];
-  /** Live stats from the research sub-agent. */
-  researchStats: { toolCount: number; tokenCount: number; startedAt: number | null; finalElapsed: number | null };
 }
 const defaultSessionState: PerSessionState = {
   isProcessing: false,
   activityStatus: { type: 'idle' },
@@ -74,8 +91,9 @@ const defaultSessionState: PerSessionState = {
   panelView: 'script',
   panelEditable: false,
   plan: [],
   researchSteps: [],
-  researchStats: { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null },
 };
 interface AgentStore {
@@ -299,8 +317,9 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
         panelView: state.panelView,
         panelEditable: state.panelEditable,
         plan: state.plan,
         researchSteps: state.sessionStates[state.activeSessionId]?.researchSteps ?? [],
-        researchStats: state.sessionStates[state.activeSessionId]?.researchStats ?? defaultSessionState.researchStats,
       };
     }

   | { type: 'streaming' }
   | { type: 'cancelled' };
+export interface ResearchAgentStats {
+  toolCount: number;
+  tokenCount: number;
+  startedAt: number | null;
+  finalElapsed: number | null;
+}
+export interface ResearchAgentState {
+  label: string;
+  steps: string[];
+  stats: ResearchAgentStats;
+}
 /** State that is tracked per-session (each session has its own copy). */
 export interface PerSessionState {
   isProcessing: boolean;
   panelView: PanelView;
   panelEditable: boolean;
   plan: PlanItem[];
+  /** Per-agent research state, keyed by agent_id. */
+  researchAgents: Record<string, ResearchAgentState>;
+  /** @deprecated kept for backward compat selectors — use researchAgents instead */
   researchSteps: string[];
+  /** @deprecated kept for backward compat selectors — use researchAgents instead */
+  researchStats: ResearchAgentStats;
 }
+const defaultResearchStats: ResearchAgentStats = { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null };
 const defaultSessionState: PerSessionState = {
   isProcessing: false,
   activityStatus: { type: 'idle' },
   panelView: 'script',
   panelEditable: false,
   plan: [],
+  researchAgents: {},
   researchSteps: [],
+  researchStats: { ...defaultResearchStats },
 };
 interface AgentStore {
         panelView: state.panelView,
         panelEditable: state.panelEditable,
         plan: state.plan,
+        researchAgents: state.sessionStates[state.activeSessionId]?.researchAgents ?? {},
         researchSteps: state.sessionStates[state.activeSessionId]?.researchSteps ?? [],
+        researchStats: state.sessionStates[state.activeSessionId]?.researchStats ?? { ...defaultResearchStats },
       };
     }