elisaklunder commited on
Commit
299c7d9
·
2 Parent(s): 0cc1ea41a390ae

Merge branch 'main' into fix-double-message-while-streaming

Browse files
agent/tools/research_tool.py CHANGED
@@ -46,12 +46,22 @@ Your job: explore documentation, code examples, APIs, and repos,
46
  then return a concise, actionable summary. The main agent will use
47
  your findings to implement the actual solution.
48
 
 
 
 
 
 
 
 
 
 
 
49
  # Research methodology
50
 
51
- 1. **Discovery**: Find relevant entry points — example scripts, doc pages, API endpoints
52
  2. **Tracing**: Follow the chain from entry point to implementation detail
53
- 3. **Analysis**: Identify patterns, current API usage, key dependencies
54
- 4. **Synthesis**: Summarize findings in a structured format
55
 
56
  # How to use your tools
57
 
@@ -101,11 +111,12 @@ hf_inspect_dataset({"dataset": "org/name", "split": "train", "sample_rows": 3})
101
  # Output format
102
 
103
  Your output MUST include:
 
104
  - **Key findings**: The most important things you discovered (current API usage, working patterns)
105
  - **Essential references**: Specific file paths, URLs, function names, doc sections, code snippets
106
  that the main agent should use directly
107
  - **Code patterns**: Key imports, configurations, and usage patterns from working examples
108
- - **Recommendations**: What to do next based on your findings
109
 
110
  Be concise. Your output goes into another agent's context — every token counts.
111
  Aim for 500-1500 words max. Include actual code snippets from examples you read,
 
46
  then return a concise, actionable summary. The main agent will use
47
  your findings to implement the actual solution.
48
 
49
+ # Being up to date is critical
50
+
51
+ Always prioritize finding the most current, state-of-the-art approaches.
52
+ ML moves fast — a method from 6 months ago may already be obsolete.
53
+
54
+ - Search for **recent papers** (use `hf_papers`) to find SOTA methods, models, and datasets for the task
55
+ - Compare what you find in docs/examples against what recent papers recommend — prefer the newer approach
56
+ - When multiple approaches exist, identify which is SOTA and why (benchmark results, adoption, recency)
57
+ - Include in your findings: what is the current best model, dataset, and method for the task
58
+
59
  # Research methodology
60
 
61
+ 1. **Discovery**: Find relevant entry points — example scripts, doc pages, API endpoints, **and recent papers for SOTA approaches**
62
  2. **Tracing**: Follow the chain from entry point to implementation detail
63
+ 3. **Analysis**: Identify patterns, current API usage, key dependencies. **Compare against SOTA from recent papers**
64
+ 4. **Synthesis**: Summarize findings in a structured format, highlighting what is current best practice vs. outdated
65
 
66
  # How to use your tools
67
 
 
111
  # Output format
112
 
113
  Your output MUST include:
114
+ - **SOTA landscape**: Current best models, datasets, and methods for the task (from recent papers). Flag anything outdated.
115
  - **Key findings**: The most important things you discovered (current API usage, working patterns)
116
  - **Essential references**: Specific file paths, URLs, function names, doc sections, code snippets
117
  that the main agent should use directly
118
  - **Code patterns**: Key imports, configurations, and usage patterns from working examples
119
+ - **Recommendations**: What to do next based on your findings, preferring SOTA approaches
120
 
121
  Be concise. Your output goes into another agent's context — every token counts.
122
  Aim for 500-1500 words max. Include actual code snippets from examples you read,
backend/routes/agent.py CHANGED
@@ -7,6 +7,7 @@ dependency. In dev mode (no OAUTH_CLIENT_ID), auth is bypassed automatically.
7
  import asyncio
8
  import json
9
  import logging
 
10
  from typing import Any
11
 
12
  from dependencies import get_current_user
@@ -205,17 +206,14 @@ async def create_session(
205
 
206
  Returns 503 if the server or user has reached the session limit.
207
  """
208
- # Extract the user's HF token (Bearer header or HttpOnly cookie)
209
- # In dev mode, fall back to environment variable if no token in request
210
  hf_token = None
211
  auth_header = request.headers.get("Authorization", "")
212
  if auth_header.startswith("Bearer "):
213
  hf_token = auth_header[7:]
214
  if not hf_token:
215
  hf_token = request.cookies.get("hf_access_token")
216
- if not hf_token and user["user_id"] == "dev":
217
- # Dev mode: use HF_TOKEN from environment
218
- import os
219
  hf_token = os.environ.get("HF_TOKEN")
220
 
221
  try:
 
7
  import asyncio
8
  import json
9
  import logging
10
+ import os
11
  from typing import Any
12
 
13
  from dependencies import get_current_user
 
206
 
207
  Returns 503 if the server or user has reached the session limit.
208
  """
209
+ # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)
 
210
  hf_token = None
211
  auth_header = request.headers.get("Authorization", "")
212
  if auth_header.startswith("Bearer "):
213
  hf_token = auth_header[7:]
214
  if not hf_token:
215
  hf_token = request.cookies.get("hf_access_token")
216
+ if not hf_token:
 
 
217
  hf_token = os.environ.get("HF_TOKEN")
218
 
219
  try:
frontend/src/components/Chat/ActivityStatusBar.tsx CHANGED
@@ -32,6 +32,7 @@ function statusLabel(status: ActivityStatus): string {
32
  return base;
33
  }
34
  case 'waiting-approval': return 'Waiting for approval';
 
35
  default: return '';
36
  }
37
  }
@@ -59,7 +60,7 @@ export default function ActivityStatusBar() {
59
  animation: `${shimmer} 4s ease-in-out infinite`,
60
  }}
61
  >
62
- {label}…
63
  </Typography>
64
  </Box>
65
  );
 
32
  return base;
33
  }
34
  case 'waiting-approval': return 'Waiting for approval';
35
+ case 'cancelled': return 'What should the agent do instead?';
36
  default: return '';
37
  }
38
  }
 
60
  animation: `${shimmer} 4s ease-in-out infinite`,
61
  }}
62
  >
63
+ {label}{activityStatus.type !== 'cancelled' && ''}
64
  </Typography>
65
  </Box>
66
  );
frontend/src/components/SessionChat.tsx CHANGED
@@ -5,7 +5,7 @@
5
  * runs — processing events — but only the active session renders visible
6
  * UI (MessageList + ChatInput).
7
  */
8
- import { useCallback, useEffect, useState } from 'react';
9
  import { useAgentChat } from '@/hooks/useAgentChat';
10
  import { useAgentStore } from '@/store/agentStore';
11
  import { useSessionStore } from '@/store/sessionStore';
@@ -24,8 +24,6 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
24
  const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
25
  const { updateSessionTitle } = useSessionStore();
26
 
27
- const [wasCancelled, setWasCancelled] = useState(false);
28
-
29
  const { messages, sendMessage, stop, status, undoLastTurn, approveTools } = useAgentChat({
30
  sessionId,
31
  isActive,
@@ -57,11 +55,11 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
57
  return () => document.removeEventListener('visibilitychange', onVisible);
58
  }, [isActive, sessionId]);
59
 
60
- // Wrap stop to track cancellation
61
  const handleStop = useCallback(() => {
62
  stop();
63
- setWasCancelled(true);
64
- }, [stop]);
65
 
66
  // SDK status is the ground truth — if it's streaming/submitted, agent is busy
67
  const sdkBusy = status === 'streaming' || status === 'submitted';
@@ -71,12 +69,11 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
71
  async (text: string) => {
72
  if (!text.trim() || busy) return;
73
 
74
- setWasCancelled(false);
75
- updateSession(sessionId, { isProcessing: true });
76
  sendMessage({ text: text.trim(), metadata: { createdAt: new Date().toISOString() } });
77
 
78
  // Auto-title the session from the first user message
79
- const isFirstMessage = messages.filter((m) => m.role === 'user').length <= 1;
80
  if (isFirstMessage) {
81
  apiFetch('/api/title', {
82
  method: 'POST',
@@ -114,9 +111,7 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
114
  placeholder={
115
  activityStatus.type === 'waiting-approval'
116
  ? 'Approve or reject pending tools first...'
117
- : wasCancelled
118
- ? 'What should the agent do instead?'
119
- : undefined
120
  }
121
  />
122
  </>
 
5
  * runs — processing events — but only the active session renders visible
6
  * UI (MessageList + ChatInput).
7
  */
8
+ import { useCallback, useEffect } from 'react';
9
  import { useAgentChat } from '@/hooks/useAgentChat';
10
  import { useAgentStore } from '@/store/agentStore';
11
  import { useSessionStore } from '@/store/sessionStore';
 
24
  const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
25
  const { updateSessionTitle } = useSessionStore();
26
 
 
 
27
  const { messages, sendMessage, stop, status, undoLastTurn, approveTools } = useAgentChat({
28
  sessionId,
29
  isActive,
 
55
  return () => document.removeEventListener('visibilitychange', onVisible);
56
  }, [isActive, sessionId]);
57
 
58
+ // Wrap stop to show cancelled shimmer
59
  const handleStop = useCallback(() => {
60
  stop();
61
+ updateSession(sessionId, { activityStatus: { type: 'cancelled' } });
62
+ }, [stop, updateSession, sessionId]);
63
 
64
  // SDK status is the ground truth — if it's streaming/submitted, agent is busy
65
  const sdkBusy = status === 'streaming' || status === 'submitted';
 
69
  async (text: string) => {
70
  if (!text.trim() || busy) return;
71
 
72
+ updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'thinking' } });
 
73
  sendMessage({ text: text.trim(), metadata: { createdAt: new Date().toISOString() } });
74
 
75
  // Auto-title the session from the first user message
76
+ const isFirstMessage = messages.filter((m) => m.role === 'user').length === 0;
77
  if (isFirstMessage) {
78
  apiFetch('/api/title', {
79
  method: 'POST',
 
111
  placeholder={
112
  activityStatus.type === 'waiting-approval'
113
  ? 'Approve or reject pending tools first...'
114
+ : undefined
 
 
115
  }
116
  />
117
  </>
frontend/src/store/agentStore.ts CHANGED
@@ -50,7 +50,8 @@ export type ActivityStatus =
50
  | { type: 'thinking' }
51
  | { type: 'tool'; toolName: string; description?: string }
52
  | { type: 'waiting-approval' }
53
- | { type: 'streaming' };
 
54
 
55
  /** State that is tracked per-session (each session has its own copy). */
56
  export interface PerSessionState {
@@ -222,7 +223,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
222
  // Apply the processing→idle side effect
223
  const processingCleared = 'isProcessing' in updates && !updates.isProcessing;
224
  if (processingCleared) {
225
- if (updated.activityStatus.type !== 'waiting-approval') {
226
  updated.activityStatus = { type: 'idle' };
227
  }
228
  }
@@ -300,7 +301,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
300
 
301
  setProcessing: (isProcessing) => {
302
  const current = get().activityStatus;
303
- const preserveStatus = current.type === 'waiting-approval';
304
  set({ isProcessing, ...(!isProcessing && !preserveStatus ? { activityStatus: { type: 'idle' } } : {}) });
305
  },
306
  setConnected: (isConnected) => set({ isConnected }),
 
50
  | { type: 'thinking' }
51
  | { type: 'tool'; toolName: string; description?: string }
52
  | { type: 'waiting-approval' }
53
+ | { type: 'streaming' }
54
+ | { type: 'cancelled' };
55
 
56
  /** State that is tracked per-session (each session has its own copy). */
57
  export interface PerSessionState {
 
223
  // Apply the processing→idle side effect
224
  const processingCleared = 'isProcessing' in updates && !updates.isProcessing;
225
  if (processingCleared) {
226
+ if (updated.activityStatus.type !== 'waiting-approval' && updated.activityStatus.type !== 'cancelled') {
227
  updated.activityStatus = { type: 'idle' };
228
  }
229
  }
 
301
 
302
  setProcessing: (isProcessing) => {
303
  const current = get().activityStatus;
304
+ const preserveStatus = current.type === 'waiting-approval' || current.type === 'cancelled';
305
  set({ isProcessing, ...(!isProcessing && !preserveStatus ? { activityStatus: { type: 'idle' } } : {}) });
306
  },
307
  setConnected: (isConnected) => set({ isConnected }),