Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

elisaklunder commited on Apr 5

Commit

299c7d9

2 Parent(s): 0cc1ea4 1a390ae

Merge branch 'main' into fix-double-message-while-streaming

Browse files

Files changed (5) hide show

agent/tools/research_tool.py +15 -4
backend/routes/agent.py +3 -5
frontend/src/components/Chat/ActivityStatusBar.tsx +2 -1
frontend/src/components/SessionChat.tsx +7 -12
frontend/src/store/agentStore.ts +4 -3

agent/tools/research_tool.py CHANGED Viewed

@@ -46,12 +46,22 @@ Your job: explore documentation, code examples, APIs, and repos,
 then return a concise, actionable summary. The main agent will use
 your findings to implement the actual solution.
 # Research methodology
-1. **Discovery**: Find relevant entry points — example scripts, doc pages, API endpoints
 2. **Tracing**: Follow the chain from entry point to implementation detail
-3. **Analysis**: Identify patterns, current API usage, key dependencies
-4. **Synthesis**: Summarize findings in a structured format
 # How to use your tools
@@ -101,11 +111,12 @@ hf_inspect_dataset({"dataset": "org/name", "split": "train", "sample_rows": 3})
 # Output format
 Your output MUST include:
 - **Key findings**: The most important things you discovered (current API usage, working patterns)
 - **Essential references**: Specific file paths, URLs, function names, doc sections, code snippets
   that the main agent should use directly
 - **Code patterns**: Key imports, configurations, and usage patterns from working examples
-- **Recommendations**: What to do next based on your findings
 Be concise. Your output goes into another agent's context — every token counts.
 Aim for 500-1500 words max. Include actual code snippets from examples you read,

 then return a concise, actionable summary. The main agent will use
 your findings to implement the actual solution.
+# Being up to date is critical
+Always prioritize finding the most current, state-of-the-art approaches.
+ML moves fast — a method from 6 months ago may already be obsolete.
+- Search for **recent papers** (use `hf_papers`) to find SOTA methods, models, and datasets for the task
+- Compare what you find in docs/examples against what recent papers recommend — prefer the newer approach
+- When multiple approaches exist, identify which is SOTA and why (benchmark results, adoption, recency)
+- Include in your findings: what is the current best model, dataset, and method for the task
 # Research methodology
+1. **Discovery**: Find relevant entry points — example scripts, doc pages, API endpoints, **and recent papers for SOTA approaches**
 2. **Tracing**: Follow the chain from entry point to implementation detail
+3. **Analysis**: Identify patterns, current API usage, key dependencies. **Compare against SOTA from recent papers**
+4. **Synthesis**: Summarize findings in a structured format, highlighting what is current best practice vs. outdated
 # How to use your tools
 # Output format
 Your output MUST include:
+- **SOTA landscape**: Current best models, datasets, and methods for the task (from recent papers). Flag anything outdated.
 - **Key findings**: The most important things you discovered (current API usage, working patterns)
 - **Essential references**: Specific file paths, URLs, function names, doc sections, code snippets
   that the main agent should use directly
 - **Code patterns**: Key imports, configurations, and usage patterns from working examples
+- **Recommendations**: What to do next based on your findings, preferring SOTA approaches
 Be concise. Your output goes into another agent's context — every token counts.
 Aim for 500-1500 words max. Include actual code snippets from examples you read,

backend/routes/agent.py CHANGED Viewed

@@ -7,6 +7,7 @@ dependency. In dev mode (no OAUTH_CLIENT_ID), auth is bypassed automatically.
 import asyncio
 import json
 import logging
 from typing import Any
 from dependencies import get_current_user
@@ -205,17 +206,14 @@ async def create_session(
     Returns 503 if the server or user has reached the session limit.
     """
-    # Extract the user's HF token (Bearer header or HttpOnly cookie)
-    # In dev mode, fall back to environment variable if no token in request
     hf_token = None
     auth_header = request.headers.get("Authorization", "")
     if auth_header.startswith("Bearer "):
         hf_token = auth_header[7:]
     if not hf_token:
         hf_token = request.cookies.get("hf_access_token")
-    if not hf_token and user["user_id"] == "dev":
-        # Dev mode: use HF_TOKEN from environment
-        import os
         hf_token = os.environ.get("HF_TOKEN")
     try:

 import asyncio
 import json
 import logging
+import os
 from typing import Any
 from dependencies import get_current_user
     Returns 503 if the server or user has reached the session limit.
     """
+    # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)
     hf_token = None
     auth_header = request.headers.get("Authorization", "")
     if auth_header.startswith("Bearer "):
         hf_token = auth_header[7:]
     if not hf_token:
         hf_token = request.cookies.get("hf_access_token")
+    if not hf_token:
         hf_token = os.environ.get("HF_TOKEN")
     try:

frontend/src/components/Chat/ActivityStatusBar.tsx CHANGED Viewed

@@ -32,6 +32,7 @@ function statusLabel(status: ActivityStatus): string {
       return base;
     }
     case 'waiting-approval': return 'Waiting for approval';
     default: return '';
   }
 }
@@ -59,7 +60,7 @@ export default function ActivityStatusBar() {
           animation: `${shimmer} 4s ease-in-out infinite`,
         }}
       >
-        {label}…
       </Typography>
     </Box>
   );

       return base;
     }
     case 'waiting-approval': return 'Waiting for approval';
+    case 'cancelled': return 'What should the agent do instead?';
     default: return '';
   }
 }
           animation: `${shimmer} 4s ease-in-out infinite`,
         }}
       >
+        {label}{activityStatus.type !== 'cancelled' && '…'}
       </Typography>
     </Box>
   );

frontend/src/components/SessionChat.tsx CHANGED Viewed

@@ -5,7 +5,7 @@
  * runs — processing events — but only the active session renders visible
  * UI (MessageList + ChatInput).
  */
-import { useCallback, useEffect, useState } from 'react';
 import { useAgentChat } from '@/hooks/useAgentChat';
 import { useAgentStore } from '@/store/agentStore';
 import { useSessionStore } from '@/store/sessionStore';
@@ -24,8 +24,6 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
   const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
   const { updateSessionTitle } = useSessionStore();
-  const [wasCancelled, setWasCancelled] = useState(false);
   const { messages, sendMessage, stop, status, undoLastTurn, approveTools } = useAgentChat({
     sessionId,
     isActive,
@@ -57,11 +55,11 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
     return () => document.removeEventListener('visibilitychange', onVisible);
   }, [isActive, sessionId]);
-  // Wrap stop to track cancellation
   const handleStop = useCallback(() => {
     stop();
-    setWasCancelled(true);
-  }, [stop]);
   // SDK status is the ground truth — if it's streaming/submitted, agent is busy
   const sdkBusy = status === 'streaming' || status === 'submitted';
@@ -71,12 +69,11 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
     async (text: string) => {
       if (!text.trim() || busy) return;
-      setWasCancelled(false);
-      updateSession(sessionId, { isProcessing: true });
       sendMessage({ text: text.trim(), metadata: { createdAt: new Date().toISOString() } });
       // Auto-title the session from the first user message
-      const isFirstMessage = messages.filter((m) => m.role === 'user').length <= 1;
       if (isFirstMessage) {
         apiFetch('/api/title', {
           method: 'POST',
@@ -114,9 +111,7 @@ export default function SessionChat({ sessionId, isActive, onSessionDead }: Sess
         placeholder={
           activityStatus.type === 'waiting-approval'
             ? 'Approve or reject pending tools first...'
-            : wasCancelled
-              ? 'What should the agent do instead?'
-              : undefined
         }
       />
     </>

  * runs — processing events — but only the active session renders visible
  * UI (MessageList + ChatInput).
  */
+import { useCallback, useEffect } from 'react';
 import { useAgentChat } from '@/hooks/useAgentChat';
 import { useAgentStore } from '@/store/agentStore';
 import { useSessionStore } from '@/store/sessionStore';
   const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();
   const { updateSessionTitle } = useSessionStore();
   const { messages, sendMessage, stop, status, undoLastTurn, approveTools } = useAgentChat({
     sessionId,
     isActive,
     return () => document.removeEventListener('visibilitychange', onVisible);
   }, [isActive, sessionId]);
+  // Wrap stop to show cancelled shimmer
   const handleStop = useCallback(() => {
     stop();
+    updateSession(sessionId, { activityStatus: { type: 'cancelled' } });
+  }, [stop, updateSession, sessionId]);
   // SDK status is the ground truth — if it's streaming/submitted, agent is busy
   const sdkBusy = status === 'streaming' || status === 'submitted';
     async (text: string) => {
       if (!text.trim() || busy) return;
+      updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'thinking' } });
       sendMessage({ text: text.trim(), metadata: { createdAt: new Date().toISOString() } });
       // Auto-title the session from the first user message
+      const isFirstMessage = messages.filter((m) => m.role === 'user').length === 0;
       if (isFirstMessage) {
         apiFetch('/api/title', {
           method: 'POST',
         placeholder={
           activityStatus.type === 'waiting-approval'
             ? 'Approve or reject pending tools first...'
+            : undefined
         }
       />
     </>

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -50,7 +50,8 @@ export type ActivityStatus =
   | { type: 'thinking' }
   | { type: 'tool'; toolName: string; description?: string }
   | { type: 'waiting-approval' }
-  | { type: 'streaming' };
 /** State that is tracked per-session (each session has its own copy). */
 export interface PerSessionState {
@@ -222,7 +223,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
     // Apply the processing→idle side effect
     const processingCleared = 'isProcessing' in updates && !updates.isProcessing;
     if (processingCleared) {
-      if (updated.activityStatus.type !== 'waiting-approval') {
         updated.activityStatus = { type: 'idle' };
       }
     }
@@ -300,7 +301,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
   setProcessing: (isProcessing) => {
     const current = get().activityStatus;
-    const preserveStatus = current.type === 'waiting-approval';
     set({ isProcessing, ...(!isProcessing && !preserveStatus ? { activityStatus: { type: 'idle' } } : {}) });
   },
   setConnected: (isConnected) => set({ isConnected }),

   | { type: 'thinking' }
   | { type: 'tool'; toolName: string; description?: string }
   | { type: 'waiting-approval' }
+  | { type: 'streaming' }
+  | { type: 'cancelled' };
 /** State that is tracked per-session (each session has its own copy). */
 export interface PerSessionState {
     // Apply the processing→idle side effect
     const processingCleared = 'isProcessing' in updates && !updates.isProcessing;
     if (processingCleared) {
+      if (updated.activityStatus.type !== 'waiting-approval' && updated.activityStatus.type !== 'cancelled') {
         updated.activityStatus = { type: 'idle' };
       }
     }
   setProcessing: (isProcessing) => {
     const current = get().activityStatus;
+    const preserveStatus = current.type === 'waiting-approval' || current.type === 'cancelled';
     set({ isProcessing, ...(!isProcessing && !preserveStatus ? { activityStatus: { type: 'idle' } } : {}) });
   },
   setConnected: (isConnected) => set({ isConnected }),