Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

akseljoonas HF Staff commited on Feb 13

Commit

c53966b

1 Parent(s): 72bac94

fixing inference token

Files changed (3) hide show

agent/context_manager/manager.py CHANGED Viewed

@@ -47,9 +47,13 @@ def _get_hf_username() -> str:
     try:
         result = subprocess.run(
             [
-                "curl", "-s", "-4",          # force IPv4
-                "-m", str(_HF_WHOAMI_TIMEOUT),  # max time
-                "-H", f"Authorization: Bearer {hf_token}",
                 _HF_WHOAMI_URL,
             ],
             capture_output=True,
@@ -60,9 +64,13 @@ def _get_hf_username() -> str:
         if result.returncode == 0 and result.stdout:
             data = json.loads(result.stdout)
             _hf_username_cache = data.get("name", "unknown")
-            logger.info(f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s")
         else:
-            logger.warning(f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s")
             _hf_username_cache = "unknown"
     except Exception as e:
         t1 = _t.monotonic()
@@ -165,12 +173,14 @@ class ContextManager:
             )
         )
-        api_key = os.environ.get("INFERENCE_TOKEN")
         response = await acompletion(
             model=model_name,
             messages=messages_to_summarize,
             max_completion_tokens=self.compact_size,
-            **({'api_key': api_key} if api_key and model_name.startswith('huggingface/') else {}),
         )
         summarized_message = Message(
             role="assistant", content=response.choices[0].message.content

     try:
         result = subprocess.run(
             [
+                "curl",
+                "-s",
+                "-4",  # force IPv4
+                "-m",
+                str(_HF_WHOAMI_TIMEOUT),  # max time
+                "-H",
+                f"Authorization: Bearer {hf_token}",
                 _HF_WHOAMI_URL,
             ],
             capture_output=True,
         if result.returncode == 0 and result.stdout:
             data = json.loads(result.stdout)
             _hf_username_cache = data.get("name", "unknown")
+            logger.info(
+                f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s"
+            )
         else:
+            logger.warning(
+                f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
+            )
             _hf_username_cache = "unknown"
     except Exception as e:
         t1 = _t.monotonic()
             )
         )
+        hf_key = os.environ.get("INFERENCE_TOKEN")
         response = await acompletion(
             model=model_name,
             messages=messages_to_summarize,
             max_completion_tokens=self.compact_size,
+            api_key=hf_key
+            if hf_key and model_name.startswith("huggingface/")
+            else None,
         )
         summarized_message = Message(
             role="assistant", content=response.choices[0].message.content

agent/core/agent_loop.py CHANGED Viewed

@@ -157,12 +157,10 @@ class Handlers:
                     tool_choice="auto",
                     stream=True,
                     stream_options={"include_usage": True},
-                    **(
-                        {"api_key": _INFERENCE_API_KEY}
-                        if _INFERENCE_API_KEY
-                        and session.config.model_name.startswith("huggingface/")
-                        else {}
-                    ),
                 )
                 full_content = ""

                     tool_choice="auto",
                     stream=True,
                     stream_options={"include_usage": True},
+                    api_key=_INFERENCE_API_KEY
+                    if _INFERENCE_API_KEY
+                    and session.config.model_name.startswith("huggingface/")
+                    else None,
                 )
                 full_content = ""

backend/routes/agent.py CHANGED Viewed

@@ -90,16 +90,13 @@ async def llm_health_check() -> LLMHealthResponse:
     """
     model = session_manager.config.model_name
     hf_key = os.environ.get("INFERENCE_TOKEN")
-    api_key_kw = (
-        {"api_key": hf_key} if hf_key and model.startswith("huggingface/") else {}
-    )
     try:
         await acompletion(
             model=model,
             messages=[{"role": "user", "content": "hi"}],
             max_tokens=1,
             timeout=10,
-            **api_key_kw,
         )
         return LLMHealthResponse(status="ok", model=model)
     except Exception as e:
@@ -165,9 +162,6 @@ async def generate_title(
     """Generate a short title for a chat session based on the first user message."""
     model = session_manager.config.model_name
     hf_key = os.environ.get("INFERENCE_TOKEN")
-    api_key_kw = (
-        {"api_key": hf_key} if hf_key and model.startswith("huggingface/") else {}
-    )
     try:
         response = await acompletion(
             model=model,
@@ -185,7 +179,7 @@ async def generate_title(
             max_tokens=20,
             temperature=0.3,
             timeout=8,
-            **api_key_kw,
         )
         title = response.choices[0].message.content.strip().strip('"').strip("'")
         # Safety: cap at 50 chars

     """
     model = session_manager.config.model_name
     hf_key = os.environ.get("INFERENCE_TOKEN")
     try:
         await acompletion(
             model=model,
             messages=[{"role": "user", "content": "hi"}],
             max_tokens=1,
             timeout=10,
+            api_key=hf_key if hf_key and model.startswith("huggingface/") else None,
         )
         return LLMHealthResponse(status="ok", model=model)
     except Exception as e:
     """Generate a short title for a chat session based on the first user message."""
     model = session_manager.config.model_name
     hf_key = os.environ.get("INFERENCE_TOKEN")
     try:
         response = await acompletion(
             model=model,
             max_tokens=20,
             temperature=0.3,
             timeout=8,
+            api_key=hf_key if hf_key and model.startswith("huggingface/") else None,
         )
         title = response.choices[0].message.content.strip().strip('"').strip("'")
         # Safety: cap at 50 chars