akseljoonas HF Staff commited on
Commit
c53966b
·
1 Parent(s): 72bac94

fixing inference token

Browse files
agent/context_manager/manager.py CHANGED
@@ -47,9 +47,13 @@ def _get_hf_username() -> str:
47
  try:
48
  result = subprocess.run(
49
  [
50
- "curl", "-s", "-4", # force IPv4
51
- "-m", str(_HF_WHOAMI_TIMEOUT), # max time
52
- "-H", f"Authorization: Bearer {hf_token}",
 
 
 
 
53
  _HF_WHOAMI_URL,
54
  ],
55
  capture_output=True,
@@ -60,9 +64,13 @@ def _get_hf_username() -> str:
60
  if result.returncode == 0 and result.stdout:
61
  data = json.loads(result.stdout)
62
  _hf_username_cache = data.get("name", "unknown")
63
- logger.info(f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s")
 
 
64
  else:
65
- logger.warning(f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s")
 
 
66
  _hf_username_cache = "unknown"
67
  except Exception as e:
68
  t1 = _t.monotonic()
@@ -165,12 +173,14 @@ class ContextManager:
165
  )
166
  )
167
 
168
- api_key = os.environ.get("INFERENCE_TOKEN")
169
  response = await acompletion(
170
  model=model_name,
171
  messages=messages_to_summarize,
172
  max_completion_tokens=self.compact_size,
173
- **({'api_key': api_key} if api_key and model_name.startswith('huggingface/') else {}),
 
 
174
  )
175
  summarized_message = Message(
176
  role="assistant", content=response.choices[0].message.content
 
47
  try:
48
  result = subprocess.run(
49
  [
50
+ "curl",
51
+ "-s",
52
+ "-4", # force IPv4
53
+ "-m",
54
+ str(_HF_WHOAMI_TIMEOUT), # max time
55
+ "-H",
56
+ f"Authorization: Bearer {hf_token}",
57
  _HF_WHOAMI_URL,
58
  ],
59
  capture_output=True,
 
64
  if result.returncode == 0 and result.stdout:
65
  data = json.loads(result.stdout)
66
  _hf_username_cache = data.get("name", "unknown")
67
+ logger.info(
68
+ f"HF username resolved to '{_hf_username_cache}' in {t1 - t0:.2f}s"
69
+ )
70
  else:
71
+ logger.warning(
72
+ f"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s"
73
+ )
74
  _hf_username_cache = "unknown"
75
  except Exception as e:
76
  t1 = _t.monotonic()
 
173
  )
174
  )
175
 
176
+ hf_key = os.environ.get("INFERENCE_TOKEN")
177
  response = await acompletion(
178
  model=model_name,
179
  messages=messages_to_summarize,
180
  max_completion_tokens=self.compact_size,
181
+ api_key=hf_key
182
+ if hf_key and model_name.startswith("huggingface/")
183
+ else None,
184
  )
185
  summarized_message = Message(
186
  role="assistant", content=response.choices[0].message.content
agent/core/agent_loop.py CHANGED
@@ -157,12 +157,10 @@ class Handlers:
157
  tool_choice="auto",
158
  stream=True,
159
  stream_options={"include_usage": True},
160
- **(
161
- {"api_key": _INFERENCE_API_KEY}
162
- if _INFERENCE_API_KEY
163
- and session.config.model_name.startswith("huggingface/")
164
- else {}
165
- ),
166
  )
167
 
168
  full_content = ""
 
157
  tool_choice="auto",
158
  stream=True,
159
  stream_options={"include_usage": True},
160
+ api_key=_INFERENCE_API_KEY
161
+ if _INFERENCE_API_KEY
162
+ and session.config.model_name.startswith("huggingface/")
163
+ else None,
 
 
164
  )
165
 
166
  full_content = ""
backend/routes/agent.py CHANGED
@@ -90,16 +90,13 @@ async def llm_health_check() -> LLMHealthResponse:
90
  """
91
  model = session_manager.config.model_name
92
  hf_key = os.environ.get("INFERENCE_TOKEN")
93
- api_key_kw = (
94
- {"api_key": hf_key} if hf_key and model.startswith("huggingface/") else {}
95
- )
96
  try:
97
  await acompletion(
98
  model=model,
99
  messages=[{"role": "user", "content": "hi"}],
100
  max_tokens=1,
101
  timeout=10,
102
- **api_key_kw,
103
  )
104
  return LLMHealthResponse(status="ok", model=model)
105
  except Exception as e:
@@ -165,9 +162,6 @@ async def generate_title(
165
  """Generate a short title for a chat session based on the first user message."""
166
  model = session_manager.config.model_name
167
  hf_key = os.environ.get("INFERENCE_TOKEN")
168
- api_key_kw = (
169
- {"api_key": hf_key} if hf_key and model.startswith("huggingface/") else {}
170
- )
171
  try:
172
  response = await acompletion(
173
  model=model,
@@ -185,7 +179,7 @@ async def generate_title(
185
  max_tokens=20,
186
  temperature=0.3,
187
  timeout=8,
188
- **api_key_kw,
189
  )
190
  title = response.choices[0].message.content.strip().strip('"').strip("'")
191
  # Safety: cap at 50 chars
 
90
  """
91
  model = session_manager.config.model_name
92
  hf_key = os.environ.get("INFERENCE_TOKEN")
 
 
 
93
  try:
94
  await acompletion(
95
  model=model,
96
  messages=[{"role": "user", "content": "hi"}],
97
  max_tokens=1,
98
  timeout=10,
99
+ api_key=hf_key if hf_key and model.startswith("huggingface/") else None,
100
  )
101
  return LLMHealthResponse(status="ok", model=model)
102
  except Exception as e:
 
162
  """Generate a short title for a chat session based on the first user message."""
163
  model = session_manager.config.model_name
164
  hf_key = os.environ.get("INFERENCE_TOKEN")
 
 
 
165
  try:
166
  response = await acompletion(
167
  model=model,
 
179
  max_tokens=20,
180
  temperature=0.3,
181
  timeout=8,
182
+ api_key=hf_key if hf_key and model.startswith("huggingface/") else None,
183
  )
184
  title = response.choices[0].message.content.strip().strip('"').strip("'")
185
  # Safety: cap at 50 chars