Guillaume Salou commited on
Commit
2fac9ff
·
unverified ·
1 Parent(s): 540437a

feat: route Claude via Bedrock inference profile (#82)

Browse files

Switch the Claude endpoint IDs from the direct Anthropic API to the US
cross-region inference profile. Extend the model-provider substring
check so both prefixes keep hitting the HF-org gate, the daily Claude
quota, and prompt caching.

agent/core/model_switcher.py CHANGED
@@ -24,8 +24,8 @@ from agent.core.effort_probe import ProbeInconclusive, probe_effort
24
  # ":cheapest" / ":preferred" / ":<provider>" to override the default
25
  # routing policy (auto = fastest with failover).
26
  SUGGESTED_MODELS = [
27
- {"id": "anthropic/claude-opus-4-7", "label": "Claude Opus 4.7"},
28
- {"id": "anthropic/claude-opus-4-6", "label": "Claude Opus 4.6"},
29
  {"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"},
30
  {"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"},
31
  {"id": "zai-org/GLM-5.1", "label": "GLM 5.1"},
 
24
  # ":cheapest" / ":preferred" / ":<provider>" to override the default
25
  # routing policy (auto = fastest with failover).
26
  SUGGESTED_MODELS = [
27
+ {"id": "bedrock/us.anthropic.claude-opus-4-7", "label": "Claude Opus 4.7"},
28
+ {"id": "bedrock/us.anthropic.claude-opus-4-6-v1", "label": "Claude Opus 4.6"},
29
  {"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"},
30
  {"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"},
31
  {"id": "zai-org/GLM-5.1", "label": "GLM 5.1"},
agent/core/prompt_caching.py CHANGED
@@ -28,7 +28,7 @@ def with_prompt_caching(
28
  that share the underlying ``ContextManager.items`` list don't see their
29
  persisted history rewritten.
30
  """
31
- if not model_name or not model_name.startswith("anthropic/"):
32
  return messages, tools
33
 
34
  if tools:
 
28
  that share the underlying ``ContextManager.items`` list don't see their
29
  persisted history rewritten.
30
  """
31
+ if not model_name or "anthropic" not in model_name:
32
  return messages, tools
33
 
34
  if tools:
agent/core/session.py CHANGED
@@ -95,7 +95,7 @@ class Session:
95
  self.event_queue = event_queue
96
  self.session_id = str(uuid.uuid4())
97
  self.config = config or Config(
98
- model_name="anthropic/claude-sonnet-4-5-20250929",
99
  )
100
  self.is_running = True
101
  self._cancelled = asyncio.Event()
 
95
  self.event_queue = event_queue
96
  self.session_id = str(uuid.uuid4())
97
  self.config = config or Config(
98
+ model_name="bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0",
99
  )
100
  self.is_running = True
101
  self._cancelled = asyncio.Event()
agent/tools/research_tool.py CHANGED
@@ -216,8 +216,8 @@ RESEARCH_TOOL_SPEC = {
216
 
217
  def _get_research_model(main_model: str) -> str:
218
  """Pick a cheaper model for research based on the main model."""
219
- if "anthropic/" in main_model:
220
- return "anthropic/claude-sonnet-4-6"
221
  # For non-Anthropic models (HF router etc.), use the same model
222
  return main_model
223
 
 
216
 
217
  def _get_research_model(main_model: str) -> str:
218
  """Pick a cheaper model for research based on the main model."""
219
+ if "anthropic" in main_model:
220
+ return "bedrock/us.anthropic.claude-sonnet-4-6"
221
  # For non-Anthropic models (HF router etc.), use the same model
222
  return main_model
223
 
agent/utils/terminal_display.py CHANGED
@@ -99,7 +99,7 @@ def print_banner(model: str | None = None, hf_user: str | None = None) -> None:
99
  _console.file.write("\033[2J\033[H")
100
  _console.file.flush()
101
 
102
- model_label = model or "anthropic/claude-opus-4-6"
103
  user_label = hf_user or "not logged in"
104
 
105
  # Warm gold palette matching the shimmer highlight (255, 200, 80)
 
99
  _console.file.write("\033[2J\033[H")
100
  _console.file.flush()
101
 
102
+ model_label = model or "bedrock/us.anthropic.claude-opus-4-6-v1"
103
  user_label = hf_user or "not logged in"
104
 
105
  # Warm gold palette matching the shimmer highlight (255, 200, 80)
backend/routes/agent.py CHANGED
@@ -47,7 +47,7 @@ AVAILABLE_MODELS = [
47
  "recommended": True,
48
  },
49
  {
50
- "id": "anthropic/claude-opus-4-6",
51
  "label": "Claude Opus 4.6",
52
  "provider": "anthropic",
53
  "tier": "pro",
@@ -68,17 +68,21 @@ AVAILABLE_MODELS = [
68
  ]
69
 
70
 
 
 
 
 
71
  async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
72
  """403 if a non-``huggingface``-org user tries to select an Anthropic model.
73
 
74
  Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
75
  other model in ``AVAILABLE_MODELS`` is routed through HF Router and
76
- billed via ``X-HF-Bill-To``. The gate only fires for ``anthropic/*`` so
77
  non-HF users can still freely switch between the free models.
78
 
79
  Pattern: https://github.com/huggingface/ml-intern/pull/63
80
  """
81
- if not model_id.startswith("anthropic/"):
82
  return
83
  if not await require_huggingface_org_member(request):
84
  raise HTTPException(
@@ -110,7 +114,7 @@ async def _enforce_claude_quota(
110
  if agent_session.claude_counted:
111
  return
112
  model_name = agent_session.session.config.model_name
113
- if not model_name.startswith("anthropic/"):
114
  return
115
  user_id = user["user_id"]
116
  used = await user_quotas.get_claude_used_today(user_id)
 
47
  "recommended": True,
48
  },
49
  {
50
+ "id": "bedrock/us.anthropic.claude-opus-4-6-v1",
51
  "label": "Claude Opus 4.6",
52
  "provider": "anthropic",
53
  "tier": "pro",
 
68
  ]
69
 
70
 
71
+ def _is_anthropic_model(model_id: str) -> bool:
72
+ return "anthropic" in model_id
73
+
74
+
75
  async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
76
  """403 if a non-``huggingface``-org user tries to select an Anthropic model.
77
 
78
  Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
79
  other model in ``AVAILABLE_MODELS`` is routed through HF Router and
80
+ billed via ``X-HF-Bill-To``. The gate only fires for Anthropic so
81
  non-HF users can still freely switch between the free models.
82
 
83
  Pattern: https://github.com/huggingface/ml-intern/pull/63
84
  """
85
+ if not _is_anthropic_model(model_id):
86
  return
87
  if not await require_huggingface_org_member(request):
88
  raise HTTPException(
 
114
  if agent_session.claude_counted:
115
  return
116
  model_name = agent_session.session.config.model_name
117
+ if not _is_anthropic_model(model_name):
118
  return
119
  user_id = user["user_id"]
120
  used = await user_quotas.get_claude_used_today(user_id)
configs/main_agent_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "model_name": "anthropic/claude-opus-4-6",
3
  "save_sessions": true,
4
  "session_dataset_repo": "akseljoonas/hf-agent-sessions",
5
  "yolo_mode": false,
 
1
  {
2
+ "model_name": "bedrock/us.anthropic.claude-opus-4-6-v1",
3
  "save_sessions": true,
4
  "session_dataset_repo": "akseljoonas/hf-agent-sessions",
5
  "yolo_mode": false,