Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Guillaume Salou commited on
feat: route Claude via Bedrock inference profile (#82)
Browse filesSwitch the Claude endpoint IDs from the direct Anthropic API to the US
cross-region inference profile. Extend the model-provider substring
check so both prefixes keep hitting the HF-org gate, the daily Claude
quota, and prompt caching.
- agent/core/model_switcher.py +2 -2
- agent/core/prompt_caching.py +1 -1
- agent/core/session.py +1 -1
- agent/tools/research_tool.py +2 -2
- agent/utils/terminal_display.py +1 -1
- backend/routes/agent.py +8 -4
- configs/main_agent_config.json +1 -1
agent/core/model_switcher.py
CHANGED
|
@@ -24,8 +24,8 @@ from agent.core.effort_probe import ProbeInconclusive, probe_effort
|
|
| 24 |
# ":cheapest" / ":preferred" / ":<provider>" to override the default
|
| 25 |
# routing policy (auto = fastest with failover).
|
| 26 |
SUGGESTED_MODELS = [
|
| 27 |
-
{"id": "
|
| 28 |
-
{"id": "
|
| 29 |
{"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"},
|
| 30 |
{"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"},
|
| 31 |
{"id": "zai-org/GLM-5.1", "label": "GLM 5.1"},
|
|
|
|
| 24 |
# ":cheapest" / ":preferred" / ":<provider>" to override the default
|
| 25 |
# routing policy (auto = fastest with failover).
|
| 26 |
SUGGESTED_MODELS = [
|
| 27 |
+
{"id": "bedrock/us.anthropic.claude-opus-4-7", "label": "Claude Opus 4.7"},
|
| 28 |
+
{"id": "bedrock/us.anthropic.claude-opus-4-6-v1", "label": "Claude Opus 4.6"},
|
| 29 |
{"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"},
|
| 30 |
{"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"},
|
| 31 |
{"id": "zai-org/GLM-5.1", "label": "GLM 5.1"},
|
agent/core/prompt_caching.py
CHANGED
|
@@ -28,7 +28,7 @@ def with_prompt_caching(
|
|
| 28 |
that share the underlying ``ContextManager.items`` list don't see their
|
| 29 |
persisted history rewritten.
|
| 30 |
"""
|
| 31 |
-
if not model_name or
|
| 32 |
return messages, tools
|
| 33 |
|
| 34 |
if tools:
|
|
|
|
| 28 |
that share the underlying ``ContextManager.items`` list don't see their
|
| 29 |
persisted history rewritten.
|
| 30 |
"""
|
| 31 |
+
if not model_name or "anthropic" not in model_name:
|
| 32 |
return messages, tools
|
| 33 |
|
| 34 |
if tools:
|
agent/core/session.py
CHANGED
|
@@ -95,7 +95,7 @@ class Session:
|
|
| 95 |
self.event_queue = event_queue
|
| 96 |
self.session_id = str(uuid.uuid4())
|
| 97 |
self.config = config or Config(
|
| 98 |
-
model_name="
|
| 99 |
)
|
| 100 |
self.is_running = True
|
| 101 |
self._cancelled = asyncio.Event()
|
|
|
|
| 95 |
self.event_queue = event_queue
|
| 96 |
self.session_id = str(uuid.uuid4())
|
| 97 |
self.config = config or Config(
|
| 98 |
+
model_name="bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
| 99 |
)
|
| 100 |
self.is_running = True
|
| 101 |
self._cancelled = asyncio.Event()
|
agent/tools/research_tool.py
CHANGED
|
@@ -216,8 +216,8 @@ RESEARCH_TOOL_SPEC = {
|
|
| 216 |
|
| 217 |
def _get_research_model(main_model: str) -> str:
|
| 218 |
"""Pick a cheaper model for research based on the main model."""
|
| 219 |
-
if "anthropic
|
| 220 |
-
return "
|
| 221 |
# For non-Anthropic models (HF router etc.), use the same model
|
| 222 |
return main_model
|
| 223 |
|
|
|
|
| 216 |
|
| 217 |
def _get_research_model(main_model: str) -> str:
|
| 218 |
"""Pick a cheaper model for research based on the main model."""
|
| 219 |
+
if "anthropic" in main_model:
|
| 220 |
+
return "bedrock/us.anthropic.claude-sonnet-4-6"
|
| 221 |
# For non-Anthropic models (HF router etc.), use the same model
|
| 222 |
return main_model
|
| 223 |
|
agent/utils/terminal_display.py
CHANGED
|
@@ -99,7 +99,7 @@ def print_banner(model: str | None = None, hf_user: str | None = None) -> None:
|
|
| 99 |
_console.file.write("\033[2J\033[H")
|
| 100 |
_console.file.flush()
|
| 101 |
|
| 102 |
-
model_label = model or "
|
| 103 |
user_label = hf_user or "not logged in"
|
| 104 |
|
| 105 |
# Warm gold palette matching the shimmer highlight (255, 200, 80)
|
|
|
|
| 99 |
_console.file.write("\033[2J\033[H")
|
| 100 |
_console.file.flush()
|
| 101 |
|
| 102 |
+
model_label = model or "bedrock/us.anthropic.claude-opus-4-6-v1"
|
| 103 |
user_label = hf_user or "not logged in"
|
| 104 |
|
| 105 |
# Warm gold palette matching the shimmer highlight (255, 200, 80)
|
backend/routes/agent.py
CHANGED
|
@@ -47,7 +47,7 @@ AVAILABLE_MODELS = [
|
|
| 47 |
"recommended": True,
|
| 48 |
},
|
| 49 |
{
|
| 50 |
-
"id": "
|
| 51 |
"label": "Claude Opus 4.6",
|
| 52 |
"provider": "anthropic",
|
| 53 |
"tier": "pro",
|
|
@@ -68,17 +68,21 @@ AVAILABLE_MODELS = [
|
|
| 68 |
]
|
| 69 |
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
|
| 72 |
"""403 if a non-``huggingface``-org user tries to select an Anthropic model.
|
| 73 |
|
| 74 |
Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
|
| 75 |
other model in ``AVAILABLE_MODELS`` is routed through HF Router and
|
| 76 |
-
billed via ``X-HF-Bill-To``. The gate only fires for
|
| 77 |
non-HF users can still freely switch between the free models.
|
| 78 |
|
| 79 |
Pattern: https://github.com/huggingface/ml-intern/pull/63
|
| 80 |
"""
|
| 81 |
-
if not
|
| 82 |
return
|
| 83 |
if not await require_huggingface_org_member(request):
|
| 84 |
raise HTTPException(
|
|
@@ -110,7 +114,7 @@ async def _enforce_claude_quota(
|
|
| 110 |
if agent_session.claude_counted:
|
| 111 |
return
|
| 112 |
model_name = agent_session.session.config.model_name
|
| 113 |
-
if not
|
| 114 |
return
|
| 115 |
user_id = user["user_id"]
|
| 116 |
used = await user_quotas.get_claude_used_today(user_id)
|
|
|
|
| 47 |
"recommended": True,
|
| 48 |
},
|
| 49 |
{
|
| 50 |
+
"id": "bedrock/us.anthropic.claude-opus-4-6-v1",
|
| 51 |
"label": "Claude Opus 4.6",
|
| 52 |
"provider": "anthropic",
|
| 53 |
"tier": "pro",
|
|
|
|
| 68 |
]
|
| 69 |
|
| 70 |
|
| 71 |
+
def _is_anthropic_model(model_id: str) -> bool:
|
| 72 |
+
return "anthropic" in model_id
|
| 73 |
+
|
| 74 |
+
|
| 75 |
async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
|
| 76 |
"""403 if a non-``huggingface``-org user tries to select an Anthropic model.
|
| 77 |
|
| 78 |
Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
|
| 79 |
other model in ``AVAILABLE_MODELS`` is routed through HF Router and
|
| 80 |
+
billed via ``X-HF-Bill-To``. The gate only fires for Anthropic so
|
| 81 |
non-HF users can still freely switch between the free models.
|
| 82 |
|
| 83 |
Pattern: https://github.com/huggingface/ml-intern/pull/63
|
| 84 |
"""
|
| 85 |
+
if not _is_anthropic_model(model_id):
|
| 86 |
return
|
| 87 |
if not await require_huggingface_org_member(request):
|
| 88 |
raise HTTPException(
|
|
|
|
| 114 |
if agent_session.claude_counted:
|
| 115 |
return
|
| 116 |
model_name = agent_session.session.config.model_name
|
| 117 |
+
if not _is_anthropic_model(model_name):
|
| 118 |
return
|
| 119 |
user_id = user["user_id"]
|
| 120 |
used = await user_quotas.get_claude_used_today(user_id)
|
configs/main_agent_config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"model_name": "
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "akseljoonas/hf-agent-sessions",
|
| 5 |
"yolo_mode": false,
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_name": "bedrock/us.anthropic.claude-opus-4-6-v1",
|
| 3 |
"save_sessions": true,
|
| 4 |
"session_dataset_repo": "akseljoonas/hf-agent-sessions",
|
| 5 |
"yolo_mode": false,
|