Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 8,978 Bytes
e2552e8 0545e40 e2552e8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | """Model-switching logic for the interactive CLI's ``/model`` command.
Split out of ``agent.main`` so the REPL dispatcher stays focused on input
parsing. Exposes:
* ``SUGGESTED_MODELS`` β the short list shown by ``/model`` with no arg.
* ``is_valid_model_id`` β loose format check on user input.
* ``probe_and_switch_model`` β async: checks routing, fires a 1-token
probe to resolve the effort cascade, then commits the switch (or
rejects it on hard error).
The probe's cascade lives in ``agent.core.effort_probe``; this module
glues it to CLI output + session state.
"""
from __future__ import annotations
from agent.core.effort_probe import ProbeInconclusive, probe_effort
# Suggested models shown by `/model` (not a gate). Users can paste any HF
# model id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/`
# prefix for direct API access. For HF ids, append ":fastest" /
# ":cheapest" / ":preferred" / ":<provider>" to override the default
# routing policy (auto = fastest with failover).
SUGGESTED_MODELS = [
{"id": "openai/gpt-5.5", "label": "GPT-5.5"},
{"id": "openai/gpt-5.4", "label": "GPT-5.4"},
{"id": "anthropic/claude-opus-4-7", "label": "Claude Opus 4.7"},
{"id": "anthropic/claude-opus-4-6", "label": "Claude Opus 4.6"},
{"id": "bedrock/us.anthropic.claude-opus-4-6-v1", "label": "Claude Opus 4.6 via Bedrock"},
{"id": "MiniMaxAI/MiniMax-M2.7", "label": "MiniMax M2.7"},
{"id": "moonshotai/Kimi-K2.6", "label": "Kimi K2.6"},
{"id": "zai-org/GLM-5.1", "label": "GLM 5.1"},
]
_ROUTING_POLICIES = {"fastest", "cheapest", "preferred"}
def is_valid_model_id(model_id: str) -> bool:
"""Loose format check β lets users pick any model id.
Accepts:
β’ anthropic/<model>
β’ openai/<model>
β’ <org>/<model>[:<tag>] (HF router; tag = provider or policy)
β’ huggingface/<org>/<model>[:<tag>] (same, accepts legacy prefix)
Actual availability is verified against the HF router catalog on
switch, and by the provider on the probe's ping call.
"""
if not model_id or "/" not in model_id:
return False
head = model_id.split(":", 1)[0]
parts = head.split("/")
return len(parts) >= 2 and all(parts)
def _print_hf_routing_info(model_id: str, console) -> bool:
"""Show HF router catalog info (providers, price, context, tool support)
for an HF-router model id. Returns ``True`` to signal the caller can
proceed with the switch, ``False`` to indicate a hard problem the user
should notice before we fire the effort probe.
Anthropic / OpenAI ids return ``True`` without printing anything β
the probe below covers "does this model exist".
"""
if model_id.startswith(("anthropic/", "openai/")):
return True
from agent.core import hf_router_catalog as cat
bare, _, tag = model_id.partition(":")
info = cat.lookup(bare)
if info is None:
console.print(
f"[bold red]Warning:[/bold red] '{bare}' isn't in the HF router "
"catalog. Checking anyway β first call may fail."
)
suggestions = cat.fuzzy_suggest(bare)
if suggestions:
console.print(f"[dim]Did you mean: {', '.join(suggestions)}[/dim]")
return True
live = info.live_providers
if not live:
console.print(
f"[bold red]Warning:[/bold red] '{bare}' has no live providers "
"right now. First call will likely fail."
)
return True
if tag and tag not in _ROUTING_POLICIES:
matched = [p for p in live if p.provider == tag]
if not matched:
names = ", ".join(p.provider for p in live)
console.print(
f"[bold red]Warning:[/bold red] provider '{tag}' doesn't serve "
f"'{bare}'. Live providers: {names}. Checking anyway."
)
if not info.any_supports_tools:
console.print(
f"[bold red]Warning:[/bold red] no provider for '{bare}' advertises "
"tool-call support. This agent relies on tool calls β expect errors."
)
if tag in _ROUTING_POLICIES:
policy = tag
elif tag:
policy = f"pinned to {tag}"
else:
policy = "auto (fastest)"
console.print(f" [dim]routing: {policy}[/dim]")
for p in live:
price = (
f"${p.input_price:g}/${p.output_price:g} per M tok"
if p.input_price is not None and p.output_price is not None
else "price n/a"
)
ctx = f"{p.context_length:,} ctx" if p.context_length else "ctx n/a"
tools = "tools" if p.supports_tools else "no tools"
console.print(
f" [dim]{p.provider}: {price}, {ctx}, {tools}[/dim]"
)
return True
def print_model_listing(config, console) -> None:
"""Render the default ``/model`` (no-arg) view: current + suggested."""
current = config.model_name if config else ""
console.print("[bold]Current model:[/bold]")
console.print(f" {current}")
console.print("\n[bold]Suggested:[/bold]")
for m in SUGGESTED_MODELS:
marker = " [dim]<-- current[/dim]" if m["id"] == current else ""
console.print(f" {m['id']} [dim]({m['label']})[/dim]{marker}")
console.print(
"\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n"
"Add ':fastest', ':cheapest', ':preferred', or ':<provider>' to override routing.\n"
"Use 'anthropic/<model>' or 'openai/<model>' for direct API access.[/dim]"
)
def print_invalid_id(arg: str, console) -> None:
console.print(f"[bold red]Invalid model id format:[/bold red] {arg}")
console.print(
"[dim]Expected:\n"
" β’ <org>/<model>[:tag] (HF router β paste from huggingface.co)\n"
" β’ anthropic/<model>\n"
" β’ openai/<model>[/dim]"
)
async def probe_and_switch_model(
model_id: str,
config,
session,
console,
hf_token: str | None,
) -> None:
"""Validate model+effort with a 1-token ping, cache the effective effort,
then commit the switch.
Three visible outcomes:
* β ``effort: <level>`` β model accepted the preferred effort (or a
fallback from the cascade; the note explains if so)
* β ``effort: off`` β model doesn't support thinking; we'll strip it
* β hard error (auth, model-not-found, quota) β we reject the switch
and keep the current model so the user isn't stranded
Transient errors (5xx, timeout) complete the switch with a yellow
warning; the next real call re-surfaces the error if it's persistent.
"""
preference = config.reasoning_effort
if not _print_hf_routing_info(model_id, console):
return
if not preference:
# Nothing to validate with a ping that we couldn't validate on the
# first real call just as cheaply. Skip the probe entirely.
_commit_switch(model_id, config, session, effective=None, cache=False)
console.print(f"[green]Model switched to {model_id}[/green] [dim](effort: off)[/dim]")
return
console.print(f"[dim]checking {model_id} (effort: {preference})...[/dim]")
try:
outcome = await probe_effort(model_id, preference, hf_token)
except ProbeInconclusive as e:
_commit_switch(model_id, config, session, effective=None, cache=False)
console.print(
f"[yellow]Model switched to {model_id}[/yellow] "
f"[dim](couldn't validate: {e}; will verify on first message)[/dim]"
)
return
except Exception as e:
# Hard persistent error β auth, unknown model, quota. Don't switch.
console.print(f"[bold red]Switch failed:[/bold red] {e}")
console.print(f"[dim]Keeping current model: {config.model_name}[/dim]")
return
_commit_switch(
model_id, config, session,
effective=outcome.effective_effort, cache=True,
)
effort_label = outcome.effective_effort or "off"
suffix = f" β {outcome.note}" if outcome.note else ""
console.print(
f"[green]Model switched to {model_id}[/green] "
f"[dim](effort: {effort_label}{suffix}, {outcome.elapsed_ms}ms)[/dim]"
)
def _commit_switch(model_id, config, session, effective, cache: bool) -> None:
"""Apply the switch to the session (or bare config if no session yet).
``effective`` is the probe's resolved effort; ``cache=True`` stores it
in the session's per-model cache so real calls use the resolved level
instead of re-probing. ``cache=False`` (inconclusive probe / effort
off) leaves the cache untouched β next call falls back to preference.
"""
if session is not None:
session.update_model(model_id)
if cache:
session.model_effective_effort[model_id] = effective
else:
session.model_effective_effort.pop(model_id, None)
else:
config.model_name = model_id
|