Aksel Joonas Reedi commited on
Commit
540437a
·
unverified ·
1 Parent(s): 5d357ba

feat(quota): daily Opus cap + HF-org gate + cap dialog (#72)

Browse files
backend/dependencies.py CHANGED
@@ -16,6 +16,7 @@ logger = logging.getLogger(__name__)
16
 
17
  OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
18
  AUTH_ENABLED = bool(os.environ.get("OAUTH_CLIENT_ID", ""))
 
19
 
20
  # Simple in-memory token cache: token -> (user_info, expiry_time)
21
  _token_cache: dict[str, tuple[dict[str, Any], float]] = {}
@@ -28,8 +29,13 @@ DEV_USER: dict[str, Any] = {
28
  "user_id": "dev",
29
  "username": "dev",
30
  "authenticated": True,
 
31
  }
32
 
 
 
 
 
33
 
34
  async def _validate_token(token: str) -> dict[str, Any] | None:
35
  """Validate a token against HF OAuth userinfo endpoint.
@@ -74,12 +80,86 @@ def _user_from_info(user_info: dict[str, Any]) -> dict[str, Any]:
74
  }
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
78
  """Validate a token and return a user dict, or None."""
79
  user_info = await _validate_token(token)
80
- if user_info:
81
- return _user_from_info(user_info)
82
- return None
 
 
83
 
84
 
85
  async def check_org_membership(token: str, org_name: str) -> bool:
@@ -141,3 +221,29 @@ async def get_current_user(request: Request) -> dict[str, Any]:
141
  )
142
 
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
18
  AUTH_ENABLED = bool(os.environ.get("OAUTH_CLIENT_ID", ""))
19
+ HF_EMPLOYEE_ORG = os.environ.get("HF_EMPLOYEE_ORG", "huggingface")
20
 
21
  # Simple in-memory token cache: token -> (user_info, expiry_time)
22
  _token_cache: dict[str, tuple[dict[str, Any], float]] = {}
 
29
  "user_id": "dev",
30
  "username": "dev",
31
  "authenticated": True,
32
+ "plan": "org", # Dev runs at the Pro/Org quota tier so local testing isn't capped.
33
  }
34
 
35
+ # Plan field discovery — log the whoami-v2 shape once at DEBUG so we can
36
+ # confirm the actual key in production without hammering the HF API.
37
+ _WHOAMI_SHAPE_LOGGED = False
38
+
39
 
40
  async def _validate_token(token: str) -> dict[str, Any] | None:
41
  """Validate a token against HF OAuth userinfo endpoint.
 
80
  }
81
 
82
 
83
+ def _normalize_plan(whoami: dict[str, Any]) -> str:
84
+ """Map an HF /api/whoami-v2 payload to one of: 'free' | 'pro' | 'org'.
85
+
86
+ The exact field shape in whoami-v2 isn't documented for our purposes,
87
+ so we try a handful of likely keys and fall back to 'free'. The first
88
+ call logs the raw shape at DEBUG (see `_fetch_user_plan`) so we can
89
+ pin the real key post-deploy.
90
+ """
91
+ plan_str = ""
92
+ for key in ("plan", "type", "accountType"):
93
+ val = whoami.get(key)
94
+ if isinstance(val, str) and val:
95
+ plan_str = val.lower()
96
+ break
97
+
98
+ if not plan_str:
99
+ if whoami.get("isPro") is True or whoami.get("is_pro") is True:
100
+ return "pro"
101
+
102
+ if "pro" in plan_str or "enterprise" in plan_str or "team" in plan_str:
103
+ return "pro"
104
+
105
+ # Org tier: anyone in a paid / enterprise org. We don't pay for this
106
+ # right now, but the "pro" cap applies identically.
107
+ orgs = whoami.get("orgs") or []
108
+ if isinstance(orgs, list):
109
+ for org in orgs:
110
+ if isinstance(org, dict):
111
+ org_plan = str(org.get("plan") or org.get("type") or "").lower()
112
+ if "pro" in org_plan or "enterprise" in org_plan or "team" in org_plan:
113
+ return "org"
114
+
115
+ return "free"
116
+
117
+
118
+ async def _fetch_user_plan(token: str) -> str:
119
+ """Look up the user's HF plan via /api/whoami-v2.
120
+
121
+ Returns 'free' | 'pro' | 'org'. Non-200, network errors, or an unknown
122
+ payload shape all collapse to 'free' — safe default; we'd rather under-
123
+ grant the Pro cap than over-grant it on bad data.
124
+ """
125
+ global _WHOAMI_SHAPE_LOGGED
126
+ async with httpx.AsyncClient(timeout=5.0) as client:
127
+ try:
128
+ resp = await client.get(
129
+ f"{OPENID_PROVIDER_URL}/api/whoami-v2",
130
+ headers={"Authorization": f"Bearer {token}"},
131
+ )
132
+ if resp.status_code != 200:
133
+ return "free"
134
+ whoami = resp.json()
135
+ except httpx.HTTPError:
136
+ return "free"
137
+ except ValueError:
138
+ return "free"
139
+
140
+ if not _WHOAMI_SHAPE_LOGGED:
141
+ _WHOAMI_SHAPE_LOGGED = True
142
+ logger.debug(
143
+ "whoami-v2 payload keys: %s (sample values: plan=%r type=%r isPro=%r)",
144
+ sorted(whoami.keys()) if isinstance(whoami, dict) else type(whoami).__name__,
145
+ whoami.get("plan") if isinstance(whoami, dict) else None,
146
+ whoami.get("type") if isinstance(whoami, dict) else None,
147
+ whoami.get("isPro") if isinstance(whoami, dict) else None,
148
+ )
149
+
150
+ if not isinstance(whoami, dict):
151
+ return "free"
152
+ return _normalize_plan(whoami)
153
+
154
+
155
  async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
156
  """Validate a token and return a user dict, or None."""
157
  user_info = await _validate_token(token)
158
+ if user_info is None:
159
+ return None
160
+ user = _user_from_info(user_info)
161
+ user["plan"] = await _fetch_user_plan(token)
162
+ return user
163
 
164
 
165
  async def check_org_membership(token: str, org_name: str) -> bool:
 
221
  )
222
 
223
 
224
+ def _extract_token(request: Request) -> str | None:
225
+ """Pull the HF access token from the Authorization header or cookie.
226
+
227
+ Mirrors the lookup order used by ``get_current_user``.
228
+ """
229
+ auth_header = request.headers.get("Authorization", "")
230
+ if auth_header.startswith("Bearer "):
231
+ return auth_header[7:]
232
+ return request.cookies.get("hf_access_token")
233
+
234
+
235
+ async def require_huggingface_org_member(request: Request) -> bool:
236
+ """Return True if the caller is a member of the ``huggingface`` org.
237
+
238
+ Used to gate endpoints that can push a session onto an Anthropic model
239
+ billed to the Space's ``ANTHROPIC_API_KEY``. Returns True unconditionally
240
+ in dev mode so local testing isn't blocked.
241
+ """
242
+ if not AUTH_ENABLED:
243
+ return True
244
+ token = _extract_token(request)
245
+ if not token:
246
+ return False
247
+ return await check_org_membership(token, HF_EMPLOYEE_ORG)
248
+
249
+
backend/routes/agent.py CHANGED
@@ -10,7 +10,7 @@ import logging
10
  import os
11
  from typing import Any
12
 
13
- from dependencies import get_current_user
14
  from fastapi import (
15
  APIRouter,
16
  Depends,
@@ -28,7 +28,9 @@ from models import (
28
  SubmitRequest,
29
  TruncateRequest,
30
  )
31
- from session_manager import MAX_SESSIONS, SessionCapacityError, session_manager
 
 
32
 
33
  from agent.core.llm_params import _resolve_llm_params
34
 
@@ -37,31 +39,99 @@ logger = logging.getLogger(__name__)
37
  router = APIRouter(prefix="/api", tags=["agent"])
38
 
39
  AVAILABLE_MODELS = [
 
 
 
 
 
 
 
40
  {
41
  "id": "anthropic/claude-opus-4-6",
42
  "label": "Claude Opus 4.6",
43
  "provider": "anthropic",
 
44
  "recommended": True,
45
  },
46
  {
47
  "id": "MiniMaxAI/MiniMax-M2.7",
48
  "label": "MiniMax M2.7",
49
  "provider": "huggingface",
50
- "recommended": True,
51
- },
52
- {
53
- "id": "moonshotai/Kimi-K2.6",
54
- "label": "Kimi K2.6",
55
- "provider": "huggingface",
56
  },
57
  {
58
  "id": "zai-org/GLM-5.1",
59
  "label": "GLM 5.1",
60
  "provider": "huggingface",
 
61
  },
62
  ]
63
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def _check_session_access(session_id: str, user: dict[str, Any]) -> None:
66
  """Verify the user has access to the given session. Raises 403 or 404."""
67
  info = session_manager.get_session_info(session_id)
@@ -143,20 +213,6 @@ async def get_model() -> dict:
143
  }
144
 
145
 
146
- @router.post("/config/model")
147
- async def set_model(body: dict, user: dict = Depends(get_current_user)) -> dict:
148
- """Set the LLM model. Applies to new conversations."""
149
- model_id = body.get("model")
150
- if not model_id:
151
- raise HTTPException(status_code=400, detail="Missing 'model' field")
152
- valid_ids = {m["id"] for m in AVAILABLE_MODELS}
153
- if model_id not in valid_ids:
154
- raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
155
- session_manager.config.model_name = model_id
156
- logger.info(f"Model changed to {model_id} by {user.get('username', 'unknown')}")
157
- return {"model": model_id}
158
-
159
-
160
  _TITLE_STRIP_CHARS = str.maketrans("", "", "`*_~#[]()")
161
 
162
 
@@ -224,6 +280,10 @@ async def create_session(
224
  and stored in the session so that tools (e.g. hf_jobs) can act on
225
  behalf of the user.
226
 
 
 
 
 
227
  Returns 503 if the server or user has reached the session limit.
228
  """
229
  # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)
@@ -236,9 +296,27 @@ async def create_session(
236
  if not hf_token:
237
  hf_token = os.environ.get("HF_TOKEN")
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  try:
240
  session_id = await session_manager.create_session(
241
- user_id=user["user_id"], hf_token=hf_token
242
  )
243
  except SessionCapacityError as e:
244
  raise HTTPException(status_code=503, detail=str(e))
@@ -254,6 +332,9 @@ async def restore_session_summary(
254
  conversation. The client sends its cached messages; we run the standard
255
  summarization prompt on them and drop the result into the new
256
  session's context as a user-role system note.
 
 
 
257
  """
258
  messages = body.get("messages")
259
  if not isinstance(messages, list) or not messages:
@@ -268,9 +349,17 @@ async def restore_session_summary(
268
  if not hf_token:
269
  hf_token = os.environ.get("HF_TOKEN")
270
 
 
 
 
 
 
 
 
 
271
  try:
272
  session_id = await session_manager.create_session(
273
- user_id=user["user_id"], hf_token=hf_token
274
  )
275
  except SessionCapacityError as e:
276
  raise HTTPException(status_code=503, detail=str(e))
@@ -302,12 +391,19 @@ async def get_session(
302
 
303
  @router.post("/session/{session_id}/model")
304
  async def set_session_model(
305
- session_id: str, body: dict, user: dict = Depends(get_current_user)
 
 
 
306
  ) -> dict:
307
  """Switch the active model for a single session (tab-scoped).
308
 
309
  Takes effect on the next LLM call in that session — other sessions
310
- (including other browser tabs) are unaffected.
 
 
 
 
311
  """
312
  _check_session_access(session_id, user)
313
  model_id = body.get("model")
@@ -316,6 +412,7 @@ async def set_session_model(
316
  valid_ids = {m["id"] for m in AVAILABLE_MODELS}
317
  if model_id not in valid_ids:
318
  raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
 
319
  agent_session = session_manager.sessions.get(session_id)
320
  if not agent_session:
321
  raise HTTPException(status_code=404, detail="Session not found")
@@ -327,6 +424,20 @@ async def set_session_model(
327
  return {"session_id": session_id, "model": model_id}
328
 
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  @router.get("/sessions", response_model=list[SessionInfo])
331
  async def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:
332
  """List sessions belonging to the authenticated user."""
@@ -352,6 +463,9 @@ async def submit_input(
352
  ) -> dict:
353
  """Submit user input to a session. Only accessible by the session owner."""
354
  _check_session_access(request.session_id, user)
 
 
 
355
  success = await session_manager.submit_user_input(request.session_id, request.text)
356
  if not success:
357
  raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -404,6 +518,16 @@ async def chat_sse(
404
  text = body.get("text")
405
  approvals = body.get("approvals")
406
 
 
 
 
 
 
 
 
 
 
 
407
  try:
408
  if approvals:
409
  formatted = [
 
10
  import os
11
  from typing import Any
12
 
13
+ from dependencies import get_current_user, require_huggingface_org_member
14
  from fastapi import (
15
  APIRouter,
16
  Depends,
 
28
  SubmitRequest,
29
  TruncateRequest,
30
  )
31
+ from session_manager import MAX_SESSIONS, AgentSession, SessionCapacityError, session_manager
32
+
33
+ import user_quotas
34
 
35
  from agent.core.llm_params import _resolve_llm_params
36
 
 
39
  router = APIRouter(prefix="/api", tags=["agent"])
40
 
41
  AVAILABLE_MODELS = [
42
+ {
43
+ "id": "moonshotai/Kimi-K2.6",
44
+ "label": "Kimi K2.6",
45
+ "provider": "huggingface",
46
+ "tier": "free",
47
+ "recommended": True,
48
+ },
49
  {
50
  "id": "anthropic/claude-opus-4-6",
51
  "label": "Claude Opus 4.6",
52
  "provider": "anthropic",
53
+ "tier": "pro",
54
  "recommended": True,
55
  },
56
  {
57
  "id": "MiniMaxAI/MiniMax-M2.7",
58
  "label": "MiniMax M2.7",
59
  "provider": "huggingface",
60
+ "tier": "free",
 
 
 
 
 
61
  },
62
  {
63
  "id": "zai-org/GLM-5.1",
64
  "label": "GLM 5.1",
65
  "provider": "huggingface",
66
+ "tier": "free",
67
  },
68
  ]
69
 
70
 
71
+ async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
72
+ """403 if a non-``huggingface``-org user tries to select an Anthropic model.
73
+
74
+ Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
75
+ other model in ``AVAILABLE_MODELS`` is routed through HF Router and
76
+ billed via ``X-HF-Bill-To``. The gate only fires for ``anthropic/*`` so
77
+ non-HF users can still freely switch between the free models.
78
+
79
+ Pattern: https://github.com/huggingface/ml-intern/pull/63
80
+ """
81
+ if not model_id.startswith("anthropic/"):
82
+ return
83
+ if not await require_huggingface_org_member(request):
84
+ raise HTTPException(
85
+ status_code=403,
86
+ detail={
87
+ "error": "anthropic_restricted",
88
+ "message": (
89
+ "Opus is gated to HF staff. Pick a free model — "
90
+ "Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
91
+ ),
92
+ },
93
+ )
94
+
95
+
96
+ async def _enforce_claude_quota(
97
+ user: dict[str, Any],
98
+ agent_session: AgentSession,
99
+ ) -> None:
100
+ """Charge the user's daily Claude quota on first use of Anthropic in a session.
101
+
102
+ Runs at *message-submit* time, not session-create time — so spinning up a
103
+ Claude session to look around doesn't burn quota. The ``claude_counted``
104
+ flag on ``AgentSession`` guards against re-counting the same session.
105
+
106
+ No-ops when the session's current model isn't Anthropic, or when this
107
+ session has already been charged. Raises 429 when the user has hit
108
+ their daily cap.
109
+ """
110
+ if agent_session.claude_counted:
111
+ return
112
+ model_name = agent_session.session.config.model_name
113
+ if not model_name.startswith("anthropic/"):
114
+ return
115
+ user_id = user["user_id"]
116
+ used = await user_quotas.get_claude_used_today(user_id)
117
+ cap = user_quotas.daily_cap_for(user.get("plan"))
118
+ if used >= cap:
119
+ raise HTTPException(
120
+ status_code=429,
121
+ detail={
122
+ "error": "claude_daily_cap",
123
+ "plan": user.get("plan", "free"),
124
+ "cap": cap,
125
+ "message": (
126
+ "Daily Claude limit reached. Upgrade to HF Pro for "
127
+ f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
128
+ ),
129
+ },
130
+ )
131
+ await user_quotas.increment_claude(user_id)
132
+ agent_session.claude_counted = True
133
+
134
+
135
  def _check_session_access(session_id: str, user: dict[str, Any]) -> None:
136
  """Verify the user has access to the given session. Raises 403 or 404."""
137
  info = session_manager.get_session_info(session_id)
 
213
  }
214
 
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  _TITLE_STRIP_CHARS = str.maketrans("", "", "`*_~#[]()")
217
 
218
 
 
280
  and stored in the session so that tools (e.g. hf_jobs) can act on
281
  behalf of the user.
282
 
283
+ Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
284
+ ids are rejected (400). The Claude-quota gate runs at message-submit
285
+ time, not here — spinning up an Opus session to look around is free.
286
+
287
  Returns 503 if the server or user has reached the session limit.
288
  """
289
  # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)
 
296
  if not hf_token:
297
  hf_token = os.environ.get("HF_TOKEN")
298
 
299
+ # Optional model override. Empty body falls back to the config default.
300
+ model: str | None = None
301
+ try:
302
+ body = await request.json()
303
+ except Exception:
304
+ body = None
305
+ if isinstance(body, dict):
306
+ model = body.get("model")
307
+
308
+ valid_ids = {m["id"] for m in AVAILABLE_MODELS}
309
+ if model and model not in valid_ids:
310
+ raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
311
+
312
+ # Opus is gated to HF staff (PR #63). Only fires when the resolved model
313
+ # is Anthropic; free models pass through.
314
+ resolved_model = model or session_manager.config.model_name
315
+ await _require_hf_for_anthropic(request, resolved_model)
316
+
317
  try:
318
  session_id = await session_manager.create_session(
319
+ user_id=user["user_id"], hf_token=hf_token, model=model
320
  )
321
  except SessionCapacityError as e:
322
  raise HTTPException(status_code=503, detail=str(e))
 
332
  conversation. The client sends its cached messages; we run the standard
333
  summarization prompt on them and drop the result into the new
334
  session's context as a user-role system note.
335
+
336
+ Optional ``"model"`` in the body overrides the session's LLM. The
337
+ Claude-quota gate runs at message-submit time, not here.
338
  """
339
  messages = body.get("messages")
340
  if not isinstance(messages, list) or not messages:
 
349
  if not hf_token:
350
  hf_token = os.environ.get("HF_TOKEN")
351
 
352
+ model = body.get("model")
353
+ valid_ids = {m["id"] for m in AVAILABLE_MODELS}
354
+ if model and model not in valid_ids:
355
+ raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
356
+
357
+ resolved_model = model or session_manager.config.model_name
358
+ await _require_hf_for_anthropic(request, resolved_model)
359
+
360
  try:
361
  session_id = await session_manager.create_session(
362
+ user_id=user["user_id"], hf_token=hf_token, model=model
363
  )
364
  except SessionCapacityError as e:
365
  raise HTTPException(status_code=503, detail=str(e))
 
391
 
392
  @router.post("/session/{session_id}/model")
393
  async def set_session_model(
394
+ session_id: str,
395
+ body: dict,
396
+ request: Request,
397
+ user: dict = Depends(get_current_user),
398
  ) -> dict:
399
  """Switch the active model for a single session (tab-scoped).
400
 
401
  Takes effect on the next LLM call in that session — other sessions
402
+ (including other browser tabs) are unaffected. Model switches don't
403
+ charge quota — the Claude-quota gate only fires at message-submit time.
404
+
405
+ Switching TO an Anthropic model requires HF org membership (PR #63);
406
+ free-model switches are unrestricted.
407
  """
408
  _check_session_access(session_id, user)
409
  model_id = body.get("model")
 
412
  valid_ids = {m["id"] for m in AVAILABLE_MODELS}
413
  if model_id not in valid_ids:
414
  raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
415
+ await _require_hf_for_anthropic(request, model_id)
416
  agent_session = session_manager.sessions.get(session_id)
417
  if not agent_session:
418
  raise HTTPException(status_code=404, detail="Session not found")
 
424
  return {"session_id": session_id, "model": model_id}
425
 
426
 
427
+ @router.get("/user/quota")
428
+ async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
429
+ """Return the user's plan tier and today's Claude-session quota state."""
430
+ plan = user.get("plan", "free")
431
+ used = await user_quotas.get_claude_used_today(user["user_id"])
432
+ cap = user_quotas.daily_cap_for(plan)
433
+ return {
434
+ "plan": plan,
435
+ "claude_used_today": used,
436
+ "claude_daily_cap": cap,
437
+ "claude_remaining": max(0, cap - used),
438
+ }
439
+
440
+
441
  @router.get("/sessions", response_model=list[SessionInfo])
442
  async def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:
443
  """List sessions belonging to the authenticated user."""
 
463
  ) -> dict:
464
  """Submit user input to a session. Only accessible by the session owner."""
465
  _check_session_access(request.session_id, user)
466
+ agent_session = session_manager.sessions.get(request.session_id)
467
+ if agent_session is not None:
468
+ await _enforce_claude_quota(user, agent_session)
469
  success = await session_manager.submit_user_input(request.session_id, request.text)
470
  if not success:
471
  raise HTTPException(status_code=404, detail="Session not found or inactive")
 
518
  text = body.get("text")
519
  approvals = body.get("approvals")
520
 
521
+ # Gate user-message sends against the daily Claude quota. Approvals are
522
+ # continuations of an in-progress turn — the session was already charged
523
+ # on its first message, so we skip the gate there.
524
+ if text is not None and not approvals:
525
+ try:
526
+ await _enforce_claude_quota(user, agent_session)
527
+ except HTTPException:
528
+ broadcaster.unsubscribe(sub_id)
529
+ raise
530
+
531
  try:
532
  if approvals:
533
  formatted = [
backend/session_manager.py CHANGED
@@ -91,6 +91,10 @@ class AgentSession:
91
  is_active: bool = True
92
  is_processing: bool = False # True while a submission is being executed
93
  broadcaster: Any = None
 
 
 
 
94
 
95
 
96
  class SessionCapacityError(Exception):
@@ -126,7 +130,12 @@ class SessionManager:
126
  if s.user_id == user_id and s.is_active
127
  )
128
 
129
- async def create_session(self, user_id: str = "dev", hf_token: str | None = None) -> str:
 
 
 
 
 
130
  """Create a new agent session and return its ID.
131
 
132
  Session() and ToolRouter() constructors contain blocking I/O
@@ -135,6 +144,10 @@ class SessionManager:
135
 
136
  Args:
137
  user_id: The ID of the user who owns this session.
 
 
 
 
138
 
139
  Raises:
140
  SessionCapacityError: If the server or user has reached the
@@ -175,6 +188,8 @@ class SessionManager:
175
  # Deep-copy config so each session's model switches independently —
176
  # tab A picking GLM doesn't flip tab B off Claude.
177
  session_config = self.config.model_copy(deep=True)
 
 
178
  session = Session(
179
  event_queue, config=session_config, tool_router=tool_router,
180
  hf_token=hf_token,
 
91
  is_active: bool = True
92
  is_processing: bool = False # True while a submission is being executed
93
  broadcaster: Any = None
94
+ # True once this session has been counted against the user's daily
95
+ # Claude quota. Guards double-counting when the user re-selects an
96
+ # Anthropic model mid-session.
97
+ claude_counted: bool = False
98
 
99
 
100
  class SessionCapacityError(Exception):
 
130
  if s.user_id == user_id and s.is_active
131
  )
132
 
133
+ async def create_session(
134
+ self,
135
+ user_id: str = "dev",
136
+ hf_token: str | None = None,
137
+ model: str | None = None,
138
+ ) -> str:
139
  """Create a new agent session and return its ID.
140
 
141
  Session() and ToolRouter() constructors contain blocking I/O
 
144
 
145
  Args:
146
  user_id: The ID of the user who owns this session.
147
+ hf_token: The user's HF OAuth token, stored for tool execution.
148
+ model: Optional model override. When set, replaces ``model_name``
149
+ on the per-session config clone. None falls back to the
150
+ config default.
151
 
152
  Raises:
153
  SessionCapacityError: If the server or user has reached the
 
188
  # Deep-copy config so each session's model switches independently —
189
  # tab A picking GLM doesn't flip tab B off Claude.
190
  session_config = self.config.model_copy(deep=True)
191
+ if model:
192
+ session_config.model_name = model
193
  session = Session(
194
  event_queue, config=session_config, tool_router=tool_router,
195
  hf_token=hf_token,
backend/user_quotas.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """In-memory daily quota for Claude session creations.
2
+
3
+ Tracks per-user Claude session starts against a daily cap derived from the
4
+ user's HF plan. Caps reset at UTC midnight; the store itself is in-process
5
+ and wipes on restart (deliberate — the cost of occasional over-subsidy at
6
+ restart is much lower than running a DB).
7
+
8
+ Unit: session *creations*, not messages. A user who selects Claude in a new
9
+ session consumes one quota point; switching an existing Claude session to
10
+ Claude again doesn't (`AgentSession.claude_counted` guards that).
11
+
12
+ Cap tiers:
13
+ free user → CLAUDE_FREE_DAILY (1)
14
+ pro / org → CLAUDE_PRO_DAILY (20)
15
+ """
16
+
17
+ import asyncio
18
+ import os
19
+ from datetime import UTC, datetime
20
+
21
+ CLAUDE_FREE_DAILY: int = int(os.environ.get("CLAUDE_FREE_DAILY", "1"))
22
+ CLAUDE_PRO_DAILY: int = int(os.environ.get("CLAUDE_PRO_DAILY", "20"))
23
+
24
+ # user_id -> (day_utc_iso, count_for_that_day)
25
+ _claude_counts: dict[str, tuple[str, int]] = {}
26
+ _lock = asyncio.Lock()
27
+
28
+
29
+ def _today() -> str:
30
+ return datetime.now(UTC).date().isoformat()
31
+
32
+
33
+ def daily_cap_for(plan: str | None) -> int:
34
+ """Return the daily Claude-session cap for the given plan."""
35
+ return CLAUDE_FREE_DAILY if (plan or "free") == "free" else CLAUDE_PRO_DAILY
36
+
37
+
38
+ async def get_claude_used_today(user_id: str) -> int:
39
+ """Return today's Claude session count for the user (0 if none / stale day)."""
40
+ async with _lock:
41
+ entry = _claude_counts.get(user_id)
42
+ if entry is None:
43
+ return 0
44
+ day, count = entry
45
+ if day != _today():
46
+ # Stale day — drop the entry so the first increment starts fresh.
47
+ _claude_counts.pop(user_id, None)
48
+ return 0
49
+ return count
50
+
51
+
52
+ async def increment_claude(user_id: str) -> int:
53
+ """Bump today's Claude session count for the user. Returns the new value."""
54
+ async with _lock:
55
+ today = _today()
56
+ day, count = _claude_counts.get(user_id, (today, 0))
57
+ if day != today:
58
+ count = 0
59
+ count += 1
60
+ _claude_counts[user_id] = (today, count)
61
+ return count
62
+
63
+
64
+ async def refund_claude(user_id: str) -> None:
65
+ """Decrement today's count — used when session creation fails after a successful gate."""
66
+ async with _lock:
67
+ entry = _claude_counts.get(user_id)
68
+ if entry is None:
69
+ return
70
+ day, count = entry
71
+ if day != _today():
72
+ _claude_counts.pop(user_id, None)
73
+ return
74
+ new_count = max(0, count - 1)
75
+ if new_count == 0:
76
+ _claude_counts.pop(user_id, None)
77
+ else:
78
+ _claude_counts[user_id] = (day, new_count)
79
+
80
+
81
+ def _reset_for_tests() -> None:
82
+ """Test-only: clear the in-memory store."""
83
+ _claude_counts.clear()
frontend/src/components/Chat/ChatInput.tsx CHANGED
@@ -4,6 +4,10 @@ import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';
4
  import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
5
  import StopIcon from '@mui/icons-material/Stop';
6
  import { apiFetch } from '@/utils/api';
 
 
 
 
7
 
8
  // Model configuration
9
  interface ModelOption {
@@ -21,6 +25,14 @@ const getHfAvatarUrl = (modelId: string) => {
21
  };
22
 
23
  const MODEL_OPTIONS: ModelOption[] = [
 
 
 
 
 
 
 
 
24
  {
25
  id: 'claude-opus',
26
  name: 'Claude Opus 4.6',
@@ -35,14 +47,6 @@ const MODEL_OPTIONS: ModelOption[] = [
35
  description: 'Novita',
36
  modelPath: 'MiniMaxAI/MiniMax-M2.7',
37
  avatarUrl: getHfAvatarUrl('MiniMaxAI/MiniMax-M2.7'),
38
- recommended: true,
39
- },
40
- {
41
- id: 'kimi-k2.6',
42
- name: 'Kimi K2.6',
43
- description: 'Novita',
44
- modelPath: 'moonshotai/Kimi-K2.6',
45
- avatarUrl: getHfAvatarUrl('moonshotai/Kimi-K2.6'),
46
  },
47
  {
48
  id: 'glm-5.1',
@@ -66,11 +70,23 @@ interface ChatInputProps {
66
  placeholder?: string;
67
  }
68
 
 
 
 
69
  export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
70
  const [input, setInput] = useState('');
71
  const inputRef = useRef<HTMLTextAreaElement>(null);
72
  const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);
73
  const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
 
 
 
 
 
 
 
 
 
74
 
75
  // Model is per-session: fetch this tab's current model every time the
76
  // session changes. Other tabs keep their own selections independently.
@@ -101,11 +117,27 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
101
 
102
  const handleSend = useCallback(() => {
103
  if (input.trim() && !disabled) {
 
104
  onSend(input);
105
  setInput('');
106
  }
107
  }, [input, disabled, onSend]);
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  const handleKeyDown = useCallback(
110
  (e: KeyboardEvent<HTMLDivElement>) => {
111
  if (e.key === 'Enter' && !e.shiftKey) {
@@ -136,6 +168,45 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
136
  } catch { /* ignore */ }
137
  };
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  return (
140
  <Box
141
  sx={{
@@ -334,6 +405,19 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
334
  }}
335
  />
336
  )}
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  </Box>
338
  }
339
  secondary={model.description}
@@ -344,6 +428,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
344
  </MenuItem>
345
  ))}
346
  </Menu>
 
 
 
 
 
 
 
 
347
  </Box>
348
  </Box>
349
  );
 
4
  import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
5
  import StopIcon from '@mui/icons-material/Stop';
6
  import { apiFetch } from '@/utils/api';
7
+ import { useUserQuota } from '@/hooks/useUserQuota';
8
+ import ClaudeCapDialog from '@/components/ClaudeCapDialog';
9
+ import { useAgentStore } from '@/store/agentStore';
10
+ import { FIRST_FREE_MODEL_PATH } from '@/utils/model';
11
 
12
  // Model configuration
13
  interface ModelOption {
 
25
  };
26
 
27
  const MODEL_OPTIONS: ModelOption[] = [
28
+ {
29
+ id: 'kimi-k2.6',
30
+ name: 'Kimi K2.6',
31
+ description: 'Novita',
32
+ modelPath: 'moonshotai/Kimi-K2.6',
33
+ avatarUrl: getHfAvatarUrl('moonshotai/Kimi-K2.6'),
34
+ recommended: true,
35
+ },
36
  {
37
  id: 'claude-opus',
38
  name: 'Claude Opus 4.6',
 
47
  description: 'Novita',
48
  modelPath: 'MiniMaxAI/MiniMax-M2.7',
49
  avatarUrl: getHfAvatarUrl('MiniMaxAI/MiniMax-M2.7'),
 
 
 
 
 
 
 
 
50
  },
51
  {
52
  id: 'glm-5.1',
 
70
  placeholder?: string;
71
  }
72
 
73
+ const isClaudeModel = (m: ModelOption) => m.modelPath.startsWith('anthropic/');
74
+ const firstFreeModel = () => MODEL_OPTIONS.find(m => !isClaudeModel(m)) ?? MODEL_OPTIONS[0];
75
+
76
  export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
77
  const [input, setInput] = useState('');
78
  const inputRef = useRef<HTMLTextAreaElement>(null);
79
  const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);
80
  const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
81
+ const { quota, refresh: refreshQuota } = useUserQuota();
82
+ // The daily-cap dialog is triggered from two places: (a) a 429 returned
83
+ // from the chat transport when the user tries to send on Opus over cap —
84
+ // surfaced via the agent-store flag — and (b) nothing else right now
85
+ // (switching models is free). Keeping the open state in the store means
86
+ // the hook layer can flip it without threading props through.
87
+ const claudeQuotaExhausted = useAgentStore((s) => s.claudeQuotaExhausted);
88
+ const setClaudeQuotaExhausted = useAgentStore((s) => s.setClaudeQuotaExhausted);
89
+ const lastSentRef = useRef<string>('');
90
 
91
  // Model is per-session: fetch this tab's current model every time the
92
  // session changes. Other tabs keep their own selections independently.
 
117
 
118
  const handleSend = useCallback(() => {
119
  if (input.trim() && !disabled) {
120
+ lastSentRef.current = input;
121
  onSend(input);
122
  setInput('');
123
  }
124
  }, [input, disabled, onSend]);
125
 
126
+ // When the chat transport reports a Claude-quota 429, restore the typed
127
+ // text so the user doesn't lose their message.
128
+ useEffect(() => {
129
+ if (claudeQuotaExhausted && lastSentRef.current) {
130
+ setInput(lastSentRef.current);
131
+ }
132
+ }, [claudeQuotaExhausted]);
133
+
134
+ // Refresh the quota display whenever the session changes (user might
135
+ // have started another tab that spent quota).
136
+ useEffect(() => {
137
+ if (sessionId) refreshQuota();
138
+ // eslint-disable-next-line react-hooks/exhaustive-deps
139
+ }, [sessionId]);
140
+
141
  const handleKeyDown = useCallback(
142
  (e: KeyboardEvent<HTMLDivElement>) => {
143
  if (e.key === 'Enter' && !e.shiftKey) {
 
168
  } catch { /* ignore */ }
169
  };
170
 
171
+ // Dialog close: just clear the flag. The typed text is already restored.
172
+ const handleCapDialogClose = useCallback(() => {
173
+ setClaudeQuotaExhausted(false);
174
+ }, [setClaudeQuotaExhausted]);
175
+
176
+ // "Use a free model" — switch the current session to Kimi (or the first
177
+ // non-Anthropic option) and auto-retry the send that tripped the cap.
178
+ const handleUseFreeModel = useCallback(async () => {
179
+ setClaudeQuotaExhausted(false);
180
+ if (!sessionId) return;
181
+ const free = MODEL_OPTIONS.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
182
+ ?? firstFreeModel();
183
+ try {
184
+ const res = await apiFetch(`/api/session/${sessionId}/model`, {
185
+ method: 'POST',
186
+ body: JSON.stringify({ model: free.modelPath }),
187
+ });
188
+ if (res.ok) {
189
+ setSelectedModelId(free.id);
190
+ const retryText = lastSentRef.current;
191
+ if (retryText) {
192
+ onSend(retryText);
193
+ setInput('');
194
+ lastSentRef.current = '';
195
+ }
196
+ }
197
+ } catch { /* ignore */ }
198
+ }, [sessionId, onSend, setClaudeQuotaExhausted]);
199
+
200
+ // Hide the chip until the user has actually burned quota — an unused
201
+ // Opus session shouldn't populate a counter.
202
+ const claudeChip = (() => {
203
+ if (!quota || quota.claudeUsedToday === 0) return null;
204
+ if (quota.plan === 'free') {
205
+ return quota.claudeRemaining > 0 ? 'Free today' : 'Pro only';
206
+ }
207
+ return `${quota.claudeUsedToday}/${quota.claudeDailyCap} today`;
208
+ })();
209
+
210
  return (
211
  <Box
212
  sx={{
 
405
  }}
406
  />
407
  )}
408
+ {isClaudeModel(model) && claudeChip && (
409
+ <Chip
410
+ label={claudeChip}
411
+ size="small"
412
+ sx={{
413
+ height: '18px',
414
+ fontSize: '10px',
415
+ bgcolor: 'rgba(255,255,255,0.08)',
416
+ color: 'var(--muted-text)',
417
+ fontWeight: 600,
418
+ }}
419
+ />
420
+ )}
421
  </Box>
422
  }
423
  secondary={model.description}
 
428
  </MenuItem>
429
  ))}
430
  </Menu>
431
+
432
+ <ClaudeCapDialog
433
+ open={claudeQuotaExhausted}
434
+ plan={quota?.plan ?? 'free'}
435
+ cap={quota?.claudeDailyCap ?? 1}
436
+ onClose={handleCapDialogClose}
437
+ onUseFreeModel={handleUseFreeModel}
438
+ />
439
  </Box>
440
  </Box>
441
  );
frontend/src/components/ClaudeCapDialog.tsx ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ Box,
3
+ Button,
4
+ Dialog,
5
+ DialogActions,
6
+ DialogContent,
7
+ DialogContentText,
8
+ DialogTitle,
9
+ Typography,
10
+ } from '@mui/material';
11
+ import type { PlanTier } from '@/hooks/useUserQuota';
12
+
13
+ const HF_PRICING_URL = 'https://huggingface.co/pricing';
14
+ const PRO_CAP = 20;
15
+
16
+ interface ClaudeCapDialogProps {
17
+ open: boolean;
18
+ plan: PlanTier;
19
+ cap: number;
20
+ onClose: () => void;
21
+ onUseFreeModel: () => void;
22
+ }
23
+
24
+ export default function ClaudeCapDialog({
25
+ open,
26
+ plan,
27
+ cap,
28
+ onClose,
29
+ onUseFreeModel,
30
+ }: ClaudeCapDialogProps) {
31
+ // plan not surfaced in copy right now — Pro users see the same dialog and
32
+ // can upgrade their org if they're also capped.
33
+ void plan;
34
+
35
+ return (
36
+ <Dialog
37
+ open={open}
38
+ onClose={onClose}
39
+ slotProps={{
40
+ backdrop: { sx: { backgroundColor: 'rgba(0,0,0,0.5)', backdropFilter: 'blur(4px)' } },
41
+ }}
42
+ PaperProps={{
43
+ sx: {
44
+ bgcolor: 'var(--panel)',
45
+ border: '1px solid var(--border)',
46
+ borderRadius: 'var(--radius-md)',
47
+ boxShadow: 'var(--shadow-1)',
48
+ maxWidth: 460,
49
+ mx: 2,
50
+ },
51
+ }}
52
+ >
53
+ <DialogTitle
54
+ sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
55
+ >
56
+ You've hit your Opus limit
57
+ </DialogTitle>
58
+ <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
59
+ <DialogContentText
60
+ sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
61
+ >
62
+ Opus costs an arm and a leg, so we unfortunately have to cap you at {cap}{' '}
63
+ {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin —
64
+ they are genuinely good and we use them all the time.
65
+ </DialogContentText>
66
+ <Box
67
+ sx={{
68
+ mt: 2,
69
+ p: 1.5,
70
+ borderRadius: '8px',
71
+ bgcolor: 'var(--accent-yellow-weak)',
72
+ border: '1px solid var(--border)',
73
+ }}
74
+ >
75
+ <Typography
76
+ variant="caption"
77
+ sx={{
78
+ display: 'block',
79
+ fontWeight: 700,
80
+ color: 'var(--text)',
81
+ fontSize: '0.78rem',
82
+ mb: 0.5,
83
+ letterSpacing: '0.02em',
84
+ }}
85
+ >
86
+ HF Pro ($9/mo) — more Opus, more everything
87
+ </Typography>
88
+ <Typography
89
+ variant="caption"
90
+ sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
91
+ >
92
+ {PRO_CAP} Opus sessions/day here, 20× HF Inference credits, ZeroGPU access,
93
+ and priority on Spaces hardware.
94
+ </Typography>
95
+ </Box>
96
+ </DialogContent>
97
+ <DialogActions sx={{ px: 3, pb: 2.5, pt: 2, gap: 1 }}>
98
+ <Button
99
+ component="a"
100
+ href={HF_PRICING_URL}
101
+ target="_blank"
102
+ rel="noopener noreferrer"
103
+ variant="contained"
104
+ size="small"
105
+ sx={{
106
+ fontSize: '0.82rem',
107
+ px: 2.5,
108
+ bgcolor: 'var(--accent-yellow)',
109
+ color: '#000',
110
+ textTransform: 'none',
111
+ fontWeight: 700,
112
+ boxShadow: 'none',
113
+ '&:hover': { bgcolor: '#FFB340', boxShadow: 'none' },
114
+ }}
115
+ >
116
+ Upgrade to Pro
117
+ </Button>
118
+ <Button
119
+ onClick={onUseFreeModel}
120
+ size="small"
121
+ sx={{
122
+ color: 'var(--muted-text)',
123
+ fontSize: '0.82rem',
124
+ px: 2,
125
+ textTransform: 'none',
126
+ '&:hover': { bgcolor: 'var(--hover-bg)' },
127
+ }}
128
+ >
129
+ Use a free model
130
+ </Button>
131
+ </DialogActions>
132
+ </Dialog>
133
+ );
134
+ }
frontend/src/hooks/useAgentChat.ts CHANGED
@@ -345,8 +345,16 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
345
  // sendMessages on the transport.
346
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
347
  onError: (error) => {
348
- logger.error('useChat error:', error);
349
  updateSession(sessionId, { isProcessing: false });
 
 
 
 
 
 
 
 
 
350
  if (isActiveRef.current) {
351
  useAgentStore.getState().setError(error.message);
352
  }
 
345
  // sendMessages on the transport.
346
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
347
  onError: (error) => {
 
348
  updateSession(sessionId, { isProcessing: false });
349
+ // Claude daily-cap: open the cap dialog instead of the generic error
350
+ // banner. Transport marks the error with this sentinel.
351
+ if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
352
+ if (isActiveRef.current) {
353
+ useAgentStore.getState().setClaudeQuotaExhausted(true);
354
+ }
355
+ return;
356
+ }
357
+ logger.error('useChat error:', error);
358
  if (isActiveRef.current) {
359
  useAgentStore.getState().setError(error.message);
360
  }
frontend/src/hooks/useUserQuota.ts ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Reads the current user's Claude daily quota + plan tier from the backend.
3
+ *
4
+ * Fetches once when the user becomes authenticated, and exposes a `refresh()`
5
+ * that callers invoke after a successful session-create / model-switch so the
6
+ * chip reflects the new count without a full page reload.
7
+ */
8
+ import { useCallback, useEffect, useState } from 'react';
9
+ import { useAgentStore } from '@/store/agentStore';
10
+ import { apiFetch } from '@/utils/api';
11
+
12
+ export type PlanTier = 'free' | 'pro' | 'org';
13
+
14
+ export interface UserQuota {
15
+ plan: PlanTier;
16
+ claudeUsedToday: number;
17
+ claudeDailyCap: number;
18
+ claudeRemaining: number;
19
+ }
20
+
21
+ export function useUserQuota() {
22
+ const user = useAgentStore((s) => s.user);
23
+ const [quota, setQuota] = useState<UserQuota | null>(null);
24
+ const [loading, setLoading] = useState(false);
25
+
26
+ const refresh = useCallback(async () => {
27
+ if (!user?.authenticated) return;
28
+ setLoading(true);
29
+ try {
30
+ const res = await apiFetch('/api/user/quota');
31
+ if (!res.ok) return;
32
+ const data = await res.json();
33
+ setQuota({
34
+ plan: (data.plan ?? 'free') as PlanTier,
35
+ claudeUsedToday: data.claude_used_today ?? 0,
36
+ claudeDailyCap: data.claude_daily_cap ?? 1,
37
+ claudeRemaining: data.claude_remaining ?? 0,
38
+ });
39
+ } catch {
40
+ /* backend unreachable — leave previous value */
41
+ } finally {
42
+ setLoading(false);
43
+ }
44
+ }, [user?.authenticated]);
45
+
46
+ useEffect(() => {
47
+ refresh();
48
+ }, [refresh]);
49
+
50
+ return { quota, loading, refresh };
51
+ }
frontend/src/lib/sse-chat-transport.ts CHANGED
@@ -356,6 +356,12 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
356
  // it can flag the session for the catch-up banner.
357
  this.sideChannel.onSessionDead(sessionId);
358
  }
 
 
 
 
 
 
359
  if (!response.ok) {
360
  const errorText = await response.text().catch(() => 'Request failed');
361
  throw new Error(`Chat request failed: ${response.status} ${errorText}`);
 
356
  // it can flag the session for the catch-up banner.
357
  this.sideChannel.onSessionDead(sessionId);
358
  }
359
+ if (response.status === 429) {
360
+ // Claude daily-quota gate tripped. The prefix is the detection marker
361
+ // for useAgentChat's onError handler, which surfaces the cap dialog
362
+ // instead of a generic error banner.
363
+ throw new Error('CLAUDE_QUOTA_EXHAUSTED');
364
+ }
365
  if (!response.ok) {
366
  const errorText = await response.text().catch(() => 'Request failed');
367
  throw new Error(`Chat request failed: ${response.status} ${errorText}`);
frontend/src/store/agentStore.ts CHANGED
@@ -108,6 +108,8 @@ interface AgentStore {
108
  user: User | null;
109
  error: string | null;
110
  llmHealthError: LLMHealthError | null;
 
 
111
 
112
  // Right panel (single-artifact pattern)
113
  panelData: PanelData | null;
@@ -153,6 +155,7 @@ interface AgentStore {
153
  setUser: (user: User | null) => void;
154
  setError: (error: string | null) => void;
155
  setLlmHealthError: (error: LLMHealthError | null) => void;
 
156
 
157
  setPanel: (data: PanelData, view?: PanelView, editable?: boolean) => void;
158
  setPanelView: (view: PanelView) => void;
@@ -247,6 +250,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
247
  user: null,
248
  error: null,
249
  llmHealthError: null,
 
250
 
251
  panelData: null,
252
  panelView: 'script',
@@ -358,6 +362,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
358
  setUser: (user) => set({ user }),
359
  setError: (error) => set({ error }),
360
  setLlmHealthError: (error) => set({ llmHealthError: error }),
 
361
 
362
  // ── Panel (single-artifact) ───────────────────────────────────────
363
  // Each setter also patches the active session's snapshot so that
 
108
  user: User | null;
109
  error: string | null;
110
  llmHealthError: LLMHealthError | null;
111
+ /** Set when a Claude-send hits the daily quota — ChatInput opens the cap dialog in response. */
112
+ claudeQuotaExhausted: boolean;
113
 
114
  // Right panel (single-artifact pattern)
115
  panelData: PanelData | null;
 
155
  setUser: (user: User | null) => void;
156
  setError: (error: string | null) => void;
157
  setLlmHealthError: (error: LLMHealthError | null) => void;
158
+ setClaudeQuotaExhausted: (exhausted: boolean) => void;
159
 
160
  setPanel: (data: PanelData, view?: PanelView, editable?: boolean) => void;
161
  setPanelView: (view: PanelView) => void;
 
250
  user: null,
251
  error: null,
252
  llmHealthError: null,
253
+ claudeQuotaExhausted: false,
254
 
255
  panelData: null,
256
  panelView: 'script',
 
362
  setUser: (user) => set({ user }),
363
  setError: (error) => set({ error }),
364
  setLlmHealthError: (error) => set({ llmHealthError: error }),
365
+ setClaudeQuotaExhausted: (exhausted) => set({ claudeQuotaExhausted: exhausted }),
366
 
367
  // ── Panel (single-artifact) ───────────────────────────────────────
368
  // Each setter also patches the active session's snapshot so that
frontend/src/utils/model.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Shared model-id constants used by session-create call sites and the
3
+ * ClaudeCapDialog "Use a free model" escape hatch.
4
+ *
5
+ * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
6
+ * AVAILABLE_MODELS in backend/routes/agent.py. Bare HF ids (no
7
+ * `huggingface/` prefix) — matches upstream's auto-router.
8
+ */
9
+
10
+ export const CLAUDE_MODEL_PATH = 'anthropic/claude-opus-4-6';
11
+ export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
12
+
13
+ export function isClaudePath(modelPath: string | undefined): boolean {
14
+ return !!modelPath && modelPath.startsWith('anthropic/');
15
+ }
tests/unit/test_user_quotas.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for backend/user_quotas.py — the in-memory Claude daily-quota store."""
2
+
3
+ import asyncio
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from unittest.mock import patch
8
+
9
+ import pytest
10
+
11
+ # The backend package isn't on sys.path by default; add it so we can import
12
+ # the module under test without pulling in the whole FastAPI app.
13
+ _BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / "backend"
14
+ if str(_BACKEND_DIR) not in sys.path:
15
+ sys.path.insert(0, str(_BACKEND_DIR))
16
+
17
+ import user_quotas # noqa: E402
18
+
19
+
20
+ @pytest.fixture(autouse=True)
21
+ def _reset_store():
22
+ """Fresh in-memory store per test."""
23
+ user_quotas._reset_for_tests()
24
+ yield
25
+ user_quotas._reset_for_tests()
26
+
27
+
28
+ def test_daily_cap_for_known_plans():
29
+ assert user_quotas.daily_cap_for("free") == user_quotas.CLAUDE_FREE_DAILY
30
+ assert user_quotas.daily_cap_for("pro") == user_quotas.CLAUDE_PRO_DAILY
31
+ assert user_quotas.daily_cap_for("org") == user_quotas.CLAUDE_PRO_DAILY
32
+
33
+
34
+ def test_daily_cap_for_unknown_or_missing_defaults_to_free():
35
+ assert user_quotas.daily_cap_for(None) == user_quotas.CLAUDE_FREE_DAILY
36
+ assert user_quotas.daily_cap_for("") == user_quotas.CLAUDE_FREE_DAILY
37
+ # Anything we don't recognize as the Pro/Org tier gets the Pro cap because
38
+ # the function's contract is "free" is the only downgraded tier. If that
39
+ # ever flips, this test will flip too — adjust consciously.
40
+ assert user_quotas.daily_cap_for("mystery") == user_quotas.CLAUDE_PRO_DAILY
41
+
42
+
43
+ @pytest.mark.asyncio
44
+ async def test_increment_and_read_back_same_day():
45
+ assert await user_quotas.get_claude_used_today("u1") == 0
46
+ assert await user_quotas.increment_claude("u1") == 1
47
+ assert await user_quotas.increment_claude("u1") == 2
48
+ assert await user_quotas.get_claude_used_today("u1") == 2
49
+
50
+
51
+ @pytest.mark.asyncio
52
+ async def test_independent_users_do_not_share_counts():
53
+ await user_quotas.increment_claude("alice")
54
+ await user_quotas.increment_claude("alice")
55
+ await user_quotas.increment_claude("bob")
56
+ assert await user_quotas.get_claude_used_today("alice") == 2
57
+ assert await user_quotas.get_claude_used_today("bob") == 1
58
+
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_stale_day_resets_before_next_read():
62
+ await user_quotas.increment_claude("u1")
63
+ # Simulate yesterday's entry still in the store.
64
+ user_quotas._claude_counts["u1"] = ("2000-01-01", 99)
65
+ assert await user_quotas.get_claude_used_today("u1") == 0
66
+ # And a fresh increment starts from 0.
67
+ assert await user_quotas.increment_claude("u1") == 1
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_concurrent_increments_under_lock_do_not_lose_writes():
72
+ """50 coroutines bumping the same user must land at exactly 50."""
73
+ await asyncio.gather(*[user_quotas.increment_claude("race") for _ in range(50)])
74
+ assert await user_quotas.get_claude_used_today("race") == 50
75
+
76
+
77
+ @pytest.mark.asyncio
78
+ async def test_refund_decrements_and_drops_entry_at_zero():
79
+ await user_quotas.increment_claude("u1")
80
+ assert await user_quotas.get_claude_used_today("u1") == 1
81
+ await user_quotas.refund_claude("u1")
82
+ assert await user_quotas.get_claude_used_today("u1") == 0
83
+ assert "u1" not in user_quotas._claude_counts
84
+
85
+
86
+ @pytest.mark.asyncio
87
+ async def test_refund_on_nonexistent_user_is_noop():
88
+ await user_quotas.refund_claude("ghost") # should not raise
89
+ assert await user_quotas.get_claude_used_today("ghost") == 0
90
+
91
+
92
+ @pytest.mark.asyncio
93
+ async def test_refund_on_stale_day_resets_rather_than_underflow():
94
+ user_quotas._claude_counts["u1"] = ("2000-01-01", 5)
95
+ await user_quotas.refund_claude("u1")
96
+ # Stale entry dropped; today's count stays 0.
97
+ assert await user_quotas.get_claude_used_today("u1") == 0
98
+
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_free_user_cap_reached_at_one():
102
+ cap = user_quotas.daily_cap_for("free")
103
+ used = await user_quotas.increment_claude("freebie")
104
+ assert used == 1
105
+ assert used >= cap # first bump exhausts the free tier (cap=1)
106
+
107
+
108
+ @pytest.mark.asyncio
109
+ async def test_pro_user_cap_reached_at_twenty():
110
+ cap = user_quotas.daily_cap_for("pro")
111
+ assert cap == 20
112
+ for i in range(1, 21):
113
+ assert await user_quotas.increment_claude("pro_user") == i
114
+ # 21st would exceed — the gate in routes/agent.py enforces this; here
115
+ # we just confirm the counter tracks past the cap so that check works.
116
+ assert await user_quotas.increment_claude("pro_user") == 21