Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

Aksel Joonas Reedi commited on 14 days ago

Commit

540437a

unverified ·

1 Parent(s): 5d357ba

feat(quota): daily Opus cap + HF-org gate + cap dialog (#72)

Browse files

Files changed (12) hide show

backend/dependencies.py +109 -3
backend/routes/agent.py +150 -26
backend/session_manager.py +16 -1
backend/user_quotas.py +83 -0
frontend/src/components/Chat/ChatInput.tsx +100 -8
frontend/src/components/ClaudeCapDialog.tsx +134 -0
frontend/src/hooks/useAgentChat.ts +9 -1
frontend/src/hooks/useUserQuota.ts +51 -0
frontend/src/lib/sse-chat-transport.ts +6 -0
frontend/src/store/agentStore.ts +5 -0
frontend/src/utils/model.ts +15 -0
tests/unit/test_user_quotas.py +116 -0

backend/dependencies.py CHANGED Viewed

@@ -16,6 +16,7 @@ logger = logging.getLogger(__name__)
 OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
 AUTH_ENABLED = bool(os.environ.get("OAUTH_CLIENT_ID", ""))
 # Simple in-memory token cache: token -> (user_info, expiry_time)
 _token_cache: dict[str, tuple[dict[str, Any], float]] = {}
@@ -28,8 +29,13 @@ DEV_USER: dict[str, Any] = {
     "user_id": "dev",
     "username": "dev",
     "authenticated": True,
 }
 async def _validate_token(token: str) -> dict[str, Any] | None:
     """Validate a token against HF OAuth userinfo endpoint.
@@ -74,12 +80,86 @@ def _user_from_info(user_info: dict[str, Any]) -> dict[str, Any]:
     }
 async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
     """Validate a token and return a user dict, or None."""
     user_info = await _validate_token(token)
-    if user_info:
-        return _user_from_info(user_info)
-    return None
 async def check_org_membership(token: str, org_name: str) -> bool:
@@ -141,3 +221,29 @@ async def get_current_user(request: Request) -> dict[str, Any]:
     )

 OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
 AUTH_ENABLED = bool(os.environ.get("OAUTH_CLIENT_ID", ""))
+HF_EMPLOYEE_ORG = os.environ.get("HF_EMPLOYEE_ORG", "huggingface")
 # Simple in-memory token cache: token -> (user_info, expiry_time)
 _token_cache: dict[str, tuple[dict[str, Any], float]] = {}
     "user_id": "dev",
     "username": "dev",
     "authenticated": True,
+    "plan": "org",  # Dev runs at the Pro/Org quota tier so local testing isn't capped.
 }
+# Plan field discovery — log the whoami-v2 shape once at DEBUG so we can
+# confirm the actual key in production without hammering the HF API.
+_WHOAMI_SHAPE_LOGGED = False
 async def _validate_token(token: str) -> dict[str, Any] | None:
     """Validate a token against HF OAuth userinfo endpoint.
     }
+def _normalize_plan(whoami: dict[str, Any]) -> str:
+    """Map an HF /api/whoami-v2 payload to one of: 'free' | 'pro' | 'org'.
+    The exact field shape in whoami-v2 isn't documented for our purposes,
+    so we try a handful of likely keys and fall back to 'free'. The first
+    call logs the raw shape at DEBUG (see `_fetch_user_plan`) so we can
+    pin the real key post-deploy.
+    """
+    plan_str = ""
+    for key in ("plan", "type", "accountType"):
+        val = whoami.get(key)
+        if isinstance(val, str) and val:
+            plan_str = val.lower()
+            break
+    if not plan_str:
+        if whoami.get("isPro") is True or whoami.get("is_pro") is True:
+            return "pro"
+    if "pro" in plan_str or "enterprise" in plan_str or "team" in plan_str:
+        return "pro"
+    # Org tier: anyone in a paid / enterprise org. We don't pay for this
+    # right now, but the "pro" cap applies identically.
+    orgs = whoami.get("orgs") or []
+    if isinstance(orgs, list):
+        for org in orgs:
+            if isinstance(org, dict):
+                org_plan = str(org.get("plan") or org.get("type") or "").lower()
+                if "pro" in org_plan or "enterprise" in org_plan or "team" in org_plan:
+                    return "org"
+    return "free"
+async def _fetch_user_plan(token: str) -> str:
+    """Look up the user's HF plan via /api/whoami-v2.
+    Returns 'free' | 'pro' | 'org'. Non-200, network errors, or an unknown
+    payload shape all collapse to 'free' — safe default; we'd rather under-
+    grant the Pro cap than over-grant it on bad data.
+    """
+    global _WHOAMI_SHAPE_LOGGED
+    async with httpx.AsyncClient(timeout=5.0) as client:
+        try:
+            resp = await client.get(
+                f"{OPENID_PROVIDER_URL}/api/whoami-v2",
+                headers={"Authorization": f"Bearer {token}"},
+            )
+            if resp.status_code != 200:
+                return "free"
+            whoami = resp.json()
+        except httpx.HTTPError:
+            return "free"
+        except ValueError:
+            return "free"
+    if not _WHOAMI_SHAPE_LOGGED:
+        _WHOAMI_SHAPE_LOGGED = True
+        logger.debug(
+            "whoami-v2 payload keys: %s (sample values: plan=%r type=%r isPro=%r)",
+            sorted(whoami.keys()) if isinstance(whoami, dict) else type(whoami).__name__,
+            whoami.get("plan") if isinstance(whoami, dict) else None,
+            whoami.get("type") if isinstance(whoami, dict) else None,
+            whoami.get("isPro") if isinstance(whoami, dict) else None,
+        )
+    if not isinstance(whoami, dict):
+        return "free"
+    return _normalize_plan(whoami)
 async def _extract_user_from_token(token: str) -> dict[str, Any] | None:
     """Validate a token and return a user dict, or None."""
     user_info = await _validate_token(token)
+    if user_info is None:
+        return None
+    user = _user_from_info(user_info)
+    user["plan"] = await _fetch_user_plan(token)
+    return user
 async def check_org_membership(token: str, org_name: str) -> bool:
     )
+def _extract_token(request: Request) -> str | None:
+    """Pull the HF access token from the Authorization header or cookie.
+    Mirrors the lookup order used by ``get_current_user``.
+    """
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        return auth_header[7:]
+    return request.cookies.get("hf_access_token")
+async def require_huggingface_org_member(request: Request) -> bool:
+    """Return True if the caller is a member of the ``huggingface`` org.
+    Used to gate endpoints that can push a session onto an Anthropic model
+    billed to the Space's ``ANTHROPIC_API_KEY``. Returns True unconditionally
+    in dev mode so local testing isn't blocked.
+    """
+    if not AUTH_ENABLED:
+        return True
+    token = _extract_token(request)
+    if not token:
+        return False
+    return await check_org_membership(token, HF_EMPLOYEE_ORG)

backend/routes/agent.py CHANGED Viewed

@@ -10,7 +10,7 @@ import logging
 import os
 from typing import Any
-from dependencies import get_current_user
 from fastapi import (
     APIRouter,
     Depends,
@@ -28,7 +28,9 @@ from models import (
     SubmitRequest,
     TruncateRequest,
 )
-from session_manager import MAX_SESSIONS, SessionCapacityError, session_manager
 from agent.core.llm_params import _resolve_llm_params
@@ -37,31 +39,99 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["agent"])
 AVAILABLE_MODELS = [
     {
         "id": "anthropic/claude-opus-4-6",
         "label": "Claude Opus 4.6",
         "provider": "anthropic",
         "recommended": True,
     },
     {
         "id": "MiniMaxAI/MiniMax-M2.7",
         "label": "MiniMax M2.7",
         "provider": "huggingface",
-        "recommended": True,
-    },
-    {
-        "id": "moonshotai/Kimi-K2.6",
-        "label": "Kimi K2.6",
-        "provider": "huggingface",
     },
     {
         "id": "zai-org/GLM-5.1",
         "label": "GLM 5.1",
         "provider": "huggingface",
     },
 ]
 def _check_session_access(session_id: str, user: dict[str, Any]) -> None:
     """Verify the user has access to the given session. Raises 403 or 404."""
     info = session_manager.get_session_info(session_id)
@@ -143,20 +213,6 @@ async def get_model() -> dict:
     }
-@router.post("/config/model")
-async def set_model(body: dict, user: dict = Depends(get_current_user)) -> dict:
-    """Set the LLM model. Applies to new conversations."""
-    model_id = body.get("model")
-    if not model_id:
-        raise HTTPException(status_code=400, detail="Missing 'model' field")
-    valid_ids = {m["id"] for m in AVAILABLE_MODELS}
-    if model_id not in valid_ids:
-        raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
-    session_manager.config.model_name = model_id
-    logger.info(f"Model changed to {model_id} by {user.get('username', 'unknown')}")
-    return {"model": model_id}
 _TITLE_STRIP_CHARS = str.maketrans("", "", "`*_~#[]()")
@@ -224,6 +280,10 @@ async def create_session(
     and stored in the session so that tools (e.g. hf_jobs) can act on
     behalf of the user.
     Returns 503 if the server or user has reached the session limit.
     """
     # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)
@@ -236,9 +296,27 @@ async def create_session(
     if not hf_token:
         hf_token = os.environ.get("HF_TOKEN")
     try:
         session_id = await session_manager.create_session(
-            user_id=user["user_id"], hf_token=hf_token
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))
@@ -254,6 +332,9 @@ async def restore_session_summary(
     conversation. The client sends its cached messages; we run the standard
     summarization prompt on them and drop the result into the new
     session's context as a user-role system note.
     """
     messages = body.get("messages")
     if not isinstance(messages, list) or not messages:
@@ -268,9 +349,17 @@ async def restore_session_summary(
     if not hf_token:
         hf_token = os.environ.get("HF_TOKEN")
     try:
         session_id = await session_manager.create_session(
-            user_id=user["user_id"], hf_token=hf_token
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))
@@ -302,12 +391,19 @@ async def get_session(
 @router.post("/session/{session_id}/model")
 async def set_session_model(
-    session_id: str, body: dict, user: dict = Depends(get_current_user)
 ) -> dict:
     """Switch the active model for a single session (tab-scoped).
     Takes effect on the next LLM call in that session — other sessions
-    (including other browser tabs) are unaffected.
     """
     _check_session_access(session_id, user)
     model_id = body.get("model")
@@ -316,6 +412,7 @@ async def set_session_model(
     valid_ids = {m["id"] for m in AVAILABLE_MODELS}
     if model_id not in valid_ids:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
     agent_session = session_manager.sessions.get(session_id)
     if not agent_session:
         raise HTTPException(status_code=404, detail="Session not found")
@@ -327,6 +424,20 @@ async def set_session_model(
     return {"session_id": session_id, "model": model_id}
 @router.get("/sessions", response_model=list[SessionInfo])
 async def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:
     """List sessions belonging to the authenticated user."""
@@ -352,6 +463,9 @@ async def submit_input(
 ) -> dict:
     """Submit user input to a session. Only accessible by the session owner."""
     _check_session_access(request.session_id, user)
     success = await session_manager.submit_user_input(request.session_id, request.text)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
@@ -404,6 +518,16 @@ async def chat_sse(
     text = body.get("text")
     approvals = body.get("approvals")
     try:
         if approvals:
             formatted = [

 import os
 from typing import Any
+from dependencies import get_current_user, require_huggingface_org_member
 from fastapi import (
     APIRouter,
     Depends,
     SubmitRequest,
     TruncateRequest,
 )
+from session_manager import MAX_SESSIONS, AgentSession, SessionCapacityError, session_manager
+import user_quotas
 from agent.core.llm_params import _resolve_llm_params
 router = APIRouter(prefix="/api", tags=["agent"])
 AVAILABLE_MODELS = [
+    {
+        "id": "moonshotai/Kimi-K2.6",
+        "label": "Kimi K2.6",
+        "provider": "huggingface",
+        "tier": "free",
+        "recommended": True,
+    },
     {
         "id": "anthropic/claude-opus-4-6",
         "label": "Claude Opus 4.6",
         "provider": "anthropic",
+        "tier": "pro",
         "recommended": True,
     },
     {
         "id": "MiniMaxAI/MiniMax-M2.7",
         "label": "MiniMax M2.7",
         "provider": "huggingface",
+        "tier": "free",
     },
     {
         "id": "zai-org/GLM-5.1",
         "label": "GLM 5.1",
         "provider": "huggingface",
+        "tier": "free",
     },
 ]
+async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
+    """403 if a non-``huggingface``-org user tries to select an Anthropic model.
+    Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
+    other model in ``AVAILABLE_MODELS`` is routed through HF Router and
+    billed via ``X-HF-Bill-To``. The gate only fires for ``anthropic/*`` so
+    non-HF users can still freely switch between the free models.
+    Pattern: https://github.com/huggingface/ml-intern/pull/63
+    """
+    if not model_id.startswith("anthropic/"):
+        return
+    if not await require_huggingface_org_member(request):
+        raise HTTPException(
+            status_code=403,
+            detail={
+                "error": "anthropic_restricted",
+                "message": (
+                    "Opus is gated to HF staff. Pick a free model — "
+                    "Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead."
+                ),
+            },
+        )
+async def _enforce_claude_quota(
+    user: dict[str, Any],
+    agent_session: AgentSession,
+) -> None:
+    """Charge the user's daily Claude quota on first use of Anthropic in a session.
+    Runs at *message-submit* time, not session-create time — so spinning up a
+    Claude session to look around doesn't burn quota. The ``claude_counted``
+    flag on ``AgentSession`` guards against re-counting the same session.
+    No-ops when the session's current model isn't Anthropic, or when this
+    session has already been charged. Raises 429 when the user has hit
+    their daily cap.
+    """
+    if agent_session.claude_counted:
+        return
+    model_name = agent_session.session.config.model_name
+    if not model_name.startswith("anthropic/"):
+        return
+    user_id = user["user_id"]
+    used = await user_quotas.get_claude_used_today(user_id)
+    cap = user_quotas.daily_cap_for(user.get("plan"))
+    if used >= cap:
+        raise HTTPException(
+            status_code=429,
+            detail={
+                "error": "claude_daily_cap",
+                "plan": user.get("plan", "free"),
+                "cap": cap,
+                "message": (
+                    "Daily Claude limit reached. Upgrade to HF Pro for "
+                    f"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model."
+                ),
+            },
+        )
+    await user_quotas.increment_claude(user_id)
+    agent_session.claude_counted = True
 def _check_session_access(session_id: str, user: dict[str, Any]) -> None:
     """Verify the user has access to the given session. Raises 403 or 404."""
     info = session_manager.get_session_info(session_id)
     }
 _TITLE_STRIP_CHARS = str.maketrans("", "", "`*_~#[]()")
     and stored in the session so that tools (e.g. hf_jobs) can act on
     behalf of the user.
+    Optional body ``{"model"?: <id>}`` selects the session's LLM; unknown
+    ids are rejected (400). The Claude-quota gate runs at message-submit
+    time, not here — spinning up an Opus session to look around is free.
     Returns 503 if the server or user has reached the session limit.
     """
     # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)
     if not hf_token:
         hf_token = os.environ.get("HF_TOKEN")
+    # Optional model override. Empty body falls back to the config default.
+    model: str | None = None
+    try:
+        body = await request.json()
+    except Exception:
+        body = None
+    if isinstance(body, dict):
+        model = body.get("model")
+    valid_ids = {m["id"] for m in AVAILABLE_MODELS}
+    if model and model not in valid_ids:
+        raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
+    # Opus is gated to HF staff (PR #63). Only fires when the resolved model
+    # is Anthropic; free models pass through.
+    resolved_model = model or session_manager.config.model_name
+    await _require_hf_for_anthropic(request, resolved_model)
     try:
         session_id = await session_manager.create_session(
+            user_id=user["user_id"], hf_token=hf_token, model=model
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))
     conversation. The client sends its cached messages; we run the standard
     summarization prompt on them and drop the result into the new
     session's context as a user-role system note.
+    Optional ``"model"`` in the body overrides the session's LLM. The
+    Claude-quota gate runs at message-submit time, not here.
     """
     messages = body.get("messages")
     if not isinstance(messages, list) or not messages:
     if not hf_token:
         hf_token = os.environ.get("HF_TOKEN")
+    model = body.get("model")
+    valid_ids = {m["id"] for m in AVAILABLE_MODELS}
+    if model and model not in valid_ids:
+        raise HTTPException(status_code=400, detail=f"Unknown model: {model}")
+    resolved_model = model or session_manager.config.model_name
+    await _require_hf_for_anthropic(request, resolved_model)
     try:
         session_id = await session_manager.create_session(
+            user_id=user["user_id"], hf_token=hf_token, model=model
         )
     except SessionCapacityError as e:
         raise HTTPException(status_code=503, detail=str(e))
 @router.post("/session/{session_id}/model")
 async def set_session_model(
+    session_id: str,
+    body: dict,
+    request: Request,
+    user: dict = Depends(get_current_user),
 ) -> dict:
     """Switch the active model for a single session (tab-scoped).
     Takes effect on the next LLM call in that session — other sessions
+    (including other browser tabs) are unaffected. Model switches don't
+    charge quota — the Claude-quota gate only fires at message-submit time.
+    Switching TO an Anthropic model requires HF org membership (PR #63);
+    free-model switches are unrestricted.
     """
     _check_session_access(session_id, user)
     model_id = body.get("model")
     valid_ids = {m["id"] for m in AVAILABLE_MODELS}
     if model_id not in valid_ids:
         raise HTTPException(status_code=400, detail=f"Unknown model: {model_id}")
+    await _require_hf_for_anthropic(request, model_id)
     agent_session = session_manager.sessions.get(session_id)
     if not agent_session:
         raise HTTPException(status_code=404, detail="Session not found")
     return {"session_id": session_id, "model": model_id}
+@router.get("/user/quota")
+async def get_user_quota(user: dict = Depends(get_current_user)) -> dict:
+    """Return the user's plan tier and today's Claude-session quota state."""
+    plan = user.get("plan", "free")
+    used = await user_quotas.get_claude_used_today(user["user_id"])
+    cap = user_quotas.daily_cap_for(plan)
+    return {
+        "plan": plan,
+        "claude_used_today": used,
+        "claude_daily_cap": cap,
+        "claude_remaining": max(0, cap - used),
+    }
 @router.get("/sessions", response_model=list[SessionInfo])
 async def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:
     """List sessions belonging to the authenticated user."""
 ) -> dict:
     """Submit user input to a session. Only accessible by the session owner."""
     _check_session_access(request.session_id, user)
+    agent_session = session_manager.sessions.get(request.session_id)
+    if agent_session is not None:
+        await _enforce_claude_quota(user, agent_session)
     success = await session_manager.submit_user_input(request.session_id, request.text)
     if not success:
         raise HTTPException(status_code=404, detail="Session not found or inactive")
     text = body.get("text")
     approvals = body.get("approvals")
+    # Gate user-message sends against the daily Claude quota. Approvals are
+    # continuations of an in-progress turn — the session was already charged
+    # on its first message, so we skip the gate there.
+    if text is not None and not approvals:
+        try:
+            await _enforce_claude_quota(user, agent_session)
+        except HTTPException:
+            broadcaster.unsubscribe(sub_id)
+            raise
     try:
         if approvals:
             formatted = [

backend/session_manager.py CHANGED Viewed

@@ -91,6 +91,10 @@ class AgentSession:
     is_active: bool = True
     is_processing: bool = False  # True while a submission is being executed
     broadcaster: Any = None
 class SessionCapacityError(Exception):
@@ -126,7 +130,12 @@ class SessionManager:
             if s.user_id == user_id and s.is_active
         )
-    async def create_session(self, user_id: str = "dev", hf_token: str | None = None) -> str:
         """Create a new agent session and return its ID.
         Session() and ToolRouter() constructors contain blocking I/O
@@ -135,6 +144,10 @@ class SessionManager:
         Args:
             user_id: The ID of the user who owns this session.
         Raises:
             SessionCapacityError: If the server or user has reached the
@@ -175,6 +188,8 @@ class SessionManager:
             # Deep-copy config so each session's model switches independently —
             # tab A picking GLM doesn't flip tab B off Claude.
             session_config = self.config.model_copy(deep=True)
             session = Session(
                 event_queue, config=session_config, tool_router=tool_router,
                 hf_token=hf_token,

     is_active: bool = True
     is_processing: bool = False  # True while a submission is being executed
     broadcaster: Any = None
+    # True once this session has been counted against the user's daily
+    # Claude quota. Guards double-counting when the user re-selects an
+    # Anthropic model mid-session.
+    claude_counted: bool = False
 class SessionCapacityError(Exception):
             if s.user_id == user_id and s.is_active
         )
+    async def create_session(
+        self,
+        user_id: str = "dev",
+        hf_token: str | None = None,
+        model: str | None = None,
+    ) -> str:
         """Create a new agent session and return its ID.
         Session() and ToolRouter() constructors contain blocking I/O
         Args:
             user_id: The ID of the user who owns this session.
+            hf_token: The user's HF OAuth token, stored for tool execution.
+            model: Optional model override. When set, replaces ``model_name``
+                on the per-session config clone. None falls back to the
+                config default.
         Raises:
             SessionCapacityError: If the server or user has reached the
             # Deep-copy config so each session's model switches independently —
             # tab A picking GLM doesn't flip tab B off Claude.
             session_config = self.config.model_copy(deep=True)
+            if model:
+                session_config.model_name = model
             session = Session(
                 event_queue, config=session_config, tool_router=tool_router,
                 hf_token=hf_token,

backend/user_quotas.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""In-memory daily quota for Claude session creations.
+Tracks per-user Claude session starts against a daily cap derived from the
+user's HF plan. Caps reset at UTC midnight; the store itself is in-process
+and wipes on restart (deliberate — the cost of occasional over-subsidy at
+restart is much lower than running a DB).
+Unit: session *creations*, not messages. A user who selects Claude in a new
+session consumes one quota point; switching an existing Claude session to
+Claude again doesn't (`AgentSession.claude_counted` guards that).
+Cap tiers:
+  free user   → CLAUDE_FREE_DAILY (1)
+  pro / org   → CLAUDE_PRO_DAILY  (20)
+"""
+import asyncio
+import os
+from datetime import UTC, datetime
+CLAUDE_FREE_DAILY: int = int(os.environ.get("CLAUDE_FREE_DAILY", "1"))
+CLAUDE_PRO_DAILY: int = int(os.environ.get("CLAUDE_PRO_DAILY", "20"))
+# user_id -> (day_utc_iso, count_for_that_day)
+_claude_counts: dict[str, tuple[str, int]] = {}
+_lock = asyncio.Lock()
+def _today() -> str:
+    return datetime.now(UTC).date().isoformat()
+def daily_cap_for(plan: str | None) -> int:
+    """Return the daily Claude-session cap for the given plan."""
+    return CLAUDE_FREE_DAILY if (plan or "free") == "free" else CLAUDE_PRO_DAILY
+async def get_claude_used_today(user_id: str) -> int:
+    """Return today's Claude session count for the user (0 if none / stale day)."""
+    async with _lock:
+        entry = _claude_counts.get(user_id)
+        if entry is None:
+            return 0
+        day, count = entry
+        if day != _today():
+            # Stale day — drop the entry so the first increment starts fresh.
+            _claude_counts.pop(user_id, None)
+            return 0
+        return count
+async def increment_claude(user_id: str) -> int:
+    """Bump today's Claude session count for the user. Returns the new value."""
+    async with _lock:
+        today = _today()
+        day, count = _claude_counts.get(user_id, (today, 0))
+        if day != today:
+            count = 0
+        count += 1
+        _claude_counts[user_id] = (today, count)
+        return count
+async def refund_claude(user_id: str) -> None:
+    """Decrement today's count — used when session creation fails after a successful gate."""
+    async with _lock:
+        entry = _claude_counts.get(user_id)
+        if entry is None:
+            return
+        day, count = entry
+        if day != _today():
+            _claude_counts.pop(user_id, None)
+            return
+        new_count = max(0, count - 1)
+        if new_count == 0:
+            _claude_counts.pop(user_id, None)
+        else:
+            _claude_counts[user_id] = (day, new_count)
+def _reset_for_tests() -> None:
+    """Test-only: clear the in-memory store."""
+    _claude_counts.clear()

frontend/src/components/Chat/ChatInput.tsx CHANGED Viewed

@@ -4,6 +4,10 @@ import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';
 import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
 import StopIcon from '@mui/icons-material/Stop';
 import { apiFetch } from '@/utils/api';
 // Model configuration
 interface ModelOption {
@@ -21,6 +25,14 @@ const getHfAvatarUrl = (modelId: string) => {
 };
 const MODEL_OPTIONS: ModelOption[] = [
   {
     id: 'claude-opus',
     name: 'Claude Opus 4.6',
@@ -35,14 +47,6 @@ const MODEL_OPTIONS: ModelOption[] = [
     description: 'Novita',
     modelPath: 'MiniMaxAI/MiniMax-M2.7',
     avatarUrl: getHfAvatarUrl('MiniMaxAI/MiniMax-M2.7'),
-    recommended: true,
-  },
-  {
-    id: 'kimi-k2.6',
-    name: 'Kimi K2.6',
-    description: 'Novita',
-    modelPath: 'moonshotai/Kimi-K2.6',
-    avatarUrl: getHfAvatarUrl('moonshotai/Kimi-K2.6'),
   },
   {
     id: 'glm-5.1',
@@ -66,11 +70,23 @@ interface ChatInputProps {
   placeholder?: string;
 }
 export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
   const [input, setInput] = useState('');
   const inputRef = useRef<HTMLTextAreaElement>(null);
   const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);
   const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
   // Model is per-session: fetch this tab's current model every time the
   // session changes. Other tabs keep their own selections independently.
@@ -101,11 +117,27 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
   const handleSend = useCallback(() => {
     if (input.trim() && !disabled) {
       onSend(input);
       setInput('');
     }
   }, [input, disabled, onSend]);
   const handleKeyDown = useCallback(
     (e: KeyboardEvent<HTMLDivElement>) => {
       if (e.key === 'Enter' && !e.shiftKey) {
@@ -136,6 +168,45 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
     } catch { /* ignore */ }
   };
   return (
     <Box
       sx={{
@@ -334,6 +405,19 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
                         }}
                       />
                     )}
                   </Box>
                 }
                 secondary={model.description}
@@ -344,6 +428,14 @@ export default function ChatInput({ sessionId, onSend, onStop, isProcessing = fa
             </MenuItem>
           ))}
         </Menu>
       </Box>
     </Box>
   );

 import ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';
 import StopIcon from '@mui/icons-material/Stop';
 import { apiFetch } from '@/utils/api';
+import { useUserQuota } from '@/hooks/useUserQuota';
+import ClaudeCapDialog from '@/components/ClaudeCapDialog';
+import { useAgentStore } from '@/store/agentStore';
+import { FIRST_FREE_MODEL_PATH } from '@/utils/model';
 // Model configuration
 interface ModelOption {
 };
 const MODEL_OPTIONS: ModelOption[] = [
+  {
+    id: 'kimi-k2.6',
+    name: 'Kimi K2.6',
+    description: 'Novita',
+    modelPath: 'moonshotai/Kimi-K2.6',
+    avatarUrl: getHfAvatarUrl('moonshotai/Kimi-K2.6'),
+    recommended: true,
+  },
   {
     id: 'claude-opus',
     name: 'Claude Opus 4.6',
     description: 'Novita',
     modelPath: 'MiniMaxAI/MiniMax-M2.7',
     avatarUrl: getHfAvatarUrl('MiniMaxAI/MiniMax-M2.7'),
   },
   {
     id: 'glm-5.1',
   placeholder?: string;
 }
+const isClaudeModel = (m: ModelOption) => m.modelPath.startsWith('anthropic/');
+const firstFreeModel = () => MODEL_OPTIONS.find(m => !isClaudeModel(m)) ?? MODEL_OPTIONS[0];
 export default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {
   const [input, setInput] = useState('');
   const inputRef = useRef<HTMLTextAreaElement>(null);
   const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);
   const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);
+  const { quota, refresh: refreshQuota } = useUserQuota();
+  // The daily-cap dialog is triggered from two places: (a) a 429 returned
+  // from the chat transport when the user tries to send on Opus over cap —
+  // surfaced via the agent-store flag — and (b) nothing else right now
+  // (switching models is free). Keeping the open state in the store means
+  // the hook layer can flip it without threading props through.
+  const claudeQuotaExhausted = useAgentStore((s) => s.claudeQuotaExhausted);
+  const setClaudeQuotaExhausted = useAgentStore((s) => s.setClaudeQuotaExhausted);
+  const lastSentRef = useRef<string>('');
   // Model is per-session: fetch this tab's current model every time the
   // session changes. Other tabs keep their own selections independently.
   const handleSend = useCallback(() => {
     if (input.trim() && !disabled) {
+      lastSentRef.current = input;
       onSend(input);
       setInput('');
     }
   }, [input, disabled, onSend]);
+  // When the chat transport reports a Claude-quota 429, restore the typed
+  // text so the user doesn't lose their message.
+  useEffect(() => {
+    if (claudeQuotaExhausted && lastSentRef.current) {
+      setInput(lastSentRef.current);
+    }
+  }, [claudeQuotaExhausted]);
+  // Refresh the quota display whenever the session changes (user might
+  // have started another tab that spent quota).
+  useEffect(() => {
+    if (sessionId) refreshQuota();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [sessionId]);
   const handleKeyDown = useCallback(
     (e: KeyboardEvent<HTMLDivElement>) => {
       if (e.key === 'Enter' && !e.shiftKey) {
     } catch { /* ignore */ }
   };
+  // Dialog close: just clear the flag. The typed text is already restored.
+  const handleCapDialogClose = useCallback(() => {
+    setClaudeQuotaExhausted(false);
+  }, [setClaudeQuotaExhausted]);
+  // "Use a free model" — switch the current session to Kimi (or the first
+  // non-Anthropic option) and auto-retry the send that tripped the cap.
+  const handleUseFreeModel = useCallback(async () => {
+    setClaudeQuotaExhausted(false);
+    if (!sessionId) return;
+    const free = MODEL_OPTIONS.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)
+      ?? firstFreeModel();
+    try {
+      const res = await apiFetch(`/api/session/${sessionId}/model`, {
+        method: 'POST',
+        body: JSON.stringify({ model: free.modelPath }),
+      });
+      if (res.ok) {
+        setSelectedModelId(free.id);
+        const retryText = lastSentRef.current;
+        if (retryText) {
+          onSend(retryText);
+          setInput('');
+          lastSentRef.current = '';
+        }
+      }
+    } catch { /* ignore */ }
+  }, [sessionId, onSend, setClaudeQuotaExhausted]);
+  // Hide the chip until the user has actually burned quota — an unused
+  // Opus session shouldn't populate a counter.
+  const claudeChip = (() => {
+    if (!quota || quota.claudeUsedToday === 0) return null;
+    if (quota.plan === 'free') {
+      return quota.claudeRemaining > 0 ? 'Free today' : 'Pro only';
+    }
+    return `${quota.claudeUsedToday}/${quota.claudeDailyCap} today`;
+  })();
   return (
     <Box
       sx={{
                         }}
                       />
                     )}
+                    {isClaudeModel(model) && claudeChip && (
+                      <Chip
+                        label={claudeChip}
+                        size="small"
+                        sx={{
+                          height: '18px',
+                          fontSize: '10px',
+                          bgcolor: 'rgba(255,255,255,0.08)',
+                          color: 'var(--muted-text)',
+                          fontWeight: 600,
+                        }}
+                      />
+                    )}
                   </Box>
                 }
                 secondary={model.description}
             </MenuItem>
           ))}
         </Menu>
+        <ClaudeCapDialog
+          open={claudeQuotaExhausted}
+          plan={quota?.plan ?? 'free'}
+          cap={quota?.claudeDailyCap ?? 1}
+          onClose={handleCapDialogClose}
+          onUseFreeModel={handleUseFreeModel}
+        />
       </Box>
     </Box>
   );

frontend/src/components/ClaudeCapDialog.tsx ADDED Viewed

	@@ -0,0 +1,134 @@

+import {
+  Box,
+  Button,
+  Dialog,
+  DialogActions,
+  DialogContent,
+  DialogContentText,
+  DialogTitle,
+  Typography,
+} from '@mui/material';
+import type { PlanTier } from '@/hooks/useUserQuota';
+const HF_PRICING_URL = 'https://huggingface.co/pricing';
+const PRO_CAP = 20;
+interface ClaudeCapDialogProps {
+  open: boolean;
+  plan: PlanTier;
+  cap: number;
+  onClose: () => void;
+  onUseFreeModel: () => void;
+}
+export default function ClaudeCapDialog({
+  open,
+  plan,
+  cap,
+  onClose,
+  onUseFreeModel,
+}: ClaudeCapDialogProps) {
+  // plan not surfaced in copy right now — Pro users see the same dialog and
+  // can upgrade their org if they're also capped.
+  void plan;
+  return (
+    <Dialog
+      open={open}
+      onClose={onClose}
+      slotProps={{
+        backdrop: { sx: { backgroundColor: 'rgba(0,0,0,0.5)', backdropFilter: 'blur(4px)' } },
+      }}
+      PaperProps={{
+        sx: {
+          bgcolor: 'var(--panel)',
+          border: '1px solid var(--border)',
+          borderRadius: 'var(--radius-md)',
+          boxShadow: 'var(--shadow-1)',
+          maxWidth: 460,
+          mx: 2,
+        },
+      }}
+    >
+      <DialogTitle
+        sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}
+      >
+        You've hit your Opus limit
+      </DialogTitle>
+      <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>
+        <DialogContentText
+          sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}
+        >
+          Opus costs an arm and a leg, so we unfortunately have to cap you at {cap}{' '}
+          {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin —
+          they are genuinely good and we use them all the time.
+        </DialogContentText>
+        <Box
+          sx={{
+            mt: 2,
+            p: 1.5,
+            borderRadius: '8px',
+            bgcolor: 'var(--accent-yellow-weak)',
+            border: '1px solid var(--border)',
+          }}
+        >
+          <Typography
+            variant="caption"
+            sx={{
+              display: 'block',
+              fontWeight: 700,
+              color: 'var(--text)',
+              fontSize: '0.78rem',
+              mb: 0.5,
+              letterSpacing: '0.02em',
+            }}
+          >
+            HF Pro ($9/mo) — more Opus, more everything
+          </Typography>
+          <Typography
+            variant="caption"
+            sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}
+          >
+            {PRO_CAP} Opus sessions/day here, 20× HF Inference credits, ZeroGPU access,
+            and priority on Spaces hardware.
+          </Typography>
+        </Box>
+      </DialogContent>
+      <DialogActions sx={{ px: 3, pb: 2.5, pt: 2, gap: 1 }}>
+        <Button
+          component="a"
+          href={HF_PRICING_URL}
+          target="_blank"
+          rel="noopener noreferrer"
+          variant="contained"
+          size="small"
+          sx={{
+            fontSize: '0.82rem',
+            px: 2.5,
+            bgcolor: 'var(--accent-yellow)',
+            color: '#000',
+            textTransform: 'none',
+            fontWeight: 700,
+            boxShadow: 'none',
+            '&:hover': { bgcolor: '#FFB340', boxShadow: 'none' },
+          }}
+        >
+          Upgrade to Pro
+        </Button>
+        <Button
+          onClick={onUseFreeModel}
+          size="small"
+          sx={{
+            color: 'var(--muted-text)',
+            fontSize: '0.82rem',
+            px: 2,
+            textTransform: 'none',
+            '&:hover': { bgcolor: 'var(--hover-bg)' },
+          }}
+        >
+          Use a free model
+        </Button>
+      </DialogActions>
+    </Dialog>
+  );
+}

frontend/src/hooks/useAgentChat.ts CHANGED Viewed

@@ -345,8 +345,16 @@ export function useAgentChat({ sessionId, isActive, onReady, onError, onSessionD
     // sendMessages on the transport.
     sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
     onError: (error) => {
-      logger.error('useChat error:', error);
       updateSession(sessionId, { isProcessing: false });
       if (isActiveRef.current) {
         useAgentStore.getState().setError(error.message);
       }

     // sendMessages on the transport.
     sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
     onError: (error) => {
       updateSession(sessionId, { isProcessing: false });
+      // Claude daily-cap: open the cap dialog instead of the generic error
+      // banner. Transport marks the error with this sentinel.
+      if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {
+        if (isActiveRef.current) {
+          useAgentStore.getState().setClaudeQuotaExhausted(true);
+        }
+        return;
+      }
+      logger.error('useChat error:', error);
       if (isActiveRef.current) {
         useAgentStore.getState().setError(error.message);
       }

frontend/src/hooks/useUserQuota.ts ADDED Viewed

	@@ -0,0 +1,51 @@

+/**
+ * Reads the current user's Claude daily quota + plan tier from the backend.
+ *
+ * Fetches once when the user becomes authenticated, and exposes a `refresh()`
+ * that callers invoke after a successful session-create / model-switch so the
+ * chip reflects the new count without a full page reload.
+ */
+import { useCallback, useEffect, useState } from 'react';
+import { useAgentStore } from '@/store/agentStore';
+import { apiFetch } from '@/utils/api';
+export type PlanTier = 'free' | 'pro' | 'org';
+export interface UserQuota {
+  plan: PlanTier;
+  claudeUsedToday: number;
+  claudeDailyCap: number;
+  claudeRemaining: number;
+}
+export function useUserQuota() {
+  const user = useAgentStore((s) => s.user);
+  const [quota, setQuota] = useState<UserQuota | null>(null);
+  const [loading, setLoading] = useState(false);
+  const refresh = useCallback(async () => {
+    if (!user?.authenticated) return;
+    setLoading(true);
+    try {
+      const res = await apiFetch('/api/user/quota');
+      if (!res.ok) return;
+      const data = await res.json();
+      setQuota({
+        plan: (data.plan ?? 'free') as PlanTier,
+        claudeUsedToday: data.claude_used_today ?? 0,
+        claudeDailyCap: data.claude_daily_cap ?? 1,
+        claudeRemaining: data.claude_remaining ?? 0,
+      });
+    } catch {
+      /* backend unreachable — leave previous value */
+    } finally {
+      setLoading(false);
+    }
+  }, [user?.authenticated]);
+  useEffect(() => {
+    refresh();
+  }, [refresh]);
+  return { quota, loading, refresh };
+}

frontend/src/lib/sse-chat-transport.ts CHANGED Viewed

@@ -356,6 +356,12 @@ export class SSEChatTransport implements ChatTransport<UIMessage> {
       // it can flag the session for the catch-up banner.
       this.sideChannel.onSessionDead(sessionId);
     }
     if (!response.ok) {
       const errorText = await response.text().catch(() => 'Request failed');
       throw new Error(`Chat request failed: ${response.status} ${errorText}`);

       // it can flag the session for the catch-up banner.
       this.sideChannel.onSessionDead(sessionId);
     }
+    if (response.status === 429) {
+      // Claude daily-quota gate tripped. The prefix is the detection marker
+      // for useAgentChat's onError handler, which surfaces the cap dialog
+      // instead of a generic error banner.
+      throw new Error('CLAUDE_QUOTA_EXHAUSTED');
+    }
     if (!response.ok) {
       const errorText = await response.text().catch(() => 'Request failed');
       throw new Error(`Chat request failed: ${response.status} ${errorText}`);

frontend/src/store/agentStore.ts CHANGED Viewed

@@ -108,6 +108,8 @@ interface AgentStore {
   user: User | null;
   error: string | null;
   llmHealthError: LLMHealthError | null;
   // Right panel (single-artifact pattern)
   panelData: PanelData | null;
@@ -153,6 +155,7 @@ interface AgentStore {
   setUser: (user: User | null) => void;
   setError: (error: string | null) => void;
   setLlmHealthError: (error: LLMHealthError | null) => void;
   setPanel: (data: PanelData, view?: PanelView, editable?: boolean) => void;
   setPanelView: (view: PanelView) => void;
@@ -247,6 +250,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
   user: null,
   error: null,
   llmHealthError: null,
   panelData: null,
   panelView: 'script',
@@ -358,6 +362,7 @@ export const useAgentStore = create<AgentStore>()((set, get) => ({
   setUser: (user) => set({ user }),
   setError: (error) => set({ error }),
   setLlmHealthError: (error) => set({ llmHealthError: error }),
   // ── Panel (single-artifact) ───────────────────────────────────────
   // Each setter also patches the active session's snapshot so that

   user: User | null;
   error: string | null;
   llmHealthError: LLMHealthError | null;
+  /** Set when a Claude-send hits the daily quota — ChatInput opens the cap dialog in response. */
+  claudeQuotaExhausted: boolean;
   // Right panel (single-artifact pattern)
   panelData: PanelData | null;
   setUser: (user: User | null) => void;
   setError: (error: string | null) => void;
   setLlmHealthError: (error: LLMHealthError | null) => void;
+  setClaudeQuotaExhausted: (exhausted: boolean) => void;
   setPanel: (data: PanelData, view?: PanelView, editable?: boolean) => void;
   setPanelView: (view: PanelView) => void;
   user: null,
   error: null,
   llmHealthError: null,
+  claudeQuotaExhausted: false,
   panelData: null,
   panelView: 'script',
   setUser: (user) => set({ user }),
   setError: (error) => set({ error }),
   setLlmHealthError: (error) => set({ llmHealthError: error }),
+  setClaudeQuotaExhausted: (exhausted) => set({ claudeQuotaExhausted: exhausted }),
   // ── Panel (single-artifact) ───────────────────────────────────────
   // Each setter also patches the active session's snapshot so that

frontend/src/utils/model.ts ADDED Viewed

	@@ -0,0 +1,15 @@

+/**
+ * Shared model-id constants used by session-create call sites and the
+ * ClaudeCapDialog "Use a free model" escape hatch.
+ *
+ * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and
+ * AVAILABLE_MODELS in backend/routes/agent.py. Bare HF ids (no
+ * `huggingface/` prefix) — matches upstream's auto-router.
+ */
+export const CLAUDE_MODEL_PATH = 'anthropic/claude-opus-4-6';
+export const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';
+export function isClaudePath(modelPath: string | undefined): boolean {
+  return !!modelPath && modelPath.startsWith('anthropic/');
+}

tests/unit/test_user_quotas.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""Tests for backend/user_quotas.py — the in-memory Claude daily-quota store."""
+import asyncio
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch
+import pytest
+# The backend package isn't on sys.path by default; add it so we can import
+# the module under test without pulling in the whole FastAPI app.
+_BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / "backend"
+if str(_BACKEND_DIR) not in sys.path:
+    sys.path.insert(0, str(_BACKEND_DIR))
+import user_quotas  # noqa: E402
+@pytest.fixture(autouse=True)
+def _reset_store():
+    """Fresh in-memory store per test."""
+    user_quotas._reset_for_tests()
+    yield
+    user_quotas._reset_for_tests()
+def test_daily_cap_for_known_plans():
+    assert user_quotas.daily_cap_for("free") == user_quotas.CLAUDE_FREE_DAILY
+    assert user_quotas.daily_cap_for("pro") == user_quotas.CLAUDE_PRO_DAILY
+    assert user_quotas.daily_cap_for("org") == user_quotas.CLAUDE_PRO_DAILY
+def test_daily_cap_for_unknown_or_missing_defaults_to_free():
+    assert user_quotas.daily_cap_for(None) == user_quotas.CLAUDE_FREE_DAILY
+    assert user_quotas.daily_cap_for("") == user_quotas.CLAUDE_FREE_DAILY
+    # Anything we don't recognize as the Pro/Org tier gets the Pro cap because
+    # the function's contract is "free" is the only downgraded tier. If that
+    # ever flips, this test will flip too — adjust consciously.
+    assert user_quotas.daily_cap_for("mystery") == user_quotas.CLAUDE_PRO_DAILY
+@pytest.mark.asyncio
+async def test_increment_and_read_back_same_day():
+    assert await user_quotas.get_claude_used_today("u1") == 0
+    assert await user_quotas.increment_claude("u1") == 1
+    assert await user_quotas.increment_claude("u1") == 2
+    assert await user_quotas.get_claude_used_today("u1") == 2
+@pytest.mark.asyncio
+async def test_independent_users_do_not_share_counts():
+    await user_quotas.increment_claude("alice")
+    await user_quotas.increment_claude("alice")
+    await user_quotas.increment_claude("bob")
+    assert await user_quotas.get_claude_used_today("alice") == 2
+    assert await user_quotas.get_claude_used_today("bob") == 1
+@pytest.mark.asyncio
+async def test_stale_day_resets_before_next_read():
+    await user_quotas.increment_claude("u1")
+    # Simulate yesterday's entry still in the store.
+    user_quotas._claude_counts["u1"] = ("2000-01-01", 99)
+    assert await user_quotas.get_claude_used_today("u1") == 0
+    # And a fresh increment starts from 0.
+    assert await user_quotas.increment_claude("u1") == 1
+@pytest.mark.asyncio
+async def test_concurrent_increments_under_lock_do_not_lose_writes():
+    """50 coroutines bumping the same user must land at exactly 50."""
+    await asyncio.gather(*[user_quotas.increment_claude("race") for _ in range(50)])
+    assert await user_quotas.get_claude_used_today("race") == 50
+@pytest.mark.asyncio
+async def test_refund_decrements_and_drops_entry_at_zero():
+    await user_quotas.increment_claude("u1")
+    assert await user_quotas.get_claude_used_today("u1") == 1
+    await user_quotas.refund_claude("u1")
+    assert await user_quotas.get_claude_used_today("u1") == 0
+    assert "u1" not in user_quotas._claude_counts
+@pytest.mark.asyncio
+async def test_refund_on_nonexistent_user_is_noop():
+    await user_quotas.refund_claude("ghost")  # should not raise
+    assert await user_quotas.get_claude_used_today("ghost") == 0
+@pytest.mark.asyncio
+async def test_refund_on_stale_day_resets_rather_than_underflow():
+    user_quotas._claude_counts["u1"] = ("2000-01-01", 5)
+    await user_quotas.refund_claude("u1")
+    # Stale entry dropped; today's count stays 0.
+    assert await user_quotas.get_claude_used_today("u1") == 0
+@pytest.mark.asyncio
+async def test_free_user_cap_reached_at_one():
+    cap = user_quotas.daily_cap_for("free")
+    used = await user_quotas.increment_claude("freebie")
+    assert used == 1
+    assert used >= cap  # first bump exhausts the free tier (cap=1)
+@pytest.mark.asyncio
+async def test_pro_user_cap_reached_at_twenty():
+    cap = user_quotas.daily_cap_for("pro")
+    assert cap == 20
+    for i in range(1, 21):
+        assert await user_quotas.increment_claude("pro_user") == i
+    # 21st would exceed — the gate in routes/agent.py enforces this; here
+    # we just confirm the counter tracks past the cap so that check works.
+    assert await user_quotas.increment_claude("pro_user") == 21