Spaces:
Runtime error
Runtime error
| """Cost tracking utility for LLM API usage monitoring.""" | |
| from dataclasses import dataclass, field | |
| from typing import ClassVar | |
| from src.utils.logging import setup_logger | |
| logger = setup_logger(__name__) | |
| class TokenUsage: | |
| """Track token usage for an LLM call.""" | |
| input_tokens: int | |
| output_tokens: int | |
| model: str | |
| def total_tokens(self) -> int: | |
| """Total tokens used.""" | |
| return self.input_tokens + self.output_tokens | |
| class CostTracker: | |
| """ | |
| Track costs for LLM API usage across different providers. | |
| Pricing updated: November 2025 | |
| OpenRouter adds ~5% commission to provider pricing. | |
| """ | |
| # Pricing per 1M tokens (input/output) - Verified by user (Nov 28, 2025) | |
| PRICING: ClassVar[dict[str, dict[str, float]]] = { | |
| # === FREE TIER (for testing) === | |
| "x-ai/grok-4.1-fast:free": {"input": 0.0, "output": 0.0}, | |
| "meta-llama/llama-3.3-70b-instruct:free": {"input": 0.0, "output": 0.0}, | |
| "ollama": {"input": 0.0, "output": 0.0}, # Local | |
| # === CHEAP (testing/development) === | |
| # OpenAI | |
| "openai/gpt-5-nano": {"input": 0.05 / 1_000_000, "output": 0.40 / 1_000_000}, | |
| "openai/gpt-5-mini": {"input": 0.25 / 1_000_000, "output": 2.00 / 1_000_000}, | |
| # === PRODUCTION === | |
| # Google (enterprise credibility + advanced reasoning) | |
| "google/gemini-2.5-flash-lite": { | |
| "input": 0.10 / 1_000_000, | |
| "output": 0.40 / 1_000_000, | |
| }, | |
| "google/gemini-3-pro-preview": { | |
| "input": 2.00 / 1_000_000, | |
| "output": 12.00 / 1_000_000, | |
| }, | |
| # Anthropic (best for technical audiences - strong code/reasoning) | |
| "anthropic/claude-sonnet-4.5": { | |
| "input": 3.00 / 1_000_000, | |
| "output": 15.00 / 1_000_000, | |
| }, | |
| } | |
| total_cost: float = field(default=0.0) | |
| usage_history: list[TokenUsage] = field(default_factory=list) | |
| def calculate_cost( | |
| self, model: str, input_tokens: int, output_tokens: int | |
| ) -> float: | |
| """ | |
| Calculate cost for a single LLM call. | |
| Args: | |
| model: Model name (e.g., "anthropic/claude-3.5-sonnet") | |
| input_tokens: Number of input tokens | |
| output_tokens: Number of output tokens | |
| Returns: | |
| Cost in USD | |
| """ | |
| # Get pricing for model (fallback to GPT-5-mini if unknown) | |
| pricing = self.PRICING.get(model, self.PRICING["openai/gpt-5-mini"]) | |
| cost = (input_tokens * pricing["input"]) + (output_tokens * pricing["output"]) | |
| logger.debug( | |
| f"Cost calculated for {model}: ${cost:.4f} " | |
| f"(input: {input_tokens}, output: {output_tokens})" | |
| ) | |
| return cost | |
| def track_usage(self, model: str, input_tokens: int, output_tokens: int) -> float: | |
| """ | |
| Track LLM usage and update total cost. | |
| Args: | |
| model: Model name | |
| input_tokens: Number of input tokens | |
| output_tokens: Number of output tokens | |
| Returns: | |
| Cost for this call (USD) | |
| """ | |
| usage = TokenUsage( | |
| input_tokens=input_tokens, output_tokens=output_tokens, model=model | |
| ) | |
| cost = self.calculate_cost(model, input_tokens, output_tokens) | |
| self.usage_history.append(usage) | |
| self.total_cost += cost | |
| logger.info( | |
| f"Usage tracked: {model} - ${cost:.4f} (total: ${self.total_cost:.4f})" | |
| ) | |
| return cost | |
| def check_budget(self, max_budget: float) -> None: | |
| """ | |
| Check if total cost exceeds budget and raise exception if so. | |
| Args: | |
| max_budget: Maximum allowed budget (USD) | |
| Raises: | |
| BudgetExceededError: If total cost exceeds budget | |
| """ | |
| if self.total_cost > max_budget: | |
| raise BudgetExceededError( | |
| f"Cost ${self.total_cost:.2f} exceeds budget ${max_budget:.2f}" | |
| ) | |
| def get_summary(self) -> dict: | |
| """ | |
| Get summary of cost and usage. | |
| Returns: | |
| Dictionary with cost summary | |
| """ | |
| total_input = sum(u.input_tokens for u in self.usage_history) | |
| total_output = sum(u.output_tokens for u in self.usage_history) | |
| # Group by model | |
| model_costs = {} | |
| for usage in self.usage_history: | |
| if usage.model not in model_costs: | |
| model_costs[usage.model] = { | |
| "input_tokens": 0, | |
| "output_tokens": 0, | |
| "cost": 0.0, | |
| } | |
| model_costs[usage.model]["input_tokens"] += usage.input_tokens | |
| model_costs[usage.model]["output_tokens"] += usage.output_tokens | |
| model_costs[usage.model]["cost"] += self.calculate_cost( | |
| usage.model, usage.input_tokens, usage.output_tokens | |
| ) | |
| return { | |
| "total_cost": round(self.total_cost, 4), | |
| "total_input_tokens": total_input, | |
| "total_output_tokens": total_output, | |
| "total_tokens": total_input + total_output, | |
| "calls": len(self.usage_history), | |
| "by_model": model_costs, | |
| } | |
| class BudgetExceededError(Exception): | |
| """Raised when API usage exceeds budget.""" | |
| pass | |