Spaces:

pkgprateek
/

agentic-market-research

Runtime error

File size: 5,347 Bytes

790b5af

"""Cost tracking utility for LLM API usage monitoring."""

from dataclasses import dataclass, field
from typing import ClassVar

from src.utils.logging import setup_logger

logger = setup_logger(__name__)


@dataclass
class TokenUsage:
    """Track token usage for an LLM call."""

    input_tokens: int
    output_tokens: int
    model: str

    @property
    def total_tokens(self) -> int:
        """Total tokens used."""
        return self.input_tokens + self.output_tokens


@dataclass
class CostTracker:
    """
    Track costs for LLM API usage across different providers.

    Pricing updated: November 2025
    OpenRouter adds ~5% commission to provider pricing.
    """

    # Pricing per 1M tokens (input/output) - Verified by user (Nov 28, 2025)
    PRICING: ClassVar[dict[str, dict[str, float]]] = {
        # === FREE TIER (for testing) ===
        "x-ai/grok-4.1-fast:free": {"input": 0.0, "output": 0.0},
        "meta-llama/llama-3.3-70b-instruct:free": {"input": 0.0, "output": 0.0},
        "ollama": {"input": 0.0, "output": 0.0},  # Local
        # === CHEAP (testing/development) ===
        # OpenAI
        "openai/gpt-5-nano": {"input": 0.05 / 1_000_000, "output": 0.40 / 1_000_000},
        "openai/gpt-5-mini": {"input": 0.25 / 1_000_000, "output": 2.00 / 1_000_000},
        # === PRODUCTION ===
        # Google (enterprise credibility + advanced reasoning)
        "google/gemini-2.5-flash-lite": {
            "input": 0.10 / 1_000_000,
            "output": 0.40 / 1_000_000,
        },
        "google/gemini-3-pro-preview": {
            "input": 2.00 / 1_000_000,
            "output": 12.00 / 1_000_000,
        },
        # Anthropic (best for technical audiences - strong code/reasoning)
        "anthropic/claude-sonnet-4.5": {
            "input": 3.00 / 1_000_000,
            "output": 15.00 / 1_000_000,
        },
    }

    total_cost: float = field(default=0.0)
    usage_history: list[TokenUsage] = field(default_factory=list)

    def calculate_cost(
        self, model: str, input_tokens: int, output_tokens: int
    ) -> float:
        """
        Calculate cost for a single LLM call.

        Args:
            model: Model name (e.g., "anthropic/claude-3.5-sonnet")
            input_tokens: Number of input tokens
            output_tokens: Number of output tokens

        Returns:
            Cost in USD
        """
        # Get pricing for model (fallback to GPT-5-mini if unknown)
        pricing = self.PRICING.get(model, self.PRICING["openai/gpt-5-mini"])

        cost = (input_tokens * pricing["input"]) + (output_tokens * pricing["output"])

        logger.debug(
            f"Cost calculated for {model}: ${cost:.4f} "
            f"(input: {input_tokens}, output: {output_tokens})"
        )

        return cost

    def track_usage(self, model: str, input_tokens: int, output_tokens: int) -> float:
        """
        Track LLM usage and update total cost.

        Args:
            model: Model name
            input_tokens: Number of input tokens
            output_tokens: Number of output tokens

        Returns:
            Cost for this call (USD)
        """
        usage = TokenUsage(
            input_tokens=input_tokens, output_tokens=output_tokens, model=model
        )

        cost = self.calculate_cost(model, input_tokens, output_tokens)

        self.usage_history.append(usage)
        self.total_cost += cost

        logger.info(
            f"Usage tracked: {model} - ${cost:.4f} (total: ${self.total_cost:.4f})"
        )

        return cost

    def check_budget(self, max_budget: float) -> None:
        """
        Check if total cost exceeds budget and raise exception if so.

        Args:
            max_budget: Maximum allowed budget (USD)

        Raises:
            BudgetExceededError: If total cost exceeds budget
        """
        if self.total_cost > max_budget:
            raise BudgetExceededError(
                f"Cost ${self.total_cost:.2f} exceeds budget ${max_budget:.2f}"
            )

    def get_summary(self) -> dict:
        """
        Get summary of cost and usage.

        Returns:
            Dictionary with cost summary
        """
        total_input = sum(u.input_tokens for u in self.usage_history)
        total_output = sum(u.output_tokens for u in self.usage_history)

        # Group by model
        model_costs = {}
        for usage in self.usage_history:
            if usage.model not in model_costs:
                model_costs[usage.model] = {
                    "input_tokens": 0,
                    "output_tokens": 0,
                    "cost": 0.0,
                }

            model_costs[usage.model]["input_tokens"] += usage.input_tokens
            model_costs[usage.model]["output_tokens"] += usage.output_tokens
            model_costs[usage.model]["cost"] += self.calculate_cost(
                usage.model, usage.input_tokens, usage.output_tokens
            )

        return {
            "total_cost": round(self.total_cost, 4),
            "total_input_tokens": total_input,
            "total_output_tokens": total_output,
            "total_tokens": total_input + total_output,
            "calls": len(self.usage_history),
            "by_model": model_costs,
        }


class BudgetExceededError(Exception):
    """Raised when API usage exceeds budget."""

    pass