File size: 5,347 Bytes
790b5af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""Cost tracking utility for LLM API usage monitoring."""

from dataclasses import dataclass, field
from typing import ClassVar

from src.utils.logging import setup_logger

logger = setup_logger(__name__)


@dataclass
class TokenUsage:
    """Track token usage for an LLM call."""

    input_tokens: int
    output_tokens: int
    model: str

    @property
    def total_tokens(self) -> int:
        """Total tokens used."""
        return self.input_tokens + self.output_tokens


@dataclass
class CostTracker:
    """
    Track costs for LLM API usage across different providers.

    Pricing updated: November 2025
    OpenRouter adds ~5% commission to provider pricing.
    """

    # Pricing per 1M tokens (input/output) - Verified by user (Nov 28, 2025)
    PRICING: ClassVar[dict[str, dict[str, float]]] = {
        # === FREE TIER (for testing) ===
        "x-ai/grok-4.1-fast:free": {"input": 0.0, "output": 0.0},
        "meta-llama/llama-3.3-70b-instruct:free": {"input": 0.0, "output": 0.0},
        "ollama": {"input": 0.0, "output": 0.0},  # Local
        # === CHEAP (testing/development) ===
        # OpenAI
        "openai/gpt-5-nano": {"input": 0.05 / 1_000_000, "output": 0.40 / 1_000_000},
        "openai/gpt-5-mini": {"input": 0.25 / 1_000_000, "output": 2.00 / 1_000_000},
        # === PRODUCTION ===
        # Google (enterprise credibility + advanced reasoning)
        "google/gemini-2.5-flash-lite": {
            "input": 0.10 / 1_000_000,
            "output": 0.40 / 1_000_000,
        },
        "google/gemini-3-pro-preview": {
            "input": 2.00 / 1_000_000,
            "output": 12.00 / 1_000_000,
        },
        # Anthropic (best for technical audiences - strong code/reasoning)
        "anthropic/claude-sonnet-4.5": {
            "input": 3.00 / 1_000_000,
            "output": 15.00 / 1_000_000,
        },
    }

    total_cost: float = field(default=0.0)
    usage_history: list[TokenUsage] = field(default_factory=list)

    def calculate_cost(
        self, model: str, input_tokens: int, output_tokens: int
    ) -> float:
        """
        Calculate cost for a single LLM call.

        Args:
            model: Model name (e.g., "anthropic/claude-3.5-sonnet")
            input_tokens: Number of input tokens
            output_tokens: Number of output tokens

        Returns:
            Cost in USD
        """
        # Get pricing for model (fallback to GPT-5-mini if unknown)
        pricing = self.PRICING.get(model, self.PRICING["openai/gpt-5-mini"])

        cost = (input_tokens * pricing["input"]) + (output_tokens * pricing["output"])

        logger.debug(
            f"Cost calculated for {model}: ${cost:.4f} "
            f"(input: {input_tokens}, output: {output_tokens})"
        )

        return cost

    def track_usage(self, model: str, input_tokens: int, output_tokens: int) -> float:
        """
        Track LLM usage and update total cost.

        Args:
            model: Model name
            input_tokens: Number of input tokens
            output_tokens: Number of output tokens

        Returns:
            Cost for this call (USD)
        """
        usage = TokenUsage(
            input_tokens=input_tokens, output_tokens=output_tokens, model=model
        )

        cost = self.calculate_cost(model, input_tokens, output_tokens)

        self.usage_history.append(usage)
        self.total_cost += cost

        logger.info(
            f"Usage tracked: {model} - ${cost:.4f} (total: ${self.total_cost:.4f})"
        )

        return cost

    def check_budget(self, max_budget: float) -> None:
        """
        Check if total cost exceeds budget and raise exception if so.

        Args:
            max_budget: Maximum allowed budget (USD)

        Raises:
            BudgetExceededError: If total cost exceeds budget
        """
        if self.total_cost > max_budget:
            raise BudgetExceededError(
                f"Cost ${self.total_cost:.2f} exceeds budget ${max_budget:.2f}"
            )

    def get_summary(self) -> dict:
        """
        Get summary of cost and usage.

        Returns:
            Dictionary with cost summary
        """
        total_input = sum(u.input_tokens for u in self.usage_history)
        total_output = sum(u.output_tokens for u in self.usage_history)

        # Group by model
        model_costs = {}
        for usage in self.usage_history:
            if usage.model not in model_costs:
                model_costs[usage.model] = {
                    "input_tokens": 0,
                    "output_tokens": 0,
                    "cost": 0.0,
                }

            model_costs[usage.model]["input_tokens"] += usage.input_tokens
            model_costs[usage.model]["output_tokens"] += usage.output_tokens
            model_costs[usage.model]["cost"] += self.calculate_cost(
                usage.model, usage.input_tokens, usage.output_tokens
            )

        return {
            "total_cost": round(self.total_cost, 4),
            "total_input_tokens": total_input,
            "total_output_tokens": total_output,
            "total_tokens": total_input + total_output,
            "calls": len(self.usage_history),
            "by_model": model_costs,
        }


class BudgetExceededError(Exception):
    """Raised when API usage exceeds budget."""

    pass