| const pricingService = require('../services/pricingService') |
|
|
| |
| const MODEL_PRICING = { |
| |
| 'claude-3-5-sonnet-20241022': { |
| input: 3.0, |
| output: 15.0, |
| cacheWrite: 3.75, |
| cacheRead: 0.3 |
| }, |
| 'claude-sonnet-4-20250514': { |
| input: 3.0, |
| output: 15.0, |
| cacheWrite: 3.75, |
| cacheRead: 0.3 |
| }, |
| 'claude-sonnet-4-5-20250929': { |
| input: 3.0, |
| output: 15.0, |
| cacheWrite: 3.75, |
| cacheRead: 0.3 |
| }, |
|
|
| |
| 'claude-3-5-haiku-20241022': { |
| input: 0.25, |
| output: 1.25, |
| cacheWrite: 0.3, |
| cacheRead: 0.03 |
| }, |
|
|
| |
| 'claude-3-opus-20240229': { |
| input: 15.0, |
| output: 75.0, |
| cacheWrite: 18.75, |
| cacheRead: 1.5 |
| }, |
|
|
| |
| 'claude-opus-4-1-20250805': { |
| input: 15.0, |
| output: 75.0, |
| cacheWrite: 18.75, |
| cacheRead: 1.5 |
| }, |
|
|
| |
| 'claude-3-sonnet-20240229': { |
| input: 3.0, |
| output: 15.0, |
| cacheWrite: 3.75, |
| cacheRead: 0.3 |
| }, |
|
|
| |
| 'claude-3-haiku-20240307': { |
| input: 0.25, |
| output: 1.25, |
| cacheWrite: 0.3, |
| cacheRead: 0.03 |
| }, |
|
|
| |
| unknown: { |
| input: 3.0, |
| output: 15.0, |
| cacheWrite: 3.75, |
| cacheRead: 0.3 |
| } |
| } |
|
|
| class CostCalculator { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static calculateCost(usage, model = 'unknown') { |
| |
| if ( |
| (usage.cache_creation && typeof usage.cache_creation === 'object') || |
| (model && model.includes('[1m]')) |
| ) { |
| const result = pricingService.calculateCost(usage, model) |
| |
| return { |
| model, |
| pricing: { |
| input: result.pricing.input * 1000000, |
| output: result.pricing.output * 1000000, |
| cacheWrite: result.pricing.cacheCreate * 1000000, |
| cacheRead: result.pricing.cacheRead * 1000000 |
| }, |
| usingDynamicPricing: true, |
| isLongContextRequest: result.isLongContextRequest || false, |
| usage: { |
| inputTokens: usage.input_tokens || 0, |
| outputTokens: usage.output_tokens || 0, |
| cacheCreateTokens: usage.cache_creation_input_tokens || 0, |
| cacheReadTokens: usage.cache_read_input_tokens || 0, |
| totalTokens: |
| (usage.input_tokens || 0) + |
| (usage.output_tokens || 0) + |
| (usage.cache_creation_input_tokens || 0) + |
| (usage.cache_read_input_tokens || 0) |
| }, |
| costs: { |
| input: result.inputCost, |
| output: result.outputCost, |
| cacheWrite: result.cacheCreateCost, |
| cacheRead: result.cacheReadCost, |
| total: result.totalCost |
| }, |
| formatted: { |
| input: this.formatCost(result.inputCost), |
| output: this.formatCost(result.outputCost), |
| cacheWrite: this.formatCost(result.cacheCreateCost), |
| cacheRead: this.formatCost(result.cacheReadCost), |
| total: this.formatCost(result.totalCost) |
| }, |
| debug: { |
| isOpenAIModel: model.includes('gpt') || model.includes('o1'), |
| hasCacheCreatePrice: !!result.pricing.cacheCreate, |
| cacheCreateTokens: usage.cache_creation_input_tokens || 0, |
| cacheWritePriceUsed: result.pricing.cacheCreate * 1000000, |
| isLongContextModel: model && model.includes('[1m]'), |
| isLongContextRequest: result.isLongContextRequest || false |
| } |
| } |
| } |
|
|
| |
| const inputTokens = usage.input_tokens || 0 |
| const outputTokens = usage.output_tokens || 0 |
| const cacheCreateTokens = usage.cache_creation_input_tokens || 0 |
| const cacheReadTokens = usage.cache_read_input_tokens || 0 |
|
|
| |
| const pricingData = pricingService.getModelPricing(model) |
| let pricing |
| let usingDynamicPricing = false |
|
|
| if (pricingData) { |
| |
| const inputPrice = (pricingData.input_cost_per_token || 0) * 1000000 |
| const outputPrice = (pricingData.output_cost_per_token || 0) * 1000000 |
| const cacheReadPrice = (pricingData.cache_read_input_token_cost || 0) * 1000000 |
|
|
| |
| |
| |
| let cacheWritePrice = (pricingData.cache_creation_input_token_cost || 0) * 1000000 |
|
|
| |
| const isOpenAIModel = |
| model.includes('gpt') || model.includes('o1') || pricingData.litellm_provider === 'openai' |
|
|
| if (isOpenAIModel && !pricingData.cache_creation_input_token_cost && cacheCreateTokens > 0) { |
| |
| cacheWritePrice = inputPrice |
| } |
|
|
| pricing = { |
| input: inputPrice, |
| output: outputPrice, |
| cacheWrite: cacheWritePrice, |
| cacheRead: cacheReadPrice |
| } |
| usingDynamicPricing = true |
| } else { |
| |
| pricing = MODEL_PRICING[model] || MODEL_PRICING['unknown'] |
| } |
|
|
| |
| const inputCost = (inputTokens / 1000000) * pricing.input |
| const outputCost = (outputTokens / 1000000) * pricing.output |
| const cacheWriteCost = (cacheCreateTokens / 1000000) * pricing.cacheWrite |
| const cacheReadCost = (cacheReadTokens / 1000000) * pricing.cacheRead |
|
|
| const totalCost = inputCost + outputCost + cacheWriteCost + cacheReadCost |
|
|
| return { |
| model, |
| pricing, |
| usingDynamicPricing, |
| usage: { |
| inputTokens, |
| outputTokens, |
| cacheCreateTokens, |
| cacheReadTokens, |
| totalTokens: inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens |
| }, |
| costs: { |
| input: inputCost, |
| output: outputCost, |
| cacheWrite: cacheWriteCost, |
| cacheRead: cacheReadCost, |
| total: totalCost |
| }, |
| |
| formatted: { |
| input: this.formatCost(inputCost), |
| output: this.formatCost(outputCost), |
| cacheWrite: this.formatCost(cacheWriteCost), |
| cacheRead: this.formatCost(cacheReadCost), |
| total: this.formatCost(totalCost) |
| }, |
| |
| debug: { |
| isOpenAIModel: model.includes('gpt') || model.includes('o1'), |
| hasCacheCreatePrice: !!pricingData?.cache_creation_input_token_cost, |
| cacheCreateTokens, |
| cacheWritePriceUsed: pricing.cacheWrite |
| } |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| static calculateAggregatedCost(aggregatedUsage, model = 'unknown') { |
| const usage = { |
| input_tokens: aggregatedUsage.inputTokens || aggregatedUsage.totalInputTokens || 0, |
| output_tokens: aggregatedUsage.outputTokens || aggregatedUsage.totalOutputTokens || 0, |
| cache_creation_input_tokens: |
| aggregatedUsage.cacheCreateTokens || aggregatedUsage.totalCacheCreateTokens || 0, |
| cache_read_input_tokens: |
| aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0 |
| } |
|
|
| return this.calculateCost(usage, model) |
| } |
|
|
| |
| |
| |
| |
| |
| static getModelPricing(model = 'unknown') { |
| |
| if (model === 'gpt-5-codex' && !MODEL_PRICING['gpt-5-codex']) { |
| const gpt5Pricing = MODEL_PRICING['gpt-5'] |
| if (gpt5Pricing) { |
| console.log(`Using gpt-5 pricing as fallback for ${model}`) |
| return gpt5Pricing |
| } |
| } |
| return MODEL_PRICING[model] || MODEL_PRICING['unknown'] |
| } |
|
|
| |
| |
| |
| |
| static getAllModelPricing() { |
| return { ...MODEL_PRICING } |
| } |
|
|
| |
| |
| |
| |
| |
| static isModelSupported(model) { |
| return !!MODEL_PRICING[model] |
| } |
|
|
| |
| |
| |
| |
| |
| |
| static formatCost(cost, decimals = 6) { |
| if (cost >= 1) { |
| return `$${cost.toFixed(2)}` |
| } else if (cost >= 0.001) { |
| return `$${cost.toFixed(4)}` |
| } else { |
| return `$${cost.toFixed(decimals)}` |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| static calculateCacheSavings(usage, model = 'unknown') { |
| const pricing = this.getModelPricing(model) |
| const cacheReadTokens = usage.cache_read_input_tokens || 0 |
|
|
| |
| const normalCost = (cacheReadTokens / 1000000) * pricing.input |
| const cacheCost = (cacheReadTokens / 1000000) * pricing.cacheRead |
| const savings = normalCost - cacheCost |
| const savingsPercentage = normalCost > 0 ? (savings / normalCost) * 100 : 0 |
|
|
| return { |
| normalCost, |
| cacheCost, |
| savings, |
| savingsPercentage, |
| formatted: { |
| normalCost: this.formatCost(normalCost), |
| cacheCost: this.formatCost(cacheCost), |
| savings: this.formatCost(savings), |
| savingsPercentage: `${savingsPercentage.toFixed(1)}%` |
| } |
| } |
| } |
| } |
|
|
| module.exports = CostCalculator |
|
|