vn6295337's picture
Initial commit: Enterprise-AI-Gateway - Secure LLM gateway
bb0c63f
"""
Provider configuration with pricing and performance data
"""
PROVIDER_CONFIG = {
"gemini": {
"name": "Google Gemini",
"models": {
"gemini-2.0-flash-exp": {
"price_per_1m_input": 0.075,
"price_per_1m_output": 0.30,
"avg_latency_ms": 120,
"context_window": 1048576,
},
"gemini-1.5-pro": {
"price_per_1m_input": 1.25,
"price_per_1m_output": 5.00,
"avg_latency_ms": 150,
"context_window": 2097152,
},
"gemini-1.0-pro": {
"price_per_1m_input": 0.50,
"price_per_1m_output": 1.50,
"avg_latency_ms": 130,
"context_window": 32760,
}
}
},
"groq": {
"name": "Groq",
"models": {
"llama-3.3-70b-versatile": {
"price_per_1m_input": 0.59,
"price_per_1m_output": 0.79,
"avg_latency_ms": 87,
"context_window": 128000,
},
"llama3-70b": {
"price_per_1m_input": 0.59,
"price_per_1m_output": 0.79,
"avg_latency_ms": 90,
"context_window": 8192,
},
"mixtral-8x7b": {
"price_per_1m_input": 0.24,
"price_per_1m_output": 0.24,
"avg_latency_ms": 95,
"context_window": 32768,
}
}
},
"openrouter": {
"name": "OpenRouter",
"models": {
"google/gemini-2.0-flash-exp:free": {
"price_per_1m_input": 0.0,
"price_per_1m_output": 0.0,
"avg_latency_ms": 200,
"context_window": 1048576,
},
"gpt-4": {
"price_per_1m_input": 30.0,
"price_per_1m_output": 60.0,
"avg_latency_ms": 250,
"context_window": 128000,
},
"gpt-3.5-turbo": {
"price_per_1m_input": 0.50,
"price_per_1m_output": 1.50,
"avg_latency_ms": 180,
"context_window": 16385,
},
"claude-3-opus": {
"price_per_1m_input": 15.0,
"price_per_1m_output": 75.0,
"avg_latency_ms": 300,
"context_window": 200000,
}
}
}
}
def get_model_pricing(provider: str, model: str) -> dict:
"""Get pricing info for a specific provider/model combination"""
if provider in PROVIDER_CONFIG:
models = PROVIDER_CONFIG[provider].get("models", {})
if model in models:
return models[model]
return None
def estimate_cost(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
"""Estimate cost for a request in USD"""
pricing = get_model_pricing(provider, model)
if not pricing:
return 0.0
input_cost = (input_tokens / 1_000_000) * pricing.get("price_per_1m_input", 0)
output_cost = (output_tokens / 1_000_000) * pricing.get("price_per_1m_output", 0)
return round(input_cost + output_cost, 6)