math-solver / app /llm_client.py
Cuong2004
Deploy API from GitHub Actions
395651c
import os
import json
import asyncio
import logging
from openai import AsyncOpenAI
from typing import List, Dict, Any, Optional
from app.url_utils import openai_compatible_api_key, sanitize_env
logger = logging.getLogger(__name__)
class MultiLayerLLMClient:
def __init__(self):
# 1. Models sequence loading
self.models = []
for i in range(1, 4):
model = os.getenv(f"OPENROUTER_MODEL_{i}")
if model:
self.models.append(model)
# Fallback to legacy OPENROUTER_MODEL if no numbered models found
if not self.models:
legacy_model = os.getenv("OPENROUTER_MODEL", "google/gemini-2.0-flash-001")
self.models = [legacy_model]
# 2. Key selection (No rotation, always use the first available key)
api_key = os.getenv("OPENROUTER_API_KEY_1") or os.getenv("OPENROUTER_API_KEY")
if not api_key:
logger.error("[LLM] No OpenRouter API key found.")
self.client = None
else:
self.client = AsyncOpenAI(
api_key=openai_compatible_api_key(api_key),
base_url="https://openrouter.ai/api/v1",
timeout=60.0,
default_headers={
"HTTP-Referer": "https://mathsolver.ai",
"X-Title": "MathSolver Backend",
}
)
async def chat_completions_create(
self,
messages: List[Dict[str, str]],
response_format: Optional[Dict[str, str]] = None,
**kwargs
) -> str:
"""
Implements Model Fallback Sequence: Model 1 -> Model 2 -> Model 3.
Always starts from Model 1 for every new call.
"""
if not self.client:
raise ValueError("No API client configured. Check your API keys.")
MAX_ATTEMPTS = len(self.models)
RETRY_DELAY = 1.0 # second
for attempt_idx in range(MAX_ATTEMPTS):
current_model = self.models[attempt_idx]
attempt_num = attempt_idx + 1
try:
logger.info(f"[LLM] Attempt {attempt_num}/{MAX_ATTEMPTS} using Model: {current_model}...")
response = await self.client.chat.completions.create(
model=current_model,
messages=messages,
response_format=response_format,
**kwargs
)
if not response or not getattr(response, "choices", None):
raise ValueError(f"Invalid response structure from model {current_model}")
content = response.choices[0].message.content
if content:
logger.info(f"[LLM] SUCCESS on attempt {attempt_num} ({current_model}).")
return content
raise ValueError(f"Empty content from model {current_model}")
except Exception as e:
err_msg = f"{type(e).__name__}: {str(e)}"
logger.warning(f"[LLM] FAILED on attempt {attempt_num} ({current_model}): {err_msg}")
if attempt_num < MAX_ATTEMPTS:
logger.info(f"[LLM] Retrying next model in {RETRY_DELAY}s...")
await asyncio.sleep(RETRY_DELAY)
else:
logger.error(f"[LLM] FINAL FAILURE after {attempt_num} models.")
raise e
# Global instance for easy reuse (singleton-ish)
_llm_client = None
def get_llm_client() -> MultiLayerLLMClient:
global _llm_client
if _llm_client is None:
_llm_client = MultiLayerLLMClient()
return _llm_client