Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import asyncio | |
| import logging | |
| from openai import AsyncOpenAI | |
| from typing import List, Dict, Any, Optional | |
| from app.url_utils import openai_compatible_api_key, sanitize_env | |
| logger = logging.getLogger(__name__) | |
| class MultiLayerLLMClient: | |
| def __init__(self): | |
| # 1. Models sequence loading | |
| self.models = [] | |
| for i in range(1, 4): | |
| model = os.getenv(f"OPENROUTER_MODEL_{i}") | |
| if model: | |
| self.models.append(model) | |
| # Fallback to legacy OPENROUTER_MODEL if no numbered models found | |
| if not self.models: | |
| legacy_model = os.getenv("OPENROUTER_MODEL", "google/gemini-2.0-flash-001") | |
| self.models = [legacy_model] | |
| # 2. Key selection (No rotation, always use the first available key) | |
| api_key = os.getenv("OPENROUTER_API_KEY_1") or os.getenv("OPENROUTER_API_KEY") | |
| if not api_key: | |
| logger.error("[LLM] No OpenRouter API key found.") | |
| self.client = None | |
| else: | |
| self.client = AsyncOpenAI( | |
| api_key=openai_compatible_api_key(api_key), | |
| base_url="https://openrouter.ai/api/v1", | |
| timeout=60.0, | |
| default_headers={ | |
| "HTTP-Referer": "https://mathsolver.ai", | |
| "X-Title": "MathSolver Backend", | |
| } | |
| ) | |
| async def chat_completions_create( | |
| self, | |
| messages: List[Dict[str, str]], | |
| response_format: Optional[Dict[str, str]] = None, | |
| **kwargs | |
| ) -> str: | |
| """ | |
| Implements Model Fallback Sequence: Model 1 -> Model 2 -> Model 3. | |
| Always starts from Model 1 for every new call. | |
| """ | |
| if not self.client: | |
| raise ValueError("No API client configured. Check your API keys.") | |
| MAX_ATTEMPTS = len(self.models) | |
| RETRY_DELAY = 1.0 # second | |
| for attempt_idx in range(MAX_ATTEMPTS): | |
| current_model = self.models[attempt_idx] | |
| attempt_num = attempt_idx + 1 | |
| try: | |
| logger.info(f"[LLM] Attempt {attempt_num}/{MAX_ATTEMPTS} using Model: {current_model}...") | |
| response = await self.client.chat.completions.create( | |
| model=current_model, | |
| messages=messages, | |
| response_format=response_format, | |
| **kwargs | |
| ) | |
| if not response or not getattr(response, "choices", None): | |
| raise ValueError(f"Invalid response structure from model {current_model}") | |
| content = response.choices[0].message.content | |
| if content: | |
| logger.info(f"[LLM] SUCCESS on attempt {attempt_num} ({current_model}).") | |
| return content | |
| raise ValueError(f"Empty content from model {current_model}") | |
| except Exception as e: | |
| err_msg = f"{type(e).__name__}: {str(e)}" | |
| logger.warning(f"[LLM] FAILED on attempt {attempt_num} ({current_model}): {err_msg}") | |
| if attempt_num < MAX_ATTEMPTS: | |
| logger.info(f"[LLM] Retrying next model in {RETRY_DELAY}s...") | |
| await asyncio.sleep(RETRY_DELAY) | |
| else: | |
| logger.error(f"[LLM] FINAL FAILURE after {attempt_num} models.") | |
| raise e | |
| # Global instance for easy reuse (singleton-ish) | |
| _llm_client = None | |
| def get_llm_client() -> MultiLayerLLMClient: | |
| global _llm_client | |
| if _llm_client is None: | |
| _llm_client = MultiLayerLLMClient() | |
| return _llm_client | |