Spaces:

Cuong2004
/

math-solver

Sleeping

Cuong2004

Deploy API from GitHub Actions

395651c 3 days ago

3.75 kB

	import os
	import json
	import asyncio
	import logging
	from openai import AsyncOpenAI
	from typing import List, Dict, Any, Optional
	from app.url_utils import openai_compatible_api_key, sanitize_env

	logger = logging.getLogger(__name__)

	class MultiLayerLLMClient:
	def __init__(self):
	# 1. Models sequence loading
	self.models = []
	for i in range(1, 4):
	model = os.getenv(f"OPENROUTER_MODEL_{i}")
	if model:
	self.models.append(model)

	# Fallback to legacy OPENROUTER_MODEL if no numbered models found
	if not self.models:
	legacy_model = os.getenv("OPENROUTER_MODEL", "google/gemini-2.0-flash-001")
	self.models = [legacy_model]

	# 2. Key selection (No rotation, always use the first available key)
	api_key = os.getenv("OPENROUTER_API_KEY_1") or os.getenv("OPENROUTER_API_KEY")

	if not api_key:
	logger.error("[LLM] No OpenRouter API key found.")
	self.client = None
	else:
	self.client = AsyncOpenAI(
	api_key=openai_compatible_api_key(api_key),
	base_url="https://openrouter.ai/api/v1",
	timeout=60.0,
	default_headers={
	"HTTP-Referer": "https://mathsolver.ai",
	"X-Title": "MathSolver Backend",
	}
	)

	async def chat_completions_create(
	self,
	messages: List[Dict[str, str]],
	response_format: Optional[Dict[str, str]] = None,
	**kwargs
	) -> str:
	"""
	Implements Model Fallback Sequence: Model 1 -> Model 2 -> Model 3.
	Always starts from Model 1 for every new call.
	"""
	if not self.client:
	raise ValueError("No API client configured. Check your API keys.")

	MAX_ATTEMPTS = len(self.models)
	RETRY_DELAY = 1.0 # second

	for attempt_idx in range(MAX_ATTEMPTS):
	current_model = self.models[attempt_idx]
	attempt_num = attempt_idx + 1

	try:
	logger.info(f"[LLM] Attempt {attempt_num}/{MAX_ATTEMPTS} using Model: {current_model}...")

	response = await self.client.chat.completions.create(
	model=current_model,
	messages=messages,
	response_format=response_format,
	**kwargs
	)

	if not response or not getattr(response, "choices", None):
	raise ValueError(f"Invalid response structure from model {current_model}")

	content = response.choices[0].message.content
	if content:
	logger.info(f"[LLM] SUCCESS on attempt {attempt_num} ({current_model}).")
	return content

	raise ValueError(f"Empty content from model {current_model}")

	except Exception as e:
	err_msg = f"{type(e).__name__}: {str(e)}"
	logger.warning(f"[LLM] FAILED on attempt {attempt_num} ({current_model}): {err_msg}")

	if attempt_num < MAX_ATTEMPTS:
	logger.info(f"[LLM] Retrying next model in {RETRY_DELAY}s...")
	await asyncio.sleep(RETRY_DELAY)
	else:
	logger.error(f"[LLM] FINAL FAILURE after {attempt_num} models.")
	raise e

	# Global instance for easy reuse (singleton-ish)
	_llm_client = None

	def get_llm_client() -> MultiLayerLLMClient:
	global _llm_client
	if _llm_client is None:
	_llm_client = MultiLayerLLMClient()
	return _llm_client