Spaces:

vn6295337
/

RAG-document-assistant

Sleeping

File size: 10,357 Bytes

f866820


# src/llm_providers.py
# Priority: GEMINI → GROQ → OPENROUTER → fallback

import os
import json
import time
from typing import Optional, Dict, Any

try:
    import requests
    _HAS_REQUESTS = True
except Exception:
    import urllib.request as _urllib_request
    import urllib.error as _urllib_error
    _HAS_REQUESTS = False


def _http_post(url: str, headers: dict, payload: dict, timeout: int = 30):
    """
    Perform HTTP POST request with JSON payload.
    
    Args:
        url: Target URL
        headers: HTTP headers
        payload: JSON-serializable payload
        timeout: Request timeout in seconds
        
    Returns:
        Parsed JSON response
        
    Raises:
        requests.RequestException: If using requests and request fails
        urllib.error.URLError: If using urllib and request fails
        json.JSONDecodeError: If response is not valid JSON
        ValueError: If url is empty or payload is not serializable
    """
    if not url:
        raise ValueError("URL cannot be empty")
        
    try:
        data = json.dumps(payload).encode("utf-8")
    except Exception as e:
        raise ValueError(f"Failed to serialize payload to JSON: {str(e)}")
        
    if _HAS_REQUESTS:
        try:
            r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=timeout)
            r.raise_for_status()
            return r.json()
        except requests.RequestException:
            raise
        except json.JSONDecodeError as e:
            raise json.JSONDecodeError(f"Failed to decode JSON response: {str(e)}")
    else:
        try:
            req = _urllib_request.Request(url, data=data, headers=headers, method="POST")
            with _urllib_request.urlopen(req, timeout=timeout) as resp:
                response_data = resp.read().decode("utf-8")
                return json.loads(response_data)
        except _urllib_error.URLError:
            raise
        except json.JSONDecodeError as e:
            raise json.JSONDecodeError(f"Failed to decode JSON response: {str(e)}")


# GEMINI ------------------------------------------------------------

def _call_gemini(prompt: str, temperature: float, max_tokens: int, context: Optional[str]):
    """
    Call Gemini API with prompt and context.
    
    Args:
        prompt: User prompt
        temperature: Sampling temperature (0.0-1.0)
        max_tokens: Maximum tokens to generate
        context: Additional context for the prompt
        
    Returns:
        Dict with 'text' and 'meta' keys
        
    Raises:
        RuntimeError: If GEMINI_API_KEY is not set
        Exception: If API call fails
    """
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        raise RuntimeError("GEMINI_API_KEY missing")

    model = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"

    content = prompt
    if context:
        content = f"Context:\n{context}\n\nUser question:\n{prompt}"

    payload = {
        "contents": [{"parts": [{"text": content}]}],
        "generationConfig": {
            "temperature": float(temperature),
            "maxOutputTokens": int(max_tokens)
        }
    }

    start = time.time()
    try:
        j = _http_post(url, {"Content-Type": "application/json"}, payload)
    except Exception as e:
        raise RuntimeError(f"Gemini API call failed: {str(e)}")
    elapsed = time.time() - start

    try:
        text = j["candidates"][0]["content"]["parts"][0]["text"]
    except (KeyError, IndexError, TypeError) as e:
        text = json.dumps(j)[:1000]
        raise RuntimeError(f"Unexpected Gemini API response format: {str(e)}. Response: {text}")

    return {"text": text, "meta": {"provider": "gemini", "model": model, "elapsed_s": elapsed}}


# GROQ --------------------------------------------------------------

def _call_groq(prompt: str, temperature: float, max_tokens: int, context: Optional[str]):
    """
    Call Groq API with prompt and context.
    
    Args:
        prompt: User prompt
        temperature: Sampling temperature (0.0-1.0)
        max_tokens: Maximum tokens to generate
        context: Additional context for the prompt
        
    Returns:
        Dict with 'text' and 'meta' keys
        
    Raises:
        RuntimeError: If GROQ_API_KEY is not set
        Exception: If API call fails
    """
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        raise RuntimeError("GROQ_API_KEY missing")

    url = "https://api.groq.com/openai/v1/chat/completions"
    model = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768")

    system_msg = "You are a concise assistant. Include citations if context is provided."
    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": f"Context:\n{context}\n\n{prompt}" if context else prompt}
    ]

    payload = {
        "model": model,
        "messages": messages,
        "temperature": float(temperature),
        "max_tokens": int(max_tokens)
    }

    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

    start = time.time()
    try:
        j = _http_post(url, headers, payload)
    except Exception as e:
        raise RuntimeError(f"Groq API call failed: {str(e)}")
    elapsed = time.time() - start

    try:
        text = j["choices"][0]["message"]["content"]
    except (KeyError, IndexError, TypeError) as e:
        text = json.dumps(j)[:1000]
        raise RuntimeError(f"Unexpected Groq API response format: {str(e)}. Response: {text}")

    return {"text": text, "meta": {"provider": "groq", "model": model, "elapsed_s": elapsed}}


# OPENROUTER --------------------------------------------------------

def _call_openrouter(prompt: str, temperature: float, max_tokens: int, context: Optional[str]):
    """
    Call OpenRouter API with prompt and context.
    
    Args:
        prompt: User prompt
        temperature: Sampling temperature (0.0-1.0)
        max_tokens: Maximum tokens to generate
        context: Additional context for the prompt
        
    Returns:
        Dict with 'text' and 'meta' keys
        
    Raises:
        RuntimeError: If OPENROUTER_API_KEY is not set
        Exception: If API call fails
    """
    api_key = os.getenv("OPENROUTER_API_KEY")
    if not api_key:
        raise RuntimeError("OPENROUTER_API_KEY missing")

    url = os.getenv("OPENROUTER_URL", "https://api.openrouter.ai/v1/chat/completions")
    model = os.getenv("OPENROUTER_MODEL", "gpt-4o-mini")

    system_msg = "You are a concise assistant. Use supplied context."

    user_content = f"Context:\n{context}\n\n{prompt}" if context else prompt
    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": user_content}
    ]

    payload = {
        "model": model,
        "messages": messages,
        "temperature": float(temperature),
        "max_tokens": int(max_tokens)
    }

    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

    start = time.time()
    try:
        j = _http_post(url, headers, payload)
    except Exception as e:
        raise RuntimeError(f"OpenRouter API call failed: {str(e)}")
    elapsed = time.time() - start

    try:
        text = j["choices"][0]["message"]["content"]
    except (KeyError, IndexError, TypeError) as e:
        text = json.dumps(j)[:1000]
        raise RuntimeError(f"Unexpected OpenRouter API response format: {str(e)}. Response: {text}")

    return {"text": text, "meta": {"provider": "openrouter", "model": model, "elapsed_s": elapsed}}


# FALLBACK ----------------------------------------------------------

def _fallback(prompt: str, context: Optional[str]):
    """
    Simple fallback responder when all LLM providers fail.
    
    Args:
        prompt: User prompt
        context: Additional context
        
    Returns:
        Dict with 'text' and 'meta' keys
    """
    ctx = (context or "")[:800]
    text = f"[offline] {prompt}\n\nContext snippet:\n{ctx}"
    return {"text": text, "meta": {"provider": "local-fallback"}}


# PUBLIC ENTRYPOINT -------------------------------------------------

def call_llm(prompt: str, temperature: float = 0.0, max_tokens: int = 512, context: Optional[str] = None, **kwargs):
    """
    Call LLM with automatic fallback cascade: Gemini → Groq → OpenRouter → Local.
    If one provider fails, automatically tries the next one.
    
    Args:
        prompt: User prompt
        temperature: Sampling temperature (0.0-1.0)
        max_tokens: Maximum tokens to generate
        context: Additional context for the prompt
        **kwargs: Additional arguments passed to provider functions
        
    Returns:
        Dict with 'text' and 'meta' keys containing the response and metadata
        
    Raises:
        Exception: If all providers fail
    """
    if not prompt or not isinstance(prompt, str):
        raise ValueError("prompt must be a non-empty string")
        
    # Validate temperature and max_tokens
    temperature = max(0.0, min(1.0, float(temperature)))  # Clamp to [0.0, 1.0]
    max_tokens = max(1, int(max_tokens))  # Ensure positive
    
    errors = []

    # Try Gemini first
    if os.getenv("GEMINI_API_KEY"):
        try:
            return _call_gemini(prompt, temperature, max_tokens, context)
        except Exception as e:
            errors.append(f"gemini: {str(e)}")
            # Continue to next provider

    # Try Groq second
    if os.getenv("GROQ_API_KEY"):
        try:
            return _call_groq(prompt, temperature, max_tokens, context)
        except Exception as e:
            errors.append(f"groq: {str(e)}")
            # Continue to next provider

    # Try OpenRouter third
    if os.getenv("OPENROUTER_API_KEY"):
        try:
            return _call_openrouter(prompt, temperature, max_tokens, context)
        except Exception as e:
            errors.append(f"openrouter: {str(e)}")
            # Continue to fallback

    # All providers failed, use local fallback
    error_summary = "; ".join(errors) if errors else "No API keys configured"
    return {
        "text": f"[All providers failed: {error_summary}] Using local fallback.",
        "meta": {"provider": "local-fallback", "errors": errors}
    }