| """LLM provider client for structured-output annotation. |
| |
| Supports three OpenAI-compatible providers via parametric base URL: |
| - openrouter : https://openrouter.ai/api/v1 (MoE: many models behind one key) |
| - mistral : https://api.mistral.ai/v1 |
| - openai : https://api.openai.com/v1 |
| - ilaas : https://llm.ilaas.fr/v1 (documentation: https://www.ilaas.fr/services-inference/) |
| |
| All three accept the same OpenAI Chat Completions request shape, including |
| `response_format` (json_schema strict on OpenAI; json_object on Mistral; varies |
| on OpenRouter — we auto-fall back if the strict mode is rejected). |
| """ |
| from __future__ import annotations |
|
|
| import asyncio |
| import json |
| import time |
| from dataclasses import dataclass |
| from typing import Optional |
|
|
| import httpx |
|
|
| from schemas import AnnotationSchema, to_json_schema, validate as schema_validate |
| from prompts import VALIDATION_RETRY |
|
|
| DEFAULT_TIMEOUT = 60.0 |
|
|
|
|
| BASE_URLS = { |
| "openrouter": "https://openrouter.ai/api/v1", |
| "mistral": "https://api.mistral.ai/v1", |
| "openai": "https://api.openai.com/v1", |
| "ilaas": "https://llm.ilaas.fr/v1", |
| } |
|
|
| PROVIDERS = tuple(BASE_URLS.keys()) |
|
|
| CURATED_MODELS_BY_PROVIDER: dict[str, list[str]] = { |
| "openrouter": [ |
| "openai/gpt-oss-20b:free", |
| "google/gemma-4-26b-a4b-it:free", |
| "meta-llama/llama-3.3-70b-instruct:free", |
| "qwen/qwen3-next-80b-a3b-instruct:free", |
| "deepseek/deepseek-v4-flash:free", |
| "mistralai/mistral-nemo", |
| "mistralai/mistral-small-24b-instruct-2501", |
| "mistralai/ministral-3b-2512", |
| ], |
| "mistral": [ |
| "mistral-small-2603", |
| "mistral-large-2512", |
| "ministral-8b-2512", |
| "ministral-3b-2512", |
| ], |
| "openai": [ |
| "gpt-5-mini-2025-08-07", |
| "gpt-5-nano-2025-08-07", |
| "gpt-5-2025-08-07", |
| "gpt-4o-mini-2024-07-18", |
| ], |
| "ilaas": [ |
| "gemma-4-31b", |
| "gpt-oss-120b", |
| "llama-3.1-8b", |
| "llama-3.3-70b", |
| "qwen-3.6-35b-instruct", |
| "mistral-small-3.2-24b", |
| ] |
| } |
|
|
| |
| CURATED_MODELS = CURATED_MODELS_BY_PROVIDER["openrouter"] |
|
|
|
|
| @dataclass |
| class ModelResult: |
| model: str |
| ok: bool |
| annotation: Optional[dict] |
| latency_s: float |
| error: str = "" |
| raw: str = "" |
|
|
|
|
| def _build_headers(provider: str, api_key: str) -> dict: |
| h = { |
| "Authorization": f"Bearer {api_key}", |
| "Content-Type": "application/json", |
| } |
| if provider == "openrouter": |
| h["HTTP-Referer"] = "https://lrec2026-llm-annotator.local" |
| h["X-Title"] = "LREC2026 LLM-as-Annotator" |
| return h |
|
|
| class LLMClient: |
| def __init__(self, provider: str, api_key: str, timeout: float = DEFAULT_TIMEOUT): |
| if provider not in BASE_URLS: |
| raise ValueError(f"Unknown provider {provider!r}; expected one of {tuple(BASE_URLS)}") |
| self.provider = provider |
| self.api_key = api_key |
| self.base_url = BASE_URLS[provider] |
| self.endpoint = self.base_url + "/chat/completions" |
| self.headers = _build_headers(provider, api_key) |
| self.timeout = timeout |
| self._client: httpx.AsyncClient | None = None |
|
|
| async def __aenter__(self): |
| self._client = httpx.AsyncClient( |
| timeout=self.timeout, |
| limits=httpx.Limits( |
| max_connections=20, |
| max_keepalive_connections=10, |
| ), |
| ) |
| return self |
|
|
| async def __aexit__(self, exc_type, exc, tb): |
| if self._client: |
| await self._client.aclose() |
| self._client = None |
|
|
| async def annotate_one( |
| self, |
| *, |
| system: str, |
| user: str, |
| schema: AnnotationSchema, |
| model: str, |
| temperature: float = 0.0, |
| timeout: float = DEFAULT_TIMEOUT, |
| ) -> ModelResult: |
| """Call one model, validate JSON. One retry on schema-validation failure.""" |
| print(f"[LLM] start provider={self.provider} model={model}") |
| json_schema = to_json_schema(schema) |
| start = time.time() |
| msgs = [{"role": "system", "content": system}, {"role": "user", "content": user}] |
| try: |
| client = self._client |
| close_after = False |
| if client is None: |
| client = httpx.AsyncClient(timeout=timeout) |
| close_after = True |
| try: |
| raw_text = await self._call(client, msgs, json_schema, model, temperature) |
| ann, err = self._parse_and_validate(raw_text, schema) |
| if err: |
| retry_msg = VALIDATION_RETRY + f"\n\nValidator errors:\n{err}\n\nPrevious response:\n{raw_text}" |
| msgs.append({"role": "assistant", "content": raw_text}) |
| msgs.append({"role": "user", "content": retry_msg}) |
| raw_text = await self._call(client, msgs, json_schema, model, temperature) |
| ann, err = self._parse_and_validate(raw_text, schema) |
| if err: |
| print( |
| f"[LLM] error provider={self.provider} model={model} latency={time.time() - start:.2f}s error={err}") |
| return ModelResult(model=model, ok=False, annotation=None, latency_s=time.time() - start, error=err, |
| raw=raw_text) |
| print(f"[LLM] done provider={self.provider} model={model} latency={time.time() - start:.2f}s") |
| return ModelResult(model=model, ok=True, annotation=ann, latency_s=time.time() - start, raw=raw_text) |
| finally: |
| if close_after: |
| await client.aclose() |
| except Exception as e: |
| print(f"[LLM] error provider={self.provider} model={model} latency={time.time() - start:.2f}s error={e}") |
| return ModelResult(model=model, ok=False, annotation=None, latency_s=time.time() - start, error=str(e)) |
|
|
| async def annotate_many( |
| self, |
| *, |
| models: list[str], |
| system: str, |
| user: str, |
| schema: AnnotationSchema, |
| temperature: float = 0.0, |
| timeout: float = DEFAULT_TIMEOUT, |
| ) -> list[ModelResult]: |
| coros = [ |
| self.annotate_one( |
| system=system, user=user, schema=schema, model=m, temperature=temperature, timeout=timeout |
| ) |
| for m in models |
| ] |
| return await asyncio.gather(*coros) |
|
|
| async def _call(self, client: httpx.AsyncClient, msgs: list[dict], json_schema: dict, model: str, |
| temperature: float) -> str: |
| |
| |
| if self.provider in {"mistral", "ilaas"}: |
| payload = { |
| "model": model, |
| "messages": msgs, |
| "temperature": temperature, |
| "response_format": {"type": "json_object"}, |
| } |
| else: |
| payload = { |
| "model": model, |
| "messages": msgs, |
| "temperature": temperature, |
| "response_format": { |
| "type": "json_schema", |
| "json_schema": { |
| "name": "annotation", |
| "strict": True, |
| "schema": json_schema, |
| }, |
| }, |
| } |
| resp = await client.post(self.endpoint, headers=self.headers, json=payload) |
| if resp.status_code >= 400: |
| payload["response_format"] = {"type": "json_object"} |
| resp = await client.post(self.endpoint, headers=self.headers, json=payload) |
| if resp.status_code >= 400: |
| payload.pop("response_format", None) |
| resp = await client.post(self.endpoint, headers=self.headers, json=payload) |
| resp.raise_for_status() |
| data = resp.json() |
| return data["choices"][0]["message"]["content"] or "" |
|
|
| @staticmethod |
| def _parse_and_validate(raw_text: str, schema: AnnotationSchema) -> tuple[Optional[dict], str]: |
| text = (raw_text or "").strip() |
| if text.startswith("```"): |
| text = text.strip("`") |
| if text.lower().startswith("json"): |
| text = text[4:] |
| text = text.strip() |
| try: |
| ann = json.loads(text) |
| except json.JSONDecodeError as e: |
| return None, f"Invalid JSON: {e}" |
| ok, errors = schema_validate(schema, ann) |
| if not ok: |
| return None, "; ".join(errors[:10]) |
| return ann, "" |
|
|
|
|
| def test_connection_sync(api_key: str, provider: str = "openrouter", model: Optional[str] = None) -> tuple[bool, str]: |
| """Quick blocking test for the 'Test' button in the key modal.""" |
| if provider not in BASE_URLS: |
| return False, f"Unknown provider {provider!r}" |
| if not model: |
| models = CURATED_MODELS_BY_PROVIDER.get(provider) or [] |
| model = models[0] if models else None |
| if not model: |
| return False, "No model configured for this provider." |
| url = BASE_URLS[provider] + "/chat/completions" |
| try: |
| resp = httpx.post( |
| url, |
| headers=_build_headers(provider, api_key), |
| json={ |
| "model": model, |
| "messages": [{"role": "user", "content": "Reply with the single word: OK"}], |
| "max_tokens": 5, |
| "temperature": 0.0, |
| }, |
| timeout=20.0, |
| ) |
| if resp.status_code >= 400: |
| return False, f"HTTP {resp.status_code}: {resp.text[:200]}" |
| content = resp.json()["choices"][0]["message"]["content"] |
| return True, f"Connected ({provider} / {model}). Reply: {content!r}" |
| except Exception as e: |
| return False, str(e) |
|
|
|
|
| |
| class OpenRouterClient(LLMClient): |
| def __init__(self, api_key: str, **kwargs): |
| super().__init__(provider="openrouter", api_key=api_key) |
|
|