Upload alpha_factory/infra/llm_client.py with huggingface_hub

Browse files

Files changed (1) hide show

alpha_factory/infra/llm_client.py +102 -0

alpha_factory/infra/llm_client.py ADDED Viewed

	@@ -0,0 +1,102 @@

+"""
+LLM Client — unified interface to vLLM / Ollama with guided JSON generation.
+All outputs are schema-constrained. No free-text alpha generation.
+"""
+import asyncio
+import json
+from typing import TypeVar
+from pydantic import BaseModel
+from openai import AsyncOpenAI
+from ..config import LLMConfig
+T = TypeVar("T", bound=BaseModel)
+class LLMClient:
+    """
+    Async LLM client with structured JSON output.
+    Connects to vLLM or Ollama (both expose OpenAI-compatible API).
+    """
+    def __init__(self, config: LLMConfig):
+        self.config = config
+        self.client = AsyncOpenAI(
+            base_url=config.base_url,
+            api_key=config.api_key,
+        )
+        self._token_count = 0
+    async def generate_json(
+        self,
+        prompt: str,
+        schema: type[T],
+        model: str | None = None,
+        temperature: float | None = None,
+        system_prompt: str = "You are a quantitative finance expert.",
+    ) -> T:
+        """
+        Generate a structured JSON response conforming to the given Pydantic schema.
+        Uses guided decoding via response_format (vLLM supports this natively).
+        """
+        model = model or self.config.mediumfish_model
+        temp = temperature or self.config.temperature_generation
+        # Build JSON schema for guided generation
+        json_schema = schema.model_json_schema()
+        response = await self.client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": prompt},
+            ],
+            temperature=temp,
+            max_tokens=self.config.max_tokens,
+            response_format={
+                "type": "json_schema",
+                "json_schema": {
+                    "name": schema.__name__,
+                    "schema": json_schema,
+                },
+            },
+        )
+        content = response.choices[0].message.content
+        self._token_count += response.usage.total_tokens if response.usage else 0
+        # Parse and validate
+        data = json.loads(content)
+        return schema.model_validate(data)
+    async def generate_text(
+        self,
+        prompt: str,
+        model: str | None = None,
+        temperature: float | None = None,
+        system_prompt: str = "You are a quantitative finance expert.",
+        max_tokens: int = 2048,
+    ) -> str:
+        """Generate free-text response (for memos/reports only, never for expressions)."""
+        model = model or self.config.mediumfish_model
+        temp = temperature or self.config.temperature_critique
+        response = await self.client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": prompt},
+            ],
+            temperature=temp,
+            max_tokens=max_tokens,
+        )
+        content = response.choices[0].message.content
+        self._token_count += response.usage.total_tokens if response.usage else 0
+        return content
+    @property
+    def tokens_used(self) -> int:
+        return self._token_count
+    def reset_token_count(self):
+        self._token_count = 0