Pablo
fix: test_mcp_server 12 failures resolved — model fields, registry API, GPU label
466cc3d
"""Async HTTP client for vLLM OpenAI-compatible API."""
import logging
from typing import Any
import httpx
from apohara_context_forge.config import settings
logger = logging.getLogger(__name__)
class vLLMClient:
"""Async client for vLLM server."""
def __init__(self, base_url: str | None = None, api_key: str | None = None):
self._base_url = base_url or settings.vllm_base_url
self._api_key = api_key or settings.vllm_api_key
self._client: httpx.AsyncClient | None = None
async def __aenter__(self):
self._client = httpx.AsyncClient(
base_url=self._base_url,
headers={"Authorization": f"Bearer {self._api_key}"},
timeout=60.0,
)
return self
async def __aexit__(self, *args):
await self.aclose()
async def aclose(self) -> None:
"""Close the underlying httpx client. Safe to call multiple times."""
if self._client is not None:
await self._client.aclose()
self._client = None
async def complete(
self,
prompt: str,
max_tokens: int = 256,
temperature: float = 0.7,
**kwargs,
) -> dict[str, Any]:
"""Send completion request to vLLM."""
if self._client is None:
self._client = httpx.AsyncClient(
base_url=self._base_url,
headers={"Authorization": f"Bearer {self._api_key}"},
timeout=60.0,
)
payload = {
"model": settings.vllm_model,
"prompt": prompt,
"max_tokens": max_tokens,
"temperature": temperature,
**kwargs,
}
try:
response = await self._client.post("/v1/completions", json=payload)
response.raise_for_status()
return response.json()
except httpx.HTTPError as e:
logger.error(f"vLLM request failed: {e}")
return {"error": str(e)}
async def chat(
self,
messages: list[dict[str, str]],
max_tokens: int = 256,
temperature: float = 0.7,
**kwargs,
) -> dict[str, Any]:
"""Send chat completion request."""
if self._client is None:
self._client = httpx.AsyncClient(
base_url=self._base_url,
headers={"Authorization": f"Bearer {self._api_key}"},
timeout=60.0,
)
payload = {
"model": settings.vllm_model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
**kwargs,
}
try:
response = await self._client.post("/v1/chat/completions", json=payload)
response.raise_for_status()
return response.json()
except httpx.HTTPError as e:
logger.error(f"vLLM chat request failed: {e}")
return {"error": str(e)}
# Canonical PEP-8 alias. Tests and the MCP server import the upper-case form;
# the lower-case original stays for backward compatibility with older callers.
VLLMClient = vLLMClient