Step 4: LLM client — DeepSeek wrapper with retry and LLMUnavailable
Browse filesImplements specs/02_llm_client.md. chat_json uses response_format=json_object
and retries on 5xx/connection errors with exponential backoff; chat_vision
base64-encodes images for multimodal calls. Raises LLMUnavailable on missing
key, auth failure, or exhausted retries.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- core/llm_client.py +77 -3
- specs/02_llm_client.md +101 -0
core/llm_client.py
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
class LLMUnavailable(Exception):
|
| 5 |
pass
|
|
@@ -7,10 +14,47 @@ class LLMUnavailable(Exception):
|
|
| 7 |
|
| 8 |
class LLM:
|
| 9 |
def __init__(self, api_key: str | None = None):
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def chat_json(self, system: str, user: str, max_retries: int = 2) -> dict:
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def chat_vision(
|
| 16 |
self,
|
|
@@ -19,4 +63,34 @@ class LLM:
|
|
| 19 |
image: bytes | str | Path,
|
| 20 |
max_retries: int = 2,
|
| 21 |
) -> str:
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import openai
|
| 7 |
+
|
| 8 |
+
from core.config import DEEPSEEK_API_KEY, DEEPSEEK_BASE_URL, MODEL_NAME
|
| 9 |
+
|
| 10 |
|
| 11 |
class LLMUnavailable(Exception):
|
| 12 |
pass
|
|
|
|
| 14 |
|
| 15 |
class LLM:
|
| 16 |
def __init__(self, api_key: str | None = None):
|
| 17 |
+
resolved = api_key if api_key is not None else DEEPSEEK_API_KEY
|
| 18 |
+
self._api_key = resolved
|
| 19 |
+
self._client = openai.OpenAI(
|
| 20 |
+
api_key=resolved or "no-key",
|
| 21 |
+
base_url=DEEPSEEK_BASE_URL,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
def _check_key(self) -> None:
|
| 25 |
+
if not self._api_key:
|
| 26 |
+
raise LLMUnavailable("No API key configured")
|
| 27 |
|
| 28 |
def chat_json(self, system: str, user: str, max_retries: int = 2) -> dict:
|
| 29 |
+
self._check_key()
|
| 30 |
+
last_exc: Exception | None = None
|
| 31 |
+
for attempt in range(max_retries + 1):
|
| 32 |
+
try:
|
| 33 |
+
resp = self._client.chat.completions.create(
|
| 34 |
+
model=MODEL_NAME,
|
| 35 |
+
messages=[
|
| 36 |
+
{"role": "system", "content": system},
|
| 37 |
+
{"role": "user", "content": user},
|
| 38 |
+
],
|
| 39 |
+
temperature=0,
|
| 40 |
+
response_format={"type": "json_object"},
|
| 41 |
+
)
|
| 42 |
+
content = resp.choices[0].message.content or ""
|
| 43 |
+
try:
|
| 44 |
+
return json.loads(content)
|
| 45 |
+
except json.JSONDecodeError:
|
| 46 |
+
if attempt < max_retries:
|
| 47 |
+
system = system + " Respond ONLY with valid JSON, no prose."
|
| 48 |
+
continue
|
| 49 |
+
raise LLMUnavailable("Malformed JSON after retries")
|
| 50 |
+
except openai.AuthenticationError as e:
|
| 51 |
+
raise LLMUnavailable("Invalid API key") from e
|
| 52 |
+
except (openai.APIStatusError, openai.APIConnectionError) as e:
|
| 53 |
+
last_exc = e
|
| 54 |
+
if attempt < max_retries:
|
| 55 |
+
time.sleep(2 ** attempt)
|
| 56 |
+
continue
|
| 57 |
+
raise LLMUnavailable(f"API error after retries: {last_exc}") from last_exc
|
| 58 |
|
| 59 |
def chat_vision(
|
| 60 |
self,
|
|
|
|
| 63 |
image: bytes | str | Path,
|
| 64 |
max_retries: int = 2,
|
| 65 |
) -> str:
|
| 66 |
+
self._check_key()
|
| 67 |
+
if isinstance(image, (str, Path)):
|
| 68 |
+
raw = Path(image).read_bytes()
|
| 69 |
+
else:
|
| 70 |
+
raw = image
|
| 71 |
+
b64 = base64.b64encode(raw).decode()
|
| 72 |
+
data_uri = f"data:image/png;base64,{b64}"
|
| 73 |
+
|
| 74 |
+
last_exc: Exception | None = None
|
| 75 |
+
for attempt in range(max_retries + 1):
|
| 76 |
+
try:
|
| 77 |
+
resp = self._client.chat.completions.create(
|
| 78 |
+
model=MODEL_NAME,
|
| 79 |
+
messages=[
|
| 80 |
+
{"role": "system", "content": system},
|
| 81 |
+
{"role": "user", "content": [
|
| 82 |
+
{"type": "text", "text": user_text},
|
| 83 |
+
{"type": "image_url", "image_url": {"url": data_uri}},
|
| 84 |
+
]},
|
| 85 |
+
],
|
| 86 |
+
temperature=0,
|
| 87 |
+
)
|
| 88 |
+
return resp.choices[0].message.content or ""
|
| 89 |
+
except openai.AuthenticationError as e:
|
| 90 |
+
raise LLMUnavailable("Invalid API key") from e
|
| 91 |
+
except (openai.APIStatusError, openai.APIConnectionError) as e:
|
| 92 |
+
last_exc = e
|
| 93 |
+
if attempt < max_retries:
|
| 94 |
+
time.sleep(2 ** attempt)
|
| 95 |
+
continue
|
| 96 |
+
raise LLMUnavailable(f"API error after retries: {last_exc}") from last_exc
|
specs/02_llm_client.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Spec 02 — LLM Client
|
| 2 |
+
|
| 3 |
+
**Step:** 4 of 15
|
| 4 |
+
**Time budget:** ~25 min
|
| 5 |
+
**Checkpoint:** `LLM().chat_json(system, user)` returns a dict when the API key is valid; raises `LLMUnavailable` when the key is missing.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Goal
|
| 10 |
+
|
| 11 |
+
Implement `core/llm_client.py` — a thin wrapper around the OpenAI Python SDK pointed at the DeepSeek API. Provides `chat_json` (JSON-mode responses) and `chat_vision` (multimodal image input). Both methods retry on transient failures and raise `LLMUnavailable` after `max_retries`.
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## Dependencies
|
| 16 |
+
|
| 17 |
+
- `openai` Python SDK (OpenAI-compatible, pointed at DeepSeek base URL)
|
| 18 |
+
- `core.config` for `DEEPSEEK_API_KEY`, `DEEPSEEK_BASE_URL`, `MODEL_NAME`, `MODEL_VERSION`
|
| 19 |
+
- `core.prompts` for prompt constants (used by callers, not by this module directly)
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## Class: `LLMUnavailable`
|
| 24 |
+
|
| 25 |
+
```python
|
| 26 |
+
class LLMUnavailable(Exception):
|
| 27 |
+
pass
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
Raised whenever the LLM call cannot be completed after all retries. Callers should catch this and route to `fallback.py`.
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
## Class: `LLM`
|
| 35 |
+
|
| 36 |
+
### `__init__(self, api_key: str | None = None)`
|
| 37 |
+
|
| 38 |
+
- If `api_key` is `None`, use `config.DEEPSEEK_API_KEY`.
|
| 39 |
+
- If the resolved key is `None` or empty: do NOT raise immediately — defer to call time so the app can start without a key (precomputed mode).
|
| 40 |
+
- Create an `openai.OpenAI(api_key=key, base_url=DEEPSEEK_BASE_URL)` client and store as `self._client`.
|
| 41 |
+
|
| 42 |
+
### `chat_json(self, system: str, user: str, max_retries: int = 2) -> dict`
|
| 43 |
+
|
| 44 |
+
Calls the chat completions API with `response_format={"type": "json_object"}`, `temperature=0`.
|
| 45 |
+
|
| 46 |
+
Messages: `[{"role": "system", "content": system}, {"role": "user", "content": user}]`
|
| 47 |
+
|
| 48 |
+
Retry logic:
|
| 49 |
+
1. Try the API call.
|
| 50 |
+
2. On success: parse `response.choices[0].message.content` as JSON. If `json.loads` fails, retry once with a stricter system postscript `" Respond ONLY with valid JSON, no prose."`. If it fails again, raise `LLMUnavailable("Malformed JSON after retries")`.
|
| 51 |
+
3. On `openai.APIStatusError` (5xx) or `openai.APIConnectionError`: exponential backoff (`2 ** attempt` seconds, max 2 attempts), then raise `LLMUnavailable`.
|
| 52 |
+
4. On `openai.AuthenticationError` (401): raise `LLMUnavailable("Invalid API key")` immediately (no retry).
|
| 53 |
+
5. If `api_key` is None/empty at call time: raise `LLMUnavailable("No API key configured")`.
|
| 54 |
+
|
| 55 |
+
Returns `dict`.
|
| 56 |
+
|
| 57 |
+
### `chat_vision(self, system: str, user_text: str, image: bytes | str | Path, max_retries: int = 2) -> str`
|
| 58 |
+
|
| 59 |
+
Sends a multimodal message using the OpenAI vision format.
|
| 60 |
+
|
| 61 |
+
Image encoding:
|
| 62 |
+
- If `image` is `bytes`: base64-encode directly.
|
| 63 |
+
- If `image` is `Path` or `str`: read the file as bytes, then base64-encode.
|
| 64 |
+
- Build data URI: `f"data:image/png;base64,{b64_str}"`.
|
| 65 |
+
|
| 66 |
+
Message format:
|
| 67 |
+
```python
|
| 68 |
+
[
|
| 69 |
+
{"role": "system", "content": system},
|
| 70 |
+
{"role": "user", "content": [
|
| 71 |
+
{"type": "text", "text": user_text},
|
| 72 |
+
{"type": "image_url", "image_url": {"url": data_uri}},
|
| 73 |
+
]},
|
| 74 |
+
]
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
Call at `temperature=0`, no `response_format` (vision endpoint returns plain text).
|
| 78 |
+
|
| 79 |
+
Retry logic: same as `chat_json` but on content errors: just retry with same prompt. Returns `response.choices[0].message.content` as string.
|
| 80 |
+
|
| 81 |
+
On any failure after retries: raise `LLMUnavailable`.
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## Error handling summary
|
| 86 |
+
|
| 87 |
+
| Condition | Behaviour |
|
| 88 |
+
|---|---|
|
| 89 |
+
| Missing/empty API key | `LLMUnavailable("No API key configured")` |
|
| 90 |
+
| 401 AuthenticationError | `LLMUnavailable("Invalid API key")` |
|
| 91 |
+
| 5xx / ConnectionError | Retry with backoff, then `LLMUnavailable` |
|
| 92 |
+
| Malformed JSON (chat_json) | Retry once with stricter prompt, then `LLMUnavailable` |
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## Acceptance Criteria
|
| 97 |
+
|
| 98 |
+
1. `from core.llm_client import LLM, LLMUnavailable` imports cleanly.
|
| 99 |
+
2. `LLM(api_key=None)` with no `.env` → calling `chat_json(...)` raises `LLMUnavailable` (not an unhandled exception).
|
| 100 |
+
3. With a valid key: `LLM().chat_json("respond with valid json", '{"ok": true}')` returns `{"ok": True}` (or similar).
|
| 101 |
+
4. `LLMUnavailable` is a subclass of `Exception`.
|