LucasLooTan commited on
Commit
d20d5a9
·
1 Parent(s): 0fb3eb2

perf: cache OpenAI-compatible client per (provider, model) tuple

Browse files

deep-check audit A.F9: _resolve_client was instantiating a fresh
OpenAI client (and therefore a fresh httpx connection pool) per
recognition / composition call. lru_cache(maxsize=4) keyed on
(provider, base_url, api_key, model) reuses the pool across same-
provider calls; switching providers (e.g. amd -> hf fallback) rebuilds
cleanly.

Same pattern applied to both signbridge/recognizer/vlm.py and
signbridge/composer/sentence.py. Existing test monkey-patches still
work because they replace _resolve_client directly, not _build_client.

signbridge/composer/sentence.py CHANGED
@@ -15,6 +15,7 @@ from __future__ import annotations
15
  import logging
16
  import os
17
  import re
 
18
  from typing import Sequence
19
 
20
  logger = logging.getLogger(__name__)
@@ -32,52 +33,51 @@ Rules:
32
  6. End with appropriate punctuation."""
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def _resolve_client() -> tuple[object | None, str]:
36
- """Return (client, model_id) based on SIGNBRIDGE_PROVIDER env var."""
37
  provider = os.getenv("SIGNBRIDGE_PROVIDER", "amd").lower()
38
  composer_model = os.getenv(
39
  "SIGNBRIDGE_COMPOSER_MODEL", "meta-llama/Llama-3.1-8B-Instruct"
40
  )
41
 
42
- try:
43
- from openai import OpenAI # type: ignore[import-not-found]
44
- except ImportError:
45
- logger.warning("openai sdk not installed; composer returns naive joiner.")
46
- return None, composer_model
47
-
48
  if provider == "amd":
49
  base_url = os.getenv("AMD_DEV_CLOUD_BASE_URL", "").rstrip("/")
50
  api_key = os.getenv("AMD_DEV_CLOUD_API_KEY", "")
51
  if not base_url or not api_key:
52
  logger.info("AMD Dev Cloud not configured; falling back to naive joiner.")
53
  return None, composer_model
54
- return OpenAI(base_url=base_url, api_key=api_key), composer_model
55
 
56
  if provider == "openai":
57
  api_key = os.getenv("OPENAI_API_KEY", "")
58
  if not api_key:
59
  logger.info("OPENAI_API_KEY not set; falling back to naive joiner.")
60
  return None, composer_model
61
- # For local-dev fallback, use a small fast model.
62
- return OpenAI(api_key=api_key), os.getenv(
63
- "SIGNBRIDGE_COMPOSER_MODEL_OPENAI", "gpt-4o-mini"
64
- )
65
 
66
  if provider == "hf":
67
  api_key = os.getenv("HF_TOKEN", "")
68
  if not api_key:
69
  logger.info("HF_TOKEN not set; falling back to naive joiner.")
70
  return None, composer_model
71
- return (
72
- OpenAI(
73
- base_url=os.getenv(
74
- "HF_INFERENCE_BASE_URL",
75
- "https://router.huggingface.co/v1",
76
- ),
77
- api_key=api_key,
78
- ),
79
- composer_model,
80
  )
 
81
 
82
  logger.warning("unknown SIGNBRIDGE_PROVIDER=%r; using naive joiner.", provider)
83
  return None, composer_model
 
15
  import logging
16
  import os
17
  import re
18
+ from functools import lru_cache
19
  from typing import Sequence
20
 
21
  logger = logging.getLogger(__name__)
 
33
  6. End with appropriate punctuation."""
34
 
35
 
36
+ @lru_cache(maxsize=4)
37
+ def _build_client(provider: str, base_url: str, api_key: str, model: str) -> tuple[object | None, str]:
38
+ """Build (and cache) an OpenAI-compatible client for the given config."""
39
+ try:
40
+ from openai import OpenAI # type: ignore[import-not-found]
41
+ except ImportError:
42
+ logger.warning("openai sdk not installed; composer returns naive joiner.")
43
+ return None, model
44
+ if base_url:
45
+ return OpenAI(base_url=base_url, api_key=api_key), model
46
+ return OpenAI(api_key=api_key), model
47
+
48
+
49
  def _resolve_client() -> tuple[object | None, str]:
50
+ """Return (cached client, model_id) based on SIGNBRIDGE_PROVIDER env var."""
51
  provider = os.getenv("SIGNBRIDGE_PROVIDER", "amd").lower()
52
  composer_model = os.getenv(
53
  "SIGNBRIDGE_COMPOSER_MODEL", "meta-llama/Llama-3.1-8B-Instruct"
54
  )
55
 
 
 
 
 
 
 
56
  if provider == "amd":
57
  base_url = os.getenv("AMD_DEV_CLOUD_BASE_URL", "").rstrip("/")
58
  api_key = os.getenv("AMD_DEV_CLOUD_API_KEY", "")
59
  if not base_url or not api_key:
60
  logger.info("AMD Dev Cloud not configured; falling back to naive joiner.")
61
  return None, composer_model
62
+ return _build_client(provider, base_url, api_key, composer_model)
63
 
64
  if provider == "openai":
65
  api_key = os.getenv("OPENAI_API_KEY", "")
66
  if not api_key:
67
  logger.info("OPENAI_API_KEY not set; falling back to naive joiner.")
68
  return None, composer_model
69
+ model = os.getenv("SIGNBRIDGE_COMPOSER_MODEL_OPENAI", "gpt-4o-mini")
70
+ return _build_client(provider, "", api_key, model)
 
 
71
 
72
  if provider == "hf":
73
  api_key = os.getenv("HF_TOKEN", "")
74
  if not api_key:
75
  logger.info("HF_TOKEN not set; falling back to naive joiner.")
76
  return None, composer_model
77
+ base_url = os.getenv(
78
+ "HF_INFERENCE_BASE_URL", "https://router.huggingface.co/v1"
 
 
 
 
 
 
 
79
  )
80
+ return _build_client(provider, base_url, api_key, composer_model)
81
 
82
  logger.warning("unknown SIGNBRIDGE_PROVIDER=%r; using naive joiner.", provider)
83
  return None, composer_model
signbridge/recognizer/vlm.py CHANGED
@@ -22,6 +22,7 @@ import io
22
  import logging
23
  import os
24
  import re
 
25
 
26
  import numpy as np
27
 
@@ -51,15 +52,28 @@ _PROMPT = (
51
  )
52
 
53
 
54
- def _resolve_client() -> tuple[object | None, str]:
55
- """Return (openai-compat client, model_id) based on SIGNBRIDGE_PROVIDER."""
56
- provider = os.getenv("SIGNBRIDGE_PROVIDER", "amd").lower()
57
 
 
 
 
 
 
58
  try:
59
  from openai import OpenAI # type: ignore[import-not-found]
60
  except ImportError:
61
  logger.warning("openai sdk not installed; recognizer returns 'unknown'.")
62
- return None, DEFAULT_VLM_MODEL
 
 
 
 
 
 
 
 
63
 
64
  if provider == "amd":
65
  base_url = os.getenv("AMD_DEV_CLOUD_BASE_URL", "").rstrip("/")
@@ -67,37 +81,28 @@ def _resolve_client() -> tuple[object | None, str]:
67
  if not base_url or not api_key:
68
  logger.info("AMD Dev Cloud not configured; recognizer in stub mode.")
69
  return None, DEFAULT_VLM_MODEL
70
- return OpenAI(base_url=base_url, api_key=api_key), DEFAULT_VLM_MODEL
71
 
72
  if provider == "openai":
73
  api_key = os.getenv("OPENAI_API_KEY", "")
74
  if not api_key:
75
  logger.info("OPENAI_API_KEY not set; recognizer in stub mode.")
76
  return None, DEFAULT_VLM_MODEL
77
- return OpenAI(api_key=api_key), os.getenv(
78
- "SIGNBRIDGE_VLM_MODEL_OPENAI", "gpt-4o-mini"
79
- )
80
 
81
  if provider == "hf":
82
  api_key = os.getenv("HF_TOKEN", "")
83
  if not api_key:
84
  logger.info("HF_TOKEN not set; recognizer in stub mode.")
85
  return None, DEFAULT_VLM_MODEL
86
- # HF Inference Providers — OpenAI-compatible router serving Qwen2-VL,
87
- # Llama-3.2-Vision, etc. via Together/Fireworks/Hyperbolic backends.
88
- return (
89
- OpenAI(
90
- base_url=os.getenv(
91
- "HF_INFERENCE_BASE_URL",
92
- "https://router.huggingface.co/v1",
93
- ),
94
- api_key=api_key,
95
- ),
96
- os.getenv(
97
- "SIGNBRIDGE_VLM_MODEL_HF",
98
- "meta-llama/Llama-3.2-11B-Vision-Instruct",
99
- ),
100
  )
 
101
 
102
  logger.warning("unknown SIGNBRIDGE_PROVIDER=%r; recognizer in stub mode.", provider)
103
  return None, DEFAULT_VLM_MODEL
 
22
  import logging
23
  import os
24
  import re
25
+ from functools import lru_cache
26
 
27
  import numpy as np
28
 
 
52
  )
53
 
54
 
55
+ @lru_cache(maxsize=4)
56
+ def _build_client(provider: str, base_url: str, api_key: str, model: str) -> tuple[object | None, str]:
57
+ """Build (and cache) an OpenAI-compatible client for the given config.
58
 
59
+ Cache key includes the full provider tuple so switching providers
60
+ rebuilds; same provider re-uses the httpx connection pool. The
61
+ `(None, model)` return is cached too — once a missing-deps state is
62
+ detected we don't re-import on every frame.
63
+ """
64
  try:
65
  from openai import OpenAI # type: ignore[import-not-found]
66
  except ImportError:
67
  logger.warning("openai sdk not installed; recognizer returns 'unknown'.")
68
+ return None, model
69
+ if base_url:
70
+ return OpenAI(base_url=base_url, api_key=api_key), model
71
+ return OpenAI(api_key=api_key), model
72
+
73
+
74
+ def _resolve_client() -> tuple[object | None, str]:
75
+ """Return (cached client, model_id) based on SIGNBRIDGE_PROVIDER env var."""
76
+ provider = os.getenv("SIGNBRIDGE_PROVIDER", "amd").lower()
77
 
78
  if provider == "amd":
79
  base_url = os.getenv("AMD_DEV_CLOUD_BASE_URL", "").rstrip("/")
 
81
  if not base_url or not api_key:
82
  logger.info("AMD Dev Cloud not configured; recognizer in stub mode.")
83
  return None, DEFAULT_VLM_MODEL
84
+ return _build_client(provider, base_url, api_key, DEFAULT_VLM_MODEL)
85
 
86
  if provider == "openai":
87
  api_key = os.getenv("OPENAI_API_KEY", "")
88
  if not api_key:
89
  logger.info("OPENAI_API_KEY not set; recognizer in stub mode.")
90
  return None, DEFAULT_VLM_MODEL
91
+ model = os.getenv("SIGNBRIDGE_VLM_MODEL_OPENAI", "gpt-4o-mini")
92
+ return _build_client(provider, "", api_key, model)
 
93
 
94
  if provider == "hf":
95
  api_key = os.getenv("HF_TOKEN", "")
96
  if not api_key:
97
  logger.info("HF_TOKEN not set; recognizer in stub mode.")
98
  return None, DEFAULT_VLM_MODEL
99
+ base_url = os.getenv(
100
+ "HF_INFERENCE_BASE_URL", "https://router.huggingface.co/v1"
101
+ )
102
+ model = os.getenv(
103
+ "SIGNBRIDGE_VLM_MODEL_HF", "meta-llama/Llama-3.2-11B-Vision-Instruct"
 
 
 
 
 
 
 
 
 
104
  )
105
+ return _build_client(provider, base_url, api_key, model)
106
 
107
  logger.warning("unknown SIGNBRIDGE_PROVIDER=%r; recognizer in stub mode.", provider)
108
  return None, DEFAULT_VLM_MODEL