Spaces:
Build error
Build error
Commit Β·
819f4c1
1
Parent(s): 3a0014b
feat: end-to-end smoke test + gold-set accuracy harness
Browse files- signbridge.scripts.smoke_test: runs recognizer + composer + TTS once,
reports per-stage status. Provider-agnostic (amd/openai/hf/none).
Confirms composer + TTS work even without API keys.
- signbridge.scripts.run_gold_set: scans tests/golden/<token>/*.{jpg,png}
and reports per-class + overall accuracy. Writes timestamped CSV.
Exits non-zero if overall < 75% (the V1 success criterion).
Both ready to run the moment AMD Dev Cloud credentials land in .env.
- signbridge/scripts/run_gold_set.py +141 -0
- signbridge/scripts/smoke_test.py +147 -0
signbridge/scripts/run_gold_set.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Accuracy harness β run the recognizer over a labelled sample folder.
|
| 2 |
+
|
| 3 |
+
Folder layout expected:
|
| 4 |
+
tests/golden/
|
| 5 |
+
A/<any>.jpg|png β expected token "A"
|
| 6 |
+
B/<any>.jpg|png β expected token "B"
|
| 7 |
+
...
|
| 8 |
+
hello/<any>.jpg|png β expected token "hello"
|
| 9 |
+
|
| 10 |
+
Each subdirectory name is the expected token. Every image inside is a sample.
|
| 11 |
+
|
| 12 |
+
Output:
|
| 13 |
+
- per-class accuracy (correct / total)
|
| 14 |
+
- overall accuracy
|
| 15 |
+
- a CSV at tests/golden/results-<timestamp>.csv
|
| 16 |
+
|
| 17 |
+
Usage:
|
| 18 |
+
python -m signbridge.scripts.run_gold_set
|
| 19 |
+
python -m signbridge.scripts.run_gold_set --root tests/golden
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import argparse
|
| 25 |
+
import csv
|
| 26 |
+
import sys
|
| 27 |
+
import time
|
| 28 |
+
from collections import defaultdict
|
| 29 |
+
from datetime import datetime, timezone
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
|
| 32 |
+
import numpy as np
|
| 33 |
+
from dotenv import load_dotenv
|
| 34 |
+
from PIL import Image
|
| 35 |
+
|
| 36 |
+
from signbridge.recognizer.vlm import recognize_sign_from_frame
|
| 37 |
+
|
| 38 |
+
VALID_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _iter_samples(root: Path):
|
| 42 |
+
for cls_dir in sorted(p for p in root.iterdir() if p.is_dir()):
|
| 43 |
+
cls = cls_dir.name
|
| 44 |
+
for img_path in sorted(cls_dir.iterdir()):
|
| 45 |
+
if img_path.suffix.lower() in VALID_EXTS:
|
| 46 |
+
yield cls, img_path
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def main() -> int:
|
| 50 |
+
parser = argparse.ArgumentParser(description="SignBridge accuracy harness")
|
| 51 |
+
parser.add_argument(
|
| 52 |
+
"--root",
|
| 53 |
+
type=Path,
|
| 54 |
+
default=Path("tests/golden"),
|
| 55 |
+
help="Root folder with one subdirectory per expected token",
|
| 56 |
+
)
|
| 57 |
+
parser.add_argument(
|
| 58 |
+
"--output",
|
| 59 |
+
type=Path,
|
| 60 |
+
default=None,
|
| 61 |
+
help="CSV output path (defaults to tests/golden/results-<ts>.csv)",
|
| 62 |
+
)
|
| 63 |
+
args = parser.parse_args()
|
| 64 |
+
|
| 65 |
+
load_dotenv()
|
| 66 |
+
|
| 67 |
+
if not args.root.exists() or not args.root.is_dir():
|
| 68 |
+
print(f"error: {args.root} not found or not a directory", file=sys.stderr)
|
| 69 |
+
print("create it with subdirectories named after expected tokens, e.g.:", file=sys.stderr)
|
| 70 |
+
print(" tests/golden/A/sample1.jpg", file=sys.stderr)
|
| 71 |
+
print(" tests/golden/hello/sample2.png", file=sys.stderr)
|
| 72 |
+
return 2
|
| 73 |
+
|
| 74 |
+
samples = list(_iter_samples(args.root))
|
| 75 |
+
if not samples:
|
| 76 |
+
print(f"no images found under {args.root}", file=sys.stderr)
|
| 77 |
+
return 2
|
| 78 |
+
|
| 79 |
+
out_path = args.output or args.root / f"results-{datetime.now(timezone.utc):%Y%m%dT%H%M%SZ}.csv"
|
| 80 |
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
| 81 |
+
|
| 82 |
+
per_class_correct: dict[str, int] = defaultdict(int)
|
| 83 |
+
per_class_total: dict[str, int] = defaultdict(int)
|
| 84 |
+
rows: list[dict[str, str]] = []
|
| 85 |
+
|
| 86 |
+
print(f"running {len(samples)} samples against the configured providerβ¦")
|
| 87 |
+
t_start = time.perf_counter()
|
| 88 |
+
for expected, path in samples:
|
| 89 |
+
per_class_total[expected] += 1
|
| 90 |
+
img = np.asarray(Image.open(path).convert("RGB"))
|
| 91 |
+
t0 = time.perf_counter()
|
| 92 |
+
predicted, confidence = recognize_sign_from_frame(img)
|
| 93 |
+
dt_ms = (time.perf_counter() - t0) * 1000
|
| 94 |
+
ok = predicted == expected
|
| 95 |
+
if ok:
|
| 96 |
+
per_class_correct[expected] += 1
|
| 97 |
+
rows.append(
|
| 98 |
+
{
|
| 99 |
+
"path": str(path),
|
| 100 |
+
"expected": expected,
|
| 101 |
+
"predicted": predicted,
|
| 102 |
+
"confidence": f"{confidence:.2f}",
|
| 103 |
+
"latency_ms": f"{dt_ms:.0f}",
|
| 104 |
+
"correct": "1" if ok else "0",
|
| 105 |
+
}
|
| 106 |
+
)
|
| 107 |
+
print(
|
| 108 |
+
f" [{'β' if ok else 'β'}] {expected:<10} β {predicted!r:<12} "
|
| 109 |
+
f"conf={confidence:.2f} {dt_ms:.0f}ms ({path.name})"
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
total_correct = sum(per_class_correct.values())
|
| 113 |
+
total = sum(per_class_total.values())
|
| 114 |
+
overall = total_correct / total if total else 0.0
|
| 115 |
+
elapsed = time.perf_counter() - t_start
|
| 116 |
+
|
| 117 |
+
print()
|
| 118 |
+
print("Per-class accuracy:")
|
| 119 |
+
for cls in sorted(per_class_total):
|
| 120 |
+
c = per_class_correct[cls]
|
| 121 |
+
n = per_class_total[cls]
|
| 122 |
+
print(f" {cls:<12} {c}/{n} ({(c / n) * 100:.0f}%)" if n else f" {cls:<12} 0/0")
|
| 123 |
+
print()
|
| 124 |
+
print(f"Overall: {total_correct}/{total} ({overall * 100:.1f}%)")
|
| 125 |
+
print(f"Total wall time: {elapsed:.1f}s (avg {(elapsed / total) * 1000:.0f}ms per sample)")
|
| 126 |
+
|
| 127 |
+
with out_path.open("w", newline="") as fh:
|
| 128 |
+
writer = csv.DictWriter(
|
| 129 |
+
fh,
|
| 130 |
+
fieldnames=["path", "expected", "predicted", "confidence", "latency_ms", "correct"],
|
| 131 |
+
)
|
| 132 |
+
writer.writeheader()
|
| 133 |
+
writer.writerows(rows)
|
| 134 |
+
print(f"\nCSV written to {out_path}")
|
| 135 |
+
|
| 136 |
+
# Exit non-zero if accuracy below the V1 success criterion (75%).
|
| 137 |
+
return 0 if overall >= 0.75 else 1
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
if __name__ == "__main__":
|
| 141 |
+
sys.exit(main())
|
signbridge/scripts/smoke_test.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""End-to-end smoke test for the SignBridge inference path.
|
| 2 |
+
|
| 3 |
+
Run AFTER you've filled in .env with provider credentials. Exercises:
|
| 4 |
+
- /info endpoint (no auth needed)
|
| 5 |
+
- the VLM recognizer with a synthetic frame
|
| 6 |
+
- the LLM composer with a hand-crafted sign sequence
|
| 7 |
+
- the TTS pipeline
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python -m signbridge.scripts.smoke_test
|
| 11 |
+
SIGNBRIDGE_PROVIDER=openai python -m signbridge.scripts.smoke_test
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import argparse
|
| 17 |
+
import os
|
| 18 |
+
import sys
|
| 19 |
+
import time
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
import numpy as np
|
| 23 |
+
from dotenv import load_dotenv
|
| 24 |
+
from PIL import Image, ImageDraw
|
| 25 |
+
|
| 26 |
+
from signbridge.composer.sentence import compose_sentence
|
| 27 |
+
from signbridge.recognizer.vlm import recognize_sign_from_frame
|
| 28 |
+
from signbridge.voice.tts import synthesize_speech
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _make_synthetic_frame() -> np.ndarray:
|
| 32 |
+
"""Create a 256x256 RGB image with a stylised pose silhouette.
|
| 33 |
+
|
| 34 |
+
Real recognition needs an actual hand/sign image. This synthetic frame
|
| 35 |
+
is just to confirm the API plumbing works end-to-end β accuracy is
|
| 36 |
+
expected to be 'unknown' (the VLM returning 'unknown' is the right
|
| 37 |
+
answer for a stick figure).
|
| 38 |
+
"""
|
| 39 |
+
img = Image.new("RGB", (256, 256), color=(245, 245, 245))
|
| 40 |
+
d = ImageDraw.Draw(img)
|
| 41 |
+
# Stick figure: head + body + arms in "A" sign pose
|
| 42 |
+
d.ellipse((110, 30, 146, 66), fill=(220, 180, 140), outline="black", width=2)
|
| 43 |
+
d.line((128, 66, 128, 160), fill="black", width=4)
|
| 44 |
+
d.line((128, 90, 95, 130), fill="black", width=4)
|
| 45 |
+
d.line((128, 90, 161, 130), fill="black", width=4)
|
| 46 |
+
d.ellipse((85, 120, 105, 140), fill=(220, 180, 140), outline="black", width=2)
|
| 47 |
+
d.ellipse((151, 120, 171, 140), fill=(220, 180, 140), outline="black", width=2)
|
| 48 |
+
d.text((90, 200), "synthetic test frame", fill="black")
|
| 49 |
+
return np.asarray(img)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _print_provider_info() -> None:
|
| 53 |
+
provider = os.getenv("SIGNBRIDGE_PROVIDER", "amd")
|
| 54 |
+
print(f" provider = {provider}")
|
| 55 |
+
if provider == "amd":
|
| 56 |
+
base = os.getenv("AMD_DEV_CLOUD_BASE_URL", "")
|
| 57 |
+
key = os.getenv("AMD_DEV_CLOUD_API_KEY", "")
|
| 58 |
+
print(f" AMD_DEV_CLOUD_BASE_URL = {base or '(unset)'}")
|
| 59 |
+
print(f" AMD_DEV_CLOUD_API_KEY = {'set (' + str(len(key)) + ' chars)' if key else '(unset)'}")
|
| 60 |
+
elif provider == "openai":
|
| 61 |
+
print(f" OPENAI_API_KEY = {'set' if os.getenv('OPENAI_API_KEY') else '(unset)'}")
|
| 62 |
+
elif provider == "hf":
|
| 63 |
+
print(f" HF_TOKEN = {'set' if os.getenv('HF_TOKEN') else '(unset)'}")
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _step(label: str) -> None:
|
| 67 |
+
print(f"\nββ {label} ββ")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def main() -> int:
|
| 71 |
+
parser = argparse.ArgumentParser(description="SignBridge end-to-end smoke test")
|
| 72 |
+
parser.add_argument(
|
| 73 |
+
"--text",
|
| 74 |
+
default="My name is Lucas. Hello.",
|
| 75 |
+
help="Text to synthesise via TTS",
|
| 76 |
+
)
|
| 77 |
+
parser.add_argument(
|
| 78 |
+
"--signs",
|
| 79 |
+
nargs="+",
|
| 80 |
+
default=["hello", "name", "L", "U", "C", "A", "S"],
|
| 81 |
+
help="Sign sequence to compose into a sentence",
|
| 82 |
+
)
|
| 83 |
+
parser.add_argument(
|
| 84 |
+
"--frame",
|
| 85 |
+
type=Path,
|
| 86 |
+
default=None,
|
| 87 |
+
help="Path to a real sign image (PNG/JPG). Default = synthetic frame.",
|
| 88 |
+
)
|
| 89 |
+
args = parser.parse_args()
|
| 90 |
+
|
| 91 |
+
load_dotenv()
|
| 92 |
+
|
| 93 |
+
_step("Provider config")
|
| 94 |
+
_print_provider_info()
|
| 95 |
+
|
| 96 |
+
_step("VLM recognizer (sign-frame β token)")
|
| 97 |
+
if args.frame:
|
| 98 |
+
img = np.asarray(Image.open(args.frame).convert("RGB"))
|
| 99 |
+
print(f" using real frame: {args.frame} ({img.shape})")
|
| 100 |
+
else:
|
| 101 |
+
img = _make_synthetic_frame()
|
| 102 |
+
print(f" using synthetic frame ({img.shape})")
|
| 103 |
+
print(" (a synthetic stick figure is unlikely to match an ASL sign;")
|
| 104 |
+
print(" the expected outcome is the VLM returning 'unknown' or empty β")
|
| 105 |
+
print(" that proves the call worked even when accuracy can't be measured.)")
|
| 106 |
+
t0 = time.perf_counter()
|
| 107 |
+
token, conf = recognize_sign_from_frame(img)
|
| 108 |
+
dt = time.perf_counter() - t0
|
| 109 |
+
print(f" β token={token!r} confidence={conf:.2f} latency={dt:.2f}s")
|
| 110 |
+
|
| 111 |
+
_step("LLM composer (sign tokens β English sentence)")
|
| 112 |
+
print(f" input signs: {args.signs}")
|
| 113 |
+
t0 = time.perf_counter()
|
| 114 |
+
sentence = compose_sentence(args.signs)
|
| 115 |
+
dt = time.perf_counter() - t0
|
| 116 |
+
print(f" β sentence = {sentence!r} ({dt:.2f}s)")
|
| 117 |
+
|
| 118 |
+
_step("TTS (text β audio)")
|
| 119 |
+
print(f" input text: {args.text!r}")
|
| 120 |
+
t0 = time.perf_counter()
|
| 121 |
+
audio_path = synthesize_speech(args.text)
|
| 122 |
+
dt = time.perf_counter() - t0
|
| 123 |
+
if audio_path:
|
| 124 |
+
size = Path(audio_path).stat().st_size
|
| 125 |
+
print(f" β wrote {audio_path} ({size:,} bytes, {dt:.2f}s)")
|
| 126 |
+
else:
|
| 127 |
+
print(" β no audio (TTS unavailable)")
|
| 128 |
+
|
| 129 |
+
_step("Summary")
|
| 130 |
+
ok_recognize = bool(token)
|
| 131 |
+
ok_compose = bool(sentence)
|
| 132 |
+
ok_tts = bool(audio_path)
|
| 133 |
+
flags = {
|
| 134 |
+
"recognizer": "β" if ok_recognize else "β (provider may be in stub mode; check creds)",
|
| 135 |
+
"composer": "β" if ok_compose else "β composer failed",
|
| 136 |
+
"tts": "β" if ok_tts else "β TTS failed",
|
| 137 |
+
}
|
| 138 |
+
for k, v in flags.items():
|
| 139 |
+
print(f" {k:<10} {v}")
|
| 140 |
+
|
| 141 |
+
# Compose + tts MUST work even with no provider (naive joiner + silent stub).
|
| 142 |
+
# Recognizer needs a real provider.
|
| 143 |
+
return 0 if (ok_compose and ok_tts) else 1
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
if __name__ == "__main__":
|
| 147 |
+
sys.exit(main())
|