LucasLooTan commited on
Commit
819f4c1
Β·
1 Parent(s): 3a0014b

feat: end-to-end smoke test + gold-set accuracy harness

Browse files

- signbridge.scripts.smoke_test: runs recognizer + composer + TTS once,
reports per-stage status. Provider-agnostic (amd/openai/hf/none).
Confirms composer + TTS work even without API keys.
- signbridge.scripts.run_gold_set: scans tests/golden/<token>/*.{jpg,png}
and reports per-class + overall accuracy. Writes timestamped CSV.
Exits non-zero if overall < 75% (the V1 success criterion).

Both ready to run the moment AMD Dev Cloud credentials land in .env.

signbridge/scripts/run_gold_set.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Accuracy harness β€” run the recognizer over a labelled sample folder.
2
+
3
+ Folder layout expected:
4
+ tests/golden/
5
+ A/<any>.jpg|png β†’ expected token "A"
6
+ B/<any>.jpg|png β†’ expected token "B"
7
+ ...
8
+ hello/<any>.jpg|png β†’ expected token "hello"
9
+
10
+ Each subdirectory name is the expected token. Every image inside is a sample.
11
+
12
+ Output:
13
+ - per-class accuracy (correct / total)
14
+ - overall accuracy
15
+ - a CSV at tests/golden/results-<timestamp>.csv
16
+
17
+ Usage:
18
+ python -m signbridge.scripts.run_gold_set
19
+ python -m signbridge.scripts.run_gold_set --root tests/golden
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import argparse
25
+ import csv
26
+ import sys
27
+ import time
28
+ from collections import defaultdict
29
+ from datetime import datetime, timezone
30
+ from pathlib import Path
31
+
32
+ import numpy as np
33
+ from dotenv import load_dotenv
34
+ from PIL import Image
35
+
36
+ from signbridge.recognizer.vlm import recognize_sign_from_frame
37
+
38
+ VALID_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
39
+
40
+
41
+ def _iter_samples(root: Path):
42
+ for cls_dir in sorted(p for p in root.iterdir() if p.is_dir()):
43
+ cls = cls_dir.name
44
+ for img_path in sorted(cls_dir.iterdir()):
45
+ if img_path.suffix.lower() in VALID_EXTS:
46
+ yield cls, img_path
47
+
48
+
49
+ def main() -> int:
50
+ parser = argparse.ArgumentParser(description="SignBridge accuracy harness")
51
+ parser.add_argument(
52
+ "--root",
53
+ type=Path,
54
+ default=Path("tests/golden"),
55
+ help="Root folder with one subdirectory per expected token",
56
+ )
57
+ parser.add_argument(
58
+ "--output",
59
+ type=Path,
60
+ default=None,
61
+ help="CSV output path (defaults to tests/golden/results-<ts>.csv)",
62
+ )
63
+ args = parser.parse_args()
64
+
65
+ load_dotenv()
66
+
67
+ if not args.root.exists() or not args.root.is_dir():
68
+ print(f"error: {args.root} not found or not a directory", file=sys.stderr)
69
+ print("create it with subdirectories named after expected tokens, e.g.:", file=sys.stderr)
70
+ print(" tests/golden/A/sample1.jpg", file=sys.stderr)
71
+ print(" tests/golden/hello/sample2.png", file=sys.stderr)
72
+ return 2
73
+
74
+ samples = list(_iter_samples(args.root))
75
+ if not samples:
76
+ print(f"no images found under {args.root}", file=sys.stderr)
77
+ return 2
78
+
79
+ out_path = args.output or args.root / f"results-{datetime.now(timezone.utc):%Y%m%dT%H%M%SZ}.csv"
80
+ out_path.parent.mkdir(parents=True, exist_ok=True)
81
+
82
+ per_class_correct: dict[str, int] = defaultdict(int)
83
+ per_class_total: dict[str, int] = defaultdict(int)
84
+ rows: list[dict[str, str]] = []
85
+
86
+ print(f"running {len(samples)} samples against the configured provider…")
87
+ t_start = time.perf_counter()
88
+ for expected, path in samples:
89
+ per_class_total[expected] += 1
90
+ img = np.asarray(Image.open(path).convert("RGB"))
91
+ t0 = time.perf_counter()
92
+ predicted, confidence = recognize_sign_from_frame(img)
93
+ dt_ms = (time.perf_counter() - t0) * 1000
94
+ ok = predicted == expected
95
+ if ok:
96
+ per_class_correct[expected] += 1
97
+ rows.append(
98
+ {
99
+ "path": str(path),
100
+ "expected": expected,
101
+ "predicted": predicted,
102
+ "confidence": f"{confidence:.2f}",
103
+ "latency_ms": f"{dt_ms:.0f}",
104
+ "correct": "1" if ok else "0",
105
+ }
106
+ )
107
+ print(
108
+ f" [{'βœ“' if ok else 'βœ—'}] {expected:<10} β†’ {predicted!r:<12} "
109
+ f"conf={confidence:.2f} {dt_ms:.0f}ms ({path.name})"
110
+ )
111
+
112
+ total_correct = sum(per_class_correct.values())
113
+ total = sum(per_class_total.values())
114
+ overall = total_correct / total if total else 0.0
115
+ elapsed = time.perf_counter() - t_start
116
+
117
+ print()
118
+ print("Per-class accuracy:")
119
+ for cls in sorted(per_class_total):
120
+ c = per_class_correct[cls]
121
+ n = per_class_total[cls]
122
+ print(f" {cls:<12} {c}/{n} ({(c / n) * 100:.0f}%)" if n else f" {cls:<12} 0/0")
123
+ print()
124
+ print(f"Overall: {total_correct}/{total} ({overall * 100:.1f}%)")
125
+ print(f"Total wall time: {elapsed:.1f}s (avg {(elapsed / total) * 1000:.0f}ms per sample)")
126
+
127
+ with out_path.open("w", newline="") as fh:
128
+ writer = csv.DictWriter(
129
+ fh,
130
+ fieldnames=["path", "expected", "predicted", "confidence", "latency_ms", "correct"],
131
+ )
132
+ writer.writeheader()
133
+ writer.writerows(rows)
134
+ print(f"\nCSV written to {out_path}")
135
+
136
+ # Exit non-zero if accuracy below the V1 success criterion (75%).
137
+ return 0 if overall >= 0.75 else 1
138
+
139
+
140
+ if __name__ == "__main__":
141
+ sys.exit(main())
signbridge/scripts/smoke_test.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """End-to-end smoke test for the SignBridge inference path.
2
+
3
+ Run AFTER you've filled in .env with provider credentials. Exercises:
4
+ - /info endpoint (no auth needed)
5
+ - the VLM recognizer with a synthetic frame
6
+ - the LLM composer with a hand-crafted sign sequence
7
+ - the TTS pipeline
8
+
9
+ Usage:
10
+ python -m signbridge.scripts.smoke_test
11
+ SIGNBRIDGE_PROVIDER=openai python -m signbridge.scripts.smoke_test
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import os
18
+ import sys
19
+ import time
20
+ from pathlib import Path
21
+
22
+ import numpy as np
23
+ from dotenv import load_dotenv
24
+ from PIL import Image, ImageDraw
25
+
26
+ from signbridge.composer.sentence import compose_sentence
27
+ from signbridge.recognizer.vlm import recognize_sign_from_frame
28
+ from signbridge.voice.tts import synthesize_speech
29
+
30
+
31
+ def _make_synthetic_frame() -> np.ndarray:
32
+ """Create a 256x256 RGB image with a stylised pose silhouette.
33
+
34
+ Real recognition needs an actual hand/sign image. This synthetic frame
35
+ is just to confirm the API plumbing works end-to-end β€” accuracy is
36
+ expected to be 'unknown' (the VLM returning 'unknown' is the right
37
+ answer for a stick figure).
38
+ """
39
+ img = Image.new("RGB", (256, 256), color=(245, 245, 245))
40
+ d = ImageDraw.Draw(img)
41
+ # Stick figure: head + body + arms in "A" sign pose
42
+ d.ellipse((110, 30, 146, 66), fill=(220, 180, 140), outline="black", width=2)
43
+ d.line((128, 66, 128, 160), fill="black", width=4)
44
+ d.line((128, 90, 95, 130), fill="black", width=4)
45
+ d.line((128, 90, 161, 130), fill="black", width=4)
46
+ d.ellipse((85, 120, 105, 140), fill=(220, 180, 140), outline="black", width=2)
47
+ d.ellipse((151, 120, 171, 140), fill=(220, 180, 140), outline="black", width=2)
48
+ d.text((90, 200), "synthetic test frame", fill="black")
49
+ return np.asarray(img)
50
+
51
+
52
+ def _print_provider_info() -> None:
53
+ provider = os.getenv("SIGNBRIDGE_PROVIDER", "amd")
54
+ print(f" provider = {provider}")
55
+ if provider == "amd":
56
+ base = os.getenv("AMD_DEV_CLOUD_BASE_URL", "")
57
+ key = os.getenv("AMD_DEV_CLOUD_API_KEY", "")
58
+ print(f" AMD_DEV_CLOUD_BASE_URL = {base or '(unset)'}")
59
+ print(f" AMD_DEV_CLOUD_API_KEY = {'set (' + str(len(key)) + ' chars)' if key else '(unset)'}")
60
+ elif provider == "openai":
61
+ print(f" OPENAI_API_KEY = {'set' if os.getenv('OPENAI_API_KEY') else '(unset)'}")
62
+ elif provider == "hf":
63
+ print(f" HF_TOKEN = {'set' if os.getenv('HF_TOKEN') else '(unset)'}")
64
+
65
+
66
+ def _step(label: str) -> None:
67
+ print(f"\n── {label} ──")
68
+
69
+
70
+ def main() -> int:
71
+ parser = argparse.ArgumentParser(description="SignBridge end-to-end smoke test")
72
+ parser.add_argument(
73
+ "--text",
74
+ default="My name is Lucas. Hello.",
75
+ help="Text to synthesise via TTS",
76
+ )
77
+ parser.add_argument(
78
+ "--signs",
79
+ nargs="+",
80
+ default=["hello", "name", "L", "U", "C", "A", "S"],
81
+ help="Sign sequence to compose into a sentence",
82
+ )
83
+ parser.add_argument(
84
+ "--frame",
85
+ type=Path,
86
+ default=None,
87
+ help="Path to a real sign image (PNG/JPG). Default = synthetic frame.",
88
+ )
89
+ args = parser.parse_args()
90
+
91
+ load_dotenv()
92
+
93
+ _step("Provider config")
94
+ _print_provider_info()
95
+
96
+ _step("VLM recognizer (sign-frame β†’ token)")
97
+ if args.frame:
98
+ img = np.asarray(Image.open(args.frame).convert("RGB"))
99
+ print(f" using real frame: {args.frame} ({img.shape})")
100
+ else:
101
+ img = _make_synthetic_frame()
102
+ print(f" using synthetic frame ({img.shape})")
103
+ print(" (a synthetic stick figure is unlikely to match an ASL sign;")
104
+ print(" the expected outcome is the VLM returning 'unknown' or empty β€”")
105
+ print(" that proves the call worked even when accuracy can't be measured.)")
106
+ t0 = time.perf_counter()
107
+ token, conf = recognize_sign_from_frame(img)
108
+ dt = time.perf_counter() - t0
109
+ print(f" β†’ token={token!r} confidence={conf:.2f} latency={dt:.2f}s")
110
+
111
+ _step("LLM composer (sign tokens β†’ English sentence)")
112
+ print(f" input signs: {args.signs}")
113
+ t0 = time.perf_counter()
114
+ sentence = compose_sentence(args.signs)
115
+ dt = time.perf_counter() - t0
116
+ print(f" β†’ sentence = {sentence!r} ({dt:.2f}s)")
117
+
118
+ _step("TTS (text β†’ audio)")
119
+ print(f" input text: {args.text!r}")
120
+ t0 = time.perf_counter()
121
+ audio_path = synthesize_speech(args.text)
122
+ dt = time.perf_counter() - t0
123
+ if audio_path:
124
+ size = Path(audio_path).stat().st_size
125
+ print(f" β†’ wrote {audio_path} ({size:,} bytes, {dt:.2f}s)")
126
+ else:
127
+ print(" β†’ no audio (TTS unavailable)")
128
+
129
+ _step("Summary")
130
+ ok_recognize = bool(token)
131
+ ok_compose = bool(sentence)
132
+ ok_tts = bool(audio_path)
133
+ flags = {
134
+ "recognizer": "βœ“" if ok_recognize else "β€” (provider may be in stub mode; check creds)",
135
+ "composer": "βœ“" if ok_compose else "βœ— composer failed",
136
+ "tts": "βœ“" if ok_tts else "βœ— TTS failed",
137
+ }
138
+ for k, v in flags.items():
139
+ print(f" {k:<10} {v}")
140
+
141
+ # Compose + tts MUST work even with no provider (naive joiner + silent stub).
142
+ # Recognizer needs a real provider.
143
+ return 0 if (ok_compose and ok_tts) else 1
144
+
145
+
146
+ if __name__ == "__main__":
147
+ sys.exit(main())