techfreakworm's picture
chore(scripts): smoke.sh exercises /api/generate/dialog as well
512fc03 unverified
#!/usr/bin/env bash
# Smoke test against a running server on http://127.0.0.1:7860.
# Usage: scripts/smoke.sh [BASE_URL]
set -euo pipefail
BASE="${1:-http://127.0.0.1:7860}"
echo "== /api/health"
curl -fsS "$BASE/api/health" | tee /dev/stderr | grep -q '"device"'
echo
echo "== /api/models"
curl -fsS "$BASE/api/models" | tee /dev/stderr | grep -q 'chatterbox-en'
echo
echo "== activate chatterbox-en"
curl -fsS -X POST "$BASE/api/models/chatterbox-en/activate"
echo
echo "== generate (1 sentence)"
TMP=$(mktemp -t smoke.XXXXXX.wav)
curl -fsS -X POST "$BASE/api/generate" \
-F text='Hello world from Chatterbox.' \
-F model_id=chatterbox-en \
-F params='{}' \
-o "$TMP"
HEAD=$(head -c 4 "$TMP" | xxd -p)
if [ "$HEAD" != "52494646" ]; then
echo "FAIL: output is not a RIFF wav (head=$HEAD)"
exit 1
fi
echo "OK — wrote $TMP ($(wc -c <"$TMP") bytes)"
echo
echo "== generate dialog (2 speakers, en)"
# Make a tiny silent reference clip (1s mono 24k WAV) for both speakers.
REF_A=$(mktemp -t smoke_a.XXXXXX.wav)
REF_B=$(mktemp -t smoke_b.XXXXXX.wav)
python - <<'PY' "$REF_A"
import sys, numpy as np, soundfile as sf
sf.write(sys.argv[1], np.zeros(24000, dtype="float32"), 24000, format="WAV", subtype="PCM_16")
PY
python - <<'PY' "$REF_B"
import sys, numpy as np, soundfile as sf
sf.write(sys.argv[1], np.zeros(24000, dtype="float32"), 24000, format="WAV", subtype="PCM_16")
PY
OUT=$(mktemp -t smoke_dialog.XXXXXX.wav)
curl -fsS -X POST "$BASE/api/generate/dialog" \
-F text='SPEAKER A: hi.
SPEAKER B: hello.' \
-F engine_id=chatterbox-en \
-F params='{}' \
-F reference_wav_a=@"$REF_A" \
-F reference_wav_b=@"$REF_B" \
-o "$OUT"
HEAD=$(head -c 4 "$OUT" | xxd -p)
if [ "$HEAD" != "52494646" ]; then
echo "FAIL: dialog output is not a RIFF wav (head=$HEAD)"
exit 1
fi
echo "OK — wrote $OUT ($(wc -c <"$OUT") bytes)"