Upload run_all_tests.sh
Browse files- run_all_tests.sh +142 -0
run_all_tests.sh
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
# purpose-agent v3.0.0 β Complete Test Suite Runner
|
| 4 |
+
#
|
| 5 |
+
# Usage:
|
| 6 |
+
# chmod +x run_all_tests.sh
|
| 7 |
+
# ./run_all_tests.sh # All tests (mock only)
|
| 8 |
+
# ./run_all_tests.sh --prod # Include real model tests (needs OPENROUTER_API_KEY)
|
| 9 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
+
set -e
|
| 11 |
+
|
| 12 |
+
PASS=0
|
| 13 |
+
FAIL=0
|
| 14 |
+
TOTAL=0
|
| 15 |
+
|
| 16 |
+
run_test() {
|
| 17 |
+
local name="$1"
|
| 18 |
+
local cmd="$2"
|
| 19 |
+
echo ""
|
| 20 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 21 |
+
echo " Running: $name"
|
| 22 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 23 |
+
|
| 24 |
+
if eval "$cmd"; then
|
| 25 |
+
echo " β
$name PASSED"
|
| 26 |
+
PASS=$((PASS + 1))
|
| 27 |
+
else
|
| 28 |
+
echo " β $name FAILED"
|
| 29 |
+
FAIL=$((FAIL + 1))
|
| 30 |
+
fi
|
| 31 |
+
TOTAL=$((TOTAL + 1))
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 35 |
+
echo "β purpose-agent v3.0.0 β Complete Test Suite β"
|
| 36 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 37 |
+
echo ""
|
| 38 |
+
|
| 39 |
+
# ββ Pre-flight: verify package imports ββ
|
| 40 |
+
echo "βββ Pre-flight: Package Import Check βββ"
|
| 41 |
+
python -c "
|
| 42 |
+
import purpose_agent as pa
|
| 43 |
+
print(f' v{pa.__version__} β {len(pa.__all__)} exports')
|
| 44 |
+
assert pa.__version__ == '3.0.0', f'Version mismatch: {pa.__version__}'
|
| 45 |
+
assert len(pa.__all__) >= 110, f'Not enough exports: {len(pa.__all__)}'
|
| 46 |
+
missing = [n for n in pa.__all__ if not hasattr(pa, n)]
|
| 47 |
+
assert len(missing) == 0, f'Missing exports: {missing}'
|
| 48 |
+
print(' β
All exports importable')
|
| 49 |
+
"
|
| 50 |
+
|
| 51 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
# LAYER 1: Unit Tests
|
| 53 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
|
| 55 |
+
run_test "test_core (basic loop, Ξ¦ bounds, optimizer, replay, immune)" \
|
| 56 |
+
"python tests/test_core.py"
|
| 57 |
+
|
| 58 |
+
run_test "test_public_api_211 (all 120+ exports, Level 1/2/3)" \
|
| 59 |
+
"python tests/compat/test_public_api_211.py"
|
| 60 |
+
|
| 61 |
+
run_test "test_first_principles (state-delta O(1), falsification, PEP 578)" \
|
| 62 |
+
"python tests/test_first_principles.py"
|
| 63 |
+
|
| 64 |
+
run_test "test_hardening (null safety, timeouts, validation)" \
|
| 65 |
+
"python tests/test_hardening.py"
|
| 66 |
+
|
| 67 |
+
run_test "test_sre_regression (5 critical vulnerability scenarios)" \
|
| 68 |
+
"python tests/test_sre_regression.py"
|
| 69 |
+
|
| 70 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 71 |
+
# LAYER 2: Feature Tests
|
| 72 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
+
|
| 74 |
+
run_test "test_sprint1_events (event bus, lanes, CoT rejection)" \
|
| 75 |
+
"python tests/test_sprint1_events.py"
|
| 76 |
+
|
| 77 |
+
run_test "test_sprint2_checkpoint (durable execution, resume, idempotency)" \
|
| 78 |
+
"python tests/test_sprint2_checkpoint.py"
|
| 79 |
+
|
| 80 |
+
run_test "test_sprint3_homeostasis (memory budget, consolidation, hibernation)" \
|
| 81 |
+
"python tests/test_sprint3_homeostasis.py"
|
| 82 |
+
|
| 83 |
+
run_test "test_sprint4_8_protocols (MCP, A2A, AG-UI, AGENTS.md, quorum)" \
|
| 84 |
+
"python tests/test_sprint4_8_protocols.py"
|
| 85 |
+
|
| 86 |
+
run_test "test_track_c (routing, MAS generator, skills)" \
|
| 87 |
+
"python tests/test_track_c.py"
|
| 88 |
+
|
| 89 |
+
run_test "test_track_d (fingerprint, dataset, prompt pack, optimizer, distillation)" \
|
| 90 |
+
"python tests/test_track_d.py"
|
| 91 |
+
|
| 92 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
+
# LAYER 3: Integration Tests
|
| 94 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
|
| 96 |
+
run_test "validate.py --quick (improvement curves + adversarial)" \
|
| 97 |
+
"python benchmarks/validate.py --quick"
|
| 98 |
+
|
| 99 |
+
run_test "benchmark_v3 (35+ robustness checks across all subsystems)" \
|
| 100 |
+
"python -m purpose_agent.benchmark_v3"
|
| 101 |
+
|
| 102 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
+
# LAYER 4: Production Tests (optional β needs API key)
|
| 104 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 105 |
+
|
| 106 |
+
if [ "$1" = "--prod" ]; then
|
| 107 |
+
if [ -z "$OPENROUTER_API_KEY" ]; then
|
| 108 |
+
echo "β οΈ OPENROUTER_API_KEY not set β skipping prod tests"
|
| 109 |
+
echo " Set it with: export OPENROUTER_API_KEY=sk-or-v1-..."
|
| 110 |
+
else
|
| 111 |
+
run_test "prod_test (real model Level 1/2/3 + coding + security)" \
|
| 112 |
+
"python tests/prod_test.py"
|
| 113 |
+
fi
|
| 114 |
+
else
|
| 115 |
+
echo ""
|
| 116 |
+
echo " βΉοΈ Production tests skipped (use --prod flag to run with real model)"
|
| 117 |
+
fi
|
| 118 |
+
|
| 119 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 120 |
+
# FINAL REPORT
|
| 121 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 122 |
+
|
| 123 |
+
echo ""
|
| 124 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 125 |
+
echo "β FINAL RESULTS β"
|
| 126 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 127 |
+
echo ""
|
| 128 |
+
echo " Total suites: $TOTAL"
|
| 129 |
+
echo " Passed: $PASS"
|
| 130 |
+
echo " Failed: $FAIL"
|
| 131 |
+
echo ""
|
| 132 |
+
|
| 133 |
+
if [ $FAIL -eq 0 ]; then
|
| 134 |
+
echo " β
ALL $TOTAL TEST SUITES PASSED β ZERO FAILURES"
|
| 135 |
+
echo ""
|
| 136 |
+
echo " Ready to publish: purpose-agent==3.0.0"
|
| 137 |
+
echo " Next step: python build_and_publish.py"
|
| 138 |
+
exit 0
|
| 139 |
+
else
|
| 140 |
+
echo " β $FAIL SUITES FAILED β FIX BEFORE PUBLISHING"
|
| 141 |
+
exit 1
|
| 142 |
+
fi
|