Rohan03 commited on
Commit
0a21e03
Β·
verified Β·
1 Parent(s): 00c52ec

Upload run_all_tests.sh

Browse files
Files changed (1) hide show
  1. run_all_tests.sh +142 -0
run_all_tests.sh ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # ═══════════════════════════════════════════════════════════════
3
+ # purpose-agent v3.0.0 β€” Complete Test Suite Runner
4
+ #
5
+ # Usage:
6
+ # chmod +x run_all_tests.sh
7
+ # ./run_all_tests.sh # All tests (mock only)
8
+ # ./run_all_tests.sh --prod # Include real model tests (needs OPENROUTER_API_KEY)
9
+ # ═══════════════════════════════════════════════════════════════
10
+ set -e
11
+
12
+ PASS=0
13
+ FAIL=0
14
+ TOTAL=0
15
+
16
+ run_test() {
17
+ local name="$1"
18
+ local cmd="$2"
19
+ echo ""
20
+ echo "══════════════════════════════════════════════════════════"
21
+ echo " Running: $name"
22
+ echo "══════════════════════════════════════════════════════════"
23
+
24
+ if eval "$cmd"; then
25
+ echo " βœ… $name PASSED"
26
+ PASS=$((PASS + 1))
27
+ else
28
+ echo " ❌ $name FAILED"
29
+ FAIL=$((FAIL + 1))
30
+ fi
31
+ TOTAL=$((TOTAL + 1))
32
+ }
33
+
34
+ echo "╔══════════════════════════════════════════════════════════╗"
35
+ echo "β•‘ purpose-agent v3.0.0 β€” Complete Test Suite β•‘"
36
+ echo "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
37
+ echo ""
38
+
39
+ # ── Pre-flight: verify package imports ──
40
+ echo "═══ Pre-flight: Package Import Check ═══"
41
+ python -c "
42
+ import purpose_agent as pa
43
+ print(f' v{pa.__version__} β€” {len(pa.__all__)} exports')
44
+ assert pa.__version__ == '3.0.0', f'Version mismatch: {pa.__version__}'
45
+ assert len(pa.__all__) >= 110, f'Not enough exports: {len(pa.__all__)}'
46
+ missing = [n for n in pa.__all__ if not hasattr(pa, n)]
47
+ assert len(missing) == 0, f'Missing exports: {missing}'
48
+ print(' βœ… All exports importable')
49
+ "
50
+
51
+ # ═══════════════════════════════════════════════════════════════
52
+ # LAYER 1: Unit Tests
53
+ # ═══════════════════════════════════════════════════════════════
54
+
55
+ run_test "test_core (basic loop, Ξ¦ bounds, optimizer, replay, immune)" \
56
+ "python tests/test_core.py"
57
+
58
+ run_test "test_public_api_211 (all 120+ exports, Level 1/2/3)" \
59
+ "python tests/compat/test_public_api_211.py"
60
+
61
+ run_test "test_first_principles (state-delta O(1), falsification, PEP 578)" \
62
+ "python tests/test_first_principles.py"
63
+
64
+ run_test "test_hardening (null safety, timeouts, validation)" \
65
+ "python tests/test_hardening.py"
66
+
67
+ run_test "test_sre_regression (5 critical vulnerability scenarios)" \
68
+ "python tests/test_sre_regression.py"
69
+
70
+ # ═══════════════════════════════════════════════════════════════
71
+ # LAYER 2: Feature Tests
72
+ # ═══════════════════════════════════════════════════════════════
73
+
74
+ run_test "test_sprint1_events (event bus, lanes, CoT rejection)" \
75
+ "python tests/test_sprint1_events.py"
76
+
77
+ run_test "test_sprint2_checkpoint (durable execution, resume, idempotency)" \
78
+ "python tests/test_sprint2_checkpoint.py"
79
+
80
+ run_test "test_sprint3_homeostasis (memory budget, consolidation, hibernation)" \
81
+ "python tests/test_sprint3_homeostasis.py"
82
+
83
+ run_test "test_sprint4_8_protocols (MCP, A2A, AG-UI, AGENTS.md, quorum)" \
84
+ "python tests/test_sprint4_8_protocols.py"
85
+
86
+ run_test "test_track_c (routing, MAS generator, skills)" \
87
+ "python tests/test_track_c.py"
88
+
89
+ run_test "test_track_d (fingerprint, dataset, prompt pack, optimizer, distillation)" \
90
+ "python tests/test_track_d.py"
91
+
92
+ # ═══════════════════════════════════════════════════════════════
93
+ # LAYER 3: Integration Tests
94
+ # ═══════════════════════════════════════════════════════════════
95
+
96
+ run_test "validate.py --quick (improvement curves + adversarial)" \
97
+ "python benchmarks/validate.py --quick"
98
+
99
+ run_test "benchmark_v3 (35+ robustness checks across all subsystems)" \
100
+ "python -m purpose_agent.benchmark_v3"
101
+
102
+ # ═══════════════════════════════════════════════════════════════
103
+ # LAYER 4: Production Tests (optional β€” needs API key)
104
+ # ═══════════════════════════════════════════════════════════════
105
+
106
+ if [ "$1" = "--prod" ]; then
107
+ if [ -z "$OPENROUTER_API_KEY" ]; then
108
+ echo "⚠️ OPENROUTER_API_KEY not set β€” skipping prod tests"
109
+ echo " Set it with: export OPENROUTER_API_KEY=sk-or-v1-..."
110
+ else
111
+ run_test "prod_test (real model Level 1/2/3 + coding + security)" \
112
+ "python tests/prod_test.py"
113
+ fi
114
+ else
115
+ echo ""
116
+ echo " ℹ️ Production tests skipped (use --prod flag to run with real model)"
117
+ fi
118
+
119
+ # ═══════════════════════════════════════════════════════════════
120
+ # FINAL REPORT
121
+ # ═══════════════════════════════════════════════════════════════
122
+
123
+ echo ""
124
+ echo "╔══════════════════════════════════════════════════════════╗"
125
+ echo "β•‘ FINAL RESULTS β•‘"
126
+ echo "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
127
+ echo ""
128
+ echo " Total suites: $TOTAL"
129
+ echo " Passed: $PASS"
130
+ echo " Failed: $FAIL"
131
+ echo ""
132
+
133
+ if [ $FAIL -eq 0 ]; then
134
+ echo " βœ… ALL $TOTAL TEST SUITES PASSED β€” ZERO FAILURES"
135
+ echo ""
136
+ echo " Ready to publish: purpose-agent==3.0.0"
137
+ echo " Next step: python build_and_publish.py"
138
+ exit 0
139
+ else
140
+ echo " ❌ $FAIL SUITES FAILED β€” FIX BEFORE PUBLISHING"
141
+ exit 1
142
+ fi