Rohan03
/

purpose-agent

+#!/usr/bin/env bash
+# ═══════════════════════════════════════════════════════════════
+# purpose-agent v3.0.0 — Complete Test Suite Runner
+#
+# Usage:
+#   chmod +x run_all_tests.sh
+#   ./run_all_tests.sh              # All tests (mock only)
+#   ./run_all_tests.sh --prod       # Include real model tests (needs OPENROUTER_API_KEY)
+# ═══════════════════════════════════════════════════════════════
+set -e
+PASS=0
+FAIL=0
+TOTAL=0
+run_test() {
+    local name="$1"
+    local cmd="$2"
+    echo ""
+    echo "══════════════════════════════════════════════════════════"
+    echo "  Running: $name"
+    echo "══════════════════════════════════════════════════════════"
+    if eval "$cmd"; then
+        echo "  ✅ $name PASSED"
+        PASS=$((PASS + 1))
+    else
+        echo "  ❌ $name FAILED"
+        FAIL=$((FAIL + 1))
+    fi
+    TOTAL=$((TOTAL + 1))
+}
+echo "╔══════════════════════════════════════════════════════════╗"
+echo "║  purpose-agent v3.0.0 — Complete Test Suite            ║"
+echo "╚══════════════════════════════════════════════════════════╝"
+echo ""
+# ── Pre-flight: verify package imports ──
+echo "═══ Pre-flight: Package Import Check ═══"
+python -c "
+import purpose_agent as pa
+print(f'  v{pa.__version__} — {len(pa.__all__)} exports')
+assert pa.__version__ == '3.0.0', f'Version mismatch: {pa.__version__}'
+assert len(pa.__all__) >= 110, f'Not enough exports: {len(pa.__all__)}'
+missing = [n for n in pa.__all__ if not hasattr(pa, n)]
+assert len(missing) == 0, f'Missing exports: {missing}'
+print('  ✅ All exports importable')
+"
+# ═══════════════════════════════════════════════════════════════
+# LAYER 1: Unit Tests
+# ═══════════════════════════════════════════════════════════════
+run_test "test_core (basic loop, Φ bounds, optimizer, replay, immune)" \
+    "python tests/test_core.py"
+run_test "test_public_api_211 (all 120+ exports, Level 1/2/3)" \
+    "python tests/compat/test_public_api_211.py"
+run_test "test_first_principles (state-delta O(1), falsification, PEP 578)" \
+    "python tests/test_first_principles.py"
+run_test "test_hardening (null safety, timeouts, validation)" \
+    "python tests/test_hardening.py"
+run_test "test_sre_regression (5 critical vulnerability scenarios)" \
+    "python tests/test_sre_regression.py"
+# ═══════════════════════════════════════════════════════════════
+# LAYER 2: Feature Tests
+# ═══════════════════════════════════════════════════════════════
+run_test "test_sprint1_events (event bus, lanes, CoT rejection)" \
+    "python tests/test_sprint1_events.py"
+run_test "test_sprint2_checkpoint (durable execution, resume, idempotency)" \
+    "python tests/test_sprint2_checkpoint.py"
+run_test "test_sprint3_homeostasis (memory budget, consolidation, hibernation)" \
+    "python tests/test_sprint3_homeostasis.py"
+run_test "test_sprint4_8_protocols (MCP, A2A, AG-UI, AGENTS.md, quorum)" \
+    "python tests/test_sprint4_8_protocols.py"
+run_test "test_track_c (routing, MAS generator, skills)" \
+    "python tests/test_track_c.py"
+run_test "test_track_d (fingerprint, dataset, prompt pack, optimizer, distillation)" \
+    "python tests/test_track_d.py"
+# ═══════════════════════════════════════════════════════════════
+# LAYER 3: Integration Tests
+# ═══════════════════════════════════════════════════════════════
+run_test "validate.py --quick (improvement curves + adversarial)" \
+    "python benchmarks/validate.py --quick"
+run_test "benchmark_v3 (35+ robustness checks across all subsystems)" \
+    "python -m purpose_agent.benchmark_v3"
+# ═══════════════════════════════════════════════════════════════
+# LAYER 4: Production Tests (optional — needs API key)
+# ═══════════════════════════════════════════════════════════════
+if [ "$1" = "--prod" ]; then
+    if [ -z "$OPENROUTER_API_KEY" ]; then
+        echo "⚠️  OPENROUTER_API_KEY not set — skipping prod tests"
+        echo "   Set it with: export OPENROUTER_API_KEY=sk-or-v1-..."
+    else
+        run_test "prod_test (real model Level 1/2/3 + coding + security)" \
+            "python tests/prod_test.py"
+    fi
+else
+    echo ""
+    echo "  ℹ️  Production tests skipped (use --prod flag to run with real model)"
+fi
+# ═══════════════════════════════════════════════════════════════
+# FINAL REPORT
+# ═══════════════════════════════════════════════════════════════
+echo ""
+echo "╔══════════════════════════════════════════════════════════╗"
+echo "║  FINAL RESULTS                                         ║"
+echo "╚══════════════════════════════════════════════════════════╝"
+echo ""
+echo "  Total suites: $TOTAL"
+echo "  Passed:       $PASS"
+echo "  Failed:       $FAIL"
+echo ""
+if [ $FAIL -eq 0 ]; then
+    echo "  ✅ ALL $TOTAL TEST SUITES PASSED — ZERO FAILURES"
+    echo ""
+    echo "  Ready to publish: purpose-agent==3.0.0"
+    echo "  Next step: python build_and_publish.py"
+    exit 0
+else
+    echo "  ❌ $FAIL SUITES FAILED — FIX BEFORE PUBLISHING"
+    exit 1
+fi