File size: 4,978 Bytes
f7a7853 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | #!/usr/bin/env python3
"""
Hardening Tests β Verify production robustness.
TH.1 safe_params handles None, string, list, garbage
TH.2 safe_float handles trailing dots, None, garbage
TH.3 with_timeout returns default on hang
TH.4 llm_call_with_timeout never raises
TH.5 graceful decorator catches all exceptions
TH.6 validate_purpose rejects empty/short
TH.7 validate_purpose accepts valid
TH.8 Full orchestrator loop with None params doesn't crash
"""
import sys, os, time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
PASS = FAIL = 0
def check(name, cond, detail=""):
global PASS, FAIL
PASS += int(cond); FAIL += int(not cond)
print(f" {'β' if cond else 'β'} {name}" + (f": {detail}" if detail and not cond else ""))
from purpose_agent.hardening import (
safe_params, safe_string, safe_float, safe_dict_get,
with_timeout, llm_call_with_timeout, graceful,
validate_purpose, ValidationError,
)
# βββ Null Safety βββ
print("Null Safety")
check("TH.1 None β {}", safe_params(None) == {})
check("TH.1 dict passes through", safe_params({"x": 1}) == {"x": 1})
check("TH.1 string β {_raw: s}", safe_params("hello") == {"_raw": "hello"})
check("TH.1 list β {}", safe_params([1,2]) == {})
check("TH.1 int β {}", safe_params(42) == {})
check("TH.2 safe_float('3.0.') = 3.0", safe_float("3.0.") == 3.0)
check("TH.2 safe_float(None) = 0.0", safe_float(None) == 0.0)
check("TH.2 safe_float('abc') = 0.0", safe_float("abc") == 0.0)
check("TH.2 safe_float(15, max=10) = 10", safe_float(15, max_val=10) == 10.0)
check("TH.2 safe_float(-5, min=0) = 0", safe_float(-5, min_val=0) == 0.0)
check("safe_string(None) = ''", safe_string(None) == "")
check("safe_string(123) = '123'", safe_string(123) == "123")
check("safe_dict_get(None, 'k') = ''", safe_dict_get(None, "key") == "")
check("safe_dict_get({'k':None}, 'k') = ''", safe_dict_get({"k": None}, "k") == "")
# βββ Timeouts βββ
print("\nTimeouts")
def slow_fn():
time.sleep(10)
return "never"
wrapped = with_timeout(slow_fn, timeout_s=0.5, default="timed_out", label="slow_fn")
t0 = time.time()
result = wrapped()
elapsed = time.time() - t0
check("TH.3 Timeout returns default", result == "timed_out")
check("TH.3 Doesn't wait full 10s", elapsed < 2.0, f"took {elapsed:.1f}s")
def error_fn():
raise RuntimeError("boom")
wrapped2 = with_timeout(error_fn, timeout_s=5.0, default="safe", label="error_fn")
check("TH.3 Exception returns default", wrapped2() == "safe")
# llm_call_with_timeout
check("TH.4 Timeout on hang", llm_call_with_timeout(slow_fn, timeout_s=0.5, default="x") == "x")
check("TH.4 Exception returns default", llm_call_with_timeout(error_fn, timeout_s=5, default="y") == "y")
check("TH.4 Normal returns value", llm_call_with_timeout(lambda: "ok", timeout_s=5, default="x") == "ok")
# βββ Graceful βββ
print("\nGraceful Degradation")
@graceful(default={"empty": True}, label="parse")
def bad_parse(text):
raise ValueError("parse failed")
check("TH.5 Graceful catches", bad_parse("x") == {"empty": True})
@graceful(default=0, label="compute")
def returns_none():
return None
check("TH.5 None β default", returns_none() == 0)
# βββ Validation βββ
print("\nInput Validation")
try:
validate_purpose("")
check("TH.6 Rejects empty", False)
except ValidationError:
check("TH.6 Rejects empty", True)
try:
validate_purpose("ab")
check("TH.6 Rejects short", False)
except ValidationError:
check("TH.6 Rejects short", True)
check("TH.7 Accepts valid", validate_purpose("Write Python code") == "Write Python code")
check("TH.7 Strips whitespace", validate_purpose(" hello ") == "hello")
# βββ Integration: Orchestrator with bad params βββ
print("\nIntegration")
import purpose_agent as pa
from purpose_agent.types import State
# Mock that returns action with None params
mock = pa.MockLLMBackend()
import json
mock.register_handler("goal-directed agent", json.dumps({
"thought": "test",
"action": {"name": "DONE", "params": None}, # None params!
"expected_delta": "done"
}))
mock.set_structured_default({"phi_before":0,"phi_after":5,"reasoning":"ok","evidence":"state changed","confidence":0.7})
from purpose_agent.orchestrator import SimpleEnvironment
env = SimpleEnvironment(execute_fn=lambda a,s: State(data={"done": True}))
orch = pa.Orchestrator(llm=mock, environment=env, available_actions={"DONE": "Done"})
try:
r = orch.run_task(purpose="test", max_steps=2)
check("TH.8 None params no crash", True)
except AttributeError as e:
check("TH.8 None params no crash", False, str(e))
except Exception as e:
check("TH.8 None params no crash", True, f"different error: {type(e).__name__}")
# βββ REPORT βββ
print(f"\n{'='*50}")
print(f" Hardening Tests: {PASS} pass, {FAIL} fail")
print(f" {'ALL PASS β' if FAIL == 0 else f'{FAIL} FAILURES'}")
print(f"{'='*50}")
sys.exit(0 if FAIL == 0 else 1)
|