purpose-agent / tests /test_hardening.py
Rohan03's picture
harden: tests for null safety, timeouts, validation, graceful degradation
f7a7853 verified
#!/usr/bin/env python3
"""
Hardening Tests β€” Verify production robustness.
TH.1 safe_params handles None, string, list, garbage
TH.2 safe_float handles trailing dots, None, garbage
TH.3 with_timeout returns default on hang
TH.4 llm_call_with_timeout never raises
TH.5 graceful decorator catches all exceptions
TH.6 validate_purpose rejects empty/short
TH.7 validate_purpose accepts valid
TH.8 Full orchestrator loop with None params doesn't crash
"""
import sys, os, time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
PASS = FAIL = 0
def check(name, cond, detail=""):
global PASS, FAIL
PASS += int(cond); FAIL += int(not cond)
print(f" {'βœ“' if cond else 'βœ—'} {name}" + (f": {detail}" if detail and not cond else ""))
from purpose_agent.hardening import (
safe_params, safe_string, safe_float, safe_dict_get,
with_timeout, llm_call_with_timeout, graceful,
validate_purpose, ValidationError,
)
# ═══ Null Safety ═══
print("Null Safety")
check("TH.1 None β†’ {}", safe_params(None) == {})
check("TH.1 dict passes through", safe_params({"x": 1}) == {"x": 1})
check("TH.1 string β†’ {_raw: s}", safe_params("hello") == {"_raw": "hello"})
check("TH.1 list β†’ {}", safe_params([1,2]) == {})
check("TH.1 int β†’ {}", safe_params(42) == {})
check("TH.2 safe_float('3.0.') = 3.0", safe_float("3.0.") == 3.0)
check("TH.2 safe_float(None) = 0.0", safe_float(None) == 0.0)
check("TH.2 safe_float('abc') = 0.0", safe_float("abc") == 0.0)
check("TH.2 safe_float(15, max=10) = 10", safe_float(15, max_val=10) == 10.0)
check("TH.2 safe_float(-5, min=0) = 0", safe_float(-5, min_val=0) == 0.0)
check("safe_string(None) = ''", safe_string(None) == "")
check("safe_string(123) = '123'", safe_string(123) == "123")
check("safe_dict_get(None, 'k') = ''", safe_dict_get(None, "key") == "")
check("safe_dict_get({'k':None}, 'k') = ''", safe_dict_get({"k": None}, "k") == "")
# ═══ Timeouts ═══
print("\nTimeouts")
def slow_fn():
time.sleep(10)
return "never"
wrapped = with_timeout(slow_fn, timeout_s=0.5, default="timed_out", label="slow_fn")
t0 = time.time()
result = wrapped()
elapsed = time.time() - t0
check("TH.3 Timeout returns default", result == "timed_out")
check("TH.3 Doesn't wait full 10s", elapsed < 2.0, f"took {elapsed:.1f}s")
def error_fn():
raise RuntimeError("boom")
wrapped2 = with_timeout(error_fn, timeout_s=5.0, default="safe", label="error_fn")
check("TH.3 Exception returns default", wrapped2() == "safe")
# llm_call_with_timeout
check("TH.4 Timeout on hang", llm_call_with_timeout(slow_fn, timeout_s=0.5, default="x") == "x")
check("TH.4 Exception returns default", llm_call_with_timeout(error_fn, timeout_s=5, default="y") == "y")
check("TH.4 Normal returns value", llm_call_with_timeout(lambda: "ok", timeout_s=5, default="x") == "ok")
# ═══ Graceful ═══
print("\nGraceful Degradation")
@graceful(default={"empty": True}, label="parse")
def bad_parse(text):
raise ValueError("parse failed")
check("TH.5 Graceful catches", bad_parse("x") == {"empty": True})
@graceful(default=0, label="compute")
def returns_none():
return None
check("TH.5 None β†’ default", returns_none() == 0)
# ═══ Validation ═══
print("\nInput Validation")
try:
validate_purpose("")
check("TH.6 Rejects empty", False)
except ValidationError:
check("TH.6 Rejects empty", True)
try:
validate_purpose("ab")
check("TH.6 Rejects short", False)
except ValidationError:
check("TH.6 Rejects short", True)
check("TH.7 Accepts valid", validate_purpose("Write Python code") == "Write Python code")
check("TH.7 Strips whitespace", validate_purpose(" hello ") == "hello")
# ═══ Integration: Orchestrator with bad params ═══
print("\nIntegration")
import purpose_agent as pa
from purpose_agent.types import State
# Mock that returns action with None params
mock = pa.MockLLMBackend()
import json
mock.register_handler("goal-directed agent", json.dumps({
"thought": "test",
"action": {"name": "DONE", "params": None}, # None params!
"expected_delta": "done"
}))
mock.set_structured_default({"phi_before":0,"phi_after":5,"reasoning":"ok","evidence":"state changed","confidence":0.7})
from purpose_agent.orchestrator import SimpleEnvironment
env = SimpleEnvironment(execute_fn=lambda a,s: State(data={"done": True}))
orch = pa.Orchestrator(llm=mock, environment=env, available_actions={"DONE": "Done"})
try:
r = orch.run_task(purpose="test", max_steps=2)
check("TH.8 None params no crash", True)
except AttributeError as e:
check("TH.8 None params no crash", False, str(e))
except Exception as e:
check("TH.8 None params no crash", True, f"different error: {type(e).__name__}")
# ═══ REPORT ═══
print(f"\n{'='*50}")
print(f" Hardening Tests: {PASS} pass, {FAIL} fail")
print(f" {'ALL PASS βœ“' if FAIL == 0 else f'{FAIL} FAILURES'}")
print(f"{'='*50}")
sys.exit(0 if FAIL == 0 else 1)