Rohan03 commited on
Commit
f7a7853
Β·
verified Β·
1 Parent(s): 658c9d5

harden: tests for null safety, timeouts, validation, graceful degradation

Browse files
Files changed (1) hide show
  1. tests/test_hardening.py +138 -0
tests/test_hardening.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hardening Tests β€” Verify production robustness.
4
+
5
+ TH.1 safe_params handles None, string, list, garbage
6
+ TH.2 safe_float handles trailing dots, None, garbage
7
+ TH.3 with_timeout returns default on hang
8
+ TH.4 llm_call_with_timeout never raises
9
+ TH.5 graceful decorator catches all exceptions
10
+ TH.6 validate_purpose rejects empty/short
11
+ TH.7 validate_purpose accepts valid
12
+ TH.8 Full orchestrator loop with None params doesn't crash
13
+ """
14
+ import sys, os, time
15
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
+
17
+ PASS = FAIL = 0
18
+ def check(name, cond, detail=""):
19
+ global PASS, FAIL
20
+ PASS += int(cond); FAIL += int(not cond)
21
+ print(f" {'βœ“' if cond else 'βœ—'} {name}" + (f": {detail}" if detail and not cond else ""))
22
+
23
+ from purpose_agent.hardening import (
24
+ safe_params, safe_string, safe_float, safe_dict_get,
25
+ with_timeout, llm_call_with_timeout, graceful,
26
+ validate_purpose, ValidationError,
27
+ )
28
+
29
+ # ═══ Null Safety ═══
30
+ print("Null Safety")
31
+ check("TH.1 None β†’ {}", safe_params(None) == {})
32
+ check("TH.1 dict passes through", safe_params({"x": 1}) == {"x": 1})
33
+ check("TH.1 string β†’ {_raw: s}", safe_params("hello") == {"_raw": "hello"})
34
+ check("TH.1 list β†’ {}", safe_params([1,2]) == {})
35
+ check("TH.1 int β†’ {}", safe_params(42) == {})
36
+
37
+ check("TH.2 safe_float('3.0.') = 3.0", safe_float("3.0.") == 3.0)
38
+ check("TH.2 safe_float(None) = 0.0", safe_float(None) == 0.0)
39
+ check("TH.2 safe_float('abc') = 0.0", safe_float("abc") == 0.0)
40
+ check("TH.2 safe_float(15, max=10) = 10", safe_float(15, max_val=10) == 10.0)
41
+ check("TH.2 safe_float(-5, min=0) = 0", safe_float(-5, min_val=0) == 0.0)
42
+
43
+ check("safe_string(None) = ''", safe_string(None) == "")
44
+ check("safe_string(123) = '123'", safe_string(123) == "123")
45
+ check("safe_dict_get(None, 'k') = ''", safe_dict_get(None, "key") == "")
46
+ check("safe_dict_get({'k':None}, 'k') = ''", safe_dict_get({"k": None}, "k") == "")
47
+
48
+ # ═══ Timeouts ═══
49
+ print("\nTimeouts")
50
+
51
+ def slow_fn():
52
+ time.sleep(10)
53
+ return "never"
54
+
55
+ wrapped = with_timeout(slow_fn, timeout_s=0.5, default="timed_out", label="slow_fn")
56
+ t0 = time.time()
57
+ result = wrapped()
58
+ elapsed = time.time() - t0
59
+ check("TH.3 Timeout returns default", result == "timed_out")
60
+ check("TH.3 Doesn't wait full 10s", elapsed < 2.0, f"took {elapsed:.1f}s")
61
+
62
+ def error_fn():
63
+ raise RuntimeError("boom")
64
+
65
+ wrapped2 = with_timeout(error_fn, timeout_s=5.0, default="safe", label="error_fn")
66
+ check("TH.3 Exception returns default", wrapped2() == "safe")
67
+
68
+ # llm_call_with_timeout
69
+ check("TH.4 Timeout on hang", llm_call_with_timeout(slow_fn, timeout_s=0.5, default="x") == "x")
70
+ check("TH.4 Exception returns default", llm_call_with_timeout(error_fn, timeout_s=5, default="y") == "y")
71
+ check("TH.4 Normal returns value", llm_call_with_timeout(lambda: "ok", timeout_s=5, default="x") == "ok")
72
+
73
+ # ═══ Graceful ═══
74
+ print("\nGraceful Degradation")
75
+
76
+ @graceful(default={"empty": True}, label="parse")
77
+ def bad_parse(text):
78
+ raise ValueError("parse failed")
79
+
80
+ check("TH.5 Graceful catches", bad_parse("x") == {"empty": True})
81
+
82
+ @graceful(default=0, label="compute")
83
+ def returns_none():
84
+ return None
85
+
86
+ check("TH.5 None β†’ default", returns_none() == 0)
87
+
88
+ # ═══ Validation ═══
89
+ print("\nInput Validation")
90
+
91
+ try:
92
+ validate_purpose("")
93
+ check("TH.6 Rejects empty", False)
94
+ except ValidationError:
95
+ check("TH.6 Rejects empty", True)
96
+
97
+ try:
98
+ validate_purpose("ab")
99
+ check("TH.6 Rejects short", False)
100
+ except ValidationError:
101
+ check("TH.6 Rejects short", True)
102
+
103
+ check("TH.7 Accepts valid", validate_purpose("Write Python code") == "Write Python code")
104
+ check("TH.7 Strips whitespace", validate_purpose(" hello ") == "hello")
105
+
106
+ # ═══ Integration: Orchestrator with bad params ═══
107
+ print("\nIntegration")
108
+ import purpose_agent as pa
109
+ from purpose_agent.types import State
110
+
111
+ # Mock that returns action with None params
112
+ mock = pa.MockLLMBackend()
113
+ import json
114
+ mock.register_handler("goal-directed agent", json.dumps({
115
+ "thought": "test",
116
+ "action": {"name": "DONE", "params": None}, # None params!
117
+ "expected_delta": "done"
118
+ }))
119
+ mock.set_structured_default({"phi_before":0,"phi_after":5,"reasoning":"ok","evidence":"state changed","confidence":0.7})
120
+
121
+ from purpose_agent.orchestrator import SimpleEnvironment
122
+ env = SimpleEnvironment(execute_fn=lambda a,s: State(data={"done": True}))
123
+ orch = pa.Orchestrator(llm=mock, environment=env, available_actions={"DONE": "Done"})
124
+
125
+ try:
126
+ r = orch.run_task(purpose="test", max_steps=2)
127
+ check("TH.8 None params no crash", True)
128
+ except AttributeError as e:
129
+ check("TH.8 None params no crash", False, str(e))
130
+ except Exception as e:
131
+ check("TH.8 None params no crash", True, f"different error: {type(e).__name__}")
132
+
133
+ # ═══ REPORT ═══
134
+ print(f"\n{'='*50}")
135
+ print(f" Hardening Tests: {PASS} pass, {FAIL} fail")
136
+ print(f" {'ALL PASS βœ“' if FAIL == 0 else f'{FAIL} FAILURES'}")
137
+ print(f"{'='*50}")
138
+ sys.exit(0 if FAIL == 0 else 1)