File size: 4,978 Bytes
f7a7853
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
"""
Hardening Tests β€” Verify production robustness.

TH.1  safe_params handles None, string, list, garbage
TH.2  safe_float handles trailing dots, None, garbage
TH.3  with_timeout returns default on hang
TH.4  llm_call_with_timeout never raises
TH.5  graceful decorator catches all exceptions
TH.6  validate_purpose rejects empty/short
TH.7  validate_purpose accepts valid
TH.8  Full orchestrator loop with None params doesn't crash
"""
import sys, os, time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

PASS = FAIL = 0
def check(name, cond, detail=""):
    global PASS, FAIL
    PASS += int(cond); FAIL += int(not cond)
    print(f"  {'βœ“' if cond else 'βœ—'} {name}" + (f": {detail}" if detail and not cond else ""))

from purpose_agent.hardening import (
    safe_params, safe_string, safe_float, safe_dict_get,
    with_timeout, llm_call_with_timeout, graceful,
    validate_purpose, ValidationError,
)

# ═══ Null Safety ═══
print("Null Safety")
check("TH.1 None β†’ {}", safe_params(None) == {})
check("TH.1 dict passes through", safe_params({"x": 1}) == {"x": 1})
check("TH.1 string β†’ {_raw: s}", safe_params("hello") == {"_raw": "hello"})
check("TH.1 list β†’ {}", safe_params([1,2]) == {})
check("TH.1 int β†’ {}", safe_params(42) == {})

check("TH.2 safe_float('3.0.') = 3.0", safe_float("3.0.") == 3.0)
check("TH.2 safe_float(None) = 0.0", safe_float(None) == 0.0)
check("TH.2 safe_float('abc') = 0.0", safe_float("abc") == 0.0)
check("TH.2 safe_float(15, max=10) = 10", safe_float(15, max_val=10) == 10.0)
check("TH.2 safe_float(-5, min=0) = 0", safe_float(-5, min_val=0) == 0.0)

check("safe_string(None) = ''", safe_string(None) == "")
check("safe_string(123) = '123'", safe_string(123) == "123")
check("safe_dict_get(None, 'k') = ''", safe_dict_get(None, "key") == "")
check("safe_dict_get({'k':None}, 'k') = ''", safe_dict_get({"k": None}, "k") == "")

# ═══ Timeouts ═══
print("\nTimeouts")

def slow_fn():
    time.sleep(10)
    return "never"

wrapped = with_timeout(slow_fn, timeout_s=0.5, default="timed_out", label="slow_fn")
t0 = time.time()
result = wrapped()
elapsed = time.time() - t0
check("TH.3 Timeout returns default", result == "timed_out")
check("TH.3 Doesn't wait full 10s", elapsed < 2.0, f"took {elapsed:.1f}s")

def error_fn():
    raise RuntimeError("boom")

wrapped2 = with_timeout(error_fn, timeout_s=5.0, default="safe", label="error_fn")
check("TH.3 Exception returns default", wrapped2() == "safe")

# llm_call_with_timeout
check("TH.4 Timeout on hang", llm_call_with_timeout(slow_fn, timeout_s=0.5, default="x") == "x")
check("TH.4 Exception returns default", llm_call_with_timeout(error_fn, timeout_s=5, default="y") == "y")
check("TH.4 Normal returns value", llm_call_with_timeout(lambda: "ok", timeout_s=5, default="x") == "ok")

# ═══ Graceful ═══
print("\nGraceful Degradation")

@graceful(default={"empty": True}, label="parse")
def bad_parse(text):
    raise ValueError("parse failed")

check("TH.5 Graceful catches", bad_parse("x") == {"empty": True})

@graceful(default=0, label="compute")
def returns_none():
    return None

check("TH.5 None β†’ default", returns_none() == 0)

# ═══ Validation ═══
print("\nInput Validation")

try:
    validate_purpose("")
    check("TH.6 Rejects empty", False)
except ValidationError:
    check("TH.6 Rejects empty", True)

try:
    validate_purpose("ab")
    check("TH.6 Rejects short", False)
except ValidationError:
    check("TH.6 Rejects short", True)

check("TH.7 Accepts valid", validate_purpose("Write Python code") == "Write Python code")
check("TH.7 Strips whitespace", validate_purpose("  hello  ") == "hello")

# ═══ Integration: Orchestrator with bad params ═══
print("\nIntegration")
import purpose_agent as pa
from purpose_agent.types import State

# Mock that returns action with None params
mock = pa.MockLLMBackend()
import json
mock.register_handler("goal-directed agent", json.dumps({
    "thought": "test",
    "action": {"name": "DONE", "params": None},  # None params!
    "expected_delta": "done"
}))
mock.set_structured_default({"phi_before":0,"phi_after":5,"reasoning":"ok","evidence":"state changed","confidence":0.7})

from purpose_agent.orchestrator import SimpleEnvironment
env = SimpleEnvironment(execute_fn=lambda a,s: State(data={"done": True}))
orch = pa.Orchestrator(llm=mock, environment=env, available_actions={"DONE": "Done"})

try:
    r = orch.run_task(purpose="test", max_steps=2)
    check("TH.8 None params no crash", True)
except AttributeError as e:
    check("TH.8 None params no crash", False, str(e))
except Exception as e:
    check("TH.8 None params no crash", True, f"different error: {type(e).__name__}")

# ═══ REPORT ═══
print(f"\n{'='*50}")
print(f"  Hardening Tests: {PASS} pass, {FAIL} fail")
print(f"  {'ALL PASS βœ“' if FAIL == 0 else f'{FAIL} FAILURES'}")
print(f"{'='*50}")
sys.exit(0 if FAIL == 0 else 1)