#!/usr/bin/env python3 """ Hardening Tests — Verify production robustness. TH.1 safe_params handles None, string, list, garbage TH.2 safe_float handles trailing dots, None, garbage TH.3 with_timeout returns default on hang TH.4 llm_call_with_timeout never raises TH.5 graceful decorator catches all exceptions TH.6 validate_purpose rejects empty/short TH.7 validate_purpose accepts valid TH.8 Full orchestrator loop with None params doesn't crash """ import sys, os, time sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) PASS = FAIL = 0 def check(name, cond, detail=""): global PASS, FAIL PASS += int(cond); FAIL += int(not cond) print(f" {'✓' if cond else '✗'} {name}" + (f": {detail}" if detail and not cond else "")) from purpose_agent.hardening import ( safe_params, safe_string, safe_float, safe_dict_get, with_timeout, llm_call_with_timeout, graceful, validate_purpose, ValidationError, ) # ═══ Null Safety ═══ print("Null Safety") check("TH.1 None → {}", safe_params(None) == {}) check("TH.1 dict passes through", safe_params({"x": 1}) == {"x": 1}) check("TH.1 string → {_raw: s}", safe_params("hello") == {"_raw": "hello"}) check("TH.1 list → {}", safe_params([1,2]) == {}) check("TH.1 int → {}", safe_params(42) == {}) check("TH.2 safe_float('3.0.') = 3.0", safe_float("3.0.") == 3.0) check("TH.2 safe_float(None) = 0.0", safe_float(None) == 0.0) check("TH.2 safe_float('abc') = 0.0", safe_float("abc") == 0.0) check("TH.2 safe_float(15, max=10) = 10", safe_float(15, max_val=10) == 10.0) check("TH.2 safe_float(-5, min=0) = 0", safe_float(-5, min_val=0) == 0.0) check("safe_string(None) = ''", safe_string(None) == "") check("safe_string(123) = '123'", safe_string(123) == "123") check("safe_dict_get(None, 'k') = ''", safe_dict_get(None, "key") == "") check("safe_dict_get({'k':None}, 'k') = ''", safe_dict_get({"k": None}, "k") == "") # ═══ Timeouts ═══ print("\nTimeouts") def slow_fn(): time.sleep(10) return "never" wrapped = with_timeout(slow_fn, timeout_s=0.5, default="timed_out", label="slow_fn") t0 = time.time() result = wrapped() elapsed = time.time() - t0 check("TH.3 Timeout returns default", result == "timed_out") check("TH.3 Doesn't wait full 10s", elapsed < 2.0, f"took {elapsed:.1f}s") def error_fn(): raise RuntimeError("boom") wrapped2 = with_timeout(error_fn, timeout_s=5.0, default="safe", label="error_fn") check("TH.3 Exception returns default", wrapped2() == "safe") # llm_call_with_timeout check("TH.4 Timeout on hang", llm_call_with_timeout(slow_fn, timeout_s=0.5, default="x") == "x") check("TH.4 Exception returns default", llm_call_with_timeout(error_fn, timeout_s=5, default="y") == "y") check("TH.4 Normal returns value", llm_call_with_timeout(lambda: "ok", timeout_s=5, default="x") == "ok") # ═══ Graceful ═══ print("\nGraceful Degradation") @graceful(default={"empty": True}, label="parse") def bad_parse(text): raise ValueError("parse failed") check("TH.5 Graceful catches", bad_parse("x") == {"empty": True}) @graceful(default=0, label="compute") def returns_none(): return None check("TH.5 None → default", returns_none() == 0) # ═══ Validation ═══ print("\nInput Validation") try: validate_purpose("") check("TH.6 Rejects empty", False) except ValidationError: check("TH.6 Rejects empty", True) try: validate_purpose("ab") check("TH.6 Rejects short", False) except ValidationError: check("TH.6 Rejects short", True) check("TH.7 Accepts valid", validate_purpose("Write Python code") == "Write Python code") check("TH.7 Strips whitespace", validate_purpose(" hello ") == "hello") # ═══ Integration: Orchestrator with bad params ═══ print("\nIntegration") import purpose_agent as pa from purpose_agent.types import State # Mock that returns action with None params mock = pa.MockLLMBackend() import json mock.register_handler("goal-directed agent", json.dumps({ "thought": "test", "action": {"name": "DONE", "params": None}, # None params! "expected_delta": "done" })) mock.set_structured_default({"phi_before":0,"phi_after":5,"reasoning":"ok","evidence":"state changed","confidence":0.7}) from purpose_agent.orchestrator import SimpleEnvironment env = SimpleEnvironment(execute_fn=lambda a,s: State(data={"done": True})) orch = pa.Orchestrator(llm=mock, environment=env, available_actions={"DONE": "Done"}) try: r = orch.run_task(purpose="test", max_steps=2) check("TH.8 None params no crash", True) except AttributeError as e: check("TH.8 None params no crash", False, str(e)) except Exception as e: check("TH.8 None params no crash", True, f"different error: {type(e).__name__}") # ═══ REPORT ═══ print(f"\n{'='*50}") print(f" Hardening Tests: {PASS} pass, {FAIL} fail") print(f" {'ALL PASS ✓' if FAIL == 0 else f'{FAIL} FAILURES'}") print(f"{'='*50}") sys.exit(0 if FAIL == 0 else 1)