Fix Issue 3: Add 60s timeout to API calls in prod_test.py
Browse files- tests/prod_test.py +16 -5
tests/prod_test.py
CHANGED
|
@@ -10,6 +10,7 @@ import purpose_agent as pa
|
|
| 10 |
from purpose_agent.llm_backend import resolve_backend, ChatMessage
|
| 11 |
from purpose_agent.orchestrator import Environment
|
| 12 |
from purpose_agent.types import State
|
|
|
|
| 13 |
|
| 14 |
b = resolve_backend("openrouter:meta-llama/llama-3.3-70b-instruct", api_key=OR)
|
| 15 |
P = F = 0
|
|
@@ -19,6 +20,10 @@ def ok(n, c, d=""):
|
|
| 19 |
icon = "PASS" if c else "FAIL"
|
| 20 |
print(f" [{icon}] {n}" + (f" β {d}" if d else ""))
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
print(f"Purpose Agent v{pa.__version__} β 0-Day Production Test")
|
| 23 |
print(f"Model: Llama-3.3-70B via OpenRouter\n")
|
| 24 |
|
|
@@ -27,7 +32,7 @@ print("LEVEL 1: purpose()")
|
|
| 27 |
team = pa.purpose("Write code", model=b)
|
| 28 |
ok("Auto-detect coding team", [a.name for a in team._agents] == ["architect","coder","tester"], str([a.name for a in team._agents]))
|
| 29 |
t0 = time.time()
|
| 30 |
-
result = team.run("Check if number is prime", verbose=False)
|
| 31 |
ok("Run real task", len(result) > 5, f"{len(result)} chars {time.time()-t0:.0f}s")
|
| 32 |
team.teach("Add types")
|
| 33 |
ok("Teach works", True)
|
|
@@ -35,10 +40,10 @@ time.sleep(0.3)
|
|
| 35 |
|
| 36 |
# βββ LEVEL 2 βββ
|
| 37 |
print("\nLEVEL 2: Backend + Knowledge")
|
| 38 |
-
resp = b.generate([ChatMessage(role="user", content="Say ok")], temperature=0, max_tokens=5)
|
| 39 |
ok("API call", len(resp) > 0, repr(resp[:20]))
|
| 40 |
kt = pa.purpose("Answer Qs", model=b, knowledge=["Python created by Guido 1991."])
|
| 41 |
-
ans = kt.ask("Who created Python?")
|
| 42 |
ok("Knowledge team", len(ans) > 3, ans[:40])
|
| 43 |
time.sleep(0.3)
|
| 44 |
|
|
@@ -94,7 +99,11 @@ for name, purpose, tests in [
|
|
| 94 |
available_actions={"submit_code": "Submit code in params.code", "DONE": "Done"},
|
| 95 |
optimize_every_n_tasks=99)
|
| 96 |
t0 = time.time()
|
| 97 |
-
r =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
ok(name, r.final_state.data.get("all_passed", False),
|
| 99 |
f'{r.final_state.data.get("pass_rate",0):.0%} {time.time()-t0:.0f}s')
|
| 100 |
time.sleep(0.3)
|
|
@@ -107,7 +116,9 @@ orch2 = pa.Orchestrator(llm=b, environment=env2,
|
|
| 107 |
orch2.optimizer.min_reward_threshold = 0.01
|
| 108 |
counts = []
|
| 109 |
for _ in range(3):
|
| 110 |
-
try:
|
|
|
|
|
|
|
| 111 |
except: pass
|
| 112 |
counts.append(len(orch2.optimizer.heuristic_library))
|
| 113 |
time.sleep(0.3)
|
|
|
|
| 10 |
from purpose_agent.llm_backend import resolve_backend, ChatMessage
|
| 11 |
from purpose_agent.orchestrator import Environment
|
| 12 |
from purpose_agent.types import State
|
| 13 |
+
from purpose_agent.hardening import with_timeout
|
| 14 |
|
| 15 |
b = resolve_backend("openrouter:meta-llama/llama-3.3-70b-instruct", api_key=OR)
|
| 16 |
P = F = 0
|
|
|
|
| 20 |
icon = "PASS" if c else "FAIL"
|
| 21 |
print(f" [{icon}] {n}" + (f" β {d}" if d else ""))
|
| 22 |
|
| 23 |
+
def timed_run(fn, timeout_s=60, default=None, label="api_call"):
|
| 24 |
+
"""Run fn with a timeout to prevent hanging on slow API calls."""
|
| 25 |
+
return with_timeout(fn, timeout_s=timeout_s, default=default, label=label)()
|
| 26 |
+
|
| 27 |
print(f"Purpose Agent v{pa.__version__} β 0-Day Production Test")
|
| 28 |
print(f"Model: Llama-3.3-70B via OpenRouter\n")
|
| 29 |
|
|
|
|
| 32 |
team = pa.purpose("Write code", model=b)
|
| 33 |
ok("Auto-detect coding team", [a.name for a in team._agents] == ["architect","coder","tester"], str([a.name for a in team._agents]))
|
| 34 |
t0 = time.time()
|
| 35 |
+
result = timed_run(lambda: team.run("Check if number is prime", verbose=False), timeout_s=90, default="", label="team_run")
|
| 36 |
ok("Run real task", len(result) > 5, f"{len(result)} chars {time.time()-t0:.0f}s")
|
| 37 |
team.teach("Add types")
|
| 38 |
ok("Teach works", True)
|
|
|
|
| 40 |
|
| 41 |
# βββ LEVEL 2 βββ
|
| 42 |
print("\nLEVEL 2: Backend + Knowledge")
|
| 43 |
+
resp = timed_run(lambda: b.generate([ChatMessage(role="user", content="Say ok")], temperature=0, max_tokens=5), timeout_s=30, default="", label="simple_api")
|
| 44 |
ok("API call", len(resp) > 0, repr(resp[:20]))
|
| 45 |
kt = pa.purpose("Answer Qs", model=b, knowledge=["Python created by Guido 1991."])
|
| 46 |
+
ans = timed_run(lambda: kt.ask("Who created Python?"), timeout_s=60, default="", label="knowledge_ask")
|
| 47 |
ok("Knowledge team", len(ans) > 3, ans[:40])
|
| 48 |
time.sleep(0.3)
|
| 49 |
|
|
|
|
| 99 |
available_actions={"submit_code": "Submit code in params.code", "DONE": "Done"},
|
| 100 |
optimize_every_n_tasks=99)
|
| 101 |
t0 = time.time()
|
| 102 |
+
r = timed_run(
|
| 103 |
+
lambda p=purpose, e=env: orch.run_task(purpose=p, initial_state=e.reset(), max_steps=2),
|
| 104 |
+
timeout_s=60, default=type('R', (), {'final_state': State(data={}), 'trajectory': type('T', (), {'steps': [], 'final_phi': None, 'success_rate': 0})()})(),
|
| 105 |
+
label=f"coding_{name}",
|
| 106 |
+
)
|
| 107 |
ok(name, r.final_state.data.get("all_passed", False),
|
| 108 |
f'{r.final_state.data.get("pass_rate",0):.0%} {time.time()-t0:.0f}s')
|
| 109 |
time.sleep(0.3)
|
|
|
|
| 116 |
orch2.optimizer.min_reward_threshold = 0.01
|
| 117 |
counts = []
|
| 118 |
for _ in range(3):
|
| 119 |
+
try:
|
| 120 |
+
timed_run(lambda: orch2.run_task(purpose="fib(5)=5", initial_state=env2.reset(), max_steps=2),
|
| 121 |
+
timeout_s=60, label="self_improve")
|
| 122 |
except: pass
|
| 123 |
counts.append(len(orch2.optimizer.heuristic_library))
|
| 124 |
time.sleep(0.3)
|