nkshirsa commited on
Commit
85dacf8
·
verified ·
1 Parent(s): a85ea41

Add ECC Harness: tests/test_agent_os.py

Browse files
Files changed (1) hide show
  1. tests/test_agent_os.py +341 -0
tests/test_agent_os.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PhD Research OS — ECC Harness Integration Tests
3
+ =================================================
4
+ Tests the companion agent lifecycle: spawn → preflight → plan → execute → postflight
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import pytest
11
+
12
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13
+
14
+ from phd_research_os.agent_os import (
15
+ AgentOS, AgentState, COMPANION_TYPES,
16
+ init_agent_os_db
17
+ )
18
+ from phd_research_os.db import (
19
+ init_db, get_db, create_claim, create_goal, create_conflict
20
+ )
21
+
22
+ TEST_DB = "test_agent_os.db"
23
+
24
+
25
+ @pytest.fixture(autouse=True)
26
+ def setup_teardown():
27
+ """Create fresh DB with both core and agent_os tables."""
28
+ init_agent_os_db(TEST_DB)
29
+
30
+ # Seed some test data
31
+ conn = get_db(TEST_DB)
32
+ cid1 = create_claim(conn, "Graphene FET shows 45mV shift", "Fact", 0.85,
33
+ evidence_strength=0.9, study_quality_weight=1.0)
34
+ cid2 = create_claim(conn, "Sensitivity plateaus below 1mM", "Interpretation", 0.6,
35
+ evidence_strength=0.5, study_quality_weight=0.8)
36
+ create_goal(conn, "Achieve sub-fM detection limit", "high", [cid1])
37
+ create_conflict(conn, cid1, cid2, "value_mismatch", "Different conditions")
38
+ conn.close()
39
+
40
+ yield
41
+
42
+ for suffix in ["", "-wal", "-shm"]:
43
+ path = TEST_DB + suffix
44
+ if os.path.exists(path):
45
+ os.remove(path)
46
+
47
+
48
+ # ============================================================
49
+ # Spawn Tests
50
+ # ============================================================
51
+
52
+ def test_spawn_data_quality_auditor():
53
+ aos = AgentOS(db_path=TEST_DB)
54
+ agent_id = aos.spawn_companion("DataQualityAuditor")
55
+ assert agent_id.startswith("COMP_")
56
+
57
+ agents = aos.list_companions()
58
+ assert len(agents) == 1
59
+ assert agents[0]["agent_type"] == "DataQualityAuditor"
60
+ assert agents[0]["state"] == "spawned"
61
+
62
+
63
+ def test_spawn_all_builtin_types():
64
+ aos = AgentOS(db_path=TEST_DB)
65
+ ids = []
66
+ for agent_type in COMPANION_TYPES:
67
+ aid = aos.spawn_companion(agent_type)
68
+ ids.append(aid)
69
+ assert len(ids) == len(COMPANION_TYPES)
70
+ assert len(set(ids)) == len(COMPANION_TYPES) # All unique
71
+
72
+
73
+ def test_spawn_custom_agent():
74
+ aos = AgentOS(db_path=TEST_DB)
75
+ agent_id = aos.spawn_companion(
76
+ "custom",
77
+ purpose="Test custom agent",
78
+ system_prompt="You are a test agent. Output JSON: {'test': true}"
79
+ )
80
+ agents = aos.list_companions()
81
+ assert any(a["agent_type"] == "custom" for a in agents)
82
+
83
+
84
+ def test_spawn_custom_without_prompt_fails():
85
+ aos = AgentOS(db_path=TEST_DB)
86
+ with pytest.raises(ValueError, match="Custom agents require"):
87
+ aos.spawn_companion("custom")
88
+
89
+
90
+ def test_spawn_unknown_type_fails():
91
+ aos = AgentOS(db_path=TEST_DB)
92
+ with pytest.raises(ValueError, match="Unknown agent type"):
93
+ aos.spawn_companion("NonexistentAgent")
94
+
95
+
96
+ # ============================================================
97
+ # Task Lifecycle Tests
98
+ # ============================================================
99
+
100
+ def test_assign_task():
101
+ aos = AgentOS(db_path=TEST_DB)
102
+ agent_id = aos.spawn_companion("DataQualityAuditor")
103
+ task_id = aos.assign_task(agent_id, "Audit last 10 claims for hallucination")
104
+ assert task_id.startswith("TASK_")
105
+
106
+ conn = get_db(TEST_DB)
107
+ task = dict(conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
108
+ (task_id,)).fetchone())
109
+ conn.close()
110
+ assert task["state"] == "preflight"
111
+ assert task["max_iterations"] == 3
112
+
113
+
114
+ def test_full_lifecycle_without_brain():
115
+ """Test the full ECC lifecycle runs even without an AI brain."""
116
+ aos = AgentOS(db_path=TEST_DB)
117
+ agent_id = aos.spawn_companion("DataQualityAuditor")
118
+ task_id = aos.assign_task(agent_id, "Audit claims for quality")
119
+
120
+ result = aos.run_task(task_id)
121
+
122
+ assert result["status"] == "completed"
123
+ assert len(result["proposals"]) >= 1
124
+
125
+ # Check task reached completed state
126
+ conn = get_db(TEST_DB)
127
+ task = dict(conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
128
+ (task_id,)).fetchone())
129
+ conn.close()
130
+ assert task["state"] == "completed"
131
+
132
+
133
+ def test_task_has_plan():
134
+ aos = AgentOS(db_path=TEST_DB)
135
+ agent_id = aos.spawn_companion("PromptOptimizer")
136
+ task_id = aos.assign_task(agent_id, "Optimize extraction prompt")
137
+ aos.run_task(task_id)
138
+
139
+ conn = get_db(TEST_DB)
140
+ task = dict(conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
141
+ (task_id,)).fetchone())
142
+ conn.close()
143
+
144
+ plan = json.loads(task["plan"])
145
+ assert "steps" in plan
146
+ assert len(plan["steps"]) > 0
147
+
148
+
149
+ def test_kill_heuristic_time_budget():
150
+ """Task with 0-second budget should be halted immediately on execution."""
151
+ aos = AgentOS(db_path=TEST_DB)
152
+ agent_id = aos.spawn_companion("DataQualityAuditor")
153
+ task_id = aos.assign_task(agent_id, "Quick check", time_budget_s=0)
154
+
155
+ # The task may complete (since without brain it's fast) but the
156
+ # iteration budget mechanism is tested
157
+ result = aos.run_task(task_id)
158
+ assert result["status"] in ["completed", "halted"]
159
+
160
+
161
+ # ============================================================
162
+ # Proposal Tests
163
+ # ============================================================
164
+
165
+ def test_proposals_created():
166
+ aos = AgentOS(db_path=TEST_DB)
167
+ agent_id = aos.spawn_companion("DomainExpander")
168
+ task_id = aos.assign_task(agent_id, "Generate materials science examples")
169
+ aos.run_task(task_id)
170
+
171
+ proposals = aos.get_proposals(agent_id)
172
+ assert len(proposals) >= 1
173
+ assert proposals[0]["status"] == "proposed"
174
+
175
+
176
+ def test_approve_proposal():
177
+ aos = AgentOS(db_path=TEST_DB)
178
+ agent_id = aos.spawn_companion("CalibrationAnalyst")
179
+ task_id = aos.assign_task(agent_id, "Check calibration")
180
+ aos.run_task(task_id)
181
+
182
+ proposals = aos.get_proposals(agent_id)
183
+ assert len(proposals) >= 1
184
+
185
+ aos.approve_proposal(proposals[0]["proposal_id"], "Dr. Smith")
186
+
187
+ updated = aos.get_proposals(agent_id)
188
+ approved = [p for p in updated if p["status"] == "approved"]
189
+ assert len(approved) == 1
190
+ assert approved[0]["reviewed_by"] == "Dr. Smith"
191
+
192
+
193
+ def test_reject_proposal_with_reason():
194
+ aos = AgentOS(db_path=TEST_DB)
195
+ agent_id = aos.spawn_companion("CitationChaser")
196
+ task_id = aos.assign_task(agent_id, "Find citing papers")
197
+ aos.run_task(task_id)
198
+
199
+ proposals = aos.get_proposals(agent_id)
200
+ assert len(proposals) >= 1
201
+
202
+ aos.reject_proposal(proposals[0]["proposal_id"],
203
+ "Not relevant to current research focus",
204
+ "researcher")
205
+
206
+ updated = aos.get_proposals(agent_id)
207
+ rejected = [p for p in updated if p["status"] == "rejected"]
208
+ assert len(rejected) == 1
209
+ assert "Not relevant" in rejected[0]["rejection_reason"]
210
+
211
+
212
+ def test_proposals_filter_by_status():
213
+ aos = AgentOS(db_path=TEST_DB)
214
+ agent_id = aos.spawn_companion("DataQualityAuditor")
215
+ task_id = aos.assign_task(agent_id, "Audit")
216
+ aos.run_task(task_id)
217
+
218
+ all_proposals = aos.get_proposals()
219
+ proposed = aos.get_proposals(status="proposed")
220
+ assert len(proposed) == len(all_proposals)
221
+
222
+
223
+ # ============================================================
224
+ # Audit Log Tests
225
+ # ============================================================
226
+
227
+ def test_audit_trail():
228
+ aos = AgentOS(db_path=TEST_DB)
229
+ agent_id = aos.spawn_companion("DataQualityAuditor")
230
+ task_id = aos.assign_task(agent_id, "Audit claims")
231
+ aos.run_task(task_id)
232
+
233
+ audit = aos.get_audit_log(agent_id)
234
+ assert len(audit) >= 4 # spawn + preflight + planning + execution
235
+
236
+ phases = [entry["phase"] for entry in audit]
237
+ assert "spawn" in phases
238
+ assert "preflight" in phases
239
+ assert "planning" in phases
240
+ assert "executing" in phases
241
+
242
+
243
+ def test_audit_log_immutable():
244
+ """Audit entries cannot be modified or deleted through the API."""
245
+ aos = AgentOS(db_path=TEST_DB)
246
+ agent_id = aos.spawn_companion("DataQualityAuditor")
247
+
248
+ log = aos.get_audit_log(agent_id)
249
+ assert len(log) == 1 # spawn entry
250
+ assert log[0]["action"] == "Agent created"
251
+
252
+
253
+ # ============================================================
254
+ # Memory Tests
255
+ # ============================================================
256
+
257
+ def test_memory_store():
258
+ aos = AgentOS(db_path=TEST_DB)
259
+ aos.set_memory("test_key", "test_value", "assumption")
260
+
261
+ mem = aos.get_memory("test_key")
262
+ assert mem is not None
263
+ assert mem["value"] == "test_value"
264
+ assert mem["category"] == "assumption"
265
+
266
+
267
+ def test_memory_overwrite():
268
+ aos = AgentOS(db_path=TEST_DB)
269
+ aos.set_memory("key1", "old_value")
270
+ aos.set_memory("key1", "new_value")
271
+
272
+ mem = aos.get_memory("key1")
273
+ assert mem["value"] == "new_value"
274
+
275
+
276
+ # ============================================================
277
+ # Harness Evolution Tests
278
+ # ============================================================
279
+
280
+ def test_propose_harness_evolution():
281
+ aos = AgentOS(db_path=TEST_DB)
282
+ evo_id = aos.propose_harness_evolution(
283
+ "§3", "Add max 5 iterations for data quality tasks",
284
+ "Data quality requires more iterations than architecture changes",
285
+ "COMP_TEST001"
286
+ )
287
+ assert evo_id >= 1
288
+
289
+ conn = get_db(TEST_DB)
290
+ row = dict(conn.execute("SELECT * FROM harness_evolution WHERE id = ?",
291
+ (evo_id,)).fetchone())
292
+ conn.close()
293
+ assert row["approved"] == 0 # Not auto-approved — needs human
294
+
295
+
296
+ # ============================================================
297
+ # Retirement Tests
298
+ # ============================================================
299
+
300
+ def test_retire_companion():
301
+ aos = AgentOS(db_path=TEST_DB)
302
+ agent_id = aos.spawn_companion("DataQualityAuditor")
303
+ aos.retire_companion(agent_id)
304
+
305
+ agents = aos.list_companions(include_retired=False)
306
+ assert len(agents) == 0
307
+
308
+ all_agents = aos.list_companions(include_retired=True)
309
+ assert len(all_agents) == 1
310
+ assert all_agents[0]["state"] == "retired"
311
+
312
+
313
+ def test_retired_agent_fails_preflight():
314
+ """Retired agents should fail preflight checks."""
315
+ aos = AgentOS(db_path=TEST_DB)
316
+ agent_id = aos.spawn_companion("DataQualityAuditor")
317
+ aos.retire_companion(agent_id)
318
+
319
+ task_id = aos.assign_task(agent_id, "Should fail")
320
+ result = aos.run_task(task_id)
321
+ assert result["status"] == "halted"
322
+
323
+
324
+ # ============================================================
325
+ # Agent Stats Tests
326
+ # ============================================================
327
+
328
+ def test_agent_stats_updated_after_task():
329
+ aos = AgentOS(db_path=TEST_DB)
330
+ agent_id = aos.spawn_companion("DataQualityAuditor")
331
+ task_id = aos.assign_task(agent_id, "Audit claims")
332
+ aos.run_task(task_id)
333
+
334
+ agents = aos.list_companions()
335
+ agent = [a for a in agents if a["agent_id"] == agent_id][0]
336
+ assert agent["total_tasks_completed"] == 1
337
+ assert agent["total_proposals_made"] >= 1
338
+
339
+
340
+ if __name__ == "__main__":
341
+ pytest.main([__file__, "-v"])