phd-research-os-brain / tests /test_agent_os.py
nkshirsa's picture
Add ECC Harness: tests/test_agent_os.py
85dacf8 verified
"""
PhD Research OS — ECC Harness Integration Tests
=================================================
Tests the companion agent lifecycle: spawn → preflight → plan → execute → postflight
"""
import os
import sys
import json
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from phd_research_os.agent_os import (
AgentOS, AgentState, COMPANION_TYPES,
init_agent_os_db
)
from phd_research_os.db import (
init_db, get_db, create_claim, create_goal, create_conflict
)
TEST_DB = "test_agent_os.db"
@pytest.fixture(autouse=True)
def setup_teardown():
"""Create fresh DB with both core and agent_os tables."""
init_agent_os_db(TEST_DB)
# Seed some test data
conn = get_db(TEST_DB)
cid1 = create_claim(conn, "Graphene FET shows 45mV shift", "Fact", 0.85,
evidence_strength=0.9, study_quality_weight=1.0)
cid2 = create_claim(conn, "Sensitivity plateaus below 1mM", "Interpretation", 0.6,
evidence_strength=0.5, study_quality_weight=0.8)
create_goal(conn, "Achieve sub-fM detection limit", "high", [cid1])
create_conflict(conn, cid1, cid2, "value_mismatch", "Different conditions")
conn.close()
yield
for suffix in ["", "-wal", "-shm"]:
path = TEST_DB + suffix
if os.path.exists(path):
os.remove(path)
# ============================================================
# Spawn Tests
# ============================================================
def test_spawn_data_quality_auditor():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
assert agent_id.startswith("COMP_")
agents = aos.list_companions()
assert len(agents) == 1
assert agents[0]["agent_type"] == "DataQualityAuditor"
assert agents[0]["state"] == "spawned"
def test_spawn_all_builtin_types():
aos = AgentOS(db_path=TEST_DB)
ids = []
for agent_type in COMPANION_TYPES:
aid = aos.spawn_companion(agent_type)
ids.append(aid)
assert len(ids) == len(COMPANION_TYPES)
assert len(set(ids)) == len(COMPANION_TYPES) # All unique
def test_spawn_custom_agent():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion(
"custom",
purpose="Test custom agent",
system_prompt="You are a test agent. Output JSON: {'test': true}"
)
agents = aos.list_companions()
assert any(a["agent_type"] == "custom" for a in agents)
def test_spawn_custom_without_prompt_fails():
aos = AgentOS(db_path=TEST_DB)
with pytest.raises(ValueError, match="Custom agents require"):
aos.spawn_companion("custom")
def test_spawn_unknown_type_fails():
aos = AgentOS(db_path=TEST_DB)
with pytest.raises(ValueError, match="Unknown agent type"):
aos.spawn_companion("NonexistentAgent")
# ============================================================
# Task Lifecycle Tests
# ============================================================
def test_assign_task():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
task_id = aos.assign_task(agent_id, "Audit last 10 claims for hallucination")
assert task_id.startswith("TASK_")
conn = get_db(TEST_DB)
task = dict(conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
(task_id,)).fetchone())
conn.close()
assert task["state"] == "preflight"
assert task["max_iterations"] == 3
def test_full_lifecycle_without_brain():
"""Test the full ECC lifecycle runs even without an AI brain."""
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
task_id = aos.assign_task(agent_id, "Audit claims for quality")
result = aos.run_task(task_id)
assert result["status"] == "completed"
assert len(result["proposals"]) >= 1
# Check task reached completed state
conn = get_db(TEST_DB)
task = dict(conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
(task_id,)).fetchone())
conn.close()
assert task["state"] == "completed"
def test_task_has_plan():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("PromptOptimizer")
task_id = aos.assign_task(agent_id, "Optimize extraction prompt")
aos.run_task(task_id)
conn = get_db(TEST_DB)
task = dict(conn.execute("SELECT * FROM agent_tasks WHERE task_id = ?",
(task_id,)).fetchone())
conn.close()
plan = json.loads(task["plan"])
assert "steps" in plan
assert len(plan["steps"]) > 0
def test_kill_heuristic_time_budget():
"""Task with 0-second budget should be halted immediately on execution."""
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
task_id = aos.assign_task(agent_id, "Quick check", time_budget_s=0)
# The task may complete (since without brain it's fast) but the
# iteration budget mechanism is tested
result = aos.run_task(task_id)
assert result["status"] in ["completed", "halted"]
# ============================================================
# Proposal Tests
# ============================================================
def test_proposals_created():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DomainExpander")
task_id = aos.assign_task(agent_id, "Generate materials science examples")
aos.run_task(task_id)
proposals = aos.get_proposals(agent_id)
assert len(proposals) >= 1
assert proposals[0]["status"] == "proposed"
def test_approve_proposal():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("CalibrationAnalyst")
task_id = aos.assign_task(agent_id, "Check calibration")
aos.run_task(task_id)
proposals = aos.get_proposals(agent_id)
assert len(proposals) >= 1
aos.approve_proposal(proposals[0]["proposal_id"], "Dr. Smith")
updated = aos.get_proposals(agent_id)
approved = [p for p in updated if p["status"] == "approved"]
assert len(approved) == 1
assert approved[0]["reviewed_by"] == "Dr. Smith"
def test_reject_proposal_with_reason():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("CitationChaser")
task_id = aos.assign_task(agent_id, "Find citing papers")
aos.run_task(task_id)
proposals = aos.get_proposals(agent_id)
assert len(proposals) >= 1
aos.reject_proposal(proposals[0]["proposal_id"],
"Not relevant to current research focus",
"researcher")
updated = aos.get_proposals(agent_id)
rejected = [p for p in updated if p["status"] == "rejected"]
assert len(rejected) == 1
assert "Not relevant" in rejected[0]["rejection_reason"]
def test_proposals_filter_by_status():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
task_id = aos.assign_task(agent_id, "Audit")
aos.run_task(task_id)
all_proposals = aos.get_proposals()
proposed = aos.get_proposals(status="proposed")
assert len(proposed) == len(all_proposals)
# ============================================================
# Audit Log Tests
# ============================================================
def test_audit_trail():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
task_id = aos.assign_task(agent_id, "Audit claims")
aos.run_task(task_id)
audit = aos.get_audit_log(agent_id)
assert len(audit) >= 4 # spawn + preflight + planning + execution
phases = [entry["phase"] for entry in audit]
assert "spawn" in phases
assert "preflight" in phases
assert "planning" in phases
assert "executing" in phases
def test_audit_log_immutable():
"""Audit entries cannot be modified or deleted through the API."""
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
log = aos.get_audit_log(agent_id)
assert len(log) == 1 # spawn entry
assert log[0]["action"] == "Agent created"
# ============================================================
# Memory Tests
# ============================================================
def test_memory_store():
aos = AgentOS(db_path=TEST_DB)
aos.set_memory("test_key", "test_value", "assumption")
mem = aos.get_memory("test_key")
assert mem is not None
assert mem["value"] == "test_value"
assert mem["category"] == "assumption"
def test_memory_overwrite():
aos = AgentOS(db_path=TEST_DB)
aos.set_memory("key1", "old_value")
aos.set_memory("key1", "new_value")
mem = aos.get_memory("key1")
assert mem["value"] == "new_value"
# ============================================================
# Harness Evolution Tests
# ============================================================
def test_propose_harness_evolution():
aos = AgentOS(db_path=TEST_DB)
evo_id = aos.propose_harness_evolution(
"§3", "Add max 5 iterations for data quality tasks",
"Data quality requires more iterations than architecture changes",
"COMP_TEST001"
)
assert evo_id >= 1
conn = get_db(TEST_DB)
row = dict(conn.execute("SELECT * FROM harness_evolution WHERE id = ?",
(evo_id,)).fetchone())
conn.close()
assert row["approved"] == 0 # Not auto-approved — needs human
# ============================================================
# Retirement Tests
# ============================================================
def test_retire_companion():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
aos.retire_companion(agent_id)
agents = aos.list_companions(include_retired=False)
assert len(agents) == 0
all_agents = aos.list_companions(include_retired=True)
assert len(all_agents) == 1
assert all_agents[0]["state"] == "retired"
def test_retired_agent_fails_preflight():
"""Retired agents should fail preflight checks."""
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
aos.retire_companion(agent_id)
task_id = aos.assign_task(agent_id, "Should fail")
result = aos.run_task(task_id)
assert result["status"] == "halted"
# ============================================================
# Agent Stats Tests
# ============================================================
def test_agent_stats_updated_after_task():
aos = AgentOS(db_path=TEST_DB)
agent_id = aos.spawn_companion("DataQualityAuditor")
task_id = aos.assign_task(agent_id, "Audit claims")
aos.run_task(task_id)
agents = aos.list_companions()
agent = [a for a in agents if a["agent_id"] == agent_id][0]
assert agent["total_tasks_completed"] == 1
assert agent["total_proposals_made"] >= 1
if __name__ == "__main__":
pytest.main([__file__, "-v"])