Rohan03
/

purpose-agent

Text Generation

Model card Files Files and versions

purpose-agent / purpose_agent /v2_types.py

Rohan03's picture

V2 merge: purpose_agent/v2_types.py

f0bf034 verified 15 days ago

3.62 kB

	"""
	V2 Core Types — RunMode, extended PurposeScore, memory scoping.

	Backward compatible: all V1 types remain unchanged. V2 additions are new
	classes or optional fields on existing ones.
	"""
	from __future__ import annotations

	from enum import Enum
	from dataclasses import dataclass, field
	from typing import Any


	class RunMode(Enum):
	"""
	Controls what the framework is allowed to mutate during a run.

	learning_train: Full read/write. Memories can be created, Q-values updated,
	heuristics distilled. This is where the agent learns.

	learning_validation: Read existing memories, but writes go to a staging area.
	Used to validate whether new memories actually help before promoting.

	eval_test: Pure evaluation. NO memory writes, NO heuristic promotion,
	NO rubric mutation. The agent runs with whatever it has learned.
	This is the only mode whose numbers you can trust.
	"""
	LEARNING_TRAIN = "learning_train"
	LEARNING_VALIDATION = "learning_validation"
	EVAL_TEST = "eval_test"

	@property
	def allows_memory_write(self) -> bool:
	return self == RunMode.LEARNING_TRAIN

	@property
	def allows_staging_write(self) -> bool:
	return self in (RunMode.LEARNING_TRAIN, RunMode.LEARNING_VALIDATION)

	@property
	def is_eval(self) -> bool:
	return self == RunMode.EVAL_TEST


	@dataclass
	class MemoryScope:
	"""
	Scoping metadata for a memory. Determines which contexts a memory is
	eligible to be retrieved in.
	"""
	agent_roles: list[str] = field(default_factory=list) # e.g. ["coder", "tester"]
	tool_names: list[str] = field(default_factory=list) # e.g. ["python_exec"]
	task_categories: list[str] = field(default_factory=list) # e.g. ["coding", "debugging"]
	team_protocols: list[str] = field(default_factory=list) # e.g. ["code_review_pipeline"]
	user_id: str = "" # scoped to a specific user

	def matches(self, query_scope: "MemoryScope") -> bool:
	"""Check if this scope overlaps with a query scope. Empty = matches all."""
	if self.agent_roles and query_scope.agent_roles:
	if not set(self.agent_roles) & set(query_scope.agent_roles):
	return False
	if self.tool_names and query_scope.tool_names:
	if not set(self.tool_names) & set(query_scope.tool_names):
	return False
	if self.task_categories and query_scope.task_categories:
	if not set(self.task_categories) & set(query_scope.task_categories):
	return False
	if self.user_id and query_scope.user_id:
	if self.user_id != query_scope.user_id:
	return False
	return True


	@dataclass
	class PurposeScoreV2:
	"""
	Extended PurposeScore with evidence tracking and hack detection.

	Backward compatible: original PurposeScore fields are preserved.
	V2 additions are evidence_ids, components, rubric_version, hack_flags.
	"""
	phi_before: float
	phi_after: float
	delta: float
	reasoning: str
	evidence: str
	confidence: float

	# V2 additions
	evidence_ids: list[str] = field(default_factory=list)
	components: dict[str, float] = field(default_factory=dict)
	rubric_version: str = "v1"
	hack_flags: list[str] = field(default_factory=list)

	@property
	def improved(self) -> bool:
	return self.delta > 0.0

	@property
	def is_suspicious(self) -> bool:
	return len(self.hack_flags) > 0