| """ |
| V2 Core Types — RunMode, extended PurposeScore, memory scoping. |
| |
| Backward compatible: all V1 types remain unchanged. V2 additions are new |
| classes or optional fields on existing ones. |
| """ |
| from __future__ import annotations |
|
|
| from enum import Enum |
| from dataclasses import dataclass, field |
| from typing import Any |
|
|
|
|
| class RunMode(Enum): |
| """ |
| Controls what the framework is allowed to mutate during a run. |
| |
| learning_train: Full read/write. Memories can be created, Q-values updated, |
| heuristics distilled. This is where the agent learns. |
| |
| learning_validation: Read existing memories, but writes go to a staging area. |
| Used to validate whether new memories actually help before promoting. |
| |
| eval_test: Pure evaluation. NO memory writes, NO heuristic promotion, |
| NO rubric mutation. The agent runs with whatever it has learned. |
| This is the only mode whose numbers you can trust. |
| """ |
| LEARNING_TRAIN = "learning_train" |
| LEARNING_VALIDATION = "learning_validation" |
| EVAL_TEST = "eval_test" |
|
|
| @property |
| def allows_memory_write(self) -> bool: |
| return self == RunMode.LEARNING_TRAIN |
|
|
| @property |
| def allows_staging_write(self) -> bool: |
| return self in (RunMode.LEARNING_TRAIN, RunMode.LEARNING_VALIDATION) |
|
|
| @property |
| def is_eval(self) -> bool: |
| return self == RunMode.EVAL_TEST |
|
|
|
|
| @dataclass |
| class MemoryScope: |
| """ |
| Scoping metadata for a memory. Determines which contexts a memory is |
| eligible to be retrieved in. |
| """ |
| agent_roles: list[str] = field(default_factory=list) |
| tool_names: list[str] = field(default_factory=list) |
| task_categories: list[str] = field(default_factory=list) |
| team_protocols: list[str] = field(default_factory=list) |
| user_id: str = "" |
|
|
| def matches(self, query_scope: "MemoryScope") -> bool: |
| """Check if this scope overlaps with a query scope. Empty = matches all.""" |
| if self.agent_roles and query_scope.agent_roles: |
| if not set(self.agent_roles) & set(query_scope.agent_roles): |
| return False |
| if self.tool_names and query_scope.tool_names: |
| if not set(self.tool_names) & set(query_scope.tool_names): |
| return False |
| if self.task_categories and query_scope.task_categories: |
| if not set(self.task_categories) & set(query_scope.task_categories): |
| return False |
| if self.user_id and query_scope.user_id: |
| if self.user_id != query_scope.user_id: |
| return False |
| return True |
|
|
|
|
| @dataclass |
| class PurposeScoreV2: |
| """ |
| Extended PurposeScore with evidence tracking and hack detection. |
| |
| Backward compatible: original PurposeScore fields are preserved. |
| V2 additions are evidence_ids, components, rubric_version, hack_flags. |
| """ |
| phi_before: float |
| phi_after: float |
| delta: float |
| reasoning: str |
| evidence: str |
| confidence: float |
|
|
| |
| evidence_ids: list[str] = field(default_factory=list) |
| components: dict[str, float] = field(default_factory=dict) |
| rubric_version: str = "v1" |
| hack_flags: list[str] = field(default_factory=list) |
|
|
| @property |
| def improved(self) -> bool: |
| return self.delta > 0.0 |
|
|
| @property |
| def is_suspicious(self) -> bool: |
| return len(self.hack_flags) > 0 |
|
|