""" V2 Core Types — RunMode, extended PurposeScore, memory scoping. Backward compatible: all V1 types remain unchanged. V2 additions are new classes or optional fields on existing ones. """ from __future__ import annotations from enum import Enum from dataclasses import dataclass, field from typing import Any class RunMode(Enum): """ Controls what the framework is allowed to mutate during a run. learning_train: Full read/write. Memories can be created, Q-values updated, heuristics distilled. This is where the agent learns. learning_validation: Read existing memories, but writes go to a staging area. Used to validate whether new memories actually help before promoting. eval_test: Pure evaluation. NO memory writes, NO heuristic promotion, NO rubric mutation. The agent runs with whatever it has learned. This is the only mode whose numbers you can trust. """ LEARNING_TRAIN = "learning_train" LEARNING_VALIDATION = "learning_validation" EVAL_TEST = "eval_test" @property def allows_memory_write(self) -> bool: return self == RunMode.LEARNING_TRAIN @property def allows_staging_write(self) -> bool: return self in (RunMode.LEARNING_TRAIN, RunMode.LEARNING_VALIDATION) @property def is_eval(self) -> bool: return self == RunMode.EVAL_TEST @dataclass class MemoryScope: """ Scoping metadata for a memory. Determines which contexts a memory is eligible to be retrieved in. """ agent_roles: list[str] = field(default_factory=list) # e.g. ["coder", "tester"] tool_names: list[str] = field(default_factory=list) # e.g. ["python_exec"] task_categories: list[str] = field(default_factory=list) # e.g. ["coding", "debugging"] team_protocols: list[str] = field(default_factory=list) # e.g. ["code_review_pipeline"] user_id: str = "" # scoped to a specific user def matches(self, query_scope: "MemoryScope") -> bool: """Check if this scope overlaps with a query scope. Empty = matches all.""" if self.agent_roles and query_scope.agent_roles: if not set(self.agent_roles) & set(query_scope.agent_roles): return False if self.tool_names and query_scope.tool_names: if not set(self.tool_names) & set(query_scope.tool_names): return False if self.task_categories and query_scope.task_categories: if not set(self.task_categories) & set(query_scope.task_categories): return False if self.user_id and query_scope.user_id: if self.user_id != query_scope.user_id: return False return True @dataclass class PurposeScoreV2: """ Extended PurposeScore with evidence tracking and hack detection. Backward compatible: original PurposeScore fields are preserved. V2 additions are evidence_ids, components, rubric_version, hack_flags. """ phi_before: float phi_after: float delta: float reasoning: str evidence: str confidence: float # V2 additions evidence_ids: list[str] = field(default_factory=list) components: dict[str, float] = field(default_factory=dict) rubric_version: str = "v1" hack_flags: list[str] = field(default_factory=list) @property def improved(self) -> bool: return self.delta > 0.0 @property def is_suspicious(self) -> bool: return len(self.hack_flags) > 0