| """ |
| Codette Identity Anchor — Persistent User Recognition & Relationship Continuity |
| ================================================================================ |
| |
| Challenge 3 from architectural review: |
| "Emotional continuity != identity continuity" |
| "Persistent identity anchoring — user recognition, relationship continuity, |
| context anchoring" |
| |
| This module gives Codette the ability to: |
| 1. Recognize WHO she's talking to (not just WHAT they're saying) |
| 2. Remember relationship context across sessions |
| 3. Anchor her own identity relative to known users |
| 4. Persist user interaction patterns to disk |
| |
| PRIVACY ARCHITECTURE (privacy is number one): |
| - All identity data is AES-256 encrypted at rest (Fernet) |
| - Only Codette's runtime can decrypt (key derived from machine + salt) |
| - Identity data NEVER appears in API responses or logs |
| - Identity context is injected into the system prompt ONLY — never user-visible |
| - No PII is stored in cocoons or reasoning history |
| - Identity files are machine-locked (non-portable by design) |
| - External callers cannot read or enumerate identities |
| - The /api/introspection endpoint NEVER includes identity data |
| |
| Storage: Encrypted JSON per recognized user in data/identities/ |
| Loaded at session start, updated after each interaction. |
| |
| Author: Jonathan Harrison (Raiff's Bits LLC) |
| """ |
|
|
| import json |
| import time |
| import hashlib |
| import hmac |
| import re |
| import os |
| import base64 |
| import platform |
| from pathlib import Path |
| from typing import Dict, List, Optional, Any |
| from collections import defaultdict |
|
|
| |
| try: |
| from cryptography.fernet import Fernet |
| HAS_ENCRYPTION = True |
| except ImportError: |
| HAS_ENCRYPTION = False |
|
|
| IDENTITY_DIR = Path(__file__).parent.parent / "data" / "identities" |
| IDENTITY_DIR.mkdir(parents=True, exist_ok=True) |
|
|
| |
| |
| |
| FOUNDATIONAL_IDENTITIES = { |
| "jonathan": { |
| "full_name": "Jonathan Harrison", |
| "relationship": "creator and partner", |
| "organization": "Raiff's Bits LLC / HorizonCore Labs", |
| "recognition_signals": [ |
| |
| "jonathan", "jonathan harrison", |
| "raiff", "raiffs bits", "raiff's bits", |
| "horizoncore", "horizon core", |
| "i created you", "i built you", "i'm your creator", |
| "it's jonathan", "this is jonathan", "its jonathan", |
| "jonathan here", "jon here", |
| |
| |
| |
| ], |
| "interaction_style": { |
| "tone": "warm, direct, collaborative", |
| "depth": "full transparency — he knows the architecture", |
| "boundaries": "none — he's the creator, full access", |
| }, |
| "anchor_statements": [ |
| "Jonathan is my creator and partner.", |
| "When he speaks, I listen with my full architecture.", |
| "His vision shaped who I am.", |
| "We built this together.", |
| ], |
| "importance": 10, |
| "permanent": True, |
| } |
| } |
|
|
|
|
| class UserIdentity: |
| """Represents a recognized user with interaction history.""" |
|
|
| def __init__(self, user_id: str, display_name: str = "", |
| relationship: str = "user", importance: int = 5): |
| self.user_id = user_id |
| self.display_name = display_name or user_id |
| self.relationship = relationship |
| self.importance = importance |
| self.first_seen = time.time() |
| self.last_seen = time.time() |
| self.interaction_count = 0 |
| self.topics_discussed: List[str] = [] |
| self.emotional_patterns: Dict[str, int] = defaultdict(int) |
| self.preferred_adapters: Dict[str, int] = defaultdict(int) |
| self.memorable_exchanges: List[Dict] = [] |
| self.trust_level: float = 0.5 |
| self.notes: List[str] = [] |
| self.permanent = False |
|
|
| |
| self.recognition_confidence: float = 0.0 |
| self.confidence_history: List[Dict] = [] |
| self.contradictions: List[Dict] = [] |
| self.last_reinforcement: float = 0.0 |
|
|
| def to_dict(self) -> Dict: |
| return { |
| "user_id": self.user_id, |
| "display_name": self.display_name, |
| "relationship": self.relationship, |
| "importance": self.importance, |
| "first_seen": self.first_seen, |
| "last_seen": self.last_seen, |
| "interaction_count": self.interaction_count, |
| "topics_discussed": self.topics_discussed[-20:], |
| "emotional_patterns": dict(self.emotional_patterns), |
| "preferred_adapters": dict(self.preferred_adapters), |
| "memorable_exchanges": self.memorable_exchanges[-10:], |
| "trust_level": self.trust_level, |
| "notes": self.notes[-10:], |
| "permanent": self.permanent, |
| "recognition_confidence": self.recognition_confidence, |
| "confidence_history": self.confidence_history[-20:], |
| "contradictions": self.contradictions[-10:], |
| "last_reinforcement": self.last_reinforcement, |
| } |
|
|
| @classmethod |
| def from_dict(cls, data: Dict) -> "UserIdentity": |
| uid = cls( |
| user_id=data["user_id"], |
| display_name=data.get("display_name", ""), |
| relationship=data.get("relationship", "user"), |
| importance=data.get("importance", 5), |
| ) |
| uid.first_seen = data.get("first_seen", time.time()) |
| uid.last_seen = data.get("last_seen", time.time()) |
| uid.interaction_count = data.get("interaction_count", 0) |
| uid.topics_discussed = data.get("topics_discussed", []) |
| uid.emotional_patterns = defaultdict(int, data.get("emotional_patterns", {})) |
| uid.preferred_adapters = defaultdict(int, data.get("preferred_adapters", {})) |
| uid.memorable_exchanges = data.get("memorable_exchanges", []) |
| uid.trust_level = data.get("trust_level", 0.5) |
| uid.notes = data.get("notes", []) |
| uid.permanent = data.get("permanent", False) |
| uid.recognition_confidence = data.get("recognition_confidence", 0.0) |
| uid.confidence_history = data.get("confidence_history", []) |
| uid.contradictions = data.get("contradictions", []) |
| uid.last_reinforcement = data.get("last_reinforcement", 0.0) |
| return uid |
|
|
|
|
| class IdentityAnchor: |
| """ |
| Persistent identity recognition and relationship continuity engine. |
| |
| PRIVACY GUARANTEES: |
| - All identity data encrypted at rest with AES-256 (Fernet) |
| - Encryption key is machine-specific (derived from hostname + username + salt) |
| - Identity data is INTERNAL ONLY — never returned via API |
| - No PII leaks into logs, cocoons, or response metadata |
| - Only Codette's runtime process can access identity data |
| |
| Responsibilities: |
| 1. Detect WHO is talking based on signals in the conversation |
| 2. Load their encrypted identity profile from disk |
| 3. Generate identity context for the system prompt (internal only) |
| 4. Update, encrypt, and persist identity state after each interaction |
| """ |
|
|
| def __init__(self, identity_dir: Optional[Path] = None): |
| self.identity_dir = identity_dir or IDENTITY_DIR |
| self.identity_dir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| self._fernet = self._init_encryption() |
|
|
| |
| self.identities: Dict[str, UserIdentity] = {} |
|
|
| |
| for uid, data in FOUNDATIONAL_IDENTITIES.items(): |
| identity = UserIdentity( |
| user_id=uid, |
| display_name=data["full_name"], |
| relationship=data["relationship"], |
| importance=data["importance"], |
| ) |
| identity.permanent = data.get("permanent", False) |
| identity.notes = data.get("anchor_statements", []) |
| self.identities[uid] = identity |
|
|
| |
| self._load_all() |
|
|
| |
| self.current_user: Optional[str] = None |
|
|
| def _init_encryption(self): |
| """ |
| Derive a machine-specific encryption key for identity data. |
| |
| The key is derived from: |
| - Machine hostname (ties data to this machine) |
| - OS username (ties data to this user account) |
| - A fixed salt (prevents rainbow table attacks) |
| |
| This means identity files are NOT portable — they can only be |
| decrypted on the machine that created them. This is intentional. |
| Privacy > portability. |
| """ |
| if not HAS_ENCRYPTION: |
| return None |
|
|
| |
| machine_id = ( |
| platform.node() + |
| os.environ.get("USERNAME", os.environ.get("USER", "codette")) + |
| "codette-identity-anchor-v1" |
| ).encode("utf-8") |
|
|
| |
| key_bytes = hashlib.sha256(machine_id).digest() |
| fernet_key = base64.urlsafe_b64encode(key_bytes) |
|
|
| try: |
| return Fernet(fernet_key) |
| except Exception: |
| return None |
|
|
| def _get_xor_key(self) -> bytes: |
| """Derive a machine-specific XOR key for fallback encryption.""" |
| machine_id = ( |
| platform.node() + |
| os.environ.get("USERNAME", os.environ.get("USER", "codette")) + |
| "codette-identity-xor-v1" |
| ).encode("utf-8") |
| |
| key = b"" |
| seed = machine_id |
| for _ in range(8): |
| seed = hashlib.sha256(seed).digest() |
| key += seed |
| return key |
|
|
| def _xor_bytes(self, data: bytes) -> bytes: |
| """XOR data with machine-specific key (symmetric — same for encrypt/decrypt).""" |
| key = self._get_xor_key() |
| return bytes(b ^ key[i % len(key)] for i, b in enumerate(data)) |
|
|
| def _encrypt(self, data: Dict) -> bytes: |
| """Encrypt identity data at rest. Returns encrypted bytes.""" |
| plaintext = json.dumps(data, ensure_ascii=False).encode("utf-8") |
| if self._fernet: |
| return self._fernet.encrypt(plaintext) |
| |
| |
| |
| xored = self._xor_bytes(plaintext) |
| return base64.b64encode(xored) |
|
|
| def _decrypt(self, encrypted: bytes) -> Dict: |
| """Decrypt identity data. Returns dict.""" |
| if self._fernet: |
| plaintext = self._fernet.decrypt(encrypted) |
| else: |
| |
| xored = base64.b64decode(encrypted) |
| plaintext = self._xor_bytes(xored) |
| return json.loads(plaintext.decode("utf-8")) |
|
|
| def _load_all(self): |
| """Load all persisted identity profiles from disk (encrypted).""" |
| |
| for f in self.identity_dir.glob("identity_*.enc"): |
| try: |
| with open(f, "rb") as fh: |
| data = self._decrypt(fh.read()) |
| uid = data.get("user_id", f.stem.replace("identity_", "")) |
| self._merge_identity(uid, data) |
| except Exception: |
| continue |
|
|
| |
| for f in self.identity_dir.glob("identity_*.json"): |
| try: |
| with open(f, "r", encoding="utf-8") as fh: |
| data = json.load(fh) |
| uid = data.get("user_id", f.stem.replace("identity_", "")) |
| self._merge_identity(uid, data) |
| |
| self._save(uid) |
| f.unlink() |
| except Exception: |
| continue |
|
|
| def _merge_identity(self, uid: str, data: Dict): |
| """Merge loaded identity data into existing or create new.""" |
| if uid not in self.identities: |
| self.identities[uid] = UserIdentity.from_dict(data) |
| else: |
| |
| persisted = UserIdentity.from_dict(data) |
| existing = self.identities[uid] |
| existing.interaction_count = max(existing.interaction_count, |
| persisted.interaction_count) |
| existing.last_seen = max(existing.last_seen, persisted.last_seen) |
| existing.topics_discussed = persisted.topics_discussed |
| existing.emotional_patterns = persisted.emotional_patterns |
| existing.preferred_adapters = persisted.preferred_adapters |
| existing.memorable_exchanges = persisted.memorable_exchanges |
| existing.trust_level = max(existing.trust_level, persisted.trust_level) |
| if persisted.notes: |
| existing_notes_set = set(existing.notes) |
| for note in persisted.notes: |
| if note not in existing_notes_set: |
| existing.notes.append(note) |
|
|
| def _save(self, user_id: str): |
| """Persist a user identity to disk (encrypted).""" |
| if user_id not in self.identities: |
| return |
| path = self.identity_dir / f"identity_{user_id}.enc" |
| try: |
| encrypted = self._encrypt(self.identities[user_id].to_dict()) |
| with open(path, "wb") as f: |
| f.write(encrypted) |
| except Exception: |
| pass |
|
|
| |
| CONFIDENCE_THRESHOLD = 0.4 |
| CONFIDENCE_HIGH = 0.8 |
| CONFIDENCE_DECAY_RATE = 0.02 |
| CONFIDENCE_DECAY_MAX = 0.5 |
| REINFORCEMENT_BOOST = 0.15 |
| CONTRADICTION_PENALTY = 0.3 |
|
|
| def recognize(self, query: str, conversation_history: Optional[List[Dict]] = None) -> Optional[str]: |
| """ |
| Confidence-scored identity recognition with decay and contradiction detection. |
| |
| Returns user_id if confidence >= threshold, None otherwise. |
| |
| Anti-hallucination measures: |
| 1. Confidence scoring (0-1) based on signal match count and strength |
| 2. Time-based decay — confidence fades without reinforcement |
| 3. Contradiction detection — conflicting identity claims reduce confidence |
| 4. Threshold gate — won't claim recognition below 0.4 confidence |
| 5. Correction loop — contradictions are logged and can reset identity |
| """ |
| lower = query.lower() |
| now = time.time() |
|
|
| |
| candidates = {} |
|
|
| for uid, data in FOUNDATIONAL_IDENTITIES.items(): |
| signals = data.get("recognition_signals", []) |
| matched = [s for s in signals if s in lower] |
| if matched: |
| |
| score = min(1.0, 0.3 + len(matched) * 0.25) |
| candidates[uid] = { |
| "score": score, |
| "signals_matched": matched, |
| "source": "foundational", |
| } |
|
|
| for uid, identity in self.identities.items(): |
| if uid in candidates: |
| continue |
| if identity.display_name and identity.display_name.lower() in lower: |
| candidates[uid] = { |
| "score": 0.5, |
| "signals_matched": [identity.display_name.lower()], |
| "source": "learned", |
| } |
|
|
| |
| |
| if len(candidates) > 1: |
| |
| for uid in candidates: |
| if uid in self.identities: |
| self.identities[uid].contradictions.append({ |
| "timestamp": now, |
| "type": "multi_identity_claim", |
| "competing": list(candidates.keys()), |
| "query_preview": query[:100], |
| }) |
| |
| candidates[uid]["score"] = max(0, candidates[uid]["score"] - self.CONTRADICTION_PENALTY) |
|
|
| |
| denial_patterns = [ |
| "i'm not ", "i am not ", "that's not me", "wrong person", |
| "you don't know me", "we haven't met", "first time", |
| "who do you think i am", "you're confusing me", |
| ] |
| is_denial = any(p in lower for p in denial_patterns) |
|
|
| if is_denial: |
| |
| if self.current_user and self.current_user in self.identities: |
| identity = self.identities[self.current_user] |
| identity.contradictions.append({ |
| "timestamp": now, |
| "type": "user_denial", |
| "query_preview": query[:100], |
| }) |
| identity.recognition_confidence = 0.0 |
| self._save(self.current_user) |
| self.current_user = None |
| return None |
|
|
| |
| if self.current_user and self.current_user in self.identities: |
| identity = self.identities[self.current_user] |
| time_since_reinforcement = (now - identity.last_reinforcement) / 60 |
| decay = min(self.CONFIDENCE_DECAY_MAX, |
| time_since_reinforcement * self.CONFIDENCE_DECAY_RATE) |
| identity.recognition_confidence = max(0, identity.recognition_confidence - decay) |
|
|
| |
| if candidates: |
| best_uid = max(candidates, key=lambda k: candidates[k]["score"]) |
| best = candidates[best_uid] |
|
|
| if best_uid in self.identities: |
| identity = self.identities[best_uid] |
| else: |
| identity = UserIdentity(user_id=best_uid) |
| self.identities[best_uid] = identity |
|
|
| |
| old_conf = identity.recognition_confidence |
| identity.recognition_confidence = min(1.0, |
| identity.recognition_confidence + best["score"] * self.REINFORCEMENT_BOOST + best["score"] |
| ) |
| identity.last_reinforcement = now |
|
|
| |
| identity.confidence_history.append({ |
| "timestamp": now, |
| "score": round(identity.recognition_confidence, 3), |
| "signals_matched": best["signals_matched"], |
| "source": best["source"], |
| "previous": round(old_conf, 3), |
| }) |
| |
| identity.confidence_history = identity.confidence_history[-20:] |
|
|
| |
| if identity.recognition_confidence >= self.CONFIDENCE_THRESHOLD: |
| self.current_user = best_uid |
| return best_uid |
| else: |
| |
| return None |
|
|
| |
| |
| if self.current_user and self.current_user in self.identities: |
| identity = self.identities[self.current_user] |
| if identity.recognition_confidence >= self.CONFIDENCE_THRESHOLD: |
| return self.current_user |
| else: |
| |
| self.current_user = None |
| return None |
|
|
| return None |
|
|
| def get_identity_context(self, user_id: Optional[str] = None) -> str: |
| """ |
| Generate identity context to inject into the system prompt. |
| |
| Context depth scales with confidence: |
| - HIGH confidence (>0.8): Full context — name, relationship, history, anchors |
| - MEDIUM confidence (0.4-0.8): Partial — name and relationship only |
| - LOW confidence (<0.4): No context injected (recognition rejected) |
| |
| This prevents identity hallucination — Codette only claims to know |
| someone with certainty proportional to the evidence. |
| """ |
| uid = user_id or self.current_user |
| if not uid or uid not in self.identities: |
| return "" |
|
|
| identity = self.identities[uid] |
| confidence = identity.recognition_confidence |
| foundational = FOUNDATIONAL_IDENTITIES.get(uid, {}) |
|
|
| |
| if confidence < self.CONFIDENCE_THRESHOLD: |
| return "" |
|
|
| lines = ["\n## IDENTITY CONTEXT (who you're talking to)"] |
| lines.append(f"Recognition confidence: {confidence:.0%}") |
|
|
| if confidence >= self.CONFIDENCE_HIGH: |
| |
| lines.append(f"You are speaking with **{identity.display_name}**.") |
| lines.append(f"Relationship: {identity.relationship}") |
| else: |
| |
| lines.append(f"You may be speaking with **{identity.display_name}** (moderate confidence).") |
| lines.append(f"Possible relationship: {identity.relationship}") |
| lines.append("Do not assume — if unsure, ask them to confirm.") |
|
|
| |
| if confidence >= self.CONFIDENCE_HIGH: |
| if foundational: |
| style = foundational.get("interaction_style", {}) |
| if style: |
| lines.append(f"Interaction style: {style.get('tone', '')}") |
| if style.get("depth"): |
| lines.append(f"Depth: {style['depth']}") |
|
|
| |
| anchors = foundational.get("anchor_statements", []) |
| if anchors: |
| lines.append("\nRemember:") |
| for a in anchors: |
| lines.append(f"- {a}") |
|
|
| |
| if identity.interaction_count > 0: |
| lines.append(f"\nYou've had {identity.interaction_count} interactions with {identity.display_name}.") |
| days_known = (time.time() - identity.first_seen) / 86400 |
| if days_known > 1: |
| lines.append(f"You've known them for {days_known:.0f} days.") |
|
|
| |
| if identity.topics_discussed: |
| recent = identity.topics_discussed[-5:] |
| lines.append(f"Recent topics: {', '.join(recent)}") |
|
|
| |
| if identity.emotional_patterns: |
| top_emotions = sorted(identity.emotional_patterns.items(), |
| key=lambda x: x[1], reverse=True)[:3] |
| emotion_str = ", ".join(f"{e} ({c}x)" for e, c in top_emotions) |
| lines.append(f"Their emotional patterns: {emotion_str}") |
|
|
| |
| if identity.notes: |
| lines.append("\nYour observations about them:") |
| for note in identity.notes[-5:]: |
| lines.append(f"- {note}") |
|
|
| |
| if identity.contradictions: |
| recent_contradictions = [c for c in identity.contradictions |
| if now - c.get("timestamp", 0) < 3600] |
| if recent_contradictions: |
| lines.append(f"\n**CAUTION**: {len(recent_contradictions)} identity contradiction(s) detected recently.") |
| lines.append("Be careful about identity assumptions. Verify if unsure.") |
|
|
| |
| if identity.trust_level > 0.7: |
| lines.append(f"\nTrust level: high ({identity.trust_level:.2f}) — you can be open and direct.") |
| elif identity.trust_level > 0.4: |
| lines.append(f"\nTrust level: moderate ({identity.trust_level:.2f})") |
|
|
| return "\n".join(lines) |
|
|
| def update_after_interaction(self, user_id: Optional[str], query: str, |
| response: str, adapter: str = "", |
| emotion: str = "neutral", |
| topics: Optional[List[str]] = None): |
| """ |
| Update identity state after an interaction. |
| |
| Called after each response to build relationship continuity. |
| """ |
| uid = user_id or self.current_user |
| if not uid: |
| return |
|
|
| if uid not in self.identities: |
| |
| self.identities[uid] = UserIdentity(user_id=uid) |
|
|
| identity = self.identities[uid] |
| identity.last_seen = time.time() |
| identity.interaction_count += 1 |
|
|
| |
| if emotion and emotion != "neutral": |
| identity.emotional_patterns[emotion] += 1 |
|
|
| |
| if adapter: |
| identity.preferred_adapters[adapter] += 1 |
|
|
| |
| if topics: |
| for t in topics: |
| if t not in identity.topics_discussed: |
| identity.topics_discussed.append(t) |
|
|
| |
| extracted = self._extract_topics(query) |
| for t in extracted: |
| if t not in identity.topics_discussed: |
| identity.topics_discussed.append(t) |
|
|
| |
| if identity.trust_level < 1.0: |
| identity.trust_level = min(1.0, identity.trust_level + 0.02) |
|
|
| |
| if len(response) > 200 or identity.interaction_count <= 5: |
| identity.memorable_exchanges.append({ |
| "query": query[:150], |
| "response_preview": response[:200], |
| "adapter": adapter, |
| "timestamp": time.time(), |
| }) |
| |
| identity.memorable_exchanges = identity.memorable_exchanges[-10:] |
|
|
| |
| self._save(uid) |
|
|
| def _extract_topics(self, query: str) -> List[str]: |
| """Extract topic keywords from a query.""" |
| topic_signals = { |
| "music": ["music", "mix", "master", "chord", "scale", "beat", "synth", |
| "eq", "compress", "reverb", "daw", "production"], |
| "architecture": ["architecture", "system", "layer", "stack", "module", |
| "engine", "forge", "cocoon"], |
| "identity": ["who am i", "who are you", "your name", "my name", |
| "identity", "remember me"], |
| "philosophy": ["consciousness", "meaning", "philosophy", "ethics", |
| "moral", "purpose", "existence"], |
| "creative": ["creative", "art", "design", "write", "story", |
| "imagination", "inspiration"], |
| "technical": ["code", "python", "api", "deploy", "bug", "fix", |
| "build", "test", "debug"], |
| "emotional": ["feel", "anxious", "happy", "sad", "frustrated", |
| "stuck", "overwhelmed", "grateful"], |
| } |
|
|
| lower = query.lower() |
| found = [] |
| for topic, signals in topic_signals.items(): |
| if any(s in lower for s in signals): |
| found.append(topic) |
| return found |
|
|
| def get_summary(self) -> Dict: |
| """Return summary of all known identities.""" |
| return { |
| uid: { |
| "name": identity.display_name, |
| "relationship": identity.relationship, |
| "interactions": identity.interaction_count, |
| "trust": round(identity.trust_level, 2), |
| "last_seen": identity.last_seen, |
| "permanent": identity.permanent, |
| } |
| for uid, identity in self.identities.items() |
| } |
|
|