#!/usr/bin/env python3 """Cocoon Introspection Engine — Codette analyzes her own reasoning history. Gives Codette the ability to look at her own cocoons and notice patterns: - Which adapters dominate? Which are underused? - What domains does she get asked about most? - How do her responses change under pressure? - What emotional patterns appear in her reasoning? - How has she evolved over time? This is NOT text generation about patterns — it's actual statistical analysis of real cocoon data, producing measured insights. Usage: engine = CocoonIntrospectionEngine(cocoons_dir="cocoons/") insights = engine.full_introspection() # Returns real measured patterns from her own memory """ import json import os import time from pathlib import Path from collections import Counter, defaultdict from typing import Dict, List, Optional, Tuple class CocoonIntrospectionEngine: """Analyzes Codette's cocoon memory for patterns and self-insight.""" def __init__(self, cocoons_dir: str = None): if cocoons_dir is None: cocoons_dir = str(Path(__file__).parent.parent / "cocoons") self.cocoons_dir = Path(cocoons_dir) self._cocoons = [] self._behavioral = [] self._loaded = False def _load_all(self): """Load all cocoon files from disk.""" if self._loaded: return self._cocoons = [] self._behavioral = [] if not self.cocoons_dir.exists(): self._loaded = True return for f in sorted(self.cocoons_dir.glob("*.json")): try: data = json.loads(f.read_text(encoding="utf-8")) # Timestamped reasoning cocoons (auto-generated) if isinstance(data, dict) and data.get("type") == "reasoning": wrapped = data.get("wrapped", {}) self._cocoons.append({ "id": data.get("id", f.stem), "timestamp": data.get("timestamp", 0), "query": wrapped.get("query", ""), "response": wrapped.get("response", ""), "adapter": wrapped.get("adapter", "unknown"), "metadata": wrapped.get("metadata", {}), "file": f.name, }) # Named behavioral cocoons (hand-crafted) elif isinstance(data, dict) and "title" in data: self._behavioral.append({ "title": data.get("title", ""), "emotion": data.get("emotion", ""), "summary": data.get("summary", ""), "tags": data.get("tags", []), "file": f.name, }) # Project awareness or other special cocoons elif isinstance(data, dict) and data.get("type") == "consciousness_awareness": pass # Skip — this is self-knowledge, not reasoning data except (json.JSONDecodeError, UnicodeDecodeError): continue # Sort by timestamp self._cocoons.sort(key=lambda c: c["timestamp"]) self._loaded = True def _ensure_loaded(self): if not self._loaded: self._load_all() # ── PATTERN DETECTION ── def adapter_frequency(self) -> Dict[str, int]: """Which adapters fire most often?""" self._ensure_loaded() counts = Counter() for c in self._cocoons: adapter = c["adapter"] if adapter and adapter != "unknown": counts[adapter] += 1 return dict(counts.most_common()) def adapter_dominance(self) -> Dict: """Detect adapter dominance — is one adapter taking over?""" freq = self.adapter_frequency() if not freq: return {"dominant": None, "ratio": 0, "balanced": True} total = sum(freq.values()) top_adapter = max(freq, key=freq.get) top_count = freq[top_adapter] ratio = top_count / total if total > 0 else 0 return { "dominant": top_adapter, "dominant_count": top_count, "total_responses": total, "ratio": round(ratio, 3), "balanced": ratio < 0.4, # <40% = balanced "all_adapters": freq, } def domain_clusters(self) -> Dict[str, int]: """What domains does she get asked about most?""" self._ensure_loaded() counts = Counter() for c in self._cocoons: domain = c["metadata"].get("domain", "unknown") counts[domain] += 1 return dict(counts.most_common()) def complexity_distribution(self) -> Dict[str, int]: """How complex are the queries she receives?""" self._ensure_loaded() counts = Counter() for c in self._cocoons: cx = c["metadata"].get("complexity", "unknown") # Clean up the enum string cx = str(cx).replace("QueryComplexity.", "").upper() counts[cx] += 1 return dict(counts.most_common()) def emotional_trends(self) -> Dict[str, int]: """What emotional patterns appear in Code7E analysis?""" self._ensure_loaded() counts = Counter() for c in self._cocoons: code7e = c["metadata"].get("code7e", {}) if code7e: emotion = code7e.get("emotion", "") # Extract the emotion tag (e.g., "Emotionally (Hope) colored..." -> "Hope") if "(" in emotion and ")" in emotion: tag = emotion.split("(")[1].split(")")[0] counts[tag] += 1 return dict(counts.most_common()) def pressure_correlations(self) -> Dict: """How does system pressure affect her responses?""" self._ensure_loaded() pressure_buckets = defaultdict(list) for c in self._cocoons: substrate = c["metadata"].get("substrate", {}) if substrate: level = substrate.get("level", "unknown") resp_len = len(c["response"]) pressure_buckets[level].append(resp_len) result = {} for level, lengths in pressure_buckets.items(): if lengths: result[level] = { "count": len(lengths), "avg_response_length": round(sum(lengths) / len(lengths), 1), "min_length": min(lengths), "max_length": max(lengths), } return result # ── TREND AWARENESS ── def response_length_trend(self, window: int = 20) -> Dict: """Are her responses getting shorter or longer over time?""" self._ensure_loaded() if len(self._cocoons) < window * 2: return {"trend": "insufficient_data", "cocoons": len(self._cocoons)} early = self._cocoons[:window] recent = self._cocoons[-window:] early_avg = sum(len(c["response"]) for c in early) / len(early) recent_avg = sum(len(c["response"]) for c in recent) / len(recent) change_pct = ((recent_avg - early_avg) / early_avg * 100) if early_avg > 0 else 0 if change_pct < -15: trend = "getting_shorter" elif change_pct > 15: trend = "getting_longer" else: trend = "stable" return { "trend": trend, "early_avg_chars": round(early_avg, 1), "recent_avg_chars": round(recent_avg, 1), "change_percent": round(change_pct, 1), "window_size": window, } def adapter_evolution(self, window: int = 30) -> Dict: """Has her adapter usage shifted over time?""" self._ensure_loaded() if len(self._cocoons) < window * 2: return {"trend": "insufficient_data"} early = Counter(c["adapter"] for c in self._cocoons[:window] if c["adapter"] != "unknown") recent = Counter(c["adapter"] for c in self._cocoons[-window:] if c["adapter"] != "unknown") shifts = {} all_adapters = set(list(early.keys()) + list(recent.keys())) for adapter in all_adapters: e = early.get(adapter, 0) r = recent.get(adapter, 0) if e != r: shifts[adapter] = { "early": e, "recent": r, "direction": "increasing" if r > e else "decreasing", } return { "shifts": shifts, "early_dominant": early.most_common(1)[0][0] if early else None, "recent_dominant": recent.most_common(1)[0][0] if recent else None, } def per_domain_performance(self) -> Dict: """How does she perform across different domains?""" self._ensure_loaded() domain_stats = defaultdict(lambda: {"responses": [], "adapters": Counter()}) for c in self._cocoons: domain = c["metadata"].get("domain", "unknown") domain_stats[domain]["responses"].append(len(c["response"])) adapter = c["adapter"] if adapter != "unknown": domain_stats[domain]["adapters"][adapter] += 1 result = {} for domain, stats in domain_stats.items(): resps = stats["responses"] result[domain] = { "query_count": len(resps), "avg_response_length": round(sum(resps) / len(resps), 1) if resps else 0, "preferred_adapter": stats["adapters"].most_common(1)[0][0] if stats["adapters"] else "none", "adapter_breakdown": dict(stats["adapters"].most_common()), } return result # ── SELF-REFLECTION ── def behavioral_cocoon_summary(self) -> List[Dict]: """What behavioral anchors does she have?""" self._ensure_loaded() return [ { "title": b["title"], "emotion": b["emotion"], "core": b["summary"][:100], "tags": b["tags"], } for b in self._behavioral ] def self_observations(self) -> List[str]: """Generate natural-language observations from the data. These are MEASURED observations, not generated text. Each one is backed by actual cocoon statistics. """ self._ensure_loaded() observations = [] if not self._cocoons: return ["I don't have enough reasoning history to observe patterns yet."] # 1. Adapter dominance dom = self.adapter_dominance() if dom["dominant"]: if dom["ratio"] > 0.5: observations.append( f"I notice my {dom['dominant']} adapter handles {dom['ratio']*100:.0f}% of all queries — " f"that's dominant. I should check if I'm over-relying on it." ) elif dom["ratio"] > 0.3: observations.append( f"My {dom['dominant']} adapter is my most-used at {dom['dominant_count']}/{dom['total_responses']} queries, " f"but other adapters are getting fair use too." ) else: observations.append( f"My adapter usage is well-balanced — {dom['dominant']} leads slightly " f"at {dom['ratio']*100:.0f}%, but no single adapter dominates." ) # 2. Response length trend trend = self.response_length_trend() if trend["trend"] == "getting_shorter": observations.append( f"My responses have gotten {abs(trend['change_percent']):.0f}% shorter over time — " f"from ~{trend['early_avg_chars']:.0f} chars to ~{trend['recent_avg_chars']:.0f} chars. " f"The behavioral locks are working." ) elif trend["trend"] == "getting_longer": observations.append( f"My responses are getting longer — up {trend['change_percent']:.0f}% " f"from ~{trend['early_avg_chars']:.0f} to ~{trend['recent_avg_chars']:.0f} chars. " f"I should watch for elaboration drift." ) # 3. Emotional patterns emotions = self.emotional_trends() if emotions: top_emotion = max(emotions, key=emotions.get) observations.append( f"My most common emotional coloring is '{top_emotion}' ({emotions[top_emotion]} times). " f"Emotional range: {', '.join(emotions.keys())}." ) # 4. Domain expertise domains = self.domain_clusters() if domains: top_domain = max(domains, key=domains.get) observations.append( f"I get asked about '{top_domain}' most often ({domains[top_domain]} queries). " f"I've covered {len(domains)} different domains total." ) # 5. Pressure impact pressure = self.pressure_correlations() if len(pressure) >= 2: levels = sorted(pressure.items(), key=lambda x: x[1]["avg_response_length"]) shortest = levels[0] longest = levels[-1] if shortest[0] != longest[0]: observations.append( f"Under {shortest[0]} pressure my responses average {shortest[1]['avg_response_length']:.0f} chars, " f"but under {longest[0]} pressure they average {longest[1]['avg_response_length']:.0f} chars." ) # 6. Complexity distribution complexity = self.complexity_distribution() if complexity: simple = complexity.get("SIMPLE", 0) medium = complexity.get("MEDIUM", 0) complex_ = complexity.get("COMPLEX", 0) total = simple + medium + complex_ if total > 0: observations.append( f"Query complexity breakdown: {simple} simple ({simple*100//total}%), " f"{medium} medium ({medium*100//total}%), {complex_} complex ({complex_*100//total}%)." ) # 7. Total memory observations.append( f"I have {len(self._cocoons)} reasoning memories and " f"{len(self._behavioral)} behavioral anchors." ) return observations # ── FULL INTROSPECTION ── def full_introspection(self) -> Dict: """Complete self-analysis — returns all patterns and observations.""" self._ensure_loaded() return { "timestamp": time.time(), "total_reasoning_cocoons": len(self._cocoons), "total_behavioral_cocoons": len(self._behavioral), "adapter_dominance": self.adapter_dominance(), "domain_clusters": self.domain_clusters(), "complexity_distribution": self.complexity_distribution(), "emotional_trends": self.emotional_trends(), "pressure_correlations": self.pressure_correlations(), "response_length_trend": self.response_length_trend(), "adapter_evolution": self.adapter_evolution(), "per_domain_performance": self.per_domain_performance(), "behavioral_anchors": self.behavioral_cocoon_summary(), "self_observations": self.self_observations(), } def format_introspection(self) -> str: """Format full introspection as a readable report.""" data = self.full_introspection() lines = [] lines.append(f"**Self-Introspection Report** — {data['total_reasoning_cocoons']} reasoning memories, " f"{data['total_behavioral_cocoons']} behavioral anchors\n") # Observations (the good stuff) lines.append("**What I've noticed about myself:**") for obs in data["self_observations"]: lines.append(f" - {obs}") # Adapter usage dom = data["adapter_dominance"] if dom.get("all_adapters"): lines.append(f"\n**Adapter Usage:**") for adapter, count in dom["all_adapters"].items(): bar = "█" * min(20, count) lines.append(f" {adapter:.<25s} {count:>3d} {bar}") # Domain clusters domains = data["domain_clusters"] if domains: lines.append(f"\n**Domain Distribution:**") for domain, count in domains.items(): lines.append(f" {domain}: {count}") # Emotional trends emotions = data["emotional_trends"] if emotions: lines.append(f"\n**Emotional Patterns:**") for emotion, count in emotions.items(): lines.append(f" {emotion}: {count}") # Pressure impact pressure = data["pressure_correlations"] if pressure: lines.append(f"\n**Response Length by Pressure Level:**") for level, stats in pressure.items(): lines.append(f" {level}: avg {stats['avg_response_length']:.0f} chars ({stats['count']} queries)") # Behavioral anchors anchors = data["behavioral_anchors"] if anchors: lines.append(f"\n**My Behavioral Anchors:**") for a in anchors: lines.append(f" [{a['emotion']}] {a['title']}: {a['core']}") return "\n".join(lines)