ataeff
/

g

+#!/usr/bin/env python3
+"""
+entropy_resonance.py — Entropy-Driven Adaptive Resonance for Gemma-3 270M-IT
+The model doesn't decide WHEN to think. The entropy of its own logits does.
+LoRA teaches it HOW to think. Entropy tells it WHEN.
+Usage:
+    # Interactive mode
+    python entropy_resonance.py --adapter-path ./gemma3-resonate/best
+    # Single prompt
+    python entropy_resonance.py --adapter-path ./gemma3-resonate/best \
+        --prompt "Why does emergence happen?"
+    # Base model without LoRA (entropy still works, resonance content will be weaker)
+    python entropy_resonance.py --no-lora --prompt "What is consciousness?"
+    # With custom thresholds
+    python entropy_resonance.py --adapter-path ./gemma3-resonate/best \
+        --h-high 0.38 --h-low 0.12
+    # Verbose mode with entropy curve visualization
+    python entropy_resonance.py --adapter-path ./gemma3-resonate/best \
+        --prompt "Is free will real?" --verbose --show-curve
+    # Calibrate thresholds first (recommended for new model/adapter)
+    python calibrate_entropy.py --adapter-path ./gemma3-resonate/best
+Author: Wulf (Opus + Oleg)
+Date: 2026-03-28
+"""
+from __future__ import annotations
+import os
+import sys
+import math
+import time
+import argparse
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+import torch
+import torch.nn.functional as F
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# ============================================================================
+# Constants
+# ============================================================================
+MODEL_ID = "unsloth/gemma-3-270m-it"
+# Gemma-3 chat template
+START_OF_TURN = "<start_of_turn>"
+END_OF_TURN = "<end_of_turn>"
+# Resonance markers — plain text, not special tokens
+RESONATE_OPEN = "/resonate/"
+RESONATE_CLOSE = "/resonated/"
+# Gemma-3 vocab size
+VOCAB_SIZE = 262_144
+H_MAX = math.log2(VOCAB_SIZE)  # 18.0 bits — theoretical maximum entropy
+# ============================================================================
+# Logging
+# ============================================================================
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%H:%M:%S",
+)
+log = logging.getLogger("entropy_resonance")
+# ============================================================================
+# Entropy Computation
+# ============================================================================
+def compute_entropy(logits: torch.Tensor, temperature: float = 1.0) -> float:
+    """Compute Shannon entropy from raw logits in bits.
+    CRITICAL: We compute entropy from RAW logits (temperature=1.0 internal),
+    not from temperature-scaled logits. This gives us the model's TRUE
+    uncertainty, independent of our sampling temperature choice.
+    Args:
+        logits: shape (vocab_size,) — raw logits from model's last layer
+        temperature: ignored for entropy computation (documented for clarity)
+    Returns:
+        H in bits (log base 2). Range: [0, log2(vocab_size)] = [0, 18.0]
+    """
+    # Softmax on raw logits (no temperature scaling for entropy measurement)
+    probs = F.softmax(logits.float(), dim=-1)
+    # Clamp to avoid log(0)
+    probs = probs.clamp(min=1e-10)
+    # Shannon entropy in bits
+    H = -(probs * probs.log2()).sum().item()
+    return H
+def normalized_entropy(H: float) -> float:
+    """Normalize entropy to [0, 1] range based on vocab size.
+    H_norm = H / H_max = H / log2(262144)
+    Returns:
+        0.0 = perfect certainty (one-hot distribution)
+        1.0 = uniform distribution (maximum uncertainty)
+    """
+    return H / H_MAX
+# ============================================================================
+# Entropy Curve Visualization (Terminal)
+# ============================================================================
+class EntropyCurve:
+    """Collects entropy values during generation and renders ASCII visualization."""
+    def __init__(self, width: int = 70, height: int = 20):
+        self.width = width
+        self.height = height
+        self.values: list[float] = []          # raw H in bits
+        self.normalized: list[float] = []       # H_norm [0, 1]
+        self.tokens: list[str] = []             # generated token strings
+        self.events: list[tuple[int, str]] = [] # (step, event_type)
+    def add(self, H: float, token_str: str):
+        self.values.append(H)
+        self.normalized.append(normalized_entropy(H))
+        self.tokens.append(token_str)
+    def mark_event(self, event_type: str):
+        """Mark an event at the current step (e.g., 'enter_resonance', 'exit_resonance')."""
+        self.events.append((len(self.values) - 1, event_type))
+    def render(self, h_high: float, h_low: float) -> str:
+        """Render ASCII entropy curve with threshold lines and events.
+        Args:
+            h_high: normalized high threshold (enter resonance)
+            h_low: normalized low threshold (exit resonance)
+        Returns:
+            Multi-line string with the visualization
+        """
+        if not self.normalized:
+            return "(no data)"
+        n = len(self.normalized)
+        # If more data points than width, subsample
+        if n > self.width:
+            step = n / self.width
+            indices = [int(i * step) for i in range(self.width)]
+            data = [self.normalized[i] for i in indices]
+        else:
+            data = list(self.normalized)
+            indices = list(range(n))
+        # Scale to height
+        max_val = max(max(data), h_high + 0.05, 0.5)
+        min_val = 0.0
+        lines = []
+        lines.append(f"  Entropy Curve ({n} tokens, H_max={H_MAX:.1f} bits)")
+        lines.append(f"  H_high={h_high:.3f} (enter resonance)  H_low={h_low:.3f} (exit resonance)")
+        lines.append("")
+        # Build grid
+        grid = [[' ' for _ in range(len(data))] for _ in range(self.height)]
+        # Plot data points
+        for col, val in enumerate(data):
+            row = int((1.0 - (val - min_val) / (max_val - min_val)) * (self.height - 1))
+            row = max(0, min(self.height - 1, row))
+            grid[row][col] = '#'
+        # Plot threshold lines
+        h_high_row = int((1.0 - (h_high - min_val) / (max_val - min_val)) * (self.height - 1))
+        h_low_row = int((1.0 - (h_low - min_val) / (max_val - min_val)) * (self.height - 1))
+        h_high_row = max(0, min(self.height - 1, h_high_row))
+        h_low_row = max(0, min(self.height - 1, h_low_row))
+        for col in range(len(data)):
+            if grid[h_high_row][col] == ' ':
+                grid[h_high_row][col] = '-'
+            if grid[h_low_row][col] == ' ':
+                grid[h_low_row][col] = '.'
+        # Mark events
+        event_map = {}
+        for step, etype in self.events:
+            if n > self.width:
+                # Find closest column
+                col = min(range(len(indices)), key=lambda c: abs(indices[c] - step))
+            else:
+                col = step
+            if 0 <= col < len(data):
+                event_map[col] = etype
+        # Render
+        for row_idx, row in enumerate(grid):
+            # Y-axis label
+            val = max_val - row_idx * (max_val - min_val) / (self.height - 1)
+            label = f"{val:.2f}"
+            row_str = ''.join(row)
+            # Annotate threshold rows
+            suffix = ""
+            if row_idx == h_high_row:
+                suffix = " <-- H_high (enter)"
+            elif row_idx == h_low_row:
+                suffix = " <-- H_low (exit)"
+            lines.append(f"  {label:>5} |{row_str}|{suffix}")
+        # X-axis
+        lines.append(f"        {''.join(['+' if col in event_map else '-' for col in range(len(data))])}")
+        # Event legend
+        event_line = "        "
+        for col in range(len(data)):
+            if col in event_map:
+                if event_map[col] == 'enter_resonance':
+                    event_line += 'E'
+                elif event_map[col] == 'exit_resonance':
+                    event_line += 'X'
+                else:
+                    event_line += '?'
+            else:
+                event_line += ' '
+        lines.append(event_line)
+        lines.append(f"        E=enter resonance, X=exit resonance")
+        # Stats
+        avg_h = sum(self.normalized) / len(self.normalized)
+        max_h = max(self.normalized)
+        min_h = min(self.normalized)
+        std_h = (sum((v - avg_h)**2 for v in self.normalized) / len(self.normalized)) ** 0.5
+        lines.append("")
+        lines.append(f"  Stats: mean={avg_h:.4f}  max={max_h:.4f}  min={min_h:.4f}  std={std_h:.4f}")
+        lines.append(f"  Raw H: mean={sum(self.values)/len(self.values):.2f} bits  max={max(self.values):.2f} bits")
+        # Resonance segments
+        in_res = False
+        segments = []
+        seg_start = 0
+        for step, etype in self.events:
+            if etype == 'enter_resonance' and not in_res:
+                in_res = True
+                seg_start = step
+            elif etype == 'exit_resonance' and in_res:
+                in_res = False
+                segments.append((seg_start, step))
+        if segments:
+            lines.append(f"  Resonance segments: {len(segments)}")
+            for i, (s, e) in enumerate(segments):
+                seg_h = self.normalized[s:e+1]
+                seg_avg = sum(seg_h) / len(seg_h) if seg_h else 0
+                lines.append(f"    [{i+1}] tokens {s}-{e} ({e-s} tokens, avg H_norm={seg_avg:.4f})")
+        return '\n'.join(lines)
+# ============================================================================
+# Resonance State Machine
+# ============================================================================
+@dataclass
+class ResonanceState:
+    """Tracks the resonance state during generation."""
+    in_resonance: bool = False
+    # Hysteresis counters — prevent rapid enter/exit flickering
+    consecutive_high: int = 0    # consecutive tokens above H_high
+    consecutive_low: int = 0     # consecutive tokens below H_low
+    # Thresholds (normalized, 0-1)
+    h_high: float = 0.35        # enter resonance above this
+    h_low: float = 0.12         # exit resonance below this
+    # Hysteresis requirements
+    enter_count: int = 3         # N consecutive high-entropy tokens to enter
+    exit_count: int = 5          # M consecutive low-entropy tokens to exit
+    # Safeguards
+    max_resonance_tokens: int = 500  # force exit after this many resonance tokens
+    resonance_token_count: int = 0   # current count
+    # Entropy modulation (Delta Voice integration)
+    beta: float = 0.3           # entropy coupling constant for θ = ε + γ + αδ + βH
+    # Sampling parameters (modulated by entropy)
+    base_temperature: float = 0.7
+    base_top_p: float = 0.9
+    base_top_k: int = 40
+    # Diagnostic
+    total_tokens: int = 0
+    resonance_entries: int = 0
+    forced_exits: int = 0
+    def update(self, h_norm: float) -> Optional[str]:
+        """Process a new entropy value and return event or None.
+        Returns:
+            'enter_resonance' — inject /resonate/ marker
+            'exit_resonance' — inject /resonated/ marker
+            'force_exit' — max tokens exceeded, force exit
+            None — no state change
+        """
+        self.total_tokens += 1
+        if self.in_resonance:
+            self.resonance_token_count += 1
+            # Check for forced exit
+            if self.resonance_token_count >= self.max_resonance_tokens:
+                self.in_resonance = False
+                self.resonance_token_count = 0
+                self.consecutive_high = 0
+                self.consecutive_low = 0
+                self.forced_exits += 1
+                return 'force_exit'
+            # Check for natural exit
+            if h_norm < self.h_low:
+                self.consecutive_low += 1
+                self.consecutive_high = 0
+            else:
+                self.consecutive_low = 0
+            if self.consecutive_low >= self.exit_count:
+                self.in_resonance = False
+                self.resonance_token_count = 0
+                self.consecutive_low = 0
+                return 'exit_resonance'
+        else:
+            # Check for entry
+            if h_norm > self.h_high:
+                self.consecutive_high += 1
+                self.consecutive_low = 0
+            else:
+                self.consecutive_high = 0
+            if self.consecutive_high >= self.enter_count:
+                self.in_resonance = True
+                self.resonance_token_count = 0
+                self.consecutive_high = 0
+                self.resonance_entries += 1
+                return 'enter_resonance'
+        return None
+    def get_sampling_params(self, h_norm: float) -> dict:
+        """Get entropy-modulated sampling parameters.
+        Inside /resonate/: more exploratory (higher temp, wider sampling)
+        Outside /resonate/: more crystallized (base params)
+        The modulation is ANALOG — scales with entropy level.
+        This is the βH term in θ = ε + γ + αδ + βH
+        """
+        if self.in_resonance:
+            # Entropy modulates exploration depth
+            temp = self.base_temperature * (1.0 + self.beta * h_norm)
+            top_p = min(0.98, self.base_top_p + self.beta * h_norm * 0.15)
+            top_k = int(self.base_top_k * (1.0 + self.beta * h_norm))
+            return {
+                'temperature': temp,
+                'top_p': top_p,
+                'top_k': top_k,
+            }
+        else:
+            return {
+                'temperature': self.base_temperature,
+                'top_p': self.base_top_p,
+                'top_k': self.base_top_k,
+            }
+    def summary(self) -> str:
+        """Return diagnostic summary."""
+        return (
+            f"Resonance: {self.resonance_entries} entries, "
+            f"{self.forced_exits} forced exits, "
+            f"{self.total_tokens} total tokens"
+        )
+# ============================================================================
+# The Main Beast: Entropy-Driven Generation
+# ============================================================================
+def entropy_generate(
+    model,
+    tokenizer,
+    prompt: str,
+    state: ResonanceState,
+    max_new_tokens: int = 768,
+    verbose: bool = False,
+    show_curve: bool = False,
+    repetition_penalty: float = 1.3,
+) -> tuple[str, EntropyCurve]:
+    """Generate text with entropy-driven adaptive resonance.
+    This is NOT model.generate(). We run the generation loop manually,
+    token by token, computing entropy at each step and making resonance
+    decisions in real time.
+    Args:
+        model: Gemma-3 270M-IT (with or without LoRA adapter)
+        tokenizer: Gemma tokenizer
+        prompt: user's question/input
+        state: ResonanceState with thresholds and parameters
+        max_new_tokens: maximum tokens to generate
+        verbose: print entropy at each step
+        show_curve: collect data for visualization
+        repetition_penalty: penalize repeated tokens
+    Returns:
+        (generated_text, entropy_curve)
+    """
+    device = next(model.parameters()).device
+    model.eval()
+    # Format prompt in Gemma chat template
+    input_text = f"{START_OF_TURN}user\n{prompt}{END_OF_TURN}\n{START_OF_TURN}model\n"
+    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
+    # Initialize
+    curve = EntropyCurve()
+    generated_ids = []
+    generated_text = ""
+    # Track generated token IDs for repetition penalty
+    all_ids = input_ids[0].tolist()
+    # EOS token
+    eos_id = tokenizer.eos_token_id
+    # Gemma end_of_turn token
+    eot_text = END_OF_TURN
+    eot_ids = tokenizer.encode(eot_text, add_special_tokens=False)
+    # Reset state for this generation
+    state.in_resonance = False
+    state.consecutive_high = 0
+    state.consecutive_low = 0
+    state.resonance_token_count = 0
+    state.total_tokens = 0
+    state.resonance_entries = 0
+    state.forced_exits = 0
+    # Prefill: get initial logits from full context
+    with torch.no_grad():
+        outputs = model(input_ids)
+        next_logits = outputs.logits[0, -1, :]  # (vocab_size,)
+    for step in range(max_new_tokens):
+        # ── 1. Compute entropy from RAW logits ──
+        H = compute_entropy(next_logits)
+        h_norm = normalized_entropy(H)
+        # ── 2. Check resonance state ──
+        event = state.update(h_norm)
+        if event == 'enter_resonance':
+            # Inject /resonate/ marker into the generation
+            marker_text = f"\n{RESONATE_OPEN}\n"
+            marker_ids = tokenizer.encode(marker_text, add_special_tokens=False)
+            generated_ids.extend(marker_ids)
+            all_ids.extend(marker_ids)
+            generated_text += marker_text
+            if verbose:
+                log.info(f"  [ENTER RESONANCE] H_norm={h_norm:.4f} at token {step}")
+            if show_curve:
+                curve.mark_event('enter_resonance')
+            # Re-run model with the injected marker to update context
+            full_ids = torch.tensor([all_ids], device=device)
+            with torch.no_grad():
+                outputs = model(full_ids)
+                next_logits = outputs.logits[0, -1, :]
+            continue  # Re-evaluate entropy after marker injection
+        elif event in ('exit_resonance', 'force_exit'):
+            # Inject /resonated/ marker
+            marker_text = f"\n{RESONATE_CLOSE}\n"
+            marker_ids = tokenizer.encode(marker_text, add_special_tokens=False)
+            generated_ids.extend(marker_ids)
+            all_ids.extend(marker_ids)
+            generated_text += marker_text
+            if verbose:
+                if event == 'force_exit':
+                    log.warning(f"  [FORCED EXIT] Max resonance tokens exceeded at step {step}")
+                else:
+                    log.info(f"  [EXIT RESONANCE] H_norm={h_norm:.4f} at token {step}")
+            if show_curve:
+                curve.mark_event('exit_resonance')
+            # Re-run model with marker
+            full_ids = torch.tensor([all_ids], device=device)
+            with torch.no_grad():
+                outputs = model(full_ids)
+                next_logits = outputs.logits[0, -1, :]
+            continue
+        # ── 3. Get entropy-modulated sampling parameters ──
+        params = state.get_sampling_params(h_norm)
+        # ── 4. Apply repetition penalty ──
+        logits = next_logits.clone()
+        if repetition_penalty != 1.0 and generated_ids:
+            for prev_id in set(generated_ids[-50:]):  # look back 50 tokens
+                if logits[prev_id] > 0:
+                    logits[prev_id] /= repetition_penalty
+                else:
+                    logits[prev_id] *= repetition_penalty
+        # ── 5. Apply temperature ──
+        temp = params['temperature']
+        if temp > 0:
+            logits = logits / temp
+        else:
+            # temperature=0 → greedy
+            pass
+        # ── 6. Apply top-k filtering ──
+        top_k = params['top_k']
+        if top_k > 0:
+            indices_to_remove = logits < torch.topk(logits, top_k)[0][-1]
+            logits[indices_to_remove] = float('-inf')
+        # ── 7. Apply top-p (nucleus) filtering ──
+        top_p = params['top_p']
+        if top_p < 1.0:
+            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+            cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+            # Remove tokens with cumulative prob above top_p
+            sorted_indices_to_remove = cumulative_probs > top_p
+            # Keep the first token above threshold
+            sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone()
+            sorted_indices_to_remove[0] = False
+            indices_to_remove = sorted_indices[sorted_indices_to_remove]
+            logits[indices_to_remove] = float('-inf')
+        # ── 8. Sample ──
+        probs = F.softmax(logits, dim=-1)
+        if temp > 0:
+            next_token = torch.multinomial(probs, num_samples=1).item()
+        else:
+            next_token = torch.argmax(logits).item()
+        # ── 9. Check for EOS ──
+        if next_token == eos_id:
+            break
+        # Check for end_of_turn sequence
+        generated_ids.append(next_token)
+        all_ids.append(next_token)
+        token_str = tokenizer.decode([next_token])
+        generated_text += token_str
+        # Check if we just generated end_of_turn
+        if generated_text.rstrip().endswith(eot_text):
+            generated_text = generated_text.rstrip()[:-len(eot_text)].rstrip()
+            break
+        # ── 10. Record for visualization ──
+        if show_curve:
+            curve.add(H, token_str)
+        if verbose and step % 10 == 0:
+            mode = "RESONANCE" if state.in_resonance else "crystal"
+            log.info(
+                f"  step={step:3d} H={H:.2f}bits H_norm={h_norm:.4f} "
+                f"mode={mode} temp={params['temperature']:.3f} "
+                f"token={repr(token_str)}"
+            )
+        # ── 11. Forward pass for next token ──
+        next_input = torch.tensor([[next_token]], device=device)
+        full_ids = torch.tensor([all_ids], device=device)
+        with torch.no_grad():
+            # Use full context for each step (no KV cache for simplicity;
+            # for production, implement KV cache management)
+            outputs = model(full_ids)
+            next_logits = outputs.logits[0, -1, :]
+    return generated_text, curve
+# ============================================================================
+# Pretty Printing
+# ============================================================================
+def print_result(prompt: str, generated: str, curve: EntropyCurve,
+                 state: ResonanceState, show_curve: bool = False,
+                 h_high: float = 0.35, h_low: float = 0.12):
+    """Print the generation result with formatting."""
+    print(f"\n{'='*70}")
+    print(f"  PROMPT: {prompt}")
+    print(f"{'='*70}")
+    # Parse /resonate/ sections
+    if RESONATE_OPEN in generated and RESONATE_CLOSE in generated:
+        parts = generated.split(RESONATE_OPEN, 1)
+        pre_resonate = parts[0].strip()
+        rest = parts[1]
+        if RESONATE_CLOSE in rest:
+            reasoning_and_rest = rest.split(RESONATE_CLOSE, 1)
+            reasoning = reasoning_and_rest[0].strip()
+            answer = reasoning_and_rest[1].strip()
+        else:
+            reasoning = rest.strip()
+            answer = "[resonance did not crystallize — forced exit or max tokens]"
+        if pre_resonate:
+            print(f"\n  {pre_resonate}")
+        print(f"\n  --- {RESONATE_OPEN} ---")
+        # Print reasoning with indent
+        for line in reasoning.split('\n'):
+            print(f"  | {line}")
+        print(f"\n  --- {RESONATE_CLOSE} ---")
+        print(f"\n  {answer}")
+    else:
+        # No resonance triggered — direct answer
+        print(f"\n  [direct answer — entropy stayed low, no resonance needed]")
+        print(f"\n  {generated}")
+    print(f"\n{'─'*70}")
+    print(f"  {state.summary()}")
+    if show_curve and curve.values:
+        print(f"\n{curve.render(h_high, h_low)}")
+    print(f"{'='*70}\n")
+# ============================================================================
+# Model Loading
+# ============================================================================
+def load_model(model_id: str = MODEL_ID, adapter_path: str = None,
+               device: str = None) -> tuple:
+    """Load Gemma-3 270M-IT with optional LoRA adapter.
+    Args:
+        model_id: base model identifier
+        adapter_path: path to LoRA adapter (None for base model)
+        device: 'cuda', 'cpu', or 'mps' (auto-detected if None)
+    Returns:
+        (model, tokenizer, device_str)
+    """
+    if device is None:
+        if torch.cuda.is_available():
+            device = 'cuda'
+        elif torch.backends.mps.is_available():
+            device = 'mps'
+        else:
+            device = 'cpu'
+    log.info(f"Loading tokenizer from {model_id}...")
+    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    log.info(f"Loading model from {model_id} onto {device}...")
+    dtype = torch.bfloat16 if device == 'cuda' else torch.float32
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=dtype,
+        device_map=device if device == 'cuda' else None,
+        attn_implementation="sdpa" if device == 'cuda' else "eager",
+        trust_remote_code=True,
+    )
+    if device != 'cuda':
+        model = model.to(device)
+    total_params = sum(p.numel() for p in model.parameters())
+    log.info(f"Base model: {total_params/1e6:.1f}M params, dtype={dtype}")
+    # Load LoRA adapter if provided
+    if adapter_path:
+        if not os.path.isdir(adapter_path):
+            log.error(f"Adapter path does not exist: {adapter_path}")
+            log.error("Run training first: python train_gemma_resonate.py")
+            sys.exit(1)
+        from peft import PeftModel
+        log.info(f"Loading LoRA adapter from {adapter_path}...")
+        model = PeftModel.from_pretrained(model, adapter_path)
+        trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        log.info(f"Adapter loaded: {trainable/1e6:.1f}M trainable params")
+    model.eval()
+    return model, tokenizer, device
+# ============================================================================
+# Interactive Mode
+# ============================================================================
+def interactive_mode(model, tokenizer, state: ResonanceState,
+                     verbose: bool = False, show_curve: bool = False):
+    """Interactive REPL for entropy-driven resonance."""
+    print(f"\n{'='*70}")
+    print(f"  ENTROPY-DRIVEN ADAPTIVE RESONANCE")
+    print(f"  Gemma-3 270M-IT + Entropy Monitoring")
+    print(f"{'─'*70}")
+    print(f"  H_high = {state.h_high:.3f} (enter resonance)")
+    print(f"  H_low  = {state.h_low:.3f} (exit resonance)")
+    print(f"  Beta   = {state.beta:.2f} (entropy coupling)")
+    print(f"  Max resonance tokens = {state.max_resonance_tokens}")
+    print(f"{'─'*70}")
+    print(f"  Commands: /quit /verbose /curve /thresholds H_HIGH H_LOW")
+    print(f"{'='*70}\n")
+    while True:
+        try:
+            prompt = input(">>> ").strip()
+        except (EOFError, KeyboardInterrupt):
+            print("\nExiting.")
+            break
+        if not prompt:
+            continue
+        if prompt == '/quit':
+            break
+        elif prompt == '/verbose':
+            verbose = not verbose
+            print(f"  Verbose: {'ON' if verbose else 'OFF'}")
+            continue
+        elif prompt == '/curve':
+            show_curve = not show_curve
+            print(f"  Curve: {'ON' if show_curve else 'OFF'}")
+            continue
+        elif prompt.startswith('/thresholds'):
+            parts = prompt.split()
+            if len(parts) == 3:
+                try:
+                    state.h_high = float(parts[1])
+                    state.h_low = float(parts[2])
+                    print(f"  Thresholds updated: H_high={state.h_high:.3f}, H_low={state.h_low:.3f}")
+                except ValueError:
+                    print(f"  Usage: /thresholds 0.35 0.12")
+            else:
+                print(f"  Current: H_high={state.h_high:.3f}, H_low={state.h_low:.3f}")
+            continue
+        # Generate with entropy monitoring
+        t0 = time.time()
+        generated, curve = entropy_generate(
+            model, tokenizer, prompt, state,
+            verbose=verbose,
+            show_curve=show_curve,
+        )
+        elapsed = time.time() - t0
+        print_result(prompt, generated, curve, state,
+                     show_curve=show_curve,
+                     h_high=state.h_high, h_low=state.h_low)
+        tokens_generated = len(curve.values) if curve.values else 0
+        tps = tokens_generated / elapsed if elapsed > 0 else 0
+        print(f"  [{elapsed:.1f}s, ~{tokens_generated} tokens, {tps:.1f} tok/s]\n")
+# ============================================================================
+# Main
+# ============================================================================
+def main():
+    parser = argparse.ArgumentParser(
+        description="Entropy-Driven Adaptive Resonance — inference for Gemma-3 270M-IT"
+    )
+    # Model
+    parser.add_argument("--model", default=MODEL_ID, help="Base model ID")
+    parser.add_argument("--adapter-path", default=None, help="LoRA adapter path")
+    parser.add_argument("--no-lora", action="store_true", help="Skip LoRA loading")
+    parser.add_argument("--device", default=None, help="Device: cuda/cpu/mps (auto)")
+    # Generation
+    parser.add_argument("--prompt", default=None, help="Single prompt (non-interactive)")
+    parser.add_argument("--max-tokens", type=int, default=768, help="Max tokens to generate")
+    # Entropy thresholds
+    parser.add_argument("--h-high", type=float, default=0.35,
+                        help="Normalized entropy threshold to enter resonance (0-1)")
+    parser.add_argument("--h-low", type=float, default=0.12,
+                        help="Normalized entropy threshold to exit resonance (0-1)")
+    parser.add_argument("--beta", type=float, default=0.3,
+                        help="Entropy coupling constant (Delta Voice integration)")
+    # Hysteresis
+    parser.add_argument("--enter-count", type=int, default=3,
+                        help="Consecutive high-entropy tokens to enter resonance")
+    parser.add_argument("--exit-count", type=int, default=5,
+                        help="Consecutive low-entropy tokens to exit resonance")
+    parser.add_argument("--max-resonance", type=int, default=500,
+                        help="Max tokens in a single resonance section")
+    # Sampling
+    parser.add_argument("--temperature", type=float, default=0.7, help="Base temperature")
+    parser.add_argument("--top-p", type=float, default=0.9, help="Base top-p")
+    parser.add_argument("--top-k", type=int, default=40, help="Base top-k")
+    parser.add_argument("--repetition-penalty", type=float, default=1.3,
+                        help="Repetition penalty")
+    # Display
+    parser.add_argument("--verbose", action="store_true", help="Show entropy per step")
+    parser.add_argument("--show-curve", action="store_true",
+                        help="Show ASCII entropy curve after generation")
+    args = parser.parse_args()
+    # Load model
+    adapter = None if args.no_lora else args.adapter_path
+    model, tokenizer, device = load_model(args.model, adapter, args.device)
+    # Build resonance state
+    state = ResonanceState(
+        h_high=args.h_high,
+        h_low=args.h_low,
+        enter_count=args.enter_count,
+        exit_count=args.exit_count,
+        max_resonance_tokens=args.max_resonance,
+        beta=args.beta,
+        base_temperature=args.temperature,
+        base_top_p=args.top_p,
+        base_top_k=args.top_k,
+    )
+    if args.prompt:
+        # Single prompt mode
+        generated, curve = entropy_generate(
+            model, tokenizer, args.prompt, state,
+            max_new_tokens=args.max_tokens,
+            verbose=args.verbose,
+            show_curve=args.show_curve,
+            repetition_penalty=args.repetition_penalty,
+        )
+        print_result(args.prompt, generated, curve, state,
+                     show_curve=args.show_curve,
+                     h_high=state.h_high, h_low=state.h_low)
+    else:
+        # Interactive mode
+        interactive_mode(model, tokenizer, state,
+                         verbose=args.verbose,
+                         show_curve=args.show_curve)
+if __name__ == "__main__":
+    main()