| |
| """ |
| entropy_resonance.py — Entropy-Driven Adaptive Resonance for Gemma-3 270M-IT |
| |
| The model doesn't decide WHEN to think. The entropy of its own logits does. |
| LoRA teaches it HOW to think. Entropy tells it WHEN. |
| |
| Usage: |
| # Interactive mode |
| python entropy_resonance.py --adapter-path ./gemma3-resonate/best |
| |
| # Single prompt |
| python entropy_resonance.py --adapter-path ./gemma3-resonate/best \ |
| --prompt "Why does emergence happen?" |
| |
| # Base model without LoRA (entropy still works, resonance content will be weaker) |
| python entropy_resonance.py --no-lora --prompt "What is consciousness?" |
| |
| # With custom thresholds |
| python entropy_resonance.py --adapter-path ./gemma3-resonate/best \ |
| --h-high 0.38 --h-low 0.12 |
| |
| # Verbose mode with entropy curve visualization |
| python entropy_resonance.py --adapter-path ./gemma3-resonate/best \ |
| --prompt "Is free will real?" --verbose --show-curve |
| |
| # Calibrate thresholds first (recommended for new model/adapter) |
| python calibrate_entropy.py --adapter-path ./gemma3-resonate/best |
| |
| Author: Wulf (Opus + Oleg) |
| Date: 2026-03-28 |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| import sys |
| import math |
| import time |
| import argparse |
| import logging |
| from dataclasses import dataclass, field |
| from typing import Optional |
|
|
| import torch |
| import torch.nn.functional as F |
|
|
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| |
| |
| |
|
|
| MODEL_ID = "unsloth/gemma-3-270m-it" |
|
|
| |
| START_OF_TURN = "<start_of_turn>" |
| END_OF_TURN = "<end_of_turn>" |
|
|
| |
| RESONATE_OPEN = "/resonate/" |
| RESONATE_CLOSE = "/resonated/" |
|
|
| |
| VOCAB_SIZE = 262_144 |
| H_MAX = math.log2(VOCAB_SIZE) |
|
|
| |
| |
| |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s [%(levelname)s] %(message)s", |
| datefmt="%H:%M:%S", |
| ) |
| log = logging.getLogger("entropy_resonance") |
|
|
|
|
| |
| |
| |
|
|
| def compute_entropy(logits: torch.Tensor, temperature: float = 1.0) -> float: |
| """Compute Shannon entropy from raw logits in bits. |
| |
| CRITICAL: We compute entropy from RAW logits (temperature=1.0 internal), |
| not from temperature-scaled logits. This gives us the model's TRUE |
| uncertainty, independent of our sampling temperature choice. |
| |
| Args: |
| logits: shape (vocab_size,) — raw logits from model's last layer |
| temperature: ignored for entropy computation (documented for clarity) |
| |
| Returns: |
| H in bits (log base 2). Range: [0, log2(vocab_size)] = [0, 18.0] |
| """ |
| |
| probs = F.softmax(logits.float(), dim=-1) |
|
|
| |
| probs = probs.clamp(min=1e-10) |
|
|
| |
| H = -(probs * probs.log2()).sum().item() |
|
|
| return H |
|
|
|
|
| def normalized_entropy(H: float) -> float: |
| """Normalize entropy to [0, 1] range based on vocab size. |
| |
| H_norm = H / H_max = H / log2(262144) |
| |
| Returns: |
| 0.0 = perfect certainty (one-hot distribution) |
| 1.0 = uniform distribution (maximum uncertainty) |
| """ |
| return H / H_MAX |
|
|
|
|
| |
| |
| |
|
|
| class EntropyCurve: |
| """Collects entropy values during generation and renders ASCII visualization.""" |
|
|
| def __init__(self, width: int = 70, height: int = 20): |
| self.width = width |
| self.height = height |
| self.values: list[float] = [] |
| self.normalized: list[float] = [] |
| self.tokens: list[str] = [] |
| self.events: list[tuple[int, str]] = [] |
|
|
| def add(self, H: float, token_str: str): |
| self.values.append(H) |
| self.normalized.append(normalized_entropy(H)) |
| self.tokens.append(token_str) |
|
|
| def mark_event(self, event_type: str): |
| """Mark an event at the current step (e.g., 'enter_resonance', 'exit_resonance').""" |
| self.events.append((len(self.values) - 1, event_type)) |
|
|
| def render(self, h_high: float, h_low: float) -> str: |
| """Render ASCII entropy curve with threshold lines and events. |
| |
| Args: |
| h_high: normalized high threshold (enter resonance) |
| h_low: normalized low threshold (exit resonance) |
| |
| Returns: |
| Multi-line string with the visualization |
| """ |
| if not self.normalized: |
| return "(no data)" |
|
|
| n = len(self.normalized) |
|
|
| |
| if n > self.width: |
| step = n / self.width |
| indices = [int(i * step) for i in range(self.width)] |
| data = [self.normalized[i] for i in indices] |
| else: |
| data = list(self.normalized) |
| indices = list(range(n)) |
|
|
| |
| max_val = max(max(data), h_high + 0.05, 0.5) |
| min_val = 0.0 |
|
|
| lines = [] |
| lines.append(f" Entropy Curve ({n} tokens, H_max={H_MAX:.1f} bits)") |
| lines.append(f" H_high={h_high:.3f} (enter resonance) H_low={h_low:.3f} (exit resonance)") |
| lines.append("") |
|
|
| |
| grid = [[' ' for _ in range(len(data))] for _ in range(self.height)] |
|
|
| |
| for col, val in enumerate(data): |
| row = int((1.0 - (val - min_val) / (max_val - min_val)) * (self.height - 1)) |
| row = max(0, min(self.height - 1, row)) |
| grid[row][col] = '#' |
|
|
| |
| h_high_row = int((1.0 - (h_high - min_val) / (max_val - min_val)) * (self.height - 1)) |
| h_low_row = int((1.0 - (h_low - min_val) / (max_val - min_val)) * (self.height - 1)) |
| h_high_row = max(0, min(self.height - 1, h_high_row)) |
| h_low_row = max(0, min(self.height - 1, h_low_row)) |
|
|
| for col in range(len(data)): |
| if grid[h_high_row][col] == ' ': |
| grid[h_high_row][col] = '-' |
| if grid[h_low_row][col] == ' ': |
| grid[h_low_row][col] = '.' |
|
|
| |
| event_map = {} |
| for step, etype in self.events: |
| if n > self.width: |
| |
| col = min(range(len(indices)), key=lambda c: abs(indices[c] - step)) |
| else: |
| col = step |
| if 0 <= col < len(data): |
| event_map[col] = etype |
|
|
| |
| for row_idx, row in enumerate(grid): |
| |
| val = max_val - row_idx * (max_val - min_val) / (self.height - 1) |
| label = f"{val:.2f}" |
|
|
| row_str = ''.join(row) |
|
|
| |
| suffix = "" |
| if row_idx == h_high_row: |
| suffix = " <-- H_high (enter)" |
| elif row_idx == h_low_row: |
| suffix = " <-- H_low (exit)" |
|
|
| lines.append(f" {label:>5} |{row_str}|{suffix}") |
|
|
| |
| lines.append(f" {''.join(['+' if col in event_map else '-' for col in range(len(data))])}") |
|
|
| |
| event_line = " " |
| for col in range(len(data)): |
| if col in event_map: |
| if event_map[col] == 'enter_resonance': |
| event_line += 'E' |
| elif event_map[col] == 'exit_resonance': |
| event_line += 'X' |
| else: |
| event_line += '?' |
| else: |
| event_line += ' ' |
| lines.append(event_line) |
| lines.append(f" E=enter resonance, X=exit resonance") |
|
|
| |
| avg_h = sum(self.normalized) / len(self.normalized) |
| max_h = max(self.normalized) |
| min_h = min(self.normalized) |
| std_h = (sum((v - avg_h)**2 for v in self.normalized) / len(self.normalized)) ** 0.5 |
|
|
| lines.append("") |
| lines.append(f" Stats: mean={avg_h:.4f} max={max_h:.4f} min={min_h:.4f} std={std_h:.4f}") |
| lines.append(f" Raw H: mean={sum(self.values)/len(self.values):.2f} bits max={max(self.values):.2f} bits") |
|
|
| |
| in_res = False |
| segments = [] |
| seg_start = 0 |
| for step, etype in self.events: |
| if etype == 'enter_resonance' and not in_res: |
| in_res = True |
| seg_start = step |
| elif etype == 'exit_resonance' and in_res: |
| in_res = False |
| segments.append((seg_start, step)) |
|
|
| if segments: |
| lines.append(f" Resonance segments: {len(segments)}") |
| for i, (s, e) in enumerate(segments): |
| seg_h = self.normalized[s:e+1] |
| seg_avg = sum(seg_h) / len(seg_h) if seg_h else 0 |
| lines.append(f" [{i+1}] tokens {s}-{e} ({e-s} tokens, avg H_norm={seg_avg:.4f})") |
|
|
| return '\n'.join(lines) |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class ResonanceState: |
| """Tracks the resonance state during generation.""" |
| in_resonance: bool = False |
|
|
| |
| consecutive_high: int = 0 |
| consecutive_low: int = 0 |
|
|
| |
| h_high: float = 0.35 |
| h_low: float = 0.12 |
|
|
| |
| enter_count: int = 3 |
| exit_count: int = 5 |
|
|
| |
| max_resonance_tokens: int = 500 |
| resonance_token_count: int = 0 |
|
|
| |
| beta: float = 0.3 |
|
|
| |
| base_temperature: float = 0.7 |
| base_top_p: float = 0.9 |
| base_top_k: int = 40 |
|
|
| |
| total_tokens: int = 0 |
| resonance_entries: int = 0 |
| forced_exits: int = 0 |
|
|
| def update(self, h_norm: float) -> Optional[str]: |
| """Process a new entropy value and return event or None. |
| |
| Returns: |
| 'enter_resonance' — inject /resonate/ marker |
| 'exit_resonance' — inject /resonated/ marker |
| 'force_exit' — max tokens exceeded, force exit |
| None — no state change |
| """ |
| self.total_tokens += 1 |
|
|
| if self.in_resonance: |
| self.resonance_token_count += 1 |
|
|
| |
| if self.resonance_token_count >= self.max_resonance_tokens: |
| self.in_resonance = False |
| self.resonance_token_count = 0 |
| self.consecutive_high = 0 |
| self.consecutive_low = 0 |
| self.forced_exits += 1 |
| return 'force_exit' |
|
|
| |
| if h_norm < self.h_low: |
| self.consecutive_low += 1 |
| self.consecutive_high = 0 |
| else: |
| self.consecutive_low = 0 |
|
|
| if self.consecutive_low >= self.exit_count: |
| self.in_resonance = False |
| self.resonance_token_count = 0 |
| self.consecutive_low = 0 |
| return 'exit_resonance' |
|
|
| else: |
| |
| if h_norm > self.h_high: |
| self.consecutive_high += 1 |
| self.consecutive_low = 0 |
| else: |
| self.consecutive_high = 0 |
|
|
| if self.consecutive_high >= self.enter_count: |
| self.in_resonance = True |
| self.resonance_token_count = 0 |
| self.consecutive_high = 0 |
| self.resonance_entries += 1 |
| return 'enter_resonance' |
|
|
| return None |
|
|
| def get_sampling_params(self, h_norm: float) -> dict: |
| """Get entropy-modulated sampling parameters. |
| |
| Inside /resonate/: more exploratory (higher temp, wider sampling) |
| Outside /resonate/: more crystallized (base params) |
| |
| The modulation is ANALOG — scales with entropy level. |
| This is the βH term in θ = ε + γ + αδ + βH |
| """ |
| if self.in_resonance: |
| |
| temp = self.base_temperature * (1.0 + self.beta * h_norm) |
| top_p = min(0.98, self.base_top_p + self.beta * h_norm * 0.15) |
| top_k = int(self.base_top_k * (1.0 + self.beta * h_norm)) |
| return { |
| 'temperature': temp, |
| 'top_p': top_p, |
| 'top_k': top_k, |
| } |
| else: |
| return { |
| 'temperature': self.base_temperature, |
| 'top_p': self.base_top_p, |
| 'top_k': self.base_top_k, |
| } |
|
|
| def summary(self) -> str: |
| """Return diagnostic summary.""" |
| return ( |
| f"Resonance: {self.resonance_entries} entries, " |
| f"{self.forced_exits} forced exits, " |
| f"{self.total_tokens} total tokens" |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def entropy_generate( |
| model, |
| tokenizer, |
| prompt: str, |
| state: ResonanceState, |
| max_new_tokens: int = 768, |
| verbose: bool = False, |
| show_curve: bool = False, |
| repetition_penalty: float = 1.3, |
| ) -> tuple[str, EntropyCurve]: |
| """Generate text with entropy-driven adaptive resonance. |
| |
| This is NOT model.generate(). We run the generation loop manually, |
| token by token, computing entropy at each step and making resonance |
| decisions in real time. |
| |
| Args: |
| model: Gemma-3 270M-IT (with or without LoRA adapter) |
| tokenizer: Gemma tokenizer |
| prompt: user's question/input |
| state: ResonanceState with thresholds and parameters |
| max_new_tokens: maximum tokens to generate |
| verbose: print entropy at each step |
| show_curve: collect data for visualization |
| repetition_penalty: penalize repeated tokens |
| |
| Returns: |
| (generated_text, entropy_curve) |
| """ |
| device = next(model.parameters()).device |
| model.eval() |
|
|
| |
| input_text = f"{START_OF_TURN}user\n{prompt}{END_OF_TURN}\n{START_OF_TURN}model\n" |
| input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device) |
|
|
| |
| curve = EntropyCurve() |
| generated_ids = [] |
| generated_text = "" |
|
|
| |
| all_ids = input_ids[0].tolist() |
|
|
| |
| eos_id = tokenizer.eos_token_id |
| |
| eot_text = END_OF_TURN |
| eot_ids = tokenizer.encode(eot_text, add_special_tokens=False) |
|
|
| |
| state.in_resonance = False |
| state.consecutive_high = 0 |
| state.consecutive_low = 0 |
| state.resonance_token_count = 0 |
| state.total_tokens = 0 |
| state.resonance_entries = 0 |
| state.forced_exits = 0 |
|
|
| |
| with torch.no_grad(): |
| outputs = model(input_ids) |
| next_logits = outputs.logits[0, -1, :] |
|
|
| for step in range(max_new_tokens): |
| |
| H = compute_entropy(next_logits) |
| h_norm = normalized_entropy(H) |
|
|
| |
| event = state.update(h_norm) |
|
|
| if event == 'enter_resonance': |
| |
| marker_text = f"\n{RESONATE_OPEN}\n" |
| marker_ids = tokenizer.encode(marker_text, add_special_tokens=False) |
| generated_ids.extend(marker_ids) |
| all_ids.extend(marker_ids) |
| generated_text += marker_text |
|
|
| if verbose: |
| log.info(f" [ENTER RESONANCE] H_norm={h_norm:.4f} at token {step}") |
|
|
| if show_curve: |
| curve.mark_event('enter_resonance') |
|
|
| |
| full_ids = torch.tensor([all_ids], device=device) |
| with torch.no_grad(): |
| outputs = model(full_ids) |
| next_logits = outputs.logits[0, -1, :] |
| continue |
|
|
| elif event in ('exit_resonance', 'force_exit'): |
| |
| marker_text = f"\n{RESONATE_CLOSE}\n" |
| marker_ids = tokenizer.encode(marker_text, add_special_tokens=False) |
| generated_ids.extend(marker_ids) |
| all_ids.extend(marker_ids) |
| generated_text += marker_text |
|
|
| if verbose: |
| if event == 'force_exit': |
| log.warning(f" [FORCED EXIT] Max resonance tokens exceeded at step {step}") |
| else: |
| log.info(f" [EXIT RESONANCE] H_norm={h_norm:.4f} at token {step}") |
|
|
| if show_curve: |
| curve.mark_event('exit_resonance') |
|
|
| |
| full_ids = torch.tensor([all_ids], device=device) |
| with torch.no_grad(): |
| outputs = model(full_ids) |
| next_logits = outputs.logits[0, -1, :] |
| continue |
|
|
| |
| params = state.get_sampling_params(h_norm) |
|
|
| |
| logits = next_logits.clone() |
| if repetition_penalty != 1.0 and generated_ids: |
| for prev_id in set(generated_ids[-50:]): |
| if logits[prev_id] > 0: |
| logits[prev_id] /= repetition_penalty |
| else: |
| logits[prev_id] *= repetition_penalty |
|
|
| |
| temp = params['temperature'] |
| if temp > 0: |
| logits = logits / temp |
| else: |
| |
| pass |
|
|
| |
| top_k = params['top_k'] |
| if top_k > 0: |
| indices_to_remove = logits < torch.topk(logits, top_k)[0][-1] |
| logits[indices_to_remove] = float('-inf') |
|
|
| |
| top_p = params['top_p'] |
| if top_p < 1.0: |
| sorted_logits, sorted_indices = torch.sort(logits, descending=True) |
| cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) |
| |
| sorted_indices_to_remove = cumulative_probs > top_p |
| |
| sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone() |
| sorted_indices_to_remove[0] = False |
| indices_to_remove = sorted_indices[sorted_indices_to_remove] |
| logits[indices_to_remove] = float('-inf') |
|
|
| |
| probs = F.softmax(logits, dim=-1) |
| if temp > 0: |
| next_token = torch.multinomial(probs, num_samples=1).item() |
| else: |
| next_token = torch.argmax(logits).item() |
|
|
| |
| if next_token == eos_id: |
| break |
|
|
| |
| generated_ids.append(next_token) |
| all_ids.append(next_token) |
|
|
| token_str = tokenizer.decode([next_token]) |
| generated_text += token_str |
|
|
| |
| if generated_text.rstrip().endswith(eot_text): |
| generated_text = generated_text.rstrip()[:-len(eot_text)].rstrip() |
| break |
|
|
| |
| if show_curve: |
| curve.add(H, token_str) |
|
|
| if verbose and step % 10 == 0: |
| mode = "RESONANCE" if state.in_resonance else "crystal" |
| log.info( |
| f" step={step:3d} H={H:.2f}bits H_norm={h_norm:.4f} " |
| f"mode={mode} temp={params['temperature']:.3f} " |
| f"token={repr(token_str)}" |
| ) |
|
|
| |
| next_input = torch.tensor([[next_token]], device=device) |
| full_ids = torch.tensor([all_ids], device=device) |
| with torch.no_grad(): |
| |
| |
| outputs = model(full_ids) |
| next_logits = outputs.logits[0, -1, :] |
|
|
| return generated_text, curve |
|
|
|
|
| |
| |
| |
|
|
| def print_result(prompt: str, generated: str, curve: EntropyCurve, |
| state: ResonanceState, show_curve: bool = False, |
| h_high: float = 0.35, h_low: float = 0.12): |
| """Print the generation result with formatting.""" |
|
|
| print(f"\n{'='*70}") |
| print(f" PROMPT: {prompt}") |
| print(f"{'='*70}") |
|
|
| |
| if RESONATE_OPEN in generated and RESONATE_CLOSE in generated: |
| parts = generated.split(RESONATE_OPEN, 1) |
| pre_resonate = parts[0].strip() |
| rest = parts[1] |
|
|
| if RESONATE_CLOSE in rest: |
| reasoning_and_rest = rest.split(RESONATE_CLOSE, 1) |
| reasoning = reasoning_and_rest[0].strip() |
| answer = reasoning_and_rest[1].strip() |
| else: |
| reasoning = rest.strip() |
| answer = "[resonance did not crystallize — forced exit or max tokens]" |
|
|
| if pre_resonate: |
| print(f"\n {pre_resonate}") |
|
|
| print(f"\n --- {RESONATE_OPEN} ---") |
| |
| for line in reasoning.split('\n'): |
| print(f" | {line}") |
|
|
| print(f"\n --- {RESONATE_CLOSE} ---") |
| print(f"\n {answer}") |
| else: |
| |
| print(f"\n [direct answer — entropy stayed low, no resonance needed]") |
| print(f"\n {generated}") |
|
|
| print(f"\n{'─'*70}") |
| print(f" {state.summary()}") |
|
|
| if show_curve and curve.values: |
| print(f"\n{curve.render(h_high, h_low)}") |
|
|
| print(f"{'='*70}\n") |
|
|
|
|
| |
| |
| |
|
|
| def load_model(model_id: str = MODEL_ID, adapter_path: str = None, |
| device: str = None) -> tuple: |
| """Load Gemma-3 270M-IT with optional LoRA adapter. |
| |
| Args: |
| model_id: base model identifier |
| adapter_path: path to LoRA adapter (None for base model) |
| device: 'cuda', 'cpu', or 'mps' (auto-detected if None) |
| |
| Returns: |
| (model, tokenizer, device_str) |
| """ |
| if device is None: |
| if torch.cuda.is_available(): |
| device = 'cuda' |
| elif torch.backends.mps.is_available(): |
| device = 'mps' |
| else: |
| device = 'cpu' |
|
|
| log.info(f"Loading tokenizer from {model_id}...") |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
|
| log.info(f"Loading model from {model_id} onto {device}...") |
|
|
| dtype = torch.bfloat16 if device == 'cuda' else torch.float32 |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=dtype, |
| device_map=device if device == 'cuda' else None, |
| attn_implementation="sdpa" if device == 'cuda' else "eager", |
| trust_remote_code=True, |
| ) |
|
|
| if device != 'cuda': |
| model = model.to(device) |
|
|
| total_params = sum(p.numel() for p in model.parameters()) |
| log.info(f"Base model: {total_params/1e6:.1f}M params, dtype={dtype}") |
|
|
| |
| if adapter_path: |
| if not os.path.isdir(adapter_path): |
| log.error(f"Adapter path does not exist: {adapter_path}") |
| log.error("Run training first: python train_gemma_resonate.py") |
| sys.exit(1) |
|
|
| from peft import PeftModel |
| log.info(f"Loading LoRA adapter from {adapter_path}...") |
| model = PeftModel.from_pretrained(model, adapter_path) |
| trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| log.info(f"Adapter loaded: {trainable/1e6:.1f}M trainable params") |
|
|
| model.eval() |
| return model, tokenizer, device |
|
|
|
|
| |
| |
| |
|
|
| def interactive_mode(model, tokenizer, state: ResonanceState, |
| verbose: bool = False, show_curve: bool = False): |
| """Interactive REPL for entropy-driven resonance.""" |
|
|
| print(f"\n{'='*70}") |
| print(f" ENTROPY-DRIVEN ADAPTIVE RESONANCE") |
| print(f" Gemma-3 270M-IT + Entropy Monitoring") |
| print(f"{'─'*70}") |
| print(f" H_high = {state.h_high:.3f} (enter resonance)") |
| print(f" H_low = {state.h_low:.3f} (exit resonance)") |
| print(f" Beta = {state.beta:.2f} (entropy coupling)") |
| print(f" Max resonance tokens = {state.max_resonance_tokens}") |
| print(f"{'─'*70}") |
| print(f" Commands: /quit /verbose /curve /thresholds H_HIGH H_LOW") |
| print(f"{'='*70}\n") |
|
|
| while True: |
| try: |
| prompt = input(">>> ").strip() |
| except (EOFError, KeyboardInterrupt): |
| print("\nExiting.") |
| break |
|
|
| if not prompt: |
| continue |
|
|
| if prompt == '/quit': |
| break |
| elif prompt == '/verbose': |
| verbose = not verbose |
| print(f" Verbose: {'ON' if verbose else 'OFF'}") |
| continue |
| elif prompt == '/curve': |
| show_curve = not show_curve |
| print(f" Curve: {'ON' if show_curve else 'OFF'}") |
| continue |
| elif prompt.startswith('/thresholds'): |
| parts = prompt.split() |
| if len(parts) == 3: |
| try: |
| state.h_high = float(parts[1]) |
| state.h_low = float(parts[2]) |
| print(f" Thresholds updated: H_high={state.h_high:.3f}, H_low={state.h_low:.3f}") |
| except ValueError: |
| print(f" Usage: /thresholds 0.35 0.12") |
| else: |
| print(f" Current: H_high={state.h_high:.3f}, H_low={state.h_low:.3f}") |
| continue |
|
|
| |
| t0 = time.time() |
|
|
| generated, curve = entropy_generate( |
| model, tokenizer, prompt, state, |
| verbose=verbose, |
| show_curve=show_curve, |
| ) |
|
|
| elapsed = time.time() - t0 |
|
|
| print_result(prompt, generated, curve, state, |
| show_curve=show_curve, |
| h_high=state.h_high, h_low=state.h_low) |
|
|
| tokens_generated = len(curve.values) if curve.values else 0 |
| tps = tokens_generated / elapsed if elapsed > 0 else 0 |
| print(f" [{elapsed:.1f}s, ~{tokens_generated} tokens, {tps:.1f} tok/s]\n") |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Entropy-Driven Adaptive Resonance — inference for Gemma-3 270M-IT" |
| ) |
|
|
| |
| parser.add_argument("--model", default=MODEL_ID, help="Base model ID") |
| parser.add_argument("--adapter-path", default=None, help="LoRA adapter path") |
| parser.add_argument("--no-lora", action="store_true", help="Skip LoRA loading") |
| parser.add_argument("--device", default=None, help="Device: cuda/cpu/mps (auto)") |
|
|
| |
| parser.add_argument("--prompt", default=None, help="Single prompt (non-interactive)") |
| parser.add_argument("--max-tokens", type=int, default=768, help="Max tokens to generate") |
|
|
| |
| parser.add_argument("--h-high", type=float, default=0.35, |
| help="Normalized entropy threshold to enter resonance (0-1)") |
| parser.add_argument("--h-low", type=float, default=0.12, |
| help="Normalized entropy threshold to exit resonance (0-1)") |
| parser.add_argument("--beta", type=float, default=0.3, |
| help="Entropy coupling constant (Delta Voice integration)") |
|
|
| |
| parser.add_argument("--enter-count", type=int, default=3, |
| help="Consecutive high-entropy tokens to enter resonance") |
| parser.add_argument("--exit-count", type=int, default=5, |
| help="Consecutive low-entropy tokens to exit resonance") |
| parser.add_argument("--max-resonance", type=int, default=500, |
| help="Max tokens in a single resonance section") |
|
|
| |
| parser.add_argument("--temperature", type=float, default=0.7, help="Base temperature") |
| parser.add_argument("--top-p", type=float, default=0.9, help="Base top-p") |
| parser.add_argument("--top-k", type=int, default=40, help="Base top-k") |
| parser.add_argument("--repetition-penalty", type=float, default=1.3, |
| help="Repetition penalty") |
|
|
| |
| parser.add_argument("--verbose", action="store_true", help="Show entropy per step") |
| parser.add_argument("--show-curve", action="store_true", |
| help="Show ASCII entropy curve after generation") |
|
|
| args = parser.parse_args() |
|
|
| |
| adapter = None if args.no_lora else args.adapter_path |
| model, tokenizer, device = load_model(args.model, adapter, args.device) |
|
|
| |
| state = ResonanceState( |
| h_high=args.h_high, |
| h_low=args.h_low, |
| enter_count=args.enter_count, |
| exit_count=args.exit_count, |
| max_resonance_tokens=args.max_resonance, |
| beta=args.beta, |
| base_temperature=args.temperature, |
| base_top_p=args.top_p, |
| base_top_k=args.top_k, |
| ) |
|
|
| if args.prompt: |
| |
| generated, curve = entropy_generate( |
| model, tokenizer, args.prompt, state, |
| max_new_tokens=args.max_tokens, |
| verbose=args.verbose, |
| show_curve=args.show_curve, |
| repetition_penalty=args.repetition_penalty, |
| ) |
| print_result(args.prompt, generated, curve, state, |
| show_curve=args.show_curve, |
| h_high=state.h_high, h_low=state.h_low) |
| else: |
| |
| interactive_mode(model, tokenizer, state, |
| verbose=args.verbose, |
| show_curve=args.show_curve) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|