/** * @id: mod_thermo_compression_v2 * @version: 2.0.0 * @description: Padé-based context compression for runtime token budgeting. * Implements the closed-form D_f window from * Marin 2026, "Predicting How Transformers Attend" §sec:kvcache (Eq. 24). * @license: Apache-2.0 * * Drop into a Node/browser pipeline before any LLM API call to truncate * non-critical context analytically (no profiling, no fine-tuning). * * Usage: * import { ContextCompressor } from "./context_compressor.js"; * const cc = new ContextCompressor({ theta: 10000, T_train: 2048 }); * const { kept, dropped, Df, gamma } = cc.compress(tokens, { f: 0.9 }); */ export class ContextCompressor { /** * @param {Object} cfg * @param {number} cfg.theta RoPE base from model.config.rope_theta * @param {number} cfg.T_train training context length * @param {number} [cfg.T_eval] evaluation length (default = T_train) */ constructor({ theta, T_train, T_eval = null }) { if (typeof theta !== "number" || theta <= 0) { throw new Error(`theta must be a positive number, got ${theta}`); } if (typeof T_train !== "number" || T_train <= 0) { throw new Error(`T_train must be a positive number, got ${T_train}`); } this.theta = theta; this.T_train = T_train; this.T_eval = T_eval ?? T_train; this.gamma = this._gammaPade(); } /** Padé closed-form γ predictor (paper §sec:gamma_pade). */ _gammaPade() { const T = this.T_eval; const num = 2 * this.theta - T * Math.SQRT2; const den = 2 * this.theta + T * Math.SQRT2; return num / den; } /** Validity zone for D_f truncation (paper L11, EXP-B2 extended). */ _isValidPhase() { return this.gamma >= 0.67 && this.gamma <= 0.85; } /** * Closed-form D_f window: minimum context that retains fraction f * of total attention mass. Eq. 24 in the paper. * * @param {number} N total context length (tokens) * @param {number} f retention fraction in (0, 1), default 0.9 * @returns {number} D_f in tokens, or N if Phase B / Hagedorn */ computeDf(N, f = 0.9) { if (this.gamma >= 1.0) { // Hagedorn / Phase B: limiting form D_f ≈ N^f return Math.max(64, Math.round(Math.pow(N, f))); } const inner = (1 - f) + f * Math.pow(N, 1 - this.gamma); return Math.max(64, Math.round(Math.pow(inner, 1 / (1 - this.gamma)))); } /** * Compress a token array by retaining the last D_f tokens. * Tokens are dropped from the head (oldest first), preserving recency. * * @param {Array<*>} tokens array of tokens (any opaque type) * @param {Object} [opts] * @param {number} [opts.f=0.9] attention retention fraction * @param {boolean} [opts.force=false] override the validity guard * @returns {{kept:Array, dropped:Array, Df:number, gamma:number, phase:string}} */ compress(tokens, { f = 0.9, force = false } = {}) { const N = tokens.length; const Df = this.computeDf(N, f); const phase = this.gamma < 1 ? "A" : this.gamma > 1 ? "B" : "Hagedorn"; // Validity guard: outside [0.67, 0.85] the D_f formula has been // empirically observed to over- or under-compress (paper L11). if (!force && !this._isValidPhase()) { return { kept: tokens, dropped: [], Df: N, gamma: this.gamma, phase, warning: `gamma=${this.gamma.toFixed(3)} outside validity zone [0.67,0.85]; passthrough`, }; } if (Df >= N) { return { kept: tokens, dropped: [], Df, gamma: this.gamma, phase }; } const dropped = tokens.slice(0, N - Df); const kept = tokens.slice(N - Df); return { kept, dropped, Df, gamma: this.gamma, phase }; } } /** * Bus-style integration (matches the user's mod pattern). * Listens for { action: "OPTIMIZE_TOKENS", tokens, theta, T_train, f } events * and emits VFS_NOTIFY / LOG / SENTRY_ERR results. */ export const init = (bus) => { bus.emit?.("LOG", { level: "info", msg: "thermo_compression v2 loaded" }); bus.on?.("GOV", (payload) => { if (payload?.action !== "OPTIMIZE_TOKENS") return; try { const cc = new ContextCompressor({ theta: payload.theta ?? 10000, T_train: payload.T_train ?? 2048, }); const { kept, dropped, Df, gamma, phase, warning } = cc.compress( payload.tokens, { f: payload.f ?? 0.9 } ); bus.emit?.("VFS_NOTIFY", { status: "success", savedTokens: dropped.length, compressionRatio: dropped.length / payload.tokens.length, Df, gamma, phase, data: kept, ...(warning ? { warning } : {}), }); bus.emit?.("LOG", { level: warning ? "warn" : "info", msg: `compressed ${payload.tokens.length} → ${kept.length} (γ=${gamma.toFixed(3)}, phase=${phase})`, }); } catch (err) { bus.emit?.("SENTRY_ERR", { impact: "medium", error: err.message, context: "mod_thermo_compression_v2", }); } }); };