Spaces:
Running
Running
File size: 5,054 Bytes
b4f7029 1a6c909 b4f7029 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /**
* @id: mod_thermo_compression_v2
* @version: 2.0.0
* @description: Padé-based context compression for runtime token budgeting.
* Implements the closed-form D_f window from
* Marin 2026, "Predicting How Transformers Attend" §sec:kvcache (Eq. 24).
* @license: Apache-2.0
*
* Drop into a Node/browser pipeline before any LLM API call to truncate
* non-critical context analytically (no profiling, no fine-tuning).
*
* Usage:
* import { ContextCompressor } from "./context_compressor.js";
* const cc = new ContextCompressor({ theta: 10000, T_train: 2048 });
* const { kept, dropped, Df, gamma } = cc.compress(tokens, { f: 0.9 });
*/
export class ContextCompressor {
/**
* @param {Object} cfg
* @param {number} cfg.theta RoPE base from model.config.rope_theta
* @param {number} cfg.T_train training context length
* @param {number} [cfg.T_eval] evaluation length (default = T_train)
*/
constructor({ theta, T_train, T_eval = null }) {
if (typeof theta !== "number" || theta <= 0) {
throw new Error(`theta must be a positive number, got ${theta}`);
}
if (typeof T_train !== "number" || T_train <= 0) {
throw new Error(`T_train must be a positive number, got ${T_train}`);
}
this.theta = theta;
this.T_train = T_train;
this.T_eval = T_eval ?? T_train;
this.gamma = this._gammaPade();
}
/** Padé closed-form γ predictor (paper §sec:gamma_pade). */
_gammaPade() {
const T = this.T_eval;
const num = 2 * this.theta - T * Math.SQRT2;
const den = 2 * this.theta + T * Math.SQRT2;
return num / den;
}
/** Validity zone for D_f truncation (paper L11, EXP-B2 extended). */
_isValidPhase() {
return this.gamma >= 0.67 && this.gamma <= 0.85;
}
/**
* Closed-form D_f window: minimum context that retains fraction f
* of total attention mass. Eq. 24 in the paper.
*
* @param {number} N total context length (tokens)
* @param {number} f retention fraction in (0, 1), default 0.9
* @returns {number} D_f in tokens, or N if Phase B / Hagedorn
*/
computeDf(N, f = 0.9) {
if (this.gamma >= 1.0) {
// Hagedorn / Phase B: limiting form D_f ≈ N^f
return Math.max(64, Math.round(Math.pow(N, f)));
}
const inner = (1 - f) + f * Math.pow(N, 1 - this.gamma);
return Math.max(64, Math.round(Math.pow(inner, 1 / (1 - this.gamma))));
}
/**
* Compress a token array by retaining the last D_f tokens.
* Tokens are dropped from the head (oldest first), preserving recency.
*
* @param {Array<*>} tokens array of tokens (any opaque type)
* @param {Object} [opts]
* @param {number} [opts.f=0.9] attention retention fraction
* @param {boolean} [opts.force=false] override the validity guard
* @returns {{kept:Array, dropped:Array, Df:number, gamma:number, phase:string}}
*/
compress(tokens, { f = 0.9, force = false } = {}) {
const N = tokens.length;
const Df = this.computeDf(N, f);
const phase = this.gamma < 1 ? "A" : this.gamma > 1 ? "B" : "Hagedorn";
// Validity guard: outside [0.67, 0.85] the D_f formula has been
// empirically observed to over- or under-compress (paper L11).
if (!force && !this._isValidPhase()) {
return {
kept: tokens,
dropped: [],
Df: N,
gamma: this.gamma,
phase,
warning: `gamma=${this.gamma.toFixed(3)} outside validity zone [0.67,0.85]; passthrough`,
};
}
if (Df >= N) {
return { kept: tokens, dropped: [], Df, gamma: this.gamma, phase };
}
const dropped = tokens.slice(0, N - Df);
const kept = tokens.slice(N - Df);
return { kept, dropped, Df, gamma: this.gamma, phase };
}
}
/**
* Bus-style integration (matches the user's mod pattern).
* Listens for { action: "OPTIMIZE_TOKENS", tokens, theta, T_train, f } events
* and emits VFS_NOTIFY / LOG / SENTRY_ERR results.
*/
export const init = (bus) => {
bus.emit?.("LOG", { level: "info", msg: "thermo_compression v2 loaded" });
bus.on?.("GOV", (payload) => {
if (payload?.action !== "OPTIMIZE_TOKENS") return;
try {
const cc = new ContextCompressor({
theta: payload.theta ?? 10000,
T_train: payload.T_train ?? 2048,
});
const { kept, dropped, Df, gamma, phase, warning } = cc.compress(
payload.tokens,
{ f: payload.f ?? 0.9 }
);
bus.emit?.("VFS_NOTIFY", {
status: "success",
savedTokens: dropped.length,
compressionRatio: dropped.length / payload.tokens.length,
Df, gamma, phase,
data: kept,
...(warning ? { warning } : {}),
});
bus.emit?.("LOG", {
level: warning ? "warn" : "info",
msg: `compressed ${payload.tokens.length} → ${kept.length} (γ=${gamma.toFixed(3)}, phase=${phase})`,
});
} catch (err) {
bus.emit?.("SENTRY_ERR", {
impact: "medium",
error: err.message,
context: "mod_thermo_compression_v2",
});
}
});
};
|