File size: 5,054 Bytes
b4f7029
 
 
 
 
1a6c909
b4f7029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/**
 * @id: mod_thermo_compression_v2
 * @version: 2.0.0
 * @description: Padé-based context compression for runtime token budgeting.
 *               Implements the closed-form D_f window from
 *               Marin 2026, "Predicting How Transformers Attend" §sec:kvcache (Eq. 24).
 * @license: Apache-2.0
 *
 * Drop into a Node/browser pipeline before any LLM API call to truncate
 * non-critical context analytically (no profiling, no fine-tuning).
 *
 * Usage:
 *   import { ContextCompressor } from "./context_compressor.js";
 *   const cc = new ContextCompressor({ theta: 10000, T_train: 2048 });
 *   const { kept, dropped, Df, gamma } = cc.compress(tokens, { f: 0.9 });
 */

export class ContextCompressor {
  /**
   * @param {Object} cfg
   * @param {number} cfg.theta    RoPE base from model.config.rope_theta
   * @param {number} cfg.T_train  training context length
   * @param {number} [cfg.T_eval] evaluation length (default = T_train)
   */
  constructor({ theta, T_train, T_eval = null }) {
    if (typeof theta !== "number" || theta <= 0) {
      throw new Error(`theta must be a positive number, got ${theta}`);
    }
    if (typeof T_train !== "number" || T_train <= 0) {
      throw new Error(`T_train must be a positive number, got ${T_train}`);
    }
    this.theta = theta;
    this.T_train = T_train;
    this.T_eval = T_eval ?? T_train;
    this.gamma = this._gammaPade();
  }

  /** Padé closed-form γ predictor (paper §sec:gamma_pade). */
  _gammaPade() {
    const T = this.T_eval;
    const num = 2 * this.theta - T * Math.SQRT2;
    const den = 2 * this.theta + T * Math.SQRT2;
    return num / den;
  }

  /** Validity zone for D_f truncation (paper L11, EXP-B2 extended). */
  _isValidPhase() {
    return this.gamma >= 0.67 && this.gamma <= 0.85;
  }

  /**
   * Closed-form D_f window: minimum context that retains fraction f
   * of total attention mass. Eq. 24 in the paper.
   *
   * @param {number} N  total context length (tokens)
   * @param {number} f  retention fraction in (0, 1), default 0.9
   * @returns {number}  D_f in tokens, or N if Phase B / Hagedorn
   */
  computeDf(N, f = 0.9) {
    if (this.gamma >= 1.0) {
      // Hagedorn / Phase B: limiting form D_f ≈ N^f
      return Math.max(64, Math.round(Math.pow(N, f)));
    }
    const inner = (1 - f) + f * Math.pow(N, 1 - this.gamma);
    return Math.max(64, Math.round(Math.pow(inner, 1 / (1 - this.gamma))));
  }

  /**
   * Compress a token array by retaining the last D_f tokens.
   * Tokens are dropped from the head (oldest first), preserving recency.
   *
   * @param {Array<*>} tokens   array of tokens (any opaque type)
   * @param {Object}  [opts]
   * @param {number}  [opts.f=0.9]      attention retention fraction
   * @param {boolean} [opts.force=false] override the validity guard
   * @returns {{kept:Array, dropped:Array, Df:number, gamma:number, phase:string}}
   */
  compress(tokens, { f = 0.9, force = false } = {}) {
    const N = tokens.length;
    const Df = this.computeDf(N, f);
    const phase = this.gamma < 1 ? "A" : this.gamma > 1 ? "B" : "Hagedorn";

    // Validity guard: outside [0.67, 0.85] the D_f formula has been
    // empirically observed to over- or under-compress (paper L11).
    if (!force && !this._isValidPhase()) {
      return {
        kept: tokens,
        dropped: [],
        Df: N,
        gamma: this.gamma,
        phase,
        warning: `gamma=${this.gamma.toFixed(3)} outside validity zone [0.67,0.85]; passthrough`,
      };
    }

    if (Df >= N) {
      return { kept: tokens, dropped: [], Df, gamma: this.gamma, phase };
    }

    const dropped = tokens.slice(0, N - Df);
    const kept = tokens.slice(N - Df);
    return { kept, dropped, Df, gamma: this.gamma, phase };
  }
}

/**
 * Bus-style integration (matches the user's mod pattern).
 * Listens for { action: "OPTIMIZE_TOKENS", tokens, theta, T_train, f } events
 * and emits VFS_NOTIFY / LOG / SENTRY_ERR results.
 */
export const init = (bus) => {
  bus.emit?.("LOG", { level: "info", msg: "thermo_compression v2 loaded" });

  bus.on?.("GOV", (payload) => {
    if (payload?.action !== "OPTIMIZE_TOKENS") return;
    try {
      const cc = new ContextCompressor({
        theta: payload.theta ?? 10000,
        T_train: payload.T_train ?? 2048,
      });
      const { kept, dropped, Df, gamma, phase, warning } = cc.compress(
        payload.tokens,
        { f: payload.f ?? 0.9 }
      );
      bus.emit?.("VFS_NOTIFY", {
        status: "success",
        savedTokens: dropped.length,
        compressionRatio: dropped.length / payload.tokens.length,
        Df, gamma, phase,
        data: kept,
        ...(warning ? { warning } : {}),
      });
      bus.emit?.("LOG", {
        level: warning ? "warn" : "info",
        msg: `compressed ${payload.tokens.length}${kept.length} (γ=${gamma.toFixed(3)}, phase=${phase})`,
      });
    } catch (err) {
      bus.emit?.("SENTRY_ERR", {
        impact: "medium",
        error: err.message,
        context: "mod_thermo_compression_v2",
      });
    }
  });
};