File size: 10,784 Bytes
3d389cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// Prompt-Cache Diff Predictor (v0.8.4 anti-bullshit pack #10)
//
// Pain: small prompt edits silently invalidate provider prompt caches,
// turning a 50% discount into a 0% discount and 10x'ing the bill.
// Users debug this blind because:
//   - Anthropic's `cache_control` cache breaks at the first token diff
//     in the marked prefix (TTL 5 min default, 1 hour beta).
//   - OpenAI auto-caches prefixes ≥1024 tokens but invalidates on any
//     prefix change; the 50% read discount only applies on hit.
//   - Gemini's context cache requires explicit creation, ≥32K tokens,
//     and any prefix edit forces a new cache.
//
// Tool: paste old + new prompt → compute longest common prefix in
// tokens → predict per-provider cache hit ratio + $ delta vs no-cache.
//
// Pure logic — no human strings; main.js does i18n. Returns
// {code, params, providers: [{provider_id, ...}]}.

// =============================================================================
// Token estimation — heuristic, browser-only
// =============================================================================
//
// Real tokenizers vary by ±15% between Llama / GPT / Claude / Qwen and
// running them in-browser would mean shipping a 5-10 MB WASM blob. For a
// cache-diff predictor the absolute count doesn't matter — what matters
// is the RATIO of common-prefix to divergent-suffix tokens, which is
// robust to estimator choice. The three profiles below cover 95% of
// real prompts; users with extreme cases can paste pre-tokenized counts.
const TOKEN_PROFILES = {
  english: { chars_per_token: 4.0, label_key: "cache.profile.english" },
  code:    { chars_per_token: 3.5, label_key: "cache.profile.code" },
  mixed:   { chars_per_token: 2.0, label_key: "cache.profile.mixed" }, // CJK / Cyrillic
};

export function estimateTokens(text, profile = "english") {
  if (typeof text !== "string" || !text) return 0;
  const cpt = TOKEN_PROFILES[profile]?.chars_per_token ?? 4.0;
  return Math.ceil(text.length / cpt);
}

// =============================================================================
// Provider rules — pricing + cache mechanics
// =============================================================================
//
// Prices are USD per million tokens, snapshot 2026-01 (knowledge cutoff).
// `cache_read_multiplier` is the fraction of input price billed on a
// cache hit (Anthropic 0.10 = 10%; OpenAI/Gemini 0.50 = 50%; etc).
// `cache_write_multiplier` accounts for Anthropic's 25% write surcharge
// the first time a prefix is seen.
//
// `min_cache_tokens` is the floor below which the provider cannot cache
// (OpenAI auto-cache requires ≥1024; Gemini context cache ≥32K).
// Anthropic has no min token floor but requires explicit cache_control
// marker — we treat that as min=0 with a `requires_explicit` flag for UI.
export const PROVIDERS = {
  anthropic_opus: {
    name: "Claude Opus 4.7",
    min_cache_tokens: 0,
    requires_explicit: true,
    cache_ttl_seconds: 300,                 // 5 min default
    input_per_mt:  15.00,
    output_per_mt: 75.00,
    cache_write_multiplier: 1.25,
    cache_read_multiplier:  0.10,           // 10% of input
  },
  anthropic_sonnet: {
    name: "Claude Sonnet 4.6",
    min_cache_tokens: 0,
    requires_explicit: true,
    cache_ttl_seconds: 300,
    input_per_mt:   3.00,
    output_per_mt: 15.00,
    cache_write_multiplier: 1.25,
    cache_read_multiplier:  0.10,
  },
  anthropic_haiku: {
    name: "Claude Haiku 4.5",
    min_cache_tokens: 0,
    requires_explicit: true,
    cache_ttl_seconds: 300,
    input_per_mt:   1.00,
    output_per_mt:  5.00,
    cache_write_multiplier: 1.25,
    cache_read_multiplier:  0.10,
  },
  openai_gpt5: {
    name: "OpenAI GPT-5",
    min_cache_tokens: 1024,
    requires_explicit: false,
    cache_ttl_seconds: 600,                 // ~5-10 min observed
    input_per_mt:   5.00,
    output_per_mt: 15.00,
    cache_write_multiplier: 1.00,
    cache_read_multiplier:  0.50,           // 50% of input
  },
  openai_gpt5_mini: {
    name: "OpenAI GPT-5 mini",
    min_cache_tokens: 1024,
    requires_explicit: false,
    cache_ttl_seconds: 600,
    input_per_mt:   0.30,
    output_per_mt:  1.20,
    cache_write_multiplier: 1.00,
    cache_read_multiplier:  0.50,
  },
  gemini_25_pro: {
    name: "Gemini 2.5 Pro",
    min_cache_tokens: 32768,
    requires_explicit: true,
    cache_ttl_seconds: 3600,                // 1 hour default for context cache
    input_per_mt:   1.25,
    output_per_mt: 10.00,
    cache_write_multiplier: 1.00,
    cache_read_multiplier:  0.25,           // 25% of input
  },
};

// =============================================================================
// Longest common prefix — character-level
// =============================================================================

export function longestCommonPrefix(a, b) {
  if (typeof a !== "string" || typeof b !== "string") return 0;
  const n = Math.min(a.length, b.length);
  let i = 0;
  while (i < n && a.charCodeAt(i) === b.charCodeAt(i)) i++;
  return i;
}

// First differing line — useful for the UI "your edit landed here" hint.
function firstDifferingLine(a, b, prefixLen) {
  // Walk back to the start of the line containing the diff
  let i = prefixLen;
  while (i > 0 && a[i - 1] !== "\n" && b[i - 1] !== "\n") i--;
  // Count line number (1-indexed)
  let line = 1;
  for (let j = 0; j < i; j++) {
    if (a[j] === "\n") line++;
  }
  return { offset: i, line };
}

// =============================================================================
// Per-provider cache analysis
// =============================================================================

function analyseProvider(
  providerId,
  totalTokensNew,
  commonTokens,
  divergeTokens,
  outputTokens,
) {
  const p = PROVIDERS[providerId];
  if (!p) return null;

  const inputPrice = p.input_per_mt / 1_000_000;
  const outputPrice = p.output_per_mt / 1_000_000;
  const baseCost =
    totalTokensNew * inputPrice + outputTokens * outputPrice;

  // Can the provider cache anything? Two failure modes:
  //   (a) common prefix below provider's minimum cacheable size
  //   (b) provider requires an explicit marker AND the user almost
  //       certainly didn't include one in the paste — we still report
  //       the best-case savings but tag the result as `requires_marker`.
  let canCache = true;
  let reason = null;
  if (commonTokens < p.min_cache_tokens) {
    canCache = false;
    reason = "below_min";
  }

  if (!canCache) {
    return {
      provider_id: providerId,
      provider_name: p.name,
      base_cost_usd: baseCost,
      cached_cost_usd: baseCost,
      savings_usd: 0,
      hit_ratio: 0,
      tokens_cached: 0,
      tokens_billed_input: totalTokensNew,
      reason,
      min_cache_tokens: p.min_cache_tokens,
      requires_explicit: p.requires_explicit,
      cache_ttl_seconds: p.cache_ttl_seconds,
    };
  }

  // Cost on cache HIT for the prefix:
  //   cache-read: commonTokens × inputPrice × cache_read_multiplier
  //   fresh:      divergeTokens × inputPrice
  //   output:     outputTokens × outputPrice
  const cachedInputCost =
    commonTokens * inputPrice * p.cache_read_multiplier +
    divergeTokens * inputPrice;
  const cachedCost = cachedInputCost + outputTokens * outputPrice;

  // Cache write surcharge (Anthropic). Surfaced as `cache_write_cost`
  // separately so users see the amortization picture.
  const cacheWriteSurcharge =
    commonTokens * inputPrice * (p.cache_write_multiplier - 1.0);

  const savings = baseCost - cachedCost;
  const hitRatio = totalTokensNew === 0 ? 0 : commonTokens / totalTokensNew;

  return {
    provider_id: providerId,
    provider_name: p.name,
    base_cost_usd: baseCost,
    cached_cost_usd: cachedCost,
    cache_write_surcharge_usd: cacheWriteSurcharge,
    savings_usd: savings,
    savings_pct: baseCost === 0 ? 0 : savings / baseCost,
    hit_ratio: hitRatio,
    tokens_cached: commonTokens,
    tokens_billed_input: divergeTokens,
    reason: null,
    min_cache_tokens: p.min_cache_tokens,
    requires_explicit: p.requires_explicit,
    cache_ttl_seconds: p.cache_ttl_seconds,
  };
}

// =============================================================================
// Public entry point
// =============================================================================

export function diffPromptCache(
  oldPrompt,
  newPrompt,
  {
    profile = "english",
    outputTokensEstimate = 500,
    providers = null,
  } = {},
) {
  if (typeof oldPrompt !== "string" || typeof newPrompt !== "string") {
    return { code: "empty_input", params: {} };
  }
  const oldTrim = oldPrompt;
  const newTrim = newPrompt;
  if (!oldTrim && !newTrim) {
    return { code: "empty_input", params: {} };
  }

  const lcpChars = longestCommonPrefix(oldTrim, newTrim);
  const isIdentical = oldTrim === newTrim;
  const totalCharsNew = newTrim.length;
  const divergeChars = totalCharsNew - lcpChars;

  const tokensCommon  = estimateTokens(oldTrim.slice(0, lcpChars), profile);
  const tokensDiverge = estimateTokens(newTrim.slice(lcpChars),    profile);
  const tokensTotal   = tokensCommon + tokensDiverge;

  const providerIds = providers ?? Object.keys(PROVIDERS);
  const providerResults = providerIds
    .map(id => analyseProvider(id, tokensTotal, tokensCommon, tokensDiverge, outputTokensEstimate))
    .filter(r => r !== null);

  const diffPoint = isIdentical
    ? { offset: oldTrim.length, line: oldTrim.split("\n").length }
    : firstDifferingLine(oldTrim, newTrim, lcpChars);

  let code;
  if (isIdentical) {
    code = "identical";
  } else if (lcpChars === 0) {
    code = "fully_divergent";
  } else if (providerResults.every(r => r.reason === "below_min")) {
    code = "divergent_below_min";
  } else {
    code = "divergent_can_cache";
  }

  return {
    code,
    params: {
      profile,
      lcp_chars: lcpChars,
      diverge_chars: divergeChars,
      tokens_common: tokensCommon,
      tokens_diverge: tokensDiverge,
      tokens_total: tokensTotal,
      hit_ratio: tokensTotal === 0 ? 0 : tokensCommon / tokensTotal,
      diff_point: diffPoint,
      output_tokens: outputTokensEstimate,
    },
    providers: providerResults,
  };
}

// Helper used by the UI: short summary string per provider, suitable for
// rendering in a table row (i18n-substituted in main.js).
export function summariseProvider(result) {
  if (!result) return null;
  return {
    name: result.provider_name,
    hit_pct: Math.round(result.hit_ratio * 100),
    base: result.base_cost_usd,
    cached: result.cached_cost_usd,
    savings: result.savings_usd,
    savings_pct: result.savings_pct ?? 0,
    requires_explicit: result.requires_explicit,
    reason: result.reason,
  };
}