Spaces:
Running
Running
| // Prompt-Cache Diff Predictor (v0.8.4 anti-bullshit pack #10) | |
| // | |
| // Pain: small prompt edits silently invalidate provider prompt caches, | |
| // turning a 50% discount into a 0% discount and 10x'ing the bill. | |
| // Users debug this blind because: | |
| // - Anthropic's `cache_control` cache breaks at the first token diff | |
| // in the marked prefix (TTL 5 min default, 1 hour beta). | |
| // - OpenAI auto-caches prefixes ≥1024 tokens but invalidates on any | |
| // prefix change; the 50% read discount only applies on hit. | |
| // - Gemini's context cache requires explicit creation, ≥32K tokens, | |
| // and any prefix edit forces a new cache. | |
| // | |
| // Tool: paste old + new prompt → compute longest common prefix in | |
| // tokens → predict per-provider cache hit ratio + $ delta vs no-cache. | |
| // | |
| // Pure logic — no human strings; main.js does i18n. Returns | |
| // {code, params, providers: [{provider_id, ...}]}. | |
| // ============================================================================= | |
| // Token estimation — heuristic, browser-only | |
| // ============================================================================= | |
| // | |
| // Real tokenizers vary by ±15% between Llama / GPT / Claude / Qwen and | |
| // running them in-browser would mean shipping a 5-10 MB WASM blob. For a | |
| // cache-diff predictor the absolute count doesn't matter — what matters | |
| // is the RATIO of common-prefix to divergent-suffix tokens, which is | |
| // robust to estimator choice. The three profiles below cover 95% of | |
| // real prompts; users with extreme cases can paste pre-tokenized counts. | |
| const TOKEN_PROFILES = { | |
| english: { chars_per_token: 4.0, label_key: "cache.profile.english" }, | |
| code: { chars_per_token: 3.5, label_key: "cache.profile.code" }, | |
| mixed: { chars_per_token: 2.0, label_key: "cache.profile.mixed" }, // CJK / Cyrillic | |
| }; | |
| export function estimateTokens(text, profile = "english") { | |
| if (typeof text !== "string" || !text) return 0; | |
| const cpt = TOKEN_PROFILES[profile]?.chars_per_token ?? 4.0; | |
| return Math.ceil(text.length / cpt); | |
| } | |
| // ============================================================================= | |
| // Provider rules — pricing + cache mechanics | |
| // ============================================================================= | |
| // | |
| // Prices are USD per million tokens, snapshot 2026-01 (knowledge cutoff). | |
| // `cache_read_multiplier` is the fraction of input price billed on a | |
| // cache hit (Anthropic 0.10 = 10%; OpenAI/Gemini 0.50 = 50%; etc). | |
| // `cache_write_multiplier` accounts for Anthropic's 25% write surcharge | |
| // the first time a prefix is seen. | |
| // | |
| // `min_cache_tokens` is the floor below which the provider cannot cache | |
| // (OpenAI auto-cache requires ≥1024; Gemini context cache ≥32K). | |
| // Anthropic has no min token floor but requires explicit cache_control | |
| // marker — we treat that as min=0 with a `requires_explicit` flag for UI. | |
| export const PROVIDERS = { | |
| anthropic_opus: { | |
| name: "Claude Opus 4.7", | |
| min_cache_tokens: 0, | |
| requires_explicit: true, | |
| cache_ttl_seconds: 300, // 5 min default | |
| input_per_mt: 15.00, | |
| output_per_mt: 75.00, | |
| cache_write_multiplier: 1.25, | |
| cache_read_multiplier: 0.10, // 10% of input | |
| }, | |
| anthropic_sonnet: { | |
| name: "Claude Sonnet 4.6", | |
| min_cache_tokens: 0, | |
| requires_explicit: true, | |
| cache_ttl_seconds: 300, | |
| input_per_mt: 3.00, | |
| output_per_mt: 15.00, | |
| cache_write_multiplier: 1.25, | |
| cache_read_multiplier: 0.10, | |
| }, | |
| anthropic_haiku: { | |
| name: "Claude Haiku 4.5", | |
| min_cache_tokens: 0, | |
| requires_explicit: true, | |
| cache_ttl_seconds: 300, | |
| input_per_mt: 1.00, | |
| output_per_mt: 5.00, | |
| cache_write_multiplier: 1.25, | |
| cache_read_multiplier: 0.10, | |
| }, | |
| openai_gpt5: { | |
| name: "OpenAI GPT-5", | |
| min_cache_tokens: 1024, | |
| requires_explicit: false, | |
| cache_ttl_seconds: 600, // ~5-10 min observed | |
| input_per_mt: 5.00, | |
| output_per_mt: 15.00, | |
| cache_write_multiplier: 1.00, | |
| cache_read_multiplier: 0.50, // 50% of input | |
| }, | |
| openai_gpt5_mini: { | |
| name: "OpenAI GPT-5 mini", | |
| min_cache_tokens: 1024, | |
| requires_explicit: false, | |
| cache_ttl_seconds: 600, | |
| input_per_mt: 0.30, | |
| output_per_mt: 1.20, | |
| cache_write_multiplier: 1.00, | |
| cache_read_multiplier: 0.50, | |
| }, | |
| gemini_25_pro: { | |
| name: "Gemini 2.5 Pro", | |
| min_cache_tokens: 32768, | |
| requires_explicit: true, | |
| cache_ttl_seconds: 3600, // 1 hour default for context cache | |
| input_per_mt: 1.25, | |
| output_per_mt: 10.00, | |
| cache_write_multiplier: 1.00, | |
| cache_read_multiplier: 0.25, // 25% of input | |
| }, | |
| }; | |
| // ============================================================================= | |
| // Longest common prefix — character-level | |
| // ============================================================================= | |
| export function longestCommonPrefix(a, b) { | |
| if (typeof a !== "string" || typeof b !== "string") return 0; | |
| const n = Math.min(a.length, b.length); | |
| let i = 0; | |
| while (i < n && a.charCodeAt(i) === b.charCodeAt(i)) i++; | |
| return i; | |
| } | |
| // First differing line — useful for the UI "your edit landed here" hint. | |
| function firstDifferingLine(a, b, prefixLen) { | |
| // Walk back to the start of the line containing the diff | |
| let i = prefixLen; | |
| while (i > 0 && a[i - 1] !== "\n" && b[i - 1] !== "\n") i--; | |
| // Count line number (1-indexed) | |
| let line = 1; | |
| for (let j = 0; j < i; j++) { | |
| if (a[j] === "\n") line++; | |
| } | |
| return { offset: i, line }; | |
| } | |
| // ============================================================================= | |
| // Per-provider cache analysis | |
| // ============================================================================= | |
| function analyseProvider( | |
| providerId, | |
| totalTokensNew, | |
| commonTokens, | |
| divergeTokens, | |
| outputTokens, | |
| ) { | |
| const p = PROVIDERS[providerId]; | |
| if (!p) return null; | |
| const inputPrice = p.input_per_mt / 1_000_000; | |
| const outputPrice = p.output_per_mt / 1_000_000; | |
| const baseCost = | |
| totalTokensNew * inputPrice + outputTokens * outputPrice; | |
| // Can the provider cache anything? Two failure modes: | |
| // (a) common prefix below provider's minimum cacheable size | |
| // (b) provider requires an explicit marker AND the user almost | |
| // certainly didn't include one in the paste — we still report | |
| // the best-case savings but tag the result as `requires_marker`. | |
| let canCache = true; | |
| let reason = null; | |
| if (commonTokens < p.min_cache_tokens) { | |
| canCache = false; | |
| reason = "below_min"; | |
| } | |
| if (!canCache) { | |
| return { | |
| provider_id: providerId, | |
| provider_name: p.name, | |
| base_cost_usd: baseCost, | |
| cached_cost_usd: baseCost, | |
| savings_usd: 0, | |
| hit_ratio: 0, | |
| tokens_cached: 0, | |
| tokens_billed_input: totalTokensNew, | |
| reason, | |
| min_cache_tokens: p.min_cache_tokens, | |
| requires_explicit: p.requires_explicit, | |
| cache_ttl_seconds: p.cache_ttl_seconds, | |
| }; | |
| } | |
| // Cost on cache HIT for the prefix: | |
| // cache-read: commonTokens × inputPrice × cache_read_multiplier | |
| // fresh: divergeTokens × inputPrice | |
| // output: outputTokens × outputPrice | |
| const cachedInputCost = | |
| commonTokens * inputPrice * p.cache_read_multiplier + | |
| divergeTokens * inputPrice; | |
| const cachedCost = cachedInputCost + outputTokens * outputPrice; | |
| // Cache write surcharge (Anthropic). Surfaced as `cache_write_cost` | |
| // separately so users see the amortization picture. | |
| const cacheWriteSurcharge = | |
| commonTokens * inputPrice * (p.cache_write_multiplier - 1.0); | |
| const savings = baseCost - cachedCost; | |
| const hitRatio = totalTokensNew === 0 ? 0 : commonTokens / totalTokensNew; | |
| return { | |
| provider_id: providerId, | |
| provider_name: p.name, | |
| base_cost_usd: baseCost, | |
| cached_cost_usd: cachedCost, | |
| cache_write_surcharge_usd: cacheWriteSurcharge, | |
| savings_usd: savings, | |
| savings_pct: baseCost === 0 ? 0 : savings / baseCost, | |
| hit_ratio: hitRatio, | |
| tokens_cached: commonTokens, | |
| tokens_billed_input: divergeTokens, | |
| reason: null, | |
| min_cache_tokens: p.min_cache_tokens, | |
| requires_explicit: p.requires_explicit, | |
| cache_ttl_seconds: p.cache_ttl_seconds, | |
| }; | |
| } | |
| // ============================================================================= | |
| // Public entry point | |
| // ============================================================================= | |
| export function diffPromptCache( | |
| oldPrompt, | |
| newPrompt, | |
| { | |
| profile = "english", | |
| outputTokensEstimate = 500, | |
| providers = null, | |
| } = {}, | |
| ) { | |
| if (typeof oldPrompt !== "string" || typeof newPrompt !== "string") { | |
| return { code: "empty_input", params: {} }; | |
| } | |
| const oldTrim = oldPrompt; | |
| const newTrim = newPrompt; | |
| if (!oldTrim && !newTrim) { | |
| return { code: "empty_input", params: {} }; | |
| } | |
| const lcpChars = longestCommonPrefix(oldTrim, newTrim); | |
| const isIdentical = oldTrim === newTrim; | |
| const totalCharsNew = newTrim.length; | |
| const divergeChars = totalCharsNew - lcpChars; | |
| const tokensCommon = estimateTokens(oldTrim.slice(0, lcpChars), profile); | |
| const tokensDiverge = estimateTokens(newTrim.slice(lcpChars), profile); | |
| const tokensTotal = tokensCommon + tokensDiverge; | |
| const providerIds = providers ?? Object.keys(PROVIDERS); | |
| const providerResults = providerIds | |
| .map(id => analyseProvider(id, tokensTotal, tokensCommon, tokensDiverge, outputTokensEstimate)) | |
| .filter(r => r !== null); | |
| const diffPoint = isIdentical | |
| ? { offset: oldTrim.length, line: oldTrim.split("\n").length } | |
| : firstDifferingLine(oldTrim, newTrim, lcpChars); | |
| let code; | |
| if (isIdentical) { | |
| code = "identical"; | |
| } else if (lcpChars === 0) { | |
| code = "fully_divergent"; | |
| } else if (providerResults.every(r => r.reason === "below_min")) { | |
| code = "divergent_below_min"; | |
| } else { | |
| code = "divergent_can_cache"; | |
| } | |
| return { | |
| code, | |
| params: { | |
| profile, | |
| lcp_chars: lcpChars, | |
| diverge_chars: divergeChars, | |
| tokens_common: tokensCommon, | |
| tokens_diverge: tokensDiverge, | |
| tokens_total: tokensTotal, | |
| hit_ratio: tokensTotal === 0 ? 0 : tokensCommon / tokensTotal, | |
| diff_point: diffPoint, | |
| output_tokens: outputTokensEstimate, | |
| }, | |
| providers: providerResults, | |
| }; | |
| } | |
| // Helper used by the UI: short summary string per provider, suitable for | |
| // rendering in a table row (i18n-substituted in main.js). | |
| export function summariseProvider(result) { | |
| if (!result) return null; | |
| return { | |
| name: result.provider_name, | |
| hit_pct: Math.round(result.hit_ratio * 100), | |
| base: result.base_cost_usd, | |
| cached: result.cached_cost_usd, | |
| savings: result.savings_usd, | |
| savings_pct: result.savings_pct ?? 0, | |
| requires_explicit: result.requires_explicit, | |
| reason: result.reason, | |
| }; | |
| } | |