karlexmarin Claude Opus 4.7 (1M context) commited on
Commit
bb4f2c3
·
1 Parent(s): 449213a

feat(dedup): content-hash for shared JSONs + issue titles — natural deduplication

Browse files

User concern: same input parameters generate identical analyses, but
previous filename pattern was theta-based (collisions across models)
and same content downloaded N times → no dedup. Same issue submitted
N times → registry pollution.

Solution: SHA-256 of canonical inputs → 8-char hex hash.
- Identical inputs (same model, same T_eval, same flags) → identical hash forever
- Different inputs (any param change) → different hash

Filename pattern (deterministic):
Before: taf-profile-500000-T32000.json (theta-only, collision risk)
After: taf-profile-meta-llama-Meta-Llama-3-8B-T32000-8d29feb8.json

Issue title pattern (with hash for searchability):
Before: [TAF Profile] T_eval=32000 on RoPE-GQA post-IH
After: [TAF Profile] Meta-Llama-3-8B @ T=32000 #8d29feb8

Issue body now includes:
> **Input hash**: `#8d29feb8` — search this hash in registry issues
> to find independent verifications. Same inputs always produce the same hash.

Workflow benefits:
1. Registry users search '#8d29feb8' → find existing issues for same model+config
2. Browser overwrites duplicate downloads (same name)
3. Independent verification = comment on existing issue, not new one
4. Re-runs of same analysis = same JSON file (cache friendly)

Implementation:
- inputHash(type, data): canonicalize → JSON stringify → SHA-256 (browser native crypto.subtle)
- Hashed inputs only (not metadata/timestamp), so re-runs collide intentionally
- modelShortName(): tracks state.lastModelId from preset/HF fetch for human-friendly names
- All download/submit handlers now async (await hash computation)
- Status messages show generated filename + nudge to search registry first

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. js/main.js +138 -40
js/main.js CHANGED
@@ -272,6 +272,7 @@ function getRecipeDefaults(recipeId) {
272
  // ════════════════════════════════════════════════════════════════════
273
  $("preset").addEventListener("change", (e) => {
274
  if (!e.target.value) return;
 
275
  const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(e.target.value)})`);
276
  const preset = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
277
  if (!preset || Object.keys(preset).length === 0) return;
@@ -322,6 +323,7 @@ $("hf-fetch-btn").addEventListener("click", async () => {
322
  }
323
  $("hf-status").textContent = `⏳ Fetching config.json from HF Hub for ${modelId}...`;
324
  $("hf-fetch-btn").disabled = true;
 
325
  try {
326
  const cfg = await fetchHfConfig(modelId);
327
  const preset = configToPreset(cfg, modelId);
@@ -677,6 +679,7 @@ function formatResultPlain(r) {
677
  // ════════════════════════════════════════════════════════════════════
678
  $("profile-preset").addEventListener("change", (e) => {
679
  if (!e.target.value) return;
 
680
  const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(e.target.value)})`);
681
  const p = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
682
  if (!p || Object.keys(p).length === 0) return;
@@ -695,6 +698,7 @@ $("profile-fetch-btn").addEventListener("click", async () => {
695
  if (!id) { $("profile-hf-status").textContent = "⚠ Enter a model id"; return; }
696
  $("profile-hf-status").textContent = `⏳ Fetching ${id}...`;
697
  $("profile-fetch-btn").disabled = true;
 
698
  try {
699
  const cfg = await fetchHfConfig(id);
700
  const p = configToPreset(cfg, id);
@@ -828,17 +832,18 @@ function renderProfile(p, params) {
828
 
829
  // Wire share/download/submit buttons
830
  $("profile-share-btn").addEventListener("click", () => copyShareLink("profile", params));
831
- $("profile-download-btn").addEventListener("click", () => {
832
- const safeName = (state.lastResult?.params?.theta || "model") + "-T" + (state.lastResult?.params?.T_eval || "?");
833
- downloadJSON(`taf-profile-${safeName}.json`, exportableData("profile", p));
834
- $("profile-share-status").textContent = "✅ Downloaded";
835
- setTimeout(() => $("profile-share-status").textContent = "", 3000);
 
836
  });
837
- $("profile-submit-btn").addEventListener("click", () => {
838
- const url = buildIssueUrl("profile", p);
839
  window.open(url, "_blank");
840
- $("profile-share-status").textContent = "↗ Opened GitHub registry";
841
- setTimeout(() => $("profile-share-status").textContent = "", 3000);
842
  });
843
  }
844
 
@@ -967,15 +972,18 @@ function renderCompare(cmp) {
967
  models: cmp.rows.map(r => r.label) };
968
  copyShareLink("compare", params);
969
  });
970
- $("compare-download-btn").addEventListener("click", () => {
971
- downloadJSON(`taf-compare-${cmp.recipe_id}.json`, exportableData("compare", cmp));
972
- $("compare-share-status").textContent = "✅ Downloaded";
973
- setTimeout(() => $("compare-share-status").textContent = "", 3000);
 
 
974
  });
975
- $("compare-submit-btn").addEventListener("click", () => {
976
- window.open(buildIssueUrl("compare", cmp), "_blank");
 
977
  $("compare-share-status").textContent = "↗ Opened GitHub registry";
978
- setTimeout(() => $("compare-share-status").textContent = "", 3000);
979
  });
980
  }
981
 
@@ -1034,18 +1042,20 @@ $("share-btn").addEventListener("click", () => {
1034
  if (!state.lastResult) return;
1035
  copyShareLink(state.lastResult.type || "recipe", state.lastResult.params || {});
1036
  });
1037
- $("recipe-download-btn").addEventListener("click", () => {
1038
  if (!state.lastFullResult) return;
1039
- downloadJSON(`taf-recipe-${state.lastFullResult.recipe_id || "result"}.json`,
1040
- exportableData("recipe", state.lastFullResult));
1041
- $("share-status").textContent = "✅ Downloaded";
1042
- setTimeout(() => $("share-status").textContent = "", 3000);
 
1043
  });
1044
- $("recipe-submit-btn").addEventListener("click", () => {
1045
  if (!state.lastFullResult) return;
1046
- window.open(buildIssueUrl("recipe", state.lastFullResult), "_blank");
1047
- $("share-status").textContent = "↗ Opened GitHub registry";
1048
- setTimeout(() => $("share-status").textContent = "", 3000);
 
1049
  });
1050
 
1051
  // ════════════════════════════════════════════════════════════════════
@@ -1073,40 +1083,124 @@ function downloadJSON(filename, data) {
1073
  setTimeout(() => { document.body.removeChild(a); URL.revokeObjectURL(url); }, 100);
1074
  }
1075
 
1076
- function exportableData(type, data) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1077
  return {
1078
  _taf_export: true,
1079
  _taf_type: type,
1080
  _taf_version: "0.2",
 
1081
  _taf_timestamp: new Date().toISOString(),
1082
  payload: data,
1083
  };
1084
  }
1085
 
1086
- function buildIssueUrl(type, data) {
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  let title, body;
1088
  if (type === "profile") {
1089
  const ms = data.model_summary || {};
1090
- title = `[TAF Profile] T_eval=${ms.T_eval || "?"} on ${ms.architecture_class || "model"}`;
1091
- body = profileToMarkdown(data);
1092
  } else if (type === "compare") {
1093
- title = `[TAF Compare] ${data.recipe_id} across ${data.rows.length} models`;
1094
- body = compareToMarkdown(data);
1095
  } else {
1096
- title = `[TAF ${data.recipe_id}] ${data.verdict}`;
1097
- body = recipeToMarkdown(data);
1098
  }
 
1099
  const params = new URLSearchParams({
1100
  title: title,
1101
- body: body + "\n\n---\n*Submitted via [TAF Agent](https://karlesmarin.github.io/tafagent)*",
1102
  });
1103
  return `https://github.com/${REGISTRY_REPO}/issues/new?${params.toString()}`;
1104
  }
1105
 
1106
- function profileToMarkdown(p) {
1107
  const ms = p.model_summary || {};
1108
  const kn = p.key_numbers || {};
1109
- let md = `## TAF Profile\n\n`;
 
 
1110
  md += `**Architecture**: ${ms.architecture_class || "?"}\n`;
1111
  md += `**Params**: ${ms.n_params}, **T_train**: ${ms.T_train}, **T_eval**: ${ms.T_eval}\n`;
1112
  md += `**θ**: ${ms.rope_theta}, GQA=${ms.has_GQA}, SWA=${ms.has_SWA}\n\n`;
@@ -1119,8 +1213,10 @@ function profileToMarkdown(p) {
1119
  return md;
1120
  }
1121
 
1122
- function compareToMarkdown(c) {
1123
- let md = `## TAF Comparison — ${c.recipe_id} (${c.recipe_name})\n\n`;
 
 
1124
  md += `**Shared params**: \`${JSON.stringify(c.shared_params)}\`\n\n`;
1125
  md += `| Model | Verdict | Reason |\n|-------|---------|--------|\n`;
1126
  c.rows.forEach(r => {
@@ -1130,8 +1226,10 @@ function compareToMarkdown(c) {
1130
  return md;
1131
  }
1132
 
1133
- function recipeToMarkdown(r) {
1134
- let md = `## TAF Recipe ${r.recipe_id} — ${r.recipe_name}\n\n`;
 
 
1135
  md += `**Verdict**: ${r.verdict}\n`;
1136
  md += `**Reason**: ${r.reason}\n`;
1137
  if (r.mitigation) md += `**Action**: ${r.mitigation}\n`;
 
272
  // ════════════════════════════════════════════════════════════════════
273
  $("preset").addEventListener("change", (e) => {
274
  if (!e.target.value) return;
275
+ state.lastModelId = e.target.value; // remember for filename/hash
276
  const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(e.target.value)})`);
277
  const preset = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
278
  if (!preset || Object.keys(preset).length === 0) return;
 
323
  }
324
  $("hf-status").textContent = `⏳ Fetching config.json from HF Hub for ${modelId}...`;
325
  $("hf-fetch-btn").disabled = true;
326
+ state.lastModelId = modelId; // remember for filename/hash
327
  try {
328
  const cfg = await fetchHfConfig(modelId);
329
  const preset = configToPreset(cfg, modelId);
 
679
  // ════════════════════════════════════════════════════════════════════
680
  $("profile-preset").addEventListener("change", (e) => {
681
  if (!e.target.value) return;
682
+ state.lastModelId = e.target.value; // remember for filename/hash
683
  const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(e.target.value)})`);
684
  const p = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
685
  if (!p || Object.keys(p).length === 0) return;
 
698
  if (!id) { $("profile-hf-status").textContent = "⚠ Enter a model id"; return; }
699
  $("profile-hf-status").textContent = `⏳ Fetching ${id}...`;
700
  $("profile-fetch-btn").disabled = true;
701
+ state.lastModelId = id; // remember for filename/hash
702
  try {
703
  const cfg = await fetchHfConfig(id);
704
  const p = configToPreset(cfg, id);
 
832
 
833
  // Wire share/download/submit buttons
834
  $("profile-share-btn").addEventListener("click", () => copyShareLink("profile", params));
835
+ $("profile-download-btn").addEventListener("click", async () => {
836
+ const filename = await makeFilename("profile", p);
837
+ const data = await exportableData("profile", p);
838
+ downloadJSON(filename, data);
839
+ $("profile-share-status").textContent = `✅ Downloaded ${filename}`;
840
+ setTimeout(() => $("profile-share-status").textContent = "", 5000);
841
  });
842
+ $("profile-submit-btn").addEventListener("click", async () => {
843
+ const url = await buildIssueUrl("profile", p);
844
  window.open(url, "_blank");
845
+ $("profile-share-status").textContent = "↗ Opened GitHub registry (search hash before submitting to avoid duplicate)";
846
+ setTimeout(() => $("profile-share-status").textContent = "", 6000);
847
  });
848
  }
849
 
 
972
  models: cmp.rows.map(r => r.label) };
973
  copyShareLink("compare", params);
974
  });
975
+ $("compare-download-btn").addEventListener("click", async () => {
976
+ const filename = await makeFilename("compare", cmp);
977
+ const data = await exportableData("compare", cmp);
978
+ downloadJSON(filename, data);
979
+ $("compare-share-status").textContent = `✅ Downloaded ${filename}`;
980
+ setTimeout(() => $("compare-share-status").textContent = "", 5000);
981
  });
982
+ $("compare-submit-btn").addEventListener("click", async () => {
983
+ const url = await buildIssueUrl("compare", cmp);
984
+ window.open(url, "_blank");
985
  $("compare-share-status").textContent = "↗ Opened GitHub registry";
986
+ setTimeout(() => $("compare-share-status").textContent = "", 6000);
987
  });
988
  }
989
 
 
1042
  if (!state.lastResult) return;
1043
  copyShareLink(state.lastResult.type || "recipe", state.lastResult.params || {});
1044
  });
1045
+ $("recipe-download-btn").addEventListener("click", async () => {
1046
  if (!state.lastFullResult) return;
1047
+ const filename = await makeFilename("recipe", state.lastFullResult);
1048
+ const data = await exportableData("recipe", state.lastFullResult);
1049
+ downloadJSON(filename, data);
1050
+ $("share-status").textContent = `✅ Downloaded ${filename}`;
1051
+ setTimeout(() => $("share-status").textContent = "", 5000);
1052
  });
1053
+ $("recipe-submit-btn").addEventListener("click", async () => {
1054
  if (!state.lastFullResult) return;
1055
+ const url = await buildIssueUrl("recipe", state.lastFullResult);
1056
+ window.open(url, "_blank");
1057
+ $("share-status").textContent = " Opened GitHub registry (search hash before submitting to avoid duplicate)";
1058
+ setTimeout(() => $("share-status").textContent = "", 6000);
1059
  });
1060
 
1061
  // ════════════════════════════════════════════════════════════════════
 
1083
  setTimeout(() => { document.body.removeChild(a); URL.revokeObjectURL(url); }, 100);
1084
  }
1085
 
1086
+ // Sort object keys recursively for deterministic JSON
1087
+ function sortKeys(o) {
1088
+ if (Array.isArray(o)) return o.map(sortKeys);
1089
+ if (o && typeof o === "object") {
1090
+ return Object.keys(o).sort().reduce((acc, k) => { acc[k] = sortKeys(o[k]); return acc; }, {});
1091
+ }
1092
+ return o;
1093
+ }
1094
+
1095
+ // Compute 8-char hex hash of canonical inputs.
1096
+ // Identical inputs → identical hash (forever). Different inputs → different hash.
1097
+ async function inputHash(type, data) {
1098
+ let canonical;
1099
+ if (type === "profile") {
1100
+ const ms = data.model_summary || data;
1101
+ canonical = sortKeys({
1102
+ type: "profile",
1103
+ theta: ms.rope_theta ?? ms.theta,
1104
+ T_train: ms.T_train,
1105
+ T_eval: ms.T_eval,
1106
+ n_attn: ms.n_attention_heads ?? ms.n_attn,
1107
+ n_kv: ms.n_kv_heads ?? ms.n_kv,
1108
+ d_head: ms.d_head,
1109
+ n_layers: ms.n_layers,
1110
+ n_params: ms.n_params,
1111
+ has_SWA: ms.has_SWA,
1112
+ });
1113
+ } else if (type === "compare") {
1114
+ canonical = sortKeys({
1115
+ type: "compare",
1116
+ recipe: data.recipe_id,
1117
+ T_eval: (data.shared_params || {}).T_eval,
1118
+ models: (data.rows || []).map(r => r.label).sort(),
1119
+ });
1120
+ } else {
1121
+ canonical = sortKeys({
1122
+ type: "recipe",
1123
+ recipe: data.recipe_id,
1124
+ inputs: data.inputs || {},
1125
+ });
1126
+ }
1127
+ const text = JSON.stringify(canonical);
1128
+ const buf = new TextEncoder().encode(text);
1129
+ const hashBuf = await crypto.subtle.digest("SHA-256", buf);
1130
+ return Array.from(new Uint8Array(hashBuf)).slice(0, 4)
1131
+ .map(b => b.toString(16).padStart(2, "0")).join("");
1132
+ }
1133
+
1134
+ function safeFilename(s) {
1135
+ return String(s).replace(/[/\\?%*:|"<>]/g, "-").replace(/^-+|-+$/g, "").slice(0, 60);
1136
+ }
1137
+
1138
+ function modelShortName(data, fallback="model") {
1139
+ // Try to get from various places
1140
+ if (state.lastModelId) return safeFilename(state.lastModelId);
1141
+ if (data && data.model_summary) {
1142
+ const ms = data.model_summary;
1143
+ return safeFilename(`m${ms.n_params || 0}-θ${ms.rope_theta || 0}`);
1144
+ }
1145
+ if (data && data.inputs) {
1146
+ const i = data.inputs;
1147
+ return safeFilename(`m${i.n_params || ""}-θ${i.theta || ""}`);
1148
+ }
1149
+ return fallback;
1150
+ }
1151
+
1152
+ async function exportableData(type, data) {
1153
+ const hash = await inputHash(type, data);
1154
  return {
1155
  _taf_export: true,
1156
  _taf_type: type,
1157
  _taf_version: "0.2",
1158
+ _taf_input_hash: hash, // identical inputs ⇒ identical hash
1159
  _taf_timestamp: new Date().toISOString(),
1160
  payload: data,
1161
  };
1162
  }
1163
 
1164
+ async function makeFilename(type, data) {
1165
+ const hash = await inputHash(type, data);
1166
+ const name = modelShortName(data);
1167
+ let suffix;
1168
+ if (type === "profile" && data.model_summary?.T_eval) suffix = `T${data.model_summary.T_eval}`;
1169
+ else if (type === "compare" && data.shared_params?.T_eval) suffix = `T${data.shared_params.T_eval}`;
1170
+ else if (type === "recipe" && data.inputs?.T_eval) suffix = `T${data.inputs.T_eval}`;
1171
+ else suffix = data.recipe_id || "result";
1172
+ return `taf-${type}-${name}-${suffix}-${hash}.json`;
1173
+ }
1174
+
1175
+ async function buildIssueUrl(type, data) {
1176
+ const hash = await inputHash(type, data);
1177
+ const modelName = modelShortName(data, "model");
1178
  let title, body;
1179
  if (type === "profile") {
1180
  const ms = data.model_summary || {};
1181
+ title = `[TAF Profile] ${modelName} @ T=${ms.T_eval || "?"} #${hash}`;
1182
+ body = profileToMarkdown(data, hash);
1183
  } else if (type === "compare") {
1184
+ title = `[TAF Compare] ${data.recipe_id} × ${data.rows.length} models #${hash}`;
1185
+ body = compareToMarkdown(data, hash);
1186
  } else {
1187
+ title = `[TAF ${data.recipe_id}] ${modelName} → ${data.verdict} #${hash}`;
1188
+ body = recipeToMarkdown(data, hash);
1189
  }
1190
+ const dedupNote = `\n\n> **Input hash**: \`#${hash}\` — search this hash in registry issues to find independent verifications. Same inputs always produce the same hash.`;
1191
  const params = new URLSearchParams({
1192
  title: title,
1193
+ body: body + dedupNote + "\n\n---\n*Submitted via [TAF Agent](https://karlesmarin.github.io/tafagent)*",
1194
  });
1195
  return `https://github.com/${REGISTRY_REPO}/issues/new?${params.toString()}`;
1196
  }
1197
 
1198
+ function profileToMarkdown(p, hash="") {
1199
  const ms = p.model_summary || {};
1200
  const kn = p.key_numbers || {};
1201
+ let md = `## TAF Profile`;
1202
+ if (hash) md += ` \`#${hash}\``;
1203
+ md += `\n\n`;
1204
  md += `**Architecture**: ${ms.architecture_class || "?"}\n`;
1205
  md += `**Params**: ${ms.n_params}, **T_train**: ${ms.T_train}, **T_eval**: ${ms.T_eval}\n`;
1206
  md += `**θ**: ${ms.rope_theta}, GQA=${ms.has_GQA}, SWA=${ms.has_SWA}\n\n`;
 
1213
  return md;
1214
  }
1215
 
1216
+ function compareToMarkdown(c, hash="") {
1217
+ let md = `## TAF Comparison — ${c.recipe_id} (${c.recipe_name})`;
1218
+ if (hash) md += ` \`#${hash}\``;
1219
+ md += `\n\n`;
1220
  md += `**Shared params**: \`${JSON.stringify(c.shared_params)}\`\n\n`;
1221
  md += `| Model | Verdict | Reason |\n|-------|---------|--------|\n`;
1222
  c.rows.forEach(r => {
 
1226
  return md;
1227
  }
1228
 
1229
+ function recipeToMarkdown(r, hash="") {
1230
+ let md = `## TAF Recipe ${r.recipe_id} — ${r.recipe_name}`;
1231
+ if (hash) md += ` \`#${hash}\``;
1232
+ md += `\n\n`;
1233
  md += `**Verdict**: ${r.verdict}\n`;
1234
  md += `**Reason**: ${r.reason}\n`;
1235
  if (r.mitigation) md += `**Action**: ${r.mitigation}\n`;