Alex W. commited on
Commit
ba623bd
·
1 Parent(s): 5827b27

改动汇总

Browse files

core/plotter.py — _aggregate_by_layer() 改为两步:先按 (layer, kv_head) 分组取 median,再跨 kv_head 取 median/q25/q75
core/plotter_plotly.py — _agg() 同上
core/table_gen.py — 加 _pseudobulk(df, col) helper,所有 make_table 函数调用它而非直接 .median()
db/writer.py — 加 _pseudobulk_col(rows, col) 和 refresh_all_summaries(conn),_calc_summary_row() 和 update_model_summary() 全部改用伪重复安全的聚合方式
ui/tab_leaderboard.py — Refresh 按钮触发 load_leaderboard(),其中静默调用 refresh_all_summaries(),在展示排行榜前先把历史数据全部用新方式重算

Files changed (5) hide show
  1. core/plotter.py +21 -7
  2. core/plotter_plotly.py +30 -8
  3. core/table_gen.py +57 -20
  4. db/writer.py +101 -50
  5. ui/tab_leaderboard.py +14 -6
core/plotter.py CHANGED
@@ -73,19 +73,33 @@ BAND_COLORS = {
73
 
74
  def _aggregate_by_layer(df: pd.DataFrame, col: str):
75
  """
76
- Group by layer, return (layers, median, q25, q75).
77
- Excludes kv_shared=True rows for KV metrics to avoid theoretical-value bias.
 
 
 
78
  """
79
  kv_cols = {"ssr_KV", "pearson_KV", "cosU_KV", "cosV_KV", "alpha_KV"}
80
  if col in kv_cols:
81
  df = df[df["kv_shared"] == 0] if "kv_shared" in df.columns else df
82
 
83
- grp = df.groupby("layer")[col]
84
  layers = np.array(sorted(df["layer"].unique()))
85
- med = grp.median().reindex(layers).values
86
- q25 = grp.quantile(0.25).reindex(layers).values
87
- q75 = grp.quantile(0.75).reindex(layers).values
88
- return layers, med, q25, q75
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  def _global_layers(df: pd.DataFrame):
 
73
 
74
  def _aggregate_by_layer(df: pd.DataFrame, col: str):
75
  """
76
+ Pseudo-bulk two-step aggregation per layer (Nature Comms 2021).
77
+ Step 1: median across Q heads within each (layer, kv_head) group.
78
+ Step 2: median / q25 / q75 across kv_head groups per layer.
79
+ Avoids pseudoreplication bias in GQA models (e.g. 4Q:1K).
80
+ Excludes kv_shared rows for KV metrics (theoretical-value bias).
81
  """
82
  kv_cols = {"ssr_KV", "pearson_KV", "cosU_KV", "cosV_KV", "alpha_KV"}
83
  if col in kv_cols:
84
  df = df[df["kv_shared"] == 0] if "kv_shared" in df.columns else df
85
 
 
86
  layers = np.array(sorted(df["layer"].unique()))
87
+ med_vals, q25_vals, q75_vals = [], [], []
88
+
89
+ for layer in layers:
90
+ ldf = df[df["layer"] == layer]
91
+ # Step 1: median within each kv_head group
92
+ if "kv_head" in ldf.columns:
93
+ step1 = ldf.groupby("kv_head")[col].median().values
94
+ else:
95
+ step1 = ldf[col].dropna().values
96
+ step1 = step1[~np.isnan(step1)] if len(step1) > 0 else step1
97
+ # Step 2: statistics across kv_head medians
98
+ med_vals.append(float(np.median(step1)) if len(step1) > 0 else np.nan)
99
+ q25_vals.append(float(np.percentile(step1, 25)) if len(step1) > 0 else np.nan)
100
+ q75_vals.append(float(np.percentile(step1, 75)) if len(step1) > 0 else np.nan)
101
+
102
+ return layers, np.array(med_vals), np.array(q25_vals), np.array(q75_vals)
103
 
104
 
105
  def _global_layers(df: pd.DataFrame):
core/plotter_plotly.py CHANGED
@@ -63,15 +63,37 @@ TOTAL_HEIGHT = SUBPLOT_HEIGHT * len(PANELS) + 120 # +header
63
  # ─────────────────────────────────────────────────────────────────────────────
64
 
65
  def _agg(df: pd.DataFrame, col: str):
66
- """Per-layer median + IQR. Excludes kv_shared rows for KV metrics."""
 
 
 
 
 
 
67
  kv_cols = {"ssr_KV", "pearson_KV", "cosU_KV", "cosV_KV", "alpha_KV"}
68
- d = df[df["kv_shared"] == 0] if col in kv_cols and "kv_shared" in df.columns else df
69
- grp = d.groupby("layer")[col]
70
- layers = np.array(sorted(d["layer"].unique()), dtype=int)
71
- med = grp.median().reindex(layers).values.astype(float)
72
- q25 = grp.quantile(0.25).reindex(layers).values.astype(float)
73
- q75 = grp.quantile(0.75).reindex(layers).values.astype(float)
74
- return layers, med, q25, q75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
 
77
  def _global_layers(df: pd.DataFrame) -> list[int]:
 
63
  # ─────────────────────────────────────────────────────────────────────────────
64
 
65
  def _agg(df: pd.DataFrame, col: str):
66
+ """
67
+ Pseudo-bulk two-step aggregation per layer (Nature Comms 2021).
68
+ Step 1: median across Q heads within each (layer, kv_head) group.
69
+ Step 2: median / q25 / q75 across kv_head groups per layer.
70
+ Avoids pseudoreplication bias in GQA models (e.g. 4Q:1K).
71
+ Excludes kv_shared rows for KV metrics (theoretical-value bias).
72
+ """
73
  kv_cols = {"ssr_KV", "pearson_KV", "cosU_KV", "cosV_KV", "alpha_KV"}
74
+ if col in kv_cols and "kv_shared" in df.columns:
75
+ df = df[df["kv_shared"] == 0]
76
+
77
+ layers = np.array(sorted(df["layer"].unique()), dtype=int)
78
+ med_vals, q25_vals, q75_vals = [], [], []
79
+
80
+ for layer in layers:
81
+ ldf = df[df["layer"] == layer]
82
+ # Step 1: median within each kv_head group
83
+ if "kv_head" in ldf.columns:
84
+ step1 = ldf.groupby("kv_head")[col].median().values
85
+ else:
86
+ step1 = ldf[col].dropna().values
87
+ step1 = step1[~np.isnan(step1.astype(float))] if len(step1) > 0 else step1
88
+ # Step 2: statistics across kv_head medians
89
+ med_vals.append(float(np.median(step1)) if len(step1) > 0 else np.nan)
90
+ q25_vals.append(float(np.percentile(step1, 25)) if len(step1) > 0 else np.nan)
91
+ q75_vals.append(float(np.percentile(step1, 75)) if len(step1) > 0 else np.nan)
92
+
93
+ return (layers,
94
+ np.array(med_vals, dtype=float),
95
+ np.array(q25_vals, dtype=float),
96
+ np.array(q75_vals, dtype=float))
97
 
98
 
99
  def _global_layers(df: pd.DataFrame) -> list[int]:
core/table_gen.py CHANGED
@@ -32,6 +32,43 @@ def _mean(series) -> Optional[float]:
32
  return float(v.mean()) if len(v) > 0 else None
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def _fmt(x, decimals=6) -> str:
36
  if x is None or (isinstance(x, float) and np.isnan(x)):
37
  return "—"
@@ -134,10 +171,10 @@ def make_table1(
134
  "Model": _short(model_id),
135
  "Std Layers": n_layers,
136
  "Global Layers": n_global if n_global > 0 else "—",
137
- "Median Pearson":_fmt(_med(std["pearson_QK"]), 4),
138
- "Mean Pearson": _fmt(_mean(std["pearson_QK"]), 4),
139
- "Median SSR": _fmt(_med(std["ssr_QK"]), 6),
140
- "Mean SSR": _fmt(_mean(std["ssr_QK"]), 6),
141
  })
142
  return pd.DataFrame(rows)
143
 
@@ -166,13 +203,13 @@ def make_table2(
166
  for lo, hi in group_bounds:
167
  label = f"{lo}–{hi}"
168
  grp_a = std_a[(std_a["layer"] >= lo) & (std_a["layer"] <= hi)]
169
- ssr_a = _med(grp_a["ssr_QK"])
170
 
171
  row = {"Layer Group": label, f"{_short(name_a)} SSR": _fmt(ssr_a, 6)}
172
 
173
  if std_b is not None and name_b:
174
  grp_b = std_b[(std_b["layer"] >= lo) & (std_b["layer"] <= hi)]
175
- ssr_b = _med(grp_b["ssr_QK"])
176
  row[f"{_short(name_b)} SSR"] = _fmt(ssr_b, 6)
177
  if ssr_a and ssr_b and ssr_a > 0:
178
  improvement = (ssr_a - ssr_b) / ssr_a * 100
@@ -207,9 +244,9 @@ def make_table3(
207
  "Model": _short(model_id),
208
  "d_h": head_dim,
209
  "Random 1/√d_h": _fmt(baseline, 4),
210
- "cosU(Q,K)": _fmt(_med(std["cosU_QK"]), 4),
211
- "cosU(Q,V)": _fmt(_med(std["cosU_QV"]), 4),
212
- "cosU(K,V)": _fmt(_med(std["cosU_KV"]), 4),
213
  })
214
  return pd.DataFrame(rows)
215
 
@@ -237,9 +274,9 @@ def make_table4(
237
  "Model": _short(model_id),
238
  "d_model": d_model,
239
  "Random 1/√D": _fmt(baseline, 4),
240
- "cosV(Q,K)": _fmt(_med(std["cosV_QK"]), 4),
241
- "cosV(Q,V)": _fmt(_med(std["cosV_QV"]), 4),
242
- "cosV(K,V)": _fmt(_med(std["cosV_KV"]), 4),
243
  })
244
  return pd.DataFrame(rows)
245
 
@@ -267,12 +304,12 @@ def make_table5(
267
  deep = std[std["layer"] > std["layer"].min()]
268
  rows.append({
269
  "Model": _short(model_id),
270
- "Median κ(Q) all": _fmt(_med(std["cond_Q"]), 1),
271
- "Median κ(K) all": _fmt(_med(std["cond_K"]), 1),
272
- "κ(Q) Layer 0": _fmt(_med(l0["cond_Q"]), 1),
273
- "κ(K) Layer 0": _fmt(_med(l0["cond_K"]), 1),
274
- "Median κ(Q) deep": _fmt(_med(deep["cond_Q"]), 1),
275
- "Median κ(K) deep": _fmt(_med(deep["cond_K"]), 1),
276
  })
277
  return pd.DataFrame(rows)
278
 
@@ -293,9 +330,9 @@ def make_table6(
293
  std = _standard_only(df)
294
  if std.empty:
295
  continue
296
- med_ssr = _med(std["ssr_QK"])
297
  wang_score = 1 - med_ssr if med_ssr is not None else None
298
- med_pearson = _med(std["pearson_QK"])
299
  rows.append({
300
  "Model": _short(model_id),
301
  "Std Layers": std["layer"].nunique(),
 
32
  return float(v.mean()) if len(v) > 0 else None
33
 
34
 
35
+ def _pseudobulk(df: pd.DataFrame, col: str) -> np.ndarray:
36
+ """
37
+ Pseudo-bulk two-step aggregation (Nature Comms 2021).
38
+ Step 1: median across Q heads within each (layer, kv_head) group.
39
+ Step 2: median across kv_head groups per layer.
40
+ Returns 1-D array of per-layer medians.
41
+ For MHA models this equals a plain per-layer median.
42
+ """
43
+ if df.empty or col not in df.columns:
44
+ return np.array([])
45
+ layers = sorted(df["layer"].unique())
46
+ per_layer = []
47
+ for layer in layers:
48
+ ldf = df[df["layer"] == layer]
49
+ if "kv_head" in ldf.columns:
50
+ step1 = ldf.groupby("kv_head")[col].median().values
51
+ else:
52
+ step1 = ldf[col].dropna().values
53
+ step1 = np.array(step1, dtype=float)
54
+ step1 = step1[~np.isnan(step1)]
55
+ if len(step1) > 0:
56
+ per_layer.append(float(np.median(step1)))
57
+ return np.array(per_layer, dtype=float)
58
+
59
+
60
+ def _pb_med(df: pd.DataFrame, col: str) -> Optional[float]:
61
+ """Pseudo-bulk median across layers."""
62
+ v = _pseudobulk(df, col)
63
+ return float(np.median(v)) if len(v) > 0 else None
64
+
65
+
66
+ def _pb_mean(df: pd.DataFrame, col: str) -> Optional[float]:
67
+ """Pseudo-bulk mean across layers."""
68
+ v = _pseudobulk(df, col)
69
+ return float(np.mean(v)) if len(v) > 0 else None
70
+
71
+
72
  def _fmt(x, decimals=6) -> str:
73
  if x is None or (isinstance(x, float) and np.isnan(x)):
74
  return "—"
 
171
  "Model": _short(model_id),
172
  "Std Layers": n_layers,
173
  "Global Layers": n_global if n_global > 0 else "—",
174
+ "Median Pearson":_fmt(_pb_med(std, "pearson_QK"), 4),
175
+ "Mean Pearson": _fmt(_pb_mean(std, "pearson_QK"), 4),
176
+ "Median SSR": _fmt(_pb_med(std, "ssr_QK"), 6),
177
+ "Mean SSR": _fmt(_pb_mean(std, "ssr_QK"), 6),
178
  })
179
  return pd.DataFrame(rows)
180
 
 
203
  for lo, hi in group_bounds:
204
  label = f"{lo}–{hi}"
205
  grp_a = std_a[(std_a["layer"] >= lo) & (std_a["layer"] <= hi)]
206
+ ssr_a = _pb_med(grp_a, "ssr_QK")
207
 
208
  row = {"Layer Group": label, f"{_short(name_a)} SSR": _fmt(ssr_a, 6)}
209
 
210
  if std_b is not None and name_b:
211
  grp_b = std_b[(std_b["layer"] >= lo) & (std_b["layer"] <= hi)]
212
+ ssr_b = _pb_med(grp_b, "ssr_QK")
213
  row[f"{_short(name_b)} SSR"] = _fmt(ssr_b, 6)
214
  if ssr_a and ssr_b and ssr_a > 0:
215
  improvement = (ssr_a - ssr_b) / ssr_a * 100
 
244
  "Model": _short(model_id),
245
  "d_h": head_dim,
246
  "Random 1/√d_h": _fmt(baseline, 4),
247
+ "cosU(Q,K)": _fmt(_pb_med(std, "cosU_QK"), 4),
248
+ "cosU(Q,V)": _fmt(_pb_med(std, "cosU_QV"), 4),
249
+ "cosU(K,V)": _fmt(_pb_med(std, "cosU_KV"), 4),
250
  })
251
  return pd.DataFrame(rows)
252
 
 
274
  "Model": _short(model_id),
275
  "d_model": d_model,
276
  "Random 1/√D": _fmt(baseline, 4),
277
+ "cosV(Q,K)": _fmt(_pb_med(std, "cosV_QK"), 4),
278
+ "cosV(Q,V)": _fmt(_pb_med(std, "cosV_QV"), 4),
279
+ "cosV(K,V)": _fmt(_pb_med(std, "cosV_KV"), 4),
280
  })
281
  return pd.DataFrame(rows)
282
 
 
304
  deep = std[std["layer"] > std["layer"].min()]
305
  rows.append({
306
  "Model": _short(model_id),
307
+ "Median κ(Q) all": _fmt(_pb_med(std, "cond_Q"), 1),
308
+ "Median κ(K) all": _fmt(_pb_med(std, "cond_K"), 1),
309
+ "κ(Q) Layer 0": _fmt(_pb_med(l0, "cond_Q"), 1),
310
+ "κ(K) Layer 0": _fmt(_pb_med(l0, "cond_K"), 1),
311
+ "Median κ(Q) deep": _fmt(_pb_med(deep, "cond_Q"), 1),
312
+ "Median κ(K) deep": _fmt(_pb_med(deep, "cond_K"), 1),
313
  })
314
  return pd.DataFrame(rows)
315
 
 
330
  std = _standard_only(df)
331
  if std.empty:
332
  continue
333
+ med_ssr = _pb_med(std, "ssr_QK")
334
  wang_score = 1 - med_ssr if med_ssr is not None else None
335
+ med_pearson = _pb_med(std, "pearson_QK")
336
  rows.append({
337
  "Model": _short(model_id),
338
  "Std Layers": std["layer"].nunique(),
db/writer.py CHANGED
@@ -2,7 +2,7 @@
2
  """
3
  数据库写入模块
4
  - 写入分析结果到 layer_head_metrics
5
- - 计算并写入 model_summary
6
  - 支持断点续传(以 prefix+layer 为粒度)
7
  - 写入权限验证
8
  """
@@ -10,6 +10,7 @@
10
  import os
11
  import sqlite3
12
  import numpy as np
 
13
  from datetime import datetime
14
  from db.schema import get_connection, init_db
15
 
@@ -19,21 +20,10 @@ from db.schema import get_connection, init_db
19
  # ─────────────────────────────────────────────
20
 
21
  def infer_layer_type(kv_shared: bool) -> str:
22
- """
23
- 从结构特征推断层类型
24
- kv_shared=True → 'global' (K=V共享,如 Gemma 全局层)
25
- kv_shared=False → 'standard'
26
- """
27
  return "global" if kv_shared else "standard"
28
 
29
 
30
  def infer_modality(prefix: str) -> str:
31
- """
32
- 从组件前缀推断模态
33
- 纯关键词匹配,不 hard coding 模型名
34
- 未匹配到任何关键词 → 默认 'language'
35
- (覆盖纯语言模型,如 "model." 前缀的 LLaMA/Qwen)
36
- """
37
  p = prefix.lower()
38
  if "vision" in p or "visual" in p or "image" in p:
39
  return "vision"
@@ -47,15 +37,6 @@ def infer_modality(prefix: str) -> str:
47
  # ─────────────────────────────────────────────
48
 
49
  def check_write_permission(admin_token: str) -> bool:
50
- """
51
- 验证管理员写入权限。
52
- WRITE_TOKEN 存储在 HF Space Secrets(加密,不进入 git repo)。
53
- 运行时由 HF 注入为环境变量,只在服务端比对,不返回给前端。
54
-
55
- 返回:
56
- True = 有写入权限
57
- False = 只读模式(分析可以跑,结果不写库)
58
- """
59
  server_token = os.environ.get("WRITE_TOKEN", "")
60
  if not server_token:
61
  return False
@@ -71,7 +52,6 @@ def get_analyzed_layers(
71
  model_id: str,
72
  prefix: str,
73
  ) -> set:
74
- """返回已完成分析的层号集合"""
75
  cur = conn.cursor()
76
  cur.execute(
77
  """SELECT DISTINCT layer FROM layer_head_metrics
@@ -88,7 +68,6 @@ def is_layer_complete(
88
  layer: int,
89
  expected_records: int,
90
  ) -> bool:
91
- """检查某层是否已完整写入"""
92
  cur = conn.cursor()
93
  cur.execute(
94
  """SELECT COUNT(*) FROM layer_head_metrics
@@ -226,12 +205,67 @@ def write_layer_records(
226
  conn.commit()
227
 
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  # ─────────────────────────────────────────────
230
  # 计算并写入 model_summary
231
  # ─────────────────────────────────────────────
232
 
233
  def _calc_summary_row(
234
- rows: list,
235
  model_id: str,
236
  prefix: str,
237
  layer_type: str,
@@ -239,14 +273,13 @@ def _calc_summary_row(
239
  if not rows:
240
  return None
241
 
242
- def col(name):
243
- vals = [r[name] for r in rows if r[name] is not None]
244
- return np.array(vals, dtype=float) if vals else np.array([])
245
 
246
  def med(arr): return float(np.median(arr)) if len(arr) > 0 else None
247
  def avg(arr): return float(np.mean(arr)) if len(arr) > 0 else None
248
 
249
- ssr_qk = col("ssr_QK")
250
  wang_score = float(1 - np.median(ssr_qk)) if len(ssr_qk) > 0 else None
251
  n_layers = len(set(r["layer"] for r in rows))
252
  n_records = len(rows)
@@ -255,18 +288,18 @@ def _calc_summary_row(
255
  "model_id": model_id,
256
  "prefix": prefix,
257
  "layer_type": layer_type,
258
- "median_pearson_QK": med(col("pearson_QK")),
259
- "mean_pearson_QK": avg(col("pearson_QK")),
260
  "median_ssr_QK": med(ssr_qk),
261
  "mean_ssr_QK": avg(ssr_qk),
262
- "median_ssr_QV": med(col("ssr_QV")),
263
- "mean_ssr_QV": avg(col("ssr_QV")),
264
- "median_cond_Q": med(col("cond_Q")),
265
- "mean_cond_Q": avg(col("cond_Q")),
266
- "median_cosU_QK": med(col("cosU_QK")),
267
- "median_cosU_QV": med(col("cosU_QV")),
268
- "median_cosV_QK": med(col("cosV_QK")),
269
- "median_cosV_QV": med(col("cosV_QV")),
270
  "wang_score": wang_score,
271
  "n_layers": n_layers,
272
  "n_records": n_records,
@@ -280,21 +313,20 @@ def update_model_summary(
280
  prefix: str,
281
  ):
282
  """
283
- 重新计算并写入 model_summary(all / standard / global 三行)
284
- wang_score 统一用 standard 层计算
285
  """
286
  cur = conn.cursor()
 
287
 
288
- # 预取 standard 层 ssr_QK(wang_score 统一用这个)
289
  cur.execute(
290
- """SELECT ssr_QK FROM layer_head_metrics
291
- WHERE model_id = ? AND prefix = ? AND layer_type = 'standard'""",
 
292
  (model_id, prefix)
293
  )
294
- std_ssr_rows = cur.fetchall()
295
- std_ssr = np.array(
296
- [r[0] for r in std_ssr_rows if r[0] is not None], dtype=float
297
- )
298
  std_wang_score = float(1 - np.median(std_ssr)) if len(std_ssr) > 0 else None
299
 
300
  for layer_type in ["all", "standard", "global"]:
@@ -315,8 +347,7 @@ def update_model_summary(
315
  if summary is None:
316
  continue
317
 
318
- # wang_score 统一用 standard
319
- summary["wang_score"] = std_wang_score
320
 
321
  conn.execute(
322
  """INSERT OR REPLACE INTO model_summary(
@@ -341,4 +372,24 @@ def update_model_summary(
341
  summary
342
  )
343
 
344
- conn.commit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  """
3
  数据库写入模块
4
  - 写入分析结果到 layer_head_metrics
5
+ - 计算并写入 model_summary(pseudo-bulk 两步聚合,避免 GQA 伪重复)
6
  - 支持断点续传(以 prefix+layer 为粒度)
7
  - 写入权限验证
8
  """
 
10
  import os
11
  import sqlite3
12
  import numpy as np
13
+ from collections import defaultdict
14
  from datetime import datetime
15
  from db.schema import get_connection, init_db
16
 
 
20
  # ─────────────────────────────────────────────
21
 
22
  def infer_layer_type(kv_shared: bool) -> str:
 
 
 
 
 
23
  return "global" if kv_shared else "standard"
24
 
25
 
26
  def infer_modality(prefix: str) -> str:
 
 
 
 
 
 
27
  p = prefix.lower()
28
  if "vision" in p or "visual" in p or "image" in p:
29
  return "vision"
 
37
  # ─────────────────────────────────────────────
38
 
39
  def check_write_permission(admin_token: str) -> bool:
 
 
 
 
 
 
 
 
 
40
  server_token = os.environ.get("WRITE_TOKEN", "")
41
  if not server_token:
42
  return False
 
52
  model_id: str,
53
  prefix: str,
54
  ) -> set:
 
55
  cur = conn.cursor()
56
  cur.execute(
57
  """SELECT DISTINCT layer FROM layer_head_metrics
 
68
  layer: int,
69
  expected_records: int,
70
  ) -> bool:
 
71
  cur = conn.cursor()
72
  cur.execute(
73
  """SELECT COUNT(*) FROM layer_head_metrics
 
205
  conn.commit()
206
 
207
 
208
+ # ─────────────────────────────────────────────
209
+ # Pseudo-bulk 聚合核心函数
210
+ # ─────────────────────────────────────────────
211
+
212
+ def _pseudobulk(rows, col_name: str) -> np.ndarray:
213
+ """
214
+ Pseudo-bulk two-step aggregation (Nature Comms 2021).
215
+ Avoids GQA pseudoreplication (e.g. 4Q:1K → 4 correlated records per KV head).
216
+
217
+ Step 1: median within each (layer, kv_head) group
218
+ → one value per KV-head per layer
219
+ Step 2: return flat array of Step-1 values
220
+ → caller computes final median / mean / quantile
221
+
222
+ Works with both sqlite3.Row objects and plain dicts.
223
+ """
224
+ groups: dict[tuple, list] = defaultdict(list)
225
+ for r in rows:
226
+ try:
227
+ v = r["ssr_QK"] if col_name == "ssr_QK" else r[col_name]
228
+ layer = int(r["layer"])
229
+ kv_head = int(r["kv_head"]) if r["kv_head"] is not None else 0
230
+ except (KeyError, TypeError, IndexError):
231
+ continue
232
+ if v is None:
233
+ continue
234
+ groups[(layer, kv_head)].append(float(v))
235
+
236
+ if not groups:
237
+ return np.array([])
238
+
239
+ # Step 1: median within each (layer, kv_head) group
240
+ return np.array([float(np.median(vals)) for vals in groups.values()])
241
+
242
+
243
+ def _pseudobulk_col(rows, col_name: str) -> np.ndarray:
244
+ """Generic version of _pseudobulk for any column name."""
245
+ groups: dict[tuple, list] = defaultdict(list)
246
+ for r in rows:
247
+ try:
248
+ v = r[col_name]
249
+ layer = int(r["layer"])
250
+ kv_head = int(r["kv_head"]) if r["kv_head"] is not None else 0
251
+ except (KeyError, TypeError, IndexError):
252
+ continue
253
+ if v is None:
254
+ continue
255
+ groups[(layer, kv_head)].append(float(v))
256
+
257
+ if not groups:
258
+ return np.array([])
259
+
260
+ return np.array([float(np.median(vals)) for vals in groups.values()])
261
+
262
+
263
  # ─────────────────────────────────────────────
264
  # 计算并写入 model_summary
265
  # ─────────────────────────────────────────────
266
 
267
  def _calc_summary_row(
268
+ rows,
269
  model_id: str,
270
  prefix: str,
271
  layer_type: str,
 
273
  if not rows:
274
  return None
275
 
276
+ def pb(col):
277
+ return _pseudobulk_col(rows, col)
 
278
 
279
  def med(arr): return float(np.median(arr)) if len(arr) > 0 else None
280
  def avg(arr): return float(np.mean(arr)) if len(arr) > 0 else None
281
 
282
+ ssr_qk = pb("ssr_QK")
283
  wang_score = float(1 - np.median(ssr_qk)) if len(ssr_qk) > 0 else None
284
  n_layers = len(set(r["layer"] for r in rows))
285
  n_records = len(rows)
 
288
  "model_id": model_id,
289
  "prefix": prefix,
290
  "layer_type": layer_type,
291
+ "median_pearson_QK": med(pb("pearson_QK")),
292
+ "mean_pearson_QK": avg(pb("pearson_QK")),
293
  "median_ssr_QK": med(ssr_qk),
294
  "mean_ssr_QK": avg(ssr_qk),
295
+ "median_ssr_QV": med(pb("ssr_QV")),
296
+ "mean_ssr_QV": avg(pb("ssr_QV")),
297
+ "median_cond_Q": med(pb("cond_Q")),
298
+ "mean_cond_Q": avg(pb("cond_Q")),
299
+ "median_cosU_QK": med(pb("cosU_QK")),
300
+ "median_cosU_QV": med(pb("cosU_QV")),
301
+ "median_cosV_QK": med(pb("cosV_QK")),
302
+ "median_cosV_QV": med(pb("cosV_QV")),
303
  "wang_score": wang_score,
304
  "n_layers": n_layers,
305
  "n_records": n_records,
 
313
  prefix: str,
314
  ):
315
  """
316
+ 重新计算并写入 model_summary(all / standard / global 三行)
317
+ wang_score 统一用 standard 层 pseudo-bulk median(SSR_QK) 计算
318
  """
319
  cur = conn.cursor()
320
+ cur.row_factory = sqlite3.Row
321
 
322
+ # ── Wang Score: standard 层 pseudo-bulk ──────────────────────────────
323
  cur.execute(
324
+ """SELECT layer, kv_head, ssr_QK FROM layer_head_metrics
325
+ WHERE model_id = ? AND prefix = ? AND layer_type = 'standard'
326
+ AND kv_shared = 0""",
327
  (model_id, prefix)
328
  )
329
+ std_ssr = _pseudobulk_col(cur.fetchall(), "ssr_QK")
 
 
 
330
  std_wang_score = float(1 - np.median(std_ssr)) if len(std_ssr) > 0 else None
331
 
332
  for layer_type in ["all", "standard", "global"]:
 
347
  if summary is None:
348
  continue
349
 
350
+ summary["wang_score"] = std_wang_score # always from standard pseudo-bulk
 
351
 
352
  conn.execute(
353
  """INSERT OR REPLACE INTO model_summary(
 
372
  summary
373
  )
374
 
375
+ conn.commit()
376
+
377
+
378
+ # ─────────────────────────────────────────────
379
+ # 批量刷新所有模型的 model_summary
380
+ # ─────────────────────────────────────────────
381
+
382
+ def refresh_all_summaries(conn: sqlite3.Connection) -> int:
383
+ """
384
+ Re-compute model_summary for every (model_id, prefix) in the DB.
385
+ Called by Tab 3 Refresh button to migrate historical data to pseudo-bulk.
386
+ Returns number of (model_id, prefix) pairs refreshed.
387
+ """
388
+ cur = conn.cursor()
389
+ cur.execute(
390
+ "SELECT DISTINCT model_id, prefix FROM layer_head_metrics"
391
+ )
392
+ pairs = cur.fetchall()
393
+ for model_id, prefix in pairs:
394
+ update_model_summary(conn, model_id, prefix)
395
+ return len(pairs)
ui/tab_leaderboard.py CHANGED
@@ -1,7 +1,8 @@
1
  # ui/tab_leaderboard.py
2
  """
3
  Tab3: Wang's Five Laws Leaderboard
4
- - Ranked by wang_score (= 1 − median SSR_QK, standard layers only)
 
5
  - Filter by modality (default: language)
6
  - Filter by layer_type (default: standard)
7
  """
@@ -12,6 +13,7 @@ import numpy as np
12
 
13
  from db.schema import init_db
14
  from db.reader import get_leaderboard
 
15
 
16
 
17
  def _format_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
@@ -47,8 +49,12 @@ def load_leaderboard(
47
  layer_type: str,
48
  ) -> tuple[pd.DataFrame, str]:
49
  conn = init_db()
50
- lt = layer_type if layer_type != "all" else "standard"
51
- mod = modality
 
 
 
 
52
 
53
  df = get_leaderboard(conn, modality=mod, layer_type=lt, limit=100)
54
 
@@ -62,7 +68,8 @@ def load_leaderboard(
62
  formatted = _format_leaderboard(df)
63
  status = (
64
  f"✅ {len(formatted)} entries "
65
- f"| modality={mod} layer_type={lt}"
 
66
  )
67
  return formatted, status
68
 
@@ -72,11 +79,12 @@ def build_tab_leaderboard():
72
  gr.Markdown(r"""
73
  ## Wang's Five Laws — Model Leaderboard
74
 
75
- **Wang Score = 1 − median(SSR\_QK)** Higher is better. Theoretical max = 1.
76
  Computed from `standard` layers only (global/KV-shared layers excluded).
 
77
 
78
  > 王氏评分 = 1 − median(SSR_QK),越高越好,理论极值=1。
79
- > 仅基于 standard 层计算(排除 K=V 共享的全局层干扰)
80
  """)
81
 
82
  with gr.Row():
 
1
  # ui/tab_leaderboard.py
2
  """
3
  Tab3: Wang's Five Laws Leaderboard
4
+ - Ranked by wang_score (= 1 − pseudo-bulk median SSR_QK, standard layers only)
5
+ - On Refresh: silently re-computes all model_summary rows (pseudo-bulk migration)
6
  - Filter by modality (default: language)
7
  - Filter by layer_type (default: standard)
8
  """
 
13
 
14
  from db.schema import init_db
15
  from db.reader import get_leaderboard
16
+ from db.writer import refresh_all_summaries
17
 
18
 
19
  def _format_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
 
49
  layer_type: str,
50
  ) -> tuple[pd.DataFrame, str]:
51
  conn = init_db()
52
+
53
+ # ── Silently refresh all summaries (pseudo-bulk migration) ────────────
54
+ n_refreshed = refresh_all_summaries(conn)
55
+
56
+ lt = layer_type if layer_type != "all" else "standard"
57
+ mod = modality
58
 
59
  df = get_leaderboard(conn, modality=mod, layer_type=lt, limit=100)
60
 
 
68
  formatted = _format_leaderboard(df)
69
  status = (
70
  f"✅ {len(formatted)} entries "
71
+ f"| modality={mod} layer_type={lt} "
72
+ f"| summaries refreshed: {n_refreshed}"
73
  )
74
  return formatted, status
75
 
 
79
  gr.Markdown(r"""
80
  ## Wang's Five Laws — Model Leaderboard
81
 
82
+ **Wang Score = 1 − median(SSR\_QK)** Higher is better. Theoretical max = 1.
83
  Computed from `standard` layers only (global/KV-shared layers excluded).
84
+ Metrics use **pseudo-bulk aggregation** (Nature Comms 2021) to avoid GQA pseudoreplication.
85
 
86
  > 王氏评分 = 1 − median(SSR_QK),越高越好,理论极值=1。
87
+ > 仅基于 standard 层计算。采用 pseudo-bulk 两步聚合避免 GQA 伪重复计数
88
  """)
89
 
90
  with gr.Row():