Alex W. commited on
Commit
a69ce56
·
1 Parent(s): f467caf

改动清单

Browse files

[改动1] 顶部 + is_vision_key() + 主循环 新增 VISION_KEY_PATTERNS,自动检测并跳过视觉层(vision/visual/vit等),log 显示跳过计数
[改动2] compute_right_vector_alignment() 新增函数:计算右奇异向量(输入子空间)cosV,对应第五定律
[改动3] analyze_layer_heads() + 主循环 加载 W_v;计算 Q-V、K-V 全套指标;新增 sigma_max/min、cond_Q/K/V(第三定律)
[改动4] analyze_layer_heads() 签名 新增 modality 参数,结果表中记录 "text"/"vision"
[改动5] 全局汇总 分模态统计;补全五定律全部指标的 Median/Mean/Min/Max
[改动6] Gradio UI 推荐列表 更新为 gemma-4-e2b / gemma-4-e4b-it,补充多模态说明
[改动7] read_safetensors_header() 加入 __metadata__ 过滤(参考 reference code)
[改动8] 结果表 headers 扩展为 31 列,覆盖全部新增指标

Files changed (1) hide show
  1. app.py +366 -246
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import requests
3
  import struct
4
  import json
 
5
  import numpy as np
6
  import torch
7
  from scipy.stats import pearsonr, spearmanr
@@ -31,6 +32,13 @@ except AttributeError:
31
  UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
32
  QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
33
 
 
 
 
 
 
 
 
34
 
35
  # ─────────────────────────────────────────────
36
  # 工具函数
@@ -51,7 +59,10 @@ def read_safetensors_header(url: str, token: str = None) -> tuple[dict, int]:
51
  timeout=30
52
  )
53
  r.raise_for_status()
54
- return json.loads(r.content), header_size
 
 
 
55
 
56
 
57
  def load_tensor_remote(
@@ -100,7 +111,8 @@ def get_safetensor_files(model_id: str, token: str = None) -> list:
100
 
101
 
102
  def find_index_file(model_id: str, token: str = None) -> dict | None:
103
- url = f"https://huggingface.co/{model_id}/resolve/main/model.safetensors.index.json"
 
104
  headers = {"Authorization": f"Bearer {token}"} if token else {}
105
  r = requests.get(url, headers=headers, timeout=15)
106
  return r.json() if r.status_code == 200 else None
@@ -114,15 +126,20 @@ def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
114
  return f"❌ HTTP {code}:{e}"
115
 
116
 
 
 
 
 
 
 
117
  # ─────────────────────────────────────────────
118
- # 量化三重检测
119
  # ─────────────────────────────────────────────
120
 
121
  def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
122
  headers = {"Authorization": f"Bearer {token}"} if token else {}
123
  warnings = []
124
 
125
- # 检测 1:config.json
126
  try:
127
  r = requests.get(
128
  f"https://huggingface.co/{model_id}/resolve/main/config.json",
@@ -136,8 +153,7 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
136
  cfg.get("quantization","")).lower()
137
  if "gptq" in qt:
138
  bits = qcfg.get("bits","?")
139
- return True, (f"❌ 检测到 GPTQ {bits}bit 量化\n"
140
- f" 请改用原始 BF16 版本。")
141
  if "awq" in qt:
142
  return True, "❌ 检测到 AWQ 量化,请改用原始 BF16 版本。"
143
  if "bitsandbytes" in qt or "bnb" in qt:
@@ -145,7 +161,6 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
145
  except Exception:
146
  warnings.append("⚠️ 无法读取 config.json")
147
 
148
- # 检测 2:文件名 / 模型名关键词
149
  mid_lower = model_id.lower()
150
  for kw in ["gptq","awq","gguf"]:
151
  if kw in mid_lower:
@@ -160,7 +175,6 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
160
  except Exception as e:
161
  warnings.append(f"⚠️ 文件列表检测失败:{e}")
162
 
163
- # 检测 3:header key 签名
164
  try:
165
  index_data = find_index_file(model_id, token)
166
  if index_data:
@@ -173,10 +187,9 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
173
  bad_keys = [k for k in all_keys
174
  if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
175
  if bad_keys:
176
- return True, (f"❌ 检测到量化 key:{bad_keys[:3]}\n"
177
- f" 请使用原始 BF16 版本。")
178
  dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
179
- good = dtypes - UNSUPPORTED_SVD_DTYPES
180
  if good:
181
  warnings.append(f"✅ 权重格式:{good}")
182
  except Exception as e:
@@ -187,47 +200,36 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
187
 
188
 
189
  # ─────────────────────────────────────────────
190
- # GQA 参数自动推断
191
  # ───────────────────────��─────────────────────
192
 
193
- def infer_gqa_params(W_q: torch.Tensor, W_k: torch.Tensor, config: dict | None) -> tuple[int,int,int]:
194
- """
195
- 自动推断:
196
- - n_q_heads : Q 头数量
197
- - n_kv_heads : KV 头数量(GQA)
198
- - d_head : 每个头的维度
199
-
200
- 权重 shape 约定(最常见):
201
- W_q : (n_q_heads * d_head, d_model) → shape[0] = n_q * d_h
202
- W_k : (n_kv_heads * d_head, d_model) → shape[0] = n_kv * d_h
203
-
204
- d_head 优先从 config.json 读取,其次用常见默认值猜测。
205
- """
206
- q_rows, d_model = W_q.shape[0], W_q.shape[1]
207
- k_rows = W_k.shape[0]
208
 
209
- # 从 config.json 读取 d_head
210
  d_head = None
211
  if config:
212
  d_head = (
213
  config.get("head_dim") or
214
  config.get("kv_channels") or
215
- config.get("hidden_size", 0) // config.get("num_attention_heads", 1)
216
  )
217
  if d_head == 0:
218
  d_head = None
219
 
220
- # 如果 config 没给,用常见值探测(64, 80, 96, 128, 256)
221
  if not d_head:
222
- for candidate in [256, 128, 96, 80, 64]:
223
  if q_rows % candidate == 0 and k_rows % candidate == 0:
224
  d_head = candidate
225
  break
226
 
227
  if not d_head:
228
  raise ValueError(
229
- f"无法推断 d_head:W_q.shape={W_q.shape}, W_k.shape={W_k.shape}\n"
230
- f"请在 config.json 中确认 head_dim 字段。"
231
  )
232
 
233
  n_q_heads = q_rows // d_head
@@ -235,164 +237,231 @@ def infer_gqa_params(W_q: torch.Tensor, W_k: torch.Tensor, config: dict | None)
235
 
236
  if n_q_heads % n_kv_heads != 0:
237
  raise ValueError(
238
- f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除"
239
- f"请检查 d_head 推断是否正确。"
240
  )
241
-
242
  return n_q_heads, n_kv_heads, d_head
243
 
244
 
245
  # ─────────────────────────────────────────────
246
- # 逐头 SVD 指标计算
247
  # ─────────────────────────────────────────────
248
 
249
- def compute_pearson_corr_torch(s_q: torch.Tensor, s_k: torch.Tensor) -> float:
250
- sq = s_q.cpu().numpy()
251
- sk = s_k.cpu().numpy()
252
- r, _ = pearsonr(sq, sk)
253
- return float(r)
 
254
 
255
 
256
  def compute_singular_value_ratio(
257
- s_q: torch.Tensor, s_k: torch.Tensor
258
  ) -> tuple[float, float]:
259
- """
260
- 估计尺度因子 α = median(s_q / s_k)
261
- 残差 = mean|s_q - α * s_k| / mean(s_q)
262
- """
263
- min_len = min(s_q.shape[0], s_k.shape[0])
264
- sq = s_q[:min_len]
265
- sk = s_k[:min_len]
266
- ratio = sq / (sk + 1e-10)
267
- alpha = float(ratio.median())
268
- residual = float((sq - alpha * sk).abs().mean() / (sq.mean() + 1e-10))
269
- return alpha, residual
 
 
 
 
 
 
 
 
270
 
271
 
272
  def compute_left_vector_alignment(
273
- U_q: torch.Tensor, U_k: torch.Tensor
274
  ) -> float:
275
  """
276
- 第四定律:左奇异向量(输出子空间)对齐度
277
- cos_u = mean_i |<u_q_i, u_k_i>|
 
 
278
  """
279
- min_len = min(U_q.shape[1], U_k.shape[1])
280
- U_q = U_q[:, :min_len]
281
- U_k = U_k[:, :min_len]
282
- cos_vals = (U_q * U_k).sum(dim=0).abs()
283
- return float(cos_vals.mean())
 
284
 
285
 
286
- def compute_covariance_alignment(
287
- W_q: torch.Tensor, W_k: torch.Tensor, alpha: float
 
288
  ) -> float:
289
  """
290
- 协方差矩阵对齐误差
291
- err = ||W_q W_q^T - α² W_k W_k^T||_F / ||W_k W_k^T||_F
 
 
292
  """
293
- cov_q = W_q @ W_q.T
294
- cov_k = W_k @ W_k.T
295
- diff = cov_q - (alpha ** 2) * cov_k
296
- err = float(torch.norm(diff, p='fro') / (torch.norm(cov_k, p='fro') + 1e-10))
297
- return err
 
298
 
299
 
300
- def compute_ssr(s_q: torch.Tensor, s_k: torch.Tensor) -> float:
301
- """
302
- 第二定律:归一化谱形状残差
303
- SSR = mean_i |s̃_q_i - s̃_k_i|
304
- """
305
- min_len = min(s_q.shape[0], s_k.shape[0])
306
- sq = s_q[:min_len].cpu().numpy()
307
- sk = s_k[:min_len].cpu().numpy()
308
- sq_n = sq / (np.linalg.norm(sq) + 1e-10)
309
- sk_n = sk / (np.linalg.norm(sk) + 1e-10)
310
- return float(np.mean(np.abs(sq_n - sk_n)))
311
-
312
 
313
  def analyze_layer_heads(
314
  W_q: torch.Tensor,
315
  W_k: torch.Tensor,
 
316
  layer_idx: int,
317
  n_q_heads: int,
318
  n_kv_heads: int,
319
  d_head: int,
 
320
  ) -> tuple[list[dict], str]:
321
  """
322
- GQA 逐头分析:
323
- - 每个 KV 头对应 group_size = n_q_heads // n_kv_heads 个 Q 头
324
- - 每个 Q 头分别与其 K 头做 SVD 指标
 
325
  """
326
  group_size = n_q_heads // n_kv_heads
327
  records = []
328
  log_lines = []
329
 
330
  log_lines.append(
331
- f"\n{'─'*70}\n"
332
- f"Layer {layer_idx:3d} "
333
- f"[n_q={n_q_heads}, n_kv={n_kv_heads}, "
334
- f"group={group_size}, d_head={d_head}]\n"
335
- f"{'─'*70}\n"
336
  )
 
337
  log_lines.append(
338
- f" {'KV':>4} {'Q':>4} "
339
- f"{'Pearson':>8} {'Spearman':>9} "
340
- f"{'α':>7} {'α残差':>8} "
341
- f"{'cos(Uq,Uk)':>10} {'协方差误差':>10} {'SSR':>10}\n"
 
 
342
  )
343
 
344
  for kv_h in range(n_kv_heads):
345
- # ── 提取 K 头矩阵 (d_head × d_model) ──
 
346
  k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
347
- U_k, s_k, _ = torch.linalg.svd(k_tensor, full_matrices=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  for q_offset in range(group_size):
350
- h_idx = kv_h * group_size + q_offset
351
-
352
- # ── 提取 Q 头矩阵 (d_head × d_model) ──
353
  q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
354
- U_q, s_q, _ = torch.linalg.svd(q_tensor, full_matrices=False)
355
-
356
- # 1. Pearson r(第一定律)
357
- min_len = min(s_q.shape[0], s_k.shape[0])
358
- pearson_r = compute_pearson_corr_torch(s_q[:min_len], s_k[:min_len])
359
-
360
- # 2. Spearman r(排名相关,对异常值更鲁棒)
361
- spearman_r, _ = spearmanr(
362
- s_q[:min_len].cpu().numpy(),
363
- s_k[:min_len].cpu().numpy()
364
- )
365
-
366
- # 3. 尺度因子 α 与残差
367
- alpha, alpha_res = compute_singular_value_ratio(s_q, s_k)
368
-
369
- # 4. 左奇异向量对齐(第四定律)
370
- cos_u = compute_left_vector_alignment(U_q, U_k)
371
-
372
- # 5. 协方差矩阵对齐误差
373
- cov_err = compute_covariance_alignment(q_tensor, k_tensor, alpha)
374
-
375
- # 6. SSR(第二定律)
376
- ssr = compute_ssr(s_q, s_k)
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  records.append({
379
- "Layer": layer_idx,
380
- "KV_head": kv_h,
381
- "Q_head": h_idx,
382
- "Pearson_r": round(pearson_r, 6),
383
- "Spearman_r": round(float(spearman_r), 6),
384
- "Alpha": round(alpha, 4),
385
- "Alpha_res": round(alpha_res, 6),
386
- "cos_Uq_Uk": round(cos_u, 6),
387
- "Cov_err": round(cov_err, 6),
388
- "SSR": round(ssr, 6),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
  })
390
 
391
  log_lines.append(
392
- f" KV={kv_h:>3d} Q={h_idx:>3d} "
393
- f"{pearson_r:>+8.4f} {float(spearman_r):>+9.4f} "
394
- f"{alpha:>7.4f} {alpha_res:>8.2e} "
395
- f"{cos_u:>10.4f} {cov_err:>10.4f} {ssr:>10.6f}\n"
 
 
396
  )
397
 
398
  return records, "".join(log_lines)
@@ -412,17 +481,17 @@ def analyze_model(
412
  return "❌ 请输入模型 ID", None
413
 
414
  token = hf_token.strip() or None
415
- log_lines = [f"🔍 分析模型:{model_id}\n{'═'*70}\n"]
416
  all_records: list[dict] = []
417
 
418
  # ── 量化检测 ─────────────────────────────────
419
  progress(0.02, desc="量化检测...")
420
  is_blocked, quant_msg = check_quantization(model_id, token)
421
- log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*70}\n")
422
  if is_blocked:
423
  return "".join(log_lines), None
424
 
425
- # ── 读取 config.json(用于推断 d_head)────────
426
  config = None
427
  try:
428
  r = requests.get(
@@ -434,19 +503,20 @@ def analyze_model(
434
  config = r.json()
435
  log_lines.append(
436
  f"📋 config.json:\n"
437
- f" hidden_size = {config.get('hidden_size')}\n"
 
438
  f" num_attention_heads = {config.get('num_attention_heads')}\n"
439
  f" num_key_value_heads = {config.get('num_key_value_heads')}\n"
440
- f" head_dim = {config.get('head_dim')}\n"
441
- f"{'─'*70}\n"
442
  )
443
  except Exception:
444
  log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
445
 
446
- # ── 获取分片索引 ─────────────────────────────
447
  progress(0.05, desc="读取模型索引...")
448
  try:
449
- index_data = find_index_file(model_id, token)
450
  shard_headers: dict[str, tuple[dict, int]] = {}
451
 
452
  if index_data:
@@ -455,22 +525,19 @@ def analyze_model(
455
  f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard\n"
456
  )
457
  else:
458
- sf_files = get_safetensor_files(model_id, token)
459
- if not sf_files:
460
- return "❌ 未找到 .safetensors 文件", None
461
  weight_map = None
462
  log_lines.append(f"📦 单文件:{sf_files}\n")
463
  except requests.exceptions.HTTPError as e:
464
  return _http_error_msg(e, model_id), None
465
 
466
- # ── 探测第一个 shard,识别 Q/K key 命名 ──────
467
  progress(0.08, desc="识别层结构...")
468
  try:
469
  if index_data:
470
  first_shard = sorted(set(index_data["weight_map"].values()))[0]
471
  else:
472
  first_shard = sf_files[0]
473
-
474
  first_url = get_file_url(model_id, first_shard)
475
  first_header, first_hsize = read_safetensors_header(first_url, token)
476
  shard_headers[first_shard] = (first_header, first_hsize)
@@ -478,27 +545,37 @@ def analyze_model(
478
  except Exception as e:
479
  return f"❌ 读取 shard header 失败:{e}", None
480
 
481
- # 识别 Q/K key 命名规则
482
- q_candidates = [k for k in all_keys if any(
483
- p in k for p in ["q_proj.weight", "query.weight", "q.weight", "wq.weight"]
 
 
 
 
 
 
 
 
484
  )]
485
  if not q_candidates:
486
- sample = "\n".join(all_keys[:30])
487
- return f"⚠️ 无法识别 Q/K key,前 30 个 key:\n{sample}", None
488
 
489
  sample_q = q_candidates[0]
490
- if "q_proj" in sample_q: q_suffix, k_suffix = "self_attn.q_proj.weight", "self_attn.k_proj.weight"
491
- elif "query" in sample_q: q_suffix, k_suffix = "attention.query.weight", "attention.key.weight"
492
- elif "wq" in sample_q: q_suffix, k_suffix = "attention.wq.weight", "attention.wk.weight"
493
  else:
494
- q_suffix = sample_q.split("layers.0.")[-1]
495
- k_suffix = q_suffix.replace("q.", "k.")
 
496
 
497
- log_lines.append(f"🔑 Q suffix:{q_suffix}\n")
498
- log_lines.append(f"🔑 K suffix:{k_suffix}\n")
499
- log_lines.append(f"{'═'*70}\n")
 
500
 
501
- # ── 辅助:查找 key 所在 shard ────────────────
502
  def get_shard_for_key(key: str) -> str | None:
503
  if index_data:
504
  return index_data["weight_map"].get(key)
@@ -511,7 +588,7 @@ def analyze_model(
511
  return None
512
 
513
  # ── 逐层分析 ─────────────────────────────────
514
- gqa_inferred = False # 只打印一次 GQA 信息
515
 
516
  for layer_idx in range(int(max_layers)):
517
  progress(
@@ -519,17 +596,28 @@ def analyze_model(
519
  desc=f"第 {layer_idx} 层..."
520
  )
521
 
522
- q_key = f"model.layers.{layer_idx}.{q_suffix}"
523
- k_key = f"model.layers.{layer_idx}.{k_suffix}"
 
524
 
525
  q_shard = get_shard_for_key(q_key)
526
  k_shard = get_shard_for_key(k_key)
 
527
 
528
  if q_shard is None or k_shard is None:
529
- log_lines.append(f"\nLayer {layer_idx}: Q/K 未找到,分析结束(共 {layer_idx} 层)\n")
 
 
530
  break
531
 
532
- for shard in {q_shard, k_shard}:
 
 
 
 
 
 
 
533
  if shard not in shard_headers:
534
  h, hs = read_safetensors_header(get_file_url(model_id, shard), token)
535
  shard_headers[shard] = (h, hs)
@@ -543,91 +631,108 @@ def analyze_model(
543
  get_file_url(model_id, k_shard), k_key,
544
  *shard_headers[k_shard], token
545
  )
 
 
 
 
546
  except ValueError as e:
547
  log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
548
  continue
549
 
550
- if W_q is None or W_k is None:
551
  log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
552
  continue
553
 
554
- # ── GQA 参数推(只做一次,后续复用)───
 
 
 
 
 
 
 
555
  try:
556
  n_q_heads, n_kv_heads, d_head = infer_gqa_params(W_q, W_k, config)
557
  except ValueError as e:
558
  log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
559
- del W_q, W_k
560
  continue
561
 
562
- if not gqa_inferred:
563
- group_size = n_q_heads // n_kv_heads
564
  log_lines.append(
565
- f"🧠 GQA 结构:n_q_heads={n_q_heads}, "
566
- f"n_kv_heads={n_kv_heads}, "
567
- f"group_size={group_size}, "
568
- f"d_head={d_head}\n"
569
- f" W_q shape: {list(W_q.shape)}, "
570
- f"W_k shape: {list(W_k.shape)}\n"
571
- f"{'═'*70}\n"
572
  )
573
- gqa_inferred = True
574
 
575
- # ── 逐头计算 ────────────────────────────
576
  records, layer_log = analyze_layer_heads(
577
- W_q, W_k, layer_idx,
578
- n_q_heads, n_kv_heads, d_head
 
 
579
  )
580
  all_records.extend(records)
581
  log_lines.append(layer_log)
582
 
583
- del W_q, W_k # 立即释放内存
584
 
585
- # ── 全局汇总统计 ─────────────────────────────
586
  if all_records:
587
  df = pd.DataFrame(all_records)
588
 
589
- pearson_vals = df["Pearson_r"].values
590
- spearman_vals = df["Spearman_r"].values
591
- ssr_vals = df["SSR"].values
592
- cos_vals = df["cos_Uq_Uk"].values
593
- cov_vals = df["Cov_err"].values
594
-
595
- summary = (
596
- f"\n{'═'*70}\n"
597
- f"📊 王氏五定律全局汇总 — {model_id}\n"
598
- f"{'═'*70}\n"
599
- f"总分析:{len(df['Layer'].unique())} × "
600
- f"每层 {df.groupby('Layer').size().iloc[0]} 个 Q 头 "
601
- f"= {len(all_records)} 条记录\n\n"
602
-
603
- f"【第一定律 — Pearson r(→ 1)】\n"
604
- f" Median={np.median(pearson_vals):.6f} "
605
- f"Mean={np.mean(pearson_vals):.6f} "
606
- f"Min={np.min(pearson_vals):.6f} "
607
- f"Max={np.max(pearson_vals):.6f}\n\n"
608
-
609
- f"【第一定律 — Spearman r(→ 1)】\n"
610
- f" Median={np.median(spearman_vals):.6f} "
611
- f"Mean={np.mean(spearman_vals):.6f}\n\n"
612
-
613
- f"【第二定律 — SSR(→ 0)】\n"
614
- f" Median={np.median(ssr_vals):.8f} "
615
- f"Mean={np.mean(ssr_vals):.8f} "
616
- f"Min={np.min(ssr_vals):.8f} "
617
- f"Max={np.max(ssr_vals):.8f}\n\n"
618
-
619
- f"【第四定律 — cos(Uq,Uk) 输出子空间对齐】\n"
620
- f" Median={np.median(cos_vals):.6f} "
621
- f"Mean={np.mean(cos_vals):.6f} "
622
- f"(随机基准 1/√d_head)\n\n"
623
-
624
- f"【协方差对齐误差越小越好)】\n"
625
- f" Median={np.median(cov_vals):.6f} "
626
- f"Mean={np.mean(cov_vals):.6f}\n"
627
-
628
- f"{'═'*70}\n"
629
- )
630
- log_lines.append(summary)
 
 
 
 
 
 
 
631
 
632
  return "".join(log_lines), df
633
  else:
@@ -645,13 +750,15 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
645
  **Mathematical Foundations of Large Language Models (MF-LLM)**
646
 
647
  通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
648
- 支持 GQA(Grouped Query Attention:对每个 Q 头分别与其对应 K 头做 SVD 分析
649
 
650
- | 定律 | 指标 | 理论极值 |
651
- |------|------|---------|
652
- | 第一定律 | Pearson r / Spearman r | → 1 |
653
- | 第二定律 | SSR | → 0 |
654
- | 第定律 | cos(Uq, Uk) | 1/√d_head(随机正交)|
 
 
655
 
656
  [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
657
  [![HAL](https://img.shields.io/badge/HAL-hal--05609398-red)](https://hal.science/hal-05609398)
@@ -661,8 +768,8 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
661
  with gr.Column(scale=2):
662
  model_input = gr.Textbox(
663
  label="HuggingFace 模型 ID",
664
- placeholder="Qwen/Qwen2.5-14B-Instruct",
665
- value="Qwen/Qwen2.5-14B-Instruct"
666
  )
667
  token_input = gr.Textbox(
668
  label="HF Access Token(公开模型可留空)",
@@ -675,14 +782,15 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
675
  )
676
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
677
 
 
678
  with gr.Column(scale=1):
679
  gr.Markdown("""
680
  ### ✅ 推荐模型
681
  ```
682
  Qwen/Qwen2.5-14B-Instruct (GQA 8Q/2K)
683
  meta-llama/Llama-3-8B (GQA)
684
- google/gemma-4-e2b (MHA)
685
- google/gemma-4-e4b-it (MHA)
686
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
687
  ```
688
  ### GQA 典型结构
@@ -691,21 +799,33 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
691
  | Qwen2.5-7B | 28 | 4 | 7 |
692
  | LLaMA-3-8B | 32 | 8 | 4 |
693
  | Qwen2.5-14B | 40 | 8 | 5 |
694
- | Gemma-2-2B | 8 | 4 | 2 |
 
 
 
 
 
695
  """)
696
 
697
  log_output = gr.Textbox(
698
  label="分析日志(逐头详情)",
699
- lines=35, max_lines=80
700
  )
701
 
702
  table_output = gr.Dataframe(
703
- label="逐头结果表",
704
  headers=[
705
- "Layer","KV_head","Q_head",
706
- "Pearson_r","Spearman_r",
707
- "Alpha","Alpha_res",
708
- "cos_Uq_Uk","Cov_err","SSR"
 
 
 
 
 
 
 
709
  ]
710
  )
711
 
 
2
  import requests
3
  import struct
4
  import json
5
+ import re # [改动1] 新增:用于多模态层名过滤
6
  import numpy as np
7
  import torch
8
  from scipy.stats import pearsonr, spearmanr
 
32
  UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
33
  QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
34
 
35
+ # [改动1] 多模态视觉层关键词 → 跳过这些层
36
+ VISION_KEY_PATTERNS = [
37
+ "vision", "visual", "image_encoder",
38
+ "img_encoder", "patch_embed", "vit",
39
+ "vision_tower", "mm_projector",
40
+ ]
41
+
42
 
43
  # ─────────────────────────────────────────────
44
  # 工具函数
 
59
  timeout=30
60
  )
61
  r.raise_for_status()
62
+ raw = json.loads(r.content)
63
+ # 过滤 __metadata__
64
+ raw.pop("__metadata__", None)
65
+ return raw, header_size
66
 
67
 
68
  def load_tensor_remote(
 
111
 
112
 
113
  def find_index_file(model_id: str, token: str = None) -> dict | None:
114
+ url = (f"https://huggingface.co/{model_id}/resolve/main/"
115
+ f"model.safetensors.index.json")
116
  headers = {"Authorization": f"Bearer {token}"} if token else {}
117
  r = requests.get(url, headers=headers, timeout=15)
118
  return r.json() if r.status_code == 200 else None
 
126
  return f"❌ HTTP {code}:{e}"
127
 
128
 
129
+ # [改动1] 判断一个 key 是否属于视觉模态层
130
+ def is_vision_key(key: str) -> bool:
131
+ key_lower = key.lower()
132
+ return any(pat in key_lower for pat in VISION_KEY_PATTERNS)
133
+
134
+
135
  # ─────────────────────────────────────────────
136
+ # 量化三重检测(不变)
137
  # ─────────────────────────────────────────────
138
 
139
  def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
140
  headers = {"Authorization": f"Bearer {token}"} if token else {}
141
  warnings = []
142
 
 
143
  try:
144
  r = requests.get(
145
  f"https://huggingface.co/{model_id}/resolve/main/config.json",
 
153
  cfg.get("quantization","")).lower()
154
  if "gptq" in qt:
155
  bits = qcfg.get("bits","?")
156
+ return True, f"❌ 检测到 GPTQ {bits}bit 量化,请改用原始 BF16 版本。"
 
157
  if "awq" in qt:
158
  return True, "❌ 检测到 AWQ 量化,请改用原始 BF16 版本。"
159
  if "bitsandbytes" in qt or "bnb" in qt:
 
161
  except Exception:
162
  warnings.append("⚠️ 无法读取 config.json")
163
 
 
164
  mid_lower = model_id.lower()
165
  for kw in ["gptq","awq","gguf"]:
166
  if kw in mid_lower:
 
175
  except Exception as e:
176
  warnings.append(f"⚠️ 文件列表检测失败:{e}")
177
 
 
178
  try:
179
  index_data = find_index_file(model_id, token)
180
  if index_data:
 
187
  bad_keys = [k for k in all_keys
188
  if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
189
  if bad_keys:
190
+ return True, f"❌ 检测到量化 key:{bad_keys[:3]},请使用原始 BF16 版本。"
 
191
  dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
192
+ good = dtypes - UNSUPPORTED_SVD_DTYPES
193
  if good:
194
  warnings.append(f"✅ 权重格式:{good}")
195
  except Exception as e:
 
200
 
201
 
202
  # ─────────────────────────────────────────────
203
+ # GQA 参数自动推断(不变)
204
  # ───────────────────────��─────────────────────
205
 
206
+ def infer_gqa_params(
207
+ W_q: torch.Tensor,
208
+ W_k: torch.Tensor,
209
+ config: dict | None
210
+ ) -> tuple[int,int,int]:
211
+ q_rows = W_q.shape[0]
212
+ k_rows = W_k.shape[0]
 
 
 
 
 
 
 
 
213
 
 
214
  d_head = None
215
  if config:
216
  d_head = (
217
  config.get("head_dim") or
218
  config.get("kv_channels") or
219
+ config.get("hidden_size", 0) // max(config.get("num_attention_heads", 1), 1)
220
  )
221
  if d_head == 0:
222
  d_head = None
223
 
 
224
  if not d_head:
225
+ for candidate in [256, 128, 96, 80, 64, 32]:
226
  if q_rows % candidate == 0 and k_rows % candidate == 0:
227
  d_head = candidate
228
  break
229
 
230
  if not d_head:
231
  raise ValueError(
232
+ f"无法推断 d_head:W_q={W_q.shape}, W_k={W_k.shape}"
 
233
  )
234
 
235
  n_q_heads = q_rows // d_head
 
237
 
238
  if n_q_heads % n_kv_heads != 0:
239
  raise ValueError(
240
+ f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除"
 
241
  )
 
242
  return n_q_heads, n_kv_heads, d_head
243
 
244
 
245
  # ─────────────────────────────────────────────
246
+ # [改动2] 指标计算函数:新增右奇异向量对齐
247
  # ─────────────────────────────────────────────
248
 
249
+ def compute_pearson_corr(s_a: torch.Tensor, s_b: torch.Tensor) -> float:
250
+ am = s_a - s_a.mean()
251
+ bm = s_b - s_b.mean()
252
+ num = torch.dot(am, bm)
253
+ den = torch.norm(am, 2) * torch.norm(bm, 2)
254
+ return float(num / den) if den != 0 else 0.0
255
 
256
 
257
  def compute_singular_value_ratio(
258
+ s_a: torch.Tensor, s_b: torch.Tensor
259
  ) -> tuple[float, float]:
260
+ min_len = min(s_a.shape[0], s_b.shape[0])
261
+ sa = s_a[:min_len]
262
+ sb = s_b[:min_len]
263
+ num = torch.dot(sa, sb)
264
+ den = torch.dot(sb, sb)
265
+ if den == 0:
266
+ return 1.0, 0.0
267
+ alpha = num / den
268
+ residual = torch.mean((sa - alpha * sb) ** 2).item()
269
+ return float(alpha), float(residual)
270
+
271
+
272
+ def compute_ssr(s_a: torch.Tensor, s_b: torch.Tensor) -> float:
273
+ min_len = min(s_a.shape[0], s_b.shape[0])
274
+ sa = s_a[:min_len]
275
+ sb = s_b[:min_len]
276
+ sa_n = sa / (torch.norm(sa) + 1e-10)
277
+ sb_n = sb / (torch.norm(sb) + 1e-10)
278
+ return float(torch.mean(torch.abs(sa_n - sb_n)))
279
 
280
 
281
  def compute_left_vector_alignment(
282
+ U_a: torch.Tensor, U_b: torch.Tensor
283
  ) -> float:
284
  """
285
+ 左奇异向量(输出子空间)对齐度
286
+ cosU = mean_i |<u_a_i, u_b_i>|
287
+ 对应第四定律:cos(Uq,Uk) ≈ 1/√d_head(随机正交)
288
+ cos(Uq,Uv) < 1/√d_head(超正交)
289
  """
290
+ min_c = min(U_a.shape[1], U_b.shape[1])
291
+ Ua = U_a[:, :min_c]
292
+ Ub = U_b[:, :min_c]
293
+ Ua_n = Ua / (torch.norm(Ua, dim=0, keepdim=True) + 1e-10)
294
+ Ub_n = Ub / (torch.norm(Ub, dim=0, keepdim=True) + 1e-10)
295
+ return float(torch.diag(torch.abs(Ua_n.T @ Ub_n)).mean())
296
 
297
 
298
+ # [改动2] 新增:右奇异向量(输入子空间)对齐度
299
+ def compute_right_vector_alignment(
300
+ Vt_a: torch.Tensor, Vt_b: torch.Tensor
301
  ) -> float:
302
  """
303
+ 右奇异向量(输入子空间)对齐
304
+ cosV = mean_i |<v_a_i, v_b_i>|
305
+ 对应第五定律:所有对之间 ≈ 1/√d_model(全局随机正交)
306
+ 注意:SVD 返回 Vt(转置),每行是一个右奇异向量
307
  """
308
+ min_r = min(Vt_a.shape[0], Vt_b.shape[0])
309
+ Va_n = Vt_a[:min_r, :]
310
+ Vb_n = Vt_b[:min_r, :]
311
+ Va_n = Va_n / (torch.norm(Va_n, dim=1, keepdim=True) + 1e-10)
312
+ Vb_n = Vb_n / (torch.norm(Vb_n, dim=1, keepdim=True) + 1e-10)
313
+ return float(torch.abs((Va_n * Vb_n).sum(dim=1)).mean())
314
 
315
 
316
+ # ─────────────────────────────────────────────
317
+ # [改动3] 逐头分析:Q-K + Q-V + K-V 全指标
318
+ # ─────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
319
 
320
  def analyze_layer_heads(
321
  W_q: torch.Tensor,
322
  W_k: torch.Tensor,
323
+ W_v: torch.Tensor, # [改动3] 新增 W_v 输入
324
  layer_idx: int,
325
  n_q_heads: int,
326
  n_kv_heads: int,
327
  d_head: int,
328
+ modality: str = "text", # [改动4] 新增 modality 标记
329
  ) -> tuple[list[dict], str]:
330
  """
331
+ GQA 逐头全指标分析:
332
+ 每个 KV 头
333
+ - 计算 K-V 对的全部指标(只一次)
334
+ - 对组内每个 Q 头:计算 Q-K、Q-V 全部指标
335
  """
336
  group_size = n_q_heads // n_kv_heads
337
  records = []
338
  log_lines = []
339
 
340
  log_lines.append(
341
+ f"\n{'─'*80}\n"
342
+ f"Layer {layer_idx:3d} [{modality}] " # [改动4] 显示模态
343
+ f"n_q={n_q_heads} n_kv={n_kv_heads} "
344
+ f"group={group_size} d_head={d_head}\n"
345
+ f"{'─'*80}\n"
346
  )
347
+ # 表头
348
  log_lines.append(
349
+ f" {'KV':>3} {'Q':>3}"
350
+ f" {'P_QK':>7} {'Sp_QK':>7} {'SSR_QK':>8} │"
351
+ f" {'SSR_QV':>8} {'SSR_KV':>8}"
352
+ f" {'cosU_QK':>8} {'cosU_QV':>8} {'cosU_KV':>8}"
353
+ f" {'cosV_QK':>8} {'cosV_QV':>8} {'cosV_KV':>8} │"
354
+ f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
355
  )
356
 
357
  for kv_h in range(n_kv_heads):
358
+
359
+ # ── 提取 K / V 头矩阵 ─────────────────────────
360
  k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
361
+ v_tensor = W_v[kv_h * d_head : (kv_h + 1) * d_head, :] # [改动3]
362
+
363
+ U_k, s_k, Vt_k = torch.linalg.svd(k_tensor, full_matrices=False)
364
+ U_v, s_v, Vt_v = torch.linalg.svd(v_tensor, full_matrices=False) # [改动3]
365
+
366
+ # ── K-V 指标(每个 KV 头只算一次)─────────────
367
+ alpha_kv, alpha_res_kv = compute_singular_value_ratio(s_k, s_v)
368
+ cosU_KV = compute_left_vector_alignment(U_k, U_v)
369
+ cosV_KV = compute_right_vector_alignment(Vt_k, Vt_v) # [改动2]
370
+ ssr_kv = compute_ssr(s_k, s_v)
371
+ pearson_kv = compute_pearson_corr(
372
+ s_k[:min(s_k.shape[0], s_v.shape[0])],
373
+ s_v[:min(s_k.shape[0], s_v.shape[0])]
374
+ )
375
 
376
  for q_offset in range(group_size):
377
+ h_idx = kv_h * group_size + q_offset
 
 
378
  q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
379
+ U_q, s_q, Vt_q = torch.linalg.svd(q_tensor, full_matrices=False)
380
+
381
+ min_qk = min(s_q.shape[0], s_k.shape[0])
382
+ min_qv = min(s_q.shape[0], s_v.shape[0])
383
+
384
+ # ── Q-K 指标 ──────────────────────────────
385
+ pearson_qk = compute_pearson_corr(s_q[:min_qk], s_k[:min_qk])
386
+ spearman_qk = float(spearmanr(
387
+ s_q[:min_qk].cpu().numpy(),
388
+ s_k[:min_qk].cpu().numpy()
389
+ )[0])
390
+ ssr_qk = compute_ssr(s_q, s_k)
391
+ alpha_qk, alpha_res_qk = compute_singular_value_ratio(s_q, s_k)
392
+ cosU_QK = compute_left_vector_alignment(U_q, U_k)
393
+ cosV_QK = compute_right_vector_alignment(Vt_q, Vt_k) # [改动2]
394
+
395
+ # ── Q-V 指标 ────────────────────────────── [改动3]
396
+ pearson_qv = compute_pearson_corr(s_q[:min_qv], s_v[:min_qv])
397
+ ssr_qv = compute_ssr(s_q, s_v)
398
+ alpha_qv, alpha_res_qv = compute_singular_value_ratio(s_q, s_v)
399
+ cosU_QV = compute_left_vector_alignment(U_q, U_v)
400
+ cosV_QV = compute_right_vector_alignment(Vt_q, Vt_v) # [改动2]
401
+
402
+ # ── 奇异值范围 ───────────────────────────── [改动3]
403
+ sig_max_q = float(s_q.max())
404
+ sig_min_q = float(s_q[s_q > 1e-10].min()) if (s_q > 1e-10).any() else 0.0
405
+ sig_max_k = float(s_k.max())
406
+ sig_min_k = float(s_k[s_k > 1e-10].min()) if (s_k > 1e-10).any() else 0.0
407
+ sig_max_v = float(s_v.max())
408
+ sig_min_v = float(s_v[s_v > 1e-10].min()) if (s_v > 1e-10).any() else 0.0
409
+
410
+ # 条件数(第三定律)
411
+ cond_q = sig_max_q / (sig_min_q + 1e-10)
412
+ cond_k = sig_max_k / (sig_min_k + 1e-10)
413
+ cond_v = sig_max_v / (sig_min_v + 1e-10)
414
 
415
  records.append({
416
+ # 位置信息
417
+ "layer": layer_idx,
418
+ "modality": modality, # [改动4]
419
+ "kv_head": kv_h,
420
+ "q_head": h_idx,
421
+ # 第一定律:谱线性对齐
422
+ "pearson_QK": round(pearson_qk, 6),
423
+ "spearman_QK": round(spearman_qk, 6),
424
+ "pearson_QV": round(pearson_qv, 6), # [改动3]
425
+ "pearson_KV": round(pearson_kv, 6), # [改动3]
426
+ # 第二定律:SSR
427
+ "ssr_QK": round(ssr_qk, 8),
428
+ "ssr_QV": round(ssr_qv, 8), # [改动3]
429
+ "ssr_KV": round(ssr_kv, 8), # [改动3]
430
+ # 第四定律:左奇异向量(输出子空间)
431
+ "cosU_QK": round(cosU_QK, 6),
432
+ "cosU_QV": round(cosU_QV, 6), # [改动3]
433
+ "cosU_KV": round(cosU_KV, 6), # [改动3]
434
+ # 第五定律:右奇异向量(输入子空间)[改动2]
435
+ "cosV_QK": round(cosV_QK, 6),
436
+ "cosV_QV": round(cosV_QV, 6),
437
+ "cosV_KV": round(cosV_KV, 6),
438
+ # 尺度因子
439
+ "alpha_QK": round(alpha_qk, 4),
440
+ "alpha_QV": round(alpha_qv, 4), # [改动3]
441
+ "alpha_KV": round(alpha_kv, 4), # [改动3]
442
+ "alpha_res_QK": round(alpha_res_qk, 6),
443
+ "alpha_res_QV": round(alpha_res_qv, 6), # [改动3]
444
+ "alpha_res_KV": round(alpha_res_kv, 6), # [改动3]
445
+ # 奇异值范围 [改动3]
446
+ "sigma_max_Q": round(sig_max_q, 4),
447
+ "sigma_min_Q": round(sig_min_q, 4),
448
+ "sigma_max_K": round(sig_max_k, 4),
449
+ "sigma_min_K": round(sig_min_k, 4),
450
+ "sigma_max_V": round(sig_max_v, 4),
451
+ "sigma_min_V": round(sig_min_v, 4),
452
+ # 条件数(第三定律)[改动3]
453
+ "cond_Q": round(cond_q, 2),
454
+ "cond_K": round(cond_k, 2),
455
+ "cond_V": round(cond_v, 2),
456
  })
457
 
458
  log_lines.append(
459
+ f" {kv_h:>3d} {h_idx:>3d}"
460
+ f" {pearson_qk:>+7.4f} {spearman_qk:>+7.4f} {ssr_qk:>8.6f} │"
461
+ f" {ssr_qv:>8.6f} {ssr_kv:>8.6f}"
462
+ f" {cosU_QK:>8.4f} {cosU_QV:>8.4f} {cosU_KV:>8.4f}"
463
+ f" {cosV_QK:>8.4f} {cosV_QV:>8.4f} {cosV_KV:>8.4f} │"
464
+ f" {alpha_qk:>7.4f} {alpha_qv:>7.4f} {alpha_kv:>7.4f}\n"
465
  )
466
 
467
  return records, "".join(log_lines)
 
481
  return "❌ 请输入模型 ID", None
482
 
483
  token = hf_token.strip() or None
484
+ log_lines = [f"🔍 分析模型:{model_id}\n{'═'*80}\n"]
485
  all_records: list[dict] = []
486
 
487
  # ── 量化检测 ─────────────────────────────────
488
  progress(0.02, desc="量化检测...")
489
  is_blocked, quant_msg = check_quantization(model_id, token)
490
+ log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*80}\n")
491
  if is_blocked:
492
  return "".join(log_lines), None
493
 
494
+ # ── config.json ───────────────────────────────
495
  config = None
496
  try:
497
  r = requests.get(
 
503
  config = r.json()
504
  log_lines.append(
505
  f"📋 config.json:\n"
506
+ f" model_type = {config.get('model_type')}\n"
507
+ f" hidden_size = {config.get('hidden_size')}\n"
508
  f" num_attention_heads = {config.get('num_attention_heads')}\n"
509
  f" num_key_value_heads = {config.get('num_key_value_heads')}\n"
510
+ f" head_dim = {config.get('head_dim')}\n"
511
+ f"{'─'*80}\n"
512
  )
513
  except Exception:
514
  log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
515
 
516
+ # ── 分片索引 ──────────────────────────────────
517
  progress(0.05, desc="读取模型索引...")
518
  try:
519
+ index_data = find_index_file(model_id, token)
520
  shard_headers: dict[str, tuple[dict, int]] = {}
521
 
522
  if index_data:
 
525
  f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard\n"
526
  )
527
  else:
528
+ sf_files = get_safetensor_files(model_id, token)
 
 
529
  weight_map = None
530
  log_lines.append(f"📦 单文件:{sf_files}\n")
531
  except requests.exceptions.HTTPError as e:
532
  return _http_error_msg(e, model_id), None
533
 
534
+ # ── 探测第一个 shard ──────────────────────────
535
  progress(0.08, desc="识别层结构...")
536
  try:
537
  if index_data:
538
  first_shard = sorted(set(index_data["weight_map"].values()))[0]
539
  else:
540
  first_shard = sf_files[0]
 
541
  first_url = get_file_url(model_id, first_shard)
542
  first_header, first_hsize = read_safetensors_header(first_url, token)
543
  shard_headers[first_shard] = (first_header, first_hsize)
 
545
  except Exception as e:
546
  return f"❌ 读取 shard header 失败:{e}", None
547
 
548
+ # [改动1] 区分文本层 key 和视觉层 key
549
+ text_keys = [k for k in all_keys if not is_vision_key(k)]
550
+ vision_keys = [k for k in all_keys if is_vision_key(k)]
551
+ log_lines.append(
552
+ f"🔑 总 key 数:{len(all_keys)} "
553
+ f"(文本层:{len(text_keys)},视觉层跳过:{len(vision_keys)})\n"
554
+ )
555
+
556
+ # 识别 Q/K/V key 命名规则(只在文本 key 中识别)
557
+ q_candidates = [k for k in text_keys if any(
558
+ p in k for p in ["q_proj.weight","query.weight","q.weight","wq.weight"]
559
  )]
560
  if not q_candidates:
561
+ sample = "\n".join(text_keys[:30])
562
+ return f"⚠️ 无法识别文本层 Q/K/V key,前 30 个文本 key:\n{sample}", None
563
 
564
  sample_q = q_candidates[0]
565
+ if "q_proj" in sample_q: q_sfx, k_sfx, v_sfx = "self_attn.q_proj.weight", "self_attn.k_proj.weight", "self_attn.v_proj.weight"
566
+ elif "query" in sample_q: q_sfx, k_sfx, v_sfx = "attention.query.weight", "attention.key.weight", "attention.value.weight"
567
+ elif "wq" in sample_q: q_sfx, k_sfx, v_sfx = "attention.wq.weight", "attention.wk.weight", "attention.wv.weight"
568
  else:
569
+ q_sfx = sample_q.split("layers.0.")[-1]
570
+ k_sfx = q_sfx.replace("q.", "k.")
571
+ v_sfx = q_sfx.replace("q.", "v.")
572
 
573
+ log_lines.append(f"🔑 Q suffix:{q_sfx}\n")
574
+ log_lines.append(f"🔑 K suffix:{k_sfx}\n")
575
+ log_lines.append(f"🔑 V suffix:{v_sfx}\n") # [改动3]
576
+ log_lines.append(f"{'═'*80}\n")
577
 
578
+ # ── 辅助:查找 key 所在 shard ────────────────
579
  def get_shard_for_key(key: str) -> str | None:
580
  if index_data:
581
  return index_data["weight_map"].get(key)
 
588
  return None
589
 
590
  # ── 逐层分析 ─────────────────────────────────
591
+ gqa_logged = False
592
 
593
  for layer_idx in range(int(max_layers)):
594
  progress(
 
596
  desc=f"第 {layer_idx} 层..."
597
  )
598
 
599
+ q_key = f"model.layers.{layer_idx}.{q_sfx}"
600
+ k_key = f"model.layers.{layer_idx}.{k_sfx}"
601
+ v_key = f"model.layers.{layer_idx}.{v_sfx}" # [改动3]
602
 
603
  q_shard = get_shard_for_key(q_key)
604
  k_shard = get_shard_for_key(k_key)
605
+ v_shard = get_shard_for_key(v_key) # [改动3]
606
 
607
  if q_shard is None or k_shard is None:
608
+ log_lines.append(
609
+ f"\nLayer {layer_idx}: Q/K 未找到,分析结束(共 {layer_idx} 层)\n"
610
+ )
611
  break
612
 
613
+ # [改动3] V 找不到时降级处理(不阻断整体分析)
614
+ if v_shard is None:
615
+ log_lines.append(
616
+ f"Layer {layer_idx}: ⚠️ V 未找到,跳过该层\n"
617
+ )
618
+ continue
619
+
620
+ for shard in {q_shard, k_shard, v_shard}:
621
  if shard not in shard_headers:
622
  h, hs = read_safetensors_header(get_file_url(model_id, shard), token)
623
  shard_headers[shard] = (h, hs)
 
631
  get_file_url(model_id, k_shard), k_key,
632
  *shard_headers[k_shard], token
633
  )
634
+ W_v = load_tensor_remote( # [改动3]
635
+ get_file_url(model_id, v_shard), v_key,
636
+ *shard_headers[v_shard], token
637
+ )
638
  except ValueError as e:
639
  log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
640
  continue
641
 
642
+ if W_q is None or W_k is None or W_v is None:
643
  log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
644
  continue
645
 
646
+ # [改动1] 该层是文本层还是视觉层
647
+ modality = "vision" if is_vision_key(q_key) else "text"
648
+ if modality == "vision":
649
+ log_lines.append(f"Layer {layer_idx}: 🖼️ 视觉层,跳过\n")
650
+ del W_q, W_k, W_v
651
+ continue
652
+
653
+ # GQA 推断
654
  try:
655
  n_q_heads, n_kv_heads, d_head = infer_gqa_params(W_q, W_k, config)
656
  except ValueError as e:
657
  log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
658
+ del W_q, W_k, W_v
659
  continue
660
 
661
+ if not gqa_logged:
 
662
  log_lines.append(
663
+ f"🧠 GQA 结构:n_q={n_q_heads} n_kv={n_kv_heads} "
664
+ f"group={n_q_heads//n_kv_heads} d_head={d_head}\n"
665
+ f" W_q={list(W_q.shape)} W_k={list(W_k.shape)} "
666
+ f"W_v={list(W_v.shape)}\n" # [改动3]
667
+ f"{'═'*80}\n"
 
 
668
  )
669
+ gqa_logged = True
670
 
671
+ # 逐头全指标计算
672
  records, layer_log = analyze_layer_heads(
673
+ W_q, W_k, W_v, # [改动3]
674
+ layer_idx,
675
+ n_q_heads, n_kv_heads, d_head,
676
+ modality=modality # [改动4]
677
  )
678
  all_records.extend(records)
679
  log_lines.append(layer_log)
680
 
681
+ del W_q, W_k, W_v
682
 
683
+ # ── 全局汇总 ──────────────────────────────────
684
  if all_records:
685
  df = pd.DataFrame(all_records)
686
 
687
+ # [改动5] 分模态统计
688
+ def stat_block(arr: np.ndarray, name: str) -> str:
689
+ return (
690
+ f" {name:<14}"
691
+ f" Median={np.median(arr):.6f}"
692
+ f" Mean={np.mean(arr):.6f}"
693
+ f" Min={np.min(arr):.6f}"
694
+ f" Max={np.max(arr):.6f}\n"
695
+ )
696
+
697
+ text_df = df[df["modality"] == "text"]
698
+
699
+ summary_lines = [
700
+ f"\n{'═'*80}\n",
701
+ f"📊 王氏五定律全局汇总{model_id}\n",
702
+ f"{'═'*80}\n",
703
+ f"文本层记录:{len(text_df)} "
704
+ f"{text_df['layer'].nunique()} 层 × "
705
+ f"{text_df.groupby('layer').size().iloc[0] if len(text_df)>0 else 0} 头/层���\n\n",
706
+
707
+ f"【第一定律 — Pearson r(→ 1)】\n",
708
+ stat_block(text_df["pearson_QK"].values, "Q-K:"),
709
+ stat_block(text_df["pearson_QV"].values, "Q-V:"), # [改动3]
710
+ stat_block(text_df["pearson_KV"].values, "K-V:"), # [改动3]
711
+
712
+ f"\n【第二定律 — SSR(→ 0)】\n",
713
+ stat_block(text_df["ssr_QK"].values, "Q-K:"),
714
+ stat_block(text_df["ssr_QV"].values, "Q-V:"), # [改动3]
715
+ stat_block(text_df["ssr_KV"].values, "K-V:"), # [改动3]
716
+
717
+ f"\n【第四定律 — cosU 输出子空间(Q-K≈1/√d,Q-V<1/√d 超正交)】\n",
718
+ stat_block(text_df["cosU_QK"].values, "cosU Q-K:"),
719
+ stat_block(text_df["cosU_QV"].values, "cosU Q-V:"), # [改动3]
720
+ stat_block(text_df["cosU_KV"].values, "cosU K-V:"), # [改动3]
721
+
722
+ f"\n第五定律 — cosV 输入子空间≈1/√d_model 全局随机正交)】\n", # [改动2]
723
+ stat_block(text_df["cosV_QK"].values, "cosV Q-K:"),
724
+ stat_block(text_df["cosV_QV"].values, "cosV Q-V:"),
725
+ stat_block(text_df["cosV_KV"].values, "cosV K-V:"),
726
+
727
+ f"\n【第三定律 — 条件数(越小越稳定)】\n", # [改动3]
728
+ stat_block(text_df["cond_Q"].values, "cond Q:"),
729
+ stat_block(text_df["cond_K"].values, "cond K:"),
730
+ stat_block(text_df["cond_V"].values, "cond V:"),
731
+
732
+ f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n",
733
+ f"{'═'*80}\n",
734
+ ]
735
+ log_lines.extend(summary_lines)
736
 
737
  return "".join(log_lines), df
738
  else:
 
750
  **Mathematical Foundations of Large Language Models (MF-LLM)**
751
 
752
  通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
753
+ 支持 GQA + 多模态(自动跳过视觉层)。逐头计算全部五定律指标:
754
 
755
+ | 定律 | 指标 | 理论极值 | 对象 |
756
+ |------|------|---------|------|
757
+ | 第一定律 | Pearson r / Spearman r | → 1 | Q-K |
758
+ | 第二定律 | SSR | → 0 | Q-K, Q-V, K-V |
759
+ | 第定律 | 条件数 κ | 越小越好 | Q, K, V |
760
+ | 第四定律 | cosU(Uq,Uk) | ≈1/√d_head;cosU(Uq,Uv)<1/√d_head | Q-K, Q-V, K-V |
761
+ | 第五定律 | cosV(Vq,Vk) | ≈1/√d_model(随机正交) | Q-K, Q-V, K-V |
762
 
763
  [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
764
  [![HAL](https://img.shields.io/badge/HAL-hal--05609398-red)](https://hal.science/hal-05609398)
 
768
  with gr.Column(scale=2):
769
  model_input = gr.Textbox(
770
  label="HuggingFace 模型 ID",
771
+ placeholder="google/gemma-4-e2b",
772
+ value="google/gemma-4-e2b"
773
  )
774
  token_input = gr.Textbox(
775
  label="HF Access Token(公开模型可留空)",
 
782
  )
783
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
784
 
785
+ # [改动6] 更新推荐模型列表
786
  with gr.Column(scale=1):
787
  gr.Markdown("""
788
  ### ✅ 推荐模型
789
  ```
790
  Qwen/Qwen2.5-14B-Instruct (GQA 8Q/2K)
791
  meta-llama/Llama-3-8B (GQA)
792
+ google/gemma-4-e2b (MHA 多模态)
793
+ google/gemma-4-e4b-it (MHA 多模态)
794
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
795
  ```
796
  ### GQA 典型结构
 
799
  | Qwen2.5-7B | 28 | 4 | 7 |
800
  | LLaMA-3-8B | 32 | 8 | 4 |
801
  | Qwen2.5-14B | 40 | 8 | 5 |
802
+ | Gemma-4-E2B | 8 | 4 | 2 |
803
+
804
+ ### 🖼️ 多模态说明
805
+ - 视觉层自动跳过
806
+ - 仅分析文本 Transformer 层
807
+ - 跳过关键词:`vision / visual / vit / patch_embed`
808
  """)
809
 
810
  log_output = gr.Textbox(
811
  label="分析日志(逐头详情)",
812
+ lines=35, max_lines=100
813
  )
814
 
815
  table_output = gr.Dataframe(
816
+ label="逐头全指标结果表",
817
  headers=[
818
+ "layer","modality","kv_head","q_head",
819
+ "pearson_QK","spearman_QK","pearson_QV","pearson_KV",
820
+ "ssr_QK","ssr_QV","ssr_KV",
821
+ "cosU_QK","cosU_QV","cosU_KV",
822
+ "cosV_QK","cosV_QV","cosV_KV",
823
+ "alpha_QK","alpha_QV","alpha_KV",
824
+ "alpha_res_QK","alpha_res_QV","alpha_res_KV",
825
+ "sigma_max_Q","sigma_min_Q",
826
+ "sigma_max_K","sigma_min_K",
827
+ "sigma_max_V","sigma_min_V",
828
+ "cond_Q","cond_K","cond_V",
829
  ]
830
  )
831