Alex W. commited on
Commit
726019a
·
1 Parent(s): 4e18dab

问题很清晰,有三个独立问题:

Browse files

视觉层也要分析(不能只分析文本层)
层号不能重排(原始层号是什么就显示什么)
前缀归并逻辑错误(audio_tower 被错误归类为 text,导致层数虚报47层)

Files changed (1) hide show
  1. app.py +447 -511
app.py CHANGED
@@ -5,7 +5,7 @@ import json
5
  import re
6
  import numpy as np
7
  import torch
8
- from scipy.stats import pearsonr, spearmanr
9
  from huggingface_hub import list_repo_files
10
  import pandas as pd
11
 
@@ -32,15 +32,6 @@ except AttributeError:
32
  UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
33
  QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
34
 
35
- # 视觉层关键词(扩充)
36
- VISION_KEY_PATTERNS = [
37
- "vision", "visual", "image_encoder",
38
- "img_encoder", "patch_embed", "vit",
39
- "vision_tower", "vision_model", # ★ 补充 gemma 的命名
40
- "mm_projector", "multi_modal",
41
- ]
42
-
43
-
44
  # ─────────────────────────────────────────────
45
  # 工具函数
46
  # ─────────────────────────────────────────────
@@ -92,12 +83,12 @@ def load_tensor_remote(
92
 
93
  r = requests.get(url, headers=req_headers, timeout=120)
94
  r.raise_for_status()
95
- raw = r.content
96
 
97
  if torch_dtype == torch.bfloat16:
98
- tensor = torch.frombuffer(bytearray(raw), dtype=torch.int16).view(torch.bfloat16)
99
  else:
100
- tensor = torch.frombuffer(bytearray(raw), dtype=torch_dtype)
101
 
102
  return tensor.reshape(shape).float()
103
 
@@ -111,8 +102,7 @@ def get_safetensor_files(model_id: str, token: str = None) -> list:
111
 
112
 
113
  def find_index_file(model_id: str, token: str = None) -> dict | None:
114
- url = (f"https://huggingface.co/{model_id}/resolve/main/"
115
- f"model.safetensors.index.json")
116
  headers = {"Authorization": f"Bearer {token}"} if token else {}
117
  r = requests.get(url, headers=headers, timeout=15)
118
  return r.json() if r.status_code == 200 else None
@@ -126,91 +116,13 @@ def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
126
  return f"❌ HTTP {code}:{e}"
127
 
128
 
129
- def is_vision_key(key: str) -> bool:
130
- key_lower = key.lower()
131
- return any(pat in key_lower for pat in VISION_KEY_PATTERNS)
132
-
133
-
134
  # ─────────────────────────────────────────────
135
- # 修复1:发现层时记录 key 完整路径,并区分模态
136
- # ─────────────────────────────────────────────
137
-
138
- def discover_layer_qkv_keys(all_shard_headers: dict) -> dict:
139
- """
140
- 遍历所有 shard 的全部 keys,为每层归类 Q/K/V key。
141
-
142
- 返回结构:
143
- {
144
- (modality, layer_idx, prefix): {
145
- "q": (shard, key),
146
- "k": (shard, key),
147
- "v": (shard, key),
148
- }
149
- }
150
- 其中 prefix 是 layers.{N} 之前的部分(如 "language_model.model."),
151
- 用来区分同时存在多套 layer 编号的情况(如 vision tower + language model)。
152
- """
153
- layer_map: dict[tuple, dict] = {}
154
-
155
- for shard_name, (header, _) in all_shard_headers.items():
156
- for key in header.keys():
157
- # 必须是 weight,不要 bias / norm
158
- if not key.endswith(".weight"):
159
- continue
160
-
161
- # 提取 layers.{N} 的位置
162
- m = re.search(r'(.*?)layers\.(\d+)\.(.*)', key)
163
- if not m:
164
- continue
165
- prefix = m.group(1) # e.g. "language_model.model."
166
- layer_idx = int(m.group(2))
167
- suffix = m.group(3) # e.g. "self_attn.q_proj.weight"
168
-
169
- # ★ 关键:模态判断基于 prefix(不是整个 key)
170
- modality = "vision" if is_vision_key(prefix) else "text"
171
-
172
- # 识别 Q/K/V
173
- qkv = None
174
- if any(p in suffix for p in [
175
- "q_proj.weight", "wq.weight",
176
- "attention.query.weight",
177
- "self_attn.q.weight", "attn.q.weight",
178
- ]):
179
- qkv = "q"
180
- elif any(p in suffix for p in [
181
- "k_proj.weight", "wk.weight",
182
- "attention.key.weight",
183
- "self_attn.k.weight", "attn.k.weight",
184
- ]):
185
- qkv = "k"
186
- elif any(p in suffix for p in [
187
- "v_proj.weight", "wv.weight",
188
- "attention.value.weight",
189
- "self_attn.v.weight", "attn.v.weight",
190
- ]):
191
- qkv = "v"
192
- else:
193
- continue
194
-
195
- # ★ 用 (modality, prefix, layer_idx) 作为唯一键
196
- uid = (modality, prefix, layer_idx)
197
- if uid not in layer_map:
198
- layer_map[uid] = {"q": None, "k": None, "v": None}
199
-
200
- if layer_map[uid][qkv] is None:
201
- layer_map[uid][qkv] = (shard_name, key)
202
-
203
- return layer_map
204
-
205
-
206
- # ─────────────────────────────────────────────
207
- # Gemma4 等 config 兼容
208
  # ─────────────────────────────────────────────
209
 
210
  def extract_config_params(config: dict) -> dict:
211
  if config is None:
212
  return {}
213
-
214
  text_cfg = config.get("text_config", {}) or {}
215
 
216
  def get_field(*keys):
@@ -233,7 +145,113 @@ def extract_config_params(config: dict) -> dict:
233
 
234
 
235
  # ─────────────────────────────────────────────
236
- # 量化检测(不变)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  # ─────────────────────────────────────────────
238
 
239
  def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
@@ -248,202 +266,146 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
248
  if r.status_code == 200:
249
  cfg = r.json()
250
  qcfg = cfg.get("quantization_config", {})
251
- qt = (qcfg.get("quant_type","") or
252
- qcfg.get("quant_method","") or
253
- cfg.get("quantization","")).lower()
254
  if "gptq" in qt:
255
- bits = qcfg.get("bits","?")
256
- return True, f"❌ 检测到 GPTQ {bits}bit 量化,请改用原始 BF16 版本。"
257
  if "awq" in qt:
258
- return True, "❌ 检测到 AWQ 量化,请用原始 BF16 版本。"
259
  if "bitsandbytes" in qt or "bnb" in qt:
260
  warnings.append("⚠️ 检测到 bitsandbytes 量化,结果可能失真")
261
  except Exception:
262
  warnings.append("⚠️ 无法读取 config.json")
263
 
264
- mid_lower = model_id.lower()
265
- for kw in ["gptq","awq","gguf"]:
266
- if kw in mid_lower:
267
- return True, f"❌ 模型名含 '{kw.upper()}',为量化版本,请使用原始 BF16 版本。"
268
 
269
  try:
270
  all_files = list(list_repo_files(model_id, token=token))
271
  if any(f.endswith(".gguf") for f in all_files):
272
  return True, "❌ 检测到 .gguf 文件,不支持该格式。"
273
  if not any(f.endswith(".safetensors") for f in all_files):
274
- return True, "❌ 未找到 .safetensors 文件,仅支持 safetensors 格式。"
275
  except Exception as e:
276
  warnings.append(f"⚠️ 文件列表检测失败:{e}")
277
 
278
  try:
279
  index_data = find_index_file(model_id, token)
280
- if index_data:
281
- first_shard = sorted(set(index_data["weight_map"].values()))[0]
282
- else:
283
- sf = get_safetensor_files(model_id, token)
284
- first_shard = sf[0]
285
  hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
286
- all_keys = list(hdr.keys())
287
- bad_keys = [k for k in all_keys
288
- if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
289
- if bad_keys:
290
- return True, f"❌ 检测到量化 key:{bad_keys[:3]},请使用原始 BF16 版本。"
291
- dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
292
- good = dtypes - UNSUPPORTED_SVD_DTYPES
293
  if good:
294
  warnings.append(f"✅ 权重格式:{good}")
295
  except Exception as e:
296
  warnings.append(f"⚠️ header 检测失败:{e}")
297
 
298
- msg = "\n".join(warnings) if warnings else "✅ 未检测到量化,可以正常分析"
299
- return False, msg
300
 
301
 
302
  # ─────────────────────────────────────────────
303
- # GQA 推断
304
  # ─────────────────────────────────────────────
305
 
306
  def infer_gqa_params(
307
  W_q: torch.Tensor,
308
  W_k: torch.Tensor,
309
- config_params: dict | None,
310
- modality: str = "text",
311
- ) -> tuple[int,int,int]:
312
- q_rows = W_q.shape[0]
313
- k_rows = W_k.shape[0]
314
-
315
- d_head = None
316
-
317
- # 视觉层不要用文本层的 head_dim
318
- if config_params and modality == "text":
319
- d_head = config_params.get("head_dim")
320
- if not d_head:
321
- nh = config_params.get("num_attention_heads") or 1
322
- hs = config_params.get("hidden_size") or 0
323
- if hs and nh:
324
- d_head = hs // nh
325
- if d_head == 0:
326
- d_head = None
327
-
328
  if not d_head:
329
- for candidate in [256, 128, 96, 80, 64, 32]:
330
- if q_rows % candidate == 0 and k_rows % candidate == 0:
331
- d_head = candidate
332
  break
333
-
334
  if not d_head:
335
- raise ValueError(
336
- f"无法推断 d_head:W_q={W_q.shape}, W_k={W_k.shape}"
337
- )
338
 
339
- n_q_heads = q_rows // d_head
340
- n_kv_heads = k_rows // d_head
341
-
342
- if n_q_heads % n_kv_heads != 0:
343
- raise ValueError(
344
- f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除"
345
- )
346
- return n_q_heads, n_kv_heads, d_head
347
 
348
 
349
  # ─────────────────────────────────────────────
350
  # 指标计算
351
  # ─────────────────────────────────────────────
352
 
353
- def compute_pearson_corr(s_a: torch.Tensor, s_b: torch.Tensor) -> float:
354
- am = s_a - s_a.mean()
355
- bm = s_b - s_b.mean()
356
- num = torch.dot(am, bm)
357
- den = torch.norm(am, 2) * torch.norm(bm, 2)
358
- return float(num / den) if den != 0 else 0.0
359
-
360
-
361
- def compute_singular_value_ratio(
362
- s_a: torch.Tensor, s_b: torch.Tensor
363
- ) -> tuple[float, float]:
364
- min_len = min(s_a.shape[0], s_b.shape[0])
365
- sa = s_a[:min_len]
366
- sb = s_b[:min_len]
367
- num = torch.dot(sa, sb)
368
  den = torch.dot(sb, sb)
369
  if den == 0:
370
  return 1.0, 0.0
371
- alpha = num / den
372
- residual = torch.mean((sa - alpha * sb) ** 2).item()
373
- return float(alpha), float(residual)
374
-
375
-
376
- def compute_ssr(s_a: torch.Tensor, s_b: torch.Tensor) -> float:
377
- min_len = min(s_a.shape[0], s_b.shape[0])
378
- sa = s_a[:min_len]
379
- sb = s_b[:min_len]
380
- sa_n = sa / (torch.norm(sa) + 1e-10)
381
- sb_n = sb / (torch.norm(sb) + 1e-10)
382
- return float(torch.mean(torch.abs(sa_n - sb_n)))
383
-
384
-
385
- def compute_left_vector_alignment(
386
- U_a: torch.Tensor, U_b: torch.Tensor
387
- ) -> float:
388
- # 安全:行数(输出维度 d_head)必须相同才有意义
389
- if U_a.shape[0] != U_b.shape[0]:
390
- return float('nan')
391
- min_c = min(U_a.shape[1], U_b.shape[1])
392
- Ua = U_a[:, :min_c]
393
- Ub = U_b[:, :min_c]
394
- Ua_n = Ua / (torch.norm(Ua, dim=0, keepdim=True) + 1e-10)
395
- Ub_n = Ub / (torch.norm(Ub, dim=0, keepdim=True) + 1e-10)
396
- return float(torch.diag(torch.abs(Ua_n.T @ Ub_n)).mean())
397
-
398
-
399
- def compute_right_vector_alignment(
400
- Vt_a: torch.Tensor, Vt_b: torch.Tensor
401
- ) -> float:
402
- # ★ 安全:列数(输入维度 d_model)必须相同才有意义
403
- if Vt_a.shape[1] != Vt_b.shape[1]:
404
- return float('nan')
405
- min_r = min(Vt_a.shape[0], Vt_b.shape[0])
406
- Va_n = Vt_a[:min_r, :]
407
- Vb_n = Vt_b[:min_r, :]
408
- Va_n = Va_n / (torch.norm(Va_n, dim=1, keepdim=True) + 1e-10)
409
- Vb_n = Vb_n / (torch.norm(Vb_n, dim=1, keepdim=True) + 1e-10)
410
- return float(torch.abs((Va_n * Vb_n).sum(dim=1)).mean())
411
 
412
 
413
  # ─────────────────────────────────────────────
414
- # 逐头分析
415
  # ─────────────────────────────────────────────
416
 
417
  def analyze_layer_heads(
418
  W_q: torch.Tensor,
419
  W_k: torch.Tensor,
420
  W_v: torch.Tensor,
421
- layer_idx: int,
422
- n_q_heads: int,
423
- n_kv_heads: int,
424
- d_head: int,
425
- modality: str = "text",
426
  ) -> tuple[list[dict], str]:
427
- # ★ 强一致性检查:Q/K/V 的输入维度必须一致
428
- if W_q.shape[1] != W_k.shape[1] or W_k.shape[1] != W_v.shape[1]:
429
- return [], (
430
- f"\nLayer {layer_idx} [{modality}]: "
431
- f"⚠️ Q/K/V 输入维度不一致 "
432
- f"({W_q.shape}, {W_k.shape}, {W_v.shape}),跳过\n"
433
- )
434
 
435
- group_size = n_q_heads // n_kv_heads
436
- records = []
437
- log_lines = []
438
 
439
- log_lines.append(
440
  f"\n{'─'*80}\n"
441
  f"Layer {layer_idx:3d} [{modality}] "
442
- f"n_q={n_q_heads} n_kv={n_kv_heads} "
443
- f"group={group_size} d_head={d_head}\n"
444
  f"{'─'*80}\n"
445
- )
446
- log_lines.append(
447
  f" {'KV':>3} {'Q':>3} │"
448
  f" {'P_QK':>7} {'Sp_QK':>7} {'SSR_QK':>8} │"
449
  f" {'SSR_QV':>8} {'SSR_KV':>8} │"
@@ -452,102 +414,77 @@ def analyze_layer_heads(
452
  f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
453
  )
454
 
455
- for kv_h in range(n_kv_heads):
456
- k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
457
- v_tensor = W_v[kv_h * d_head : (kv_h + 1) * d_head, :]
458
-
459
- U_k, s_k, Vt_k = torch.linalg.svd(k_tensor, full_matrices=False)
460
- U_v, s_v, Vt_v = torch.linalg.svd(v_tensor, full_matrices=False)
461
-
462
- alpha_kv, alpha_res_kv = compute_singular_value_ratio(s_k, s_v)
463
- cosU_KV = compute_left_vector_alignment(U_k, U_v)
464
- cosV_KV = compute_right_vector_alignment(Vt_k, Vt_v)
465
- ssr_kv = compute_ssr(s_k, s_v)
466
- pearson_kv = compute_pearson_corr(
467
- s_k[:min(s_k.shape[0], s_v.shape[0])],
468
- s_v[:min(s_k.shape[0], s_v.shape[0])]
469
- )
470
-
471
- for q_offset in range(group_size):
472
- h_idx = kv_h * group_size + q_offset
473
- q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
474
- U_q, s_q, Vt_q = torch.linalg.svd(q_tensor, full_matrices=False)
475
-
476
- min_qk = min(s_q.shape[0], s_k.shape[0])
477
- min_qv = min(s_q.shape[0], s_v.shape[0])
478
-
479
- pearson_qk = compute_pearson_corr(s_q[:min_qk], s_k[:min_qk])
480
- spearman_qk = float(spearmanr(
481
- s_q[:min_qk].cpu().numpy(),
482
- s_k[:min_qk].cpu().numpy()
483
- )[0])
484
- ssr_qk = compute_ssr(s_q, s_k)
485
- alpha_qk, alpha_res_qk = compute_singular_value_ratio(s_q, s_k)
486
- cosU_QK = compute_left_vector_alignment(U_q, U_k)
487
- cosV_QK = compute_right_vector_alignment(Vt_q, Vt_k)
488
-
489
- pearson_qv = compute_pearson_corr(s_q[:min_qv], s_v[:min_qv])
490
- ssr_qv = compute_ssr(s_q, s_v)
491
- alpha_qv, alpha_res_qv = compute_singular_value_ratio(s_q, s_v)
492
- cosU_QV = compute_left_vector_alignment(U_q, U_v)
493
- cosV_QV = compute_right_vector_alignment(Vt_q, Vt_v)
494
-
495
- sig_max_q = float(s_q.max())
496
- sig_min_q = float(s_q[s_q > 1e-10].min()) if (s_q > 1e-10).any() else 0.0
497
- sig_max_k = float(s_k.max())
498
- sig_min_k = float(s_k[s_k > 1e-10].min()) if (s_k > 1e-10).any() else 0.0
499
- sig_max_v = float(s_v.max())
500
- sig_min_v = float(s_v[s_v > 1e-10].min()) if (s_v > 1e-10).any() else 0.0
501
-
502
- cond_q = sig_max_q / (sig_min_q + 1e-10)
503
- cond_k = sig_max_k / (sig_min_k + 1e-10)
504
- cond_v = sig_max_v / (sig_min_v + 1e-10)
505
 
506
  records.append({
507
- "layer": layer_idx,
508
- "modality": modality,
509
- "kv_head": kv_h,
510
- "q_head": h_idx,
511
- "pearson_QK": round(pearson_qk, 6),
512
- "spearman_QK": round(spearman_qk, 6),
513
- "pearson_QV": round(pearson_qv, 6),
514
- "pearson_KV": round(pearson_kv, 6),
515
- "ssr_QK": round(ssr_qk, 8),
516
- "ssr_QV": round(ssr_qv, 8),
517
- "ssr_KV": round(ssr_kv, 8),
518
- "cosU_QK": round(cosU_QK, 6),
519
- "cosU_QV": round(cosU_QV, 6),
520
- "cosU_KV": round(cosU_KV, 6),
521
- "cosV_QK": round(cosV_QK, 6),
522
- "cosV_QV": round(cosV_QV, 6),
523
- "cosV_KV": round(cosV_KV, 6),
524
- "alpha_QK": round(alpha_qk, 4),
525
- "alpha_QV": round(alpha_qv, 4),
526
- "alpha_KV": round(alpha_kv, 4),
527
- "alpha_res_QK": round(alpha_res_qk, 6),
528
- "alpha_res_QV": round(alpha_res_qv, 6),
529
- "alpha_res_KV": round(alpha_res_kv, 6),
530
- "sigma_max_Q": round(sig_max_q, 4),
531
- "sigma_min_Q": round(sig_min_q, 4),
532
- "sigma_max_K": round(sig_max_k, 4),
533
- "sigma_min_K": round(sig_min_k, 4),
534
- "sigma_max_V": round(sig_max_v, 4),
535
- "sigma_min_V": round(sig_min_v, 4),
536
- "cond_Q": round(cond_q, 2),
537
- "cond_K": round(cond_k, 2),
538
- "cond_V": round(cond_v, 2),
539
  })
540
 
541
- log_lines.append(
542
- f" {kv_h:>3d} {h_idx:>3d} │"
543
- f" {pearson_qk:>+7.4f} {spearman_qk:>+7.4f} {ssr_qk:>8.6f} │"
544
  f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
545
  f" {cosU_QK:>8.4f} {cosU_QV:>8.4f} {cosU_KV:>8.4f} │"
546
  f" {cosV_QK:>8.4f} {cosV_QV:>8.4f} {cosV_KV:>8.4f} │"
547
  f" {alpha_qk:>7.4f} {alpha_qv:>7.4f} {alpha_kv:>7.4f}\n"
548
  )
549
 
550
- return records, "".join(log_lines)
551
 
552
 
553
  # ─────────────────────────────────────────────
@@ -555,8 +492,8 @@ def analyze_layer_heads(
555
  # ─────────────────────────────────────────────
556
 
557
  def analyze_model(
558
- model_id: str,
559
- hf_token: str,
560
  max_layers: int,
561
  progress=gr.Progress()
562
  ):
@@ -564,14 +501,15 @@ def analyze_model(
564
  return "❌ 请输入模型 ID", None
565
 
566
  token = hf_token.strip() or None
 
567
  log_lines = [f"🔍 分析模型:{model_id}\n{'═'*80}\n"]
568
  all_records: list[dict] = []
569
 
570
  # ── 量化检测 ─────────────────────────────────
571
  progress(0.02, desc="量化检测...")
572
- is_blocked, quant_msg = check_quantization(model_id, token)
573
- log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*80}\n")
574
- if is_blocked:
575
  return "".join(log_lines), None
576
 
577
  # ── config.json ───────────────────────────────
@@ -583,238 +521,226 @@ def analyze_model(
583
  timeout=15
584
  )
585
  if r.status_code == 200:
586
- raw_config = r.json()
587
- config_params = extract_config_params(raw_config)
588
  log_lines.append(
589
  f"📋 config.json:\n"
590
  f" model_type = {config_params.get('model_type')}\n"
591
- f" hidden_size (text) = {config_params.get('hidden_size')}\n"
592
  f" num_attention_heads = {config_params.get('num_attention_heads')}\n"
593
  f" num_key_value_heads = {config_params.get('num_key_value_heads')}\n"
594
  f" head_dim = {config_params.get('head_dim')}\n"
595
  f"{'─'*80}\n"
596
  )
597
  except Exception:
598
- log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
599
 
600
- # ── shard 列表 ────────────────────────────────
601
  progress(0.05, desc="读取模型索引...")
602
  try:
603
  index_data = find_index_file(model_id, token)
604
  if index_data:
605
  shard_files = sorted(set(index_data["weight_map"].values()))
606
- log_lines.append(
607
- f"📦 分片模型,共 {len(shard_files)} 个 shard\n"
608
- )
609
  else:
610
  shard_files = get_safetensor_files(model_id, token)
611
- log_lines.append(f"📦 单/多文件:{shard_files}\n")
612
  except requests.exceptions.HTTPError as e:
613
  return _http_error_msg(e, model_id), None
614
 
615
- # ── 读取所有 shard headers ────────────────────
616
- progress(0.08, desc="读取所有 shard headers...")
617
  all_shard_headers: dict[str, tuple[dict, int]] = {}
618
  total_keys = 0
619
- for shard in shard_files:
620
  try:
621
- url = get_file_url(model_id, shard)
622
- h, hs = read_safetensors_header(url, token)
623
- all_shard_headers[shard] = (h, hs)
624
  total_keys += len(h)
625
  except Exception as e:
626
- log_lines.append(f"⚠️ 读取 {shard} header 失败:{e}\n")
627
-
628
- # ── 发现层(区分模态)─────────────────────────
629
- progress(0.12, desc="识别层结构...")
630
- layer_map = discover_layer_qkv_keys(all_shard_headers)
631
-
632
- # ★ 统计每个 (modality, prefix) 的层数
633
- groups: dict[tuple, list[int]] = {}
634
- for (modality, prefix, layer_idx), _ in layer_map.items():
635
- groups.setdefault((modality, prefix), []).append(layer_idx)
636
 
637
  log_lines.append(f"🔑 总 key 数:{total_keys}\n")
638
- log_lines.append(f"📐 发现层组:\n")
639
- for (modality, prefix), layers in sorted(groups.items()):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  log_lines.append(
641
  f" [{modality:6s}] prefix='{prefix}' "
642
- f"层数={len(layers)} 范围={min(layers)}~{max(layers)}\n"
 
643
  )
644
  log_lines.append(f"{'─'*80}\n")
645
 
646
- # 只分析 text 模态(视觉暂不分析
647
- text_layers = sorted([
648
- (uid, info) for uid, info in layer_map.items()
649
- if uid[0] == "text"
650
- ], key=lambda x: x[0][2]) # 按 layer_idx 排序
651
-
652
- if not text_layers:
653
- return (
654
- "".join(log_lines) +
655
- "❌ 未发现任何文本层\n", None
656
- )
657
-
658
- log_lines.append(f"🔵 将分析 {len(text_layers)} 个文本层(前 {max_layers} 层)\n")
659
- log_lines.append(f"{'═'*80}\n")
660
-
661
- # ── 逐层分析 ─────────────────────────────────
662
- gqa_logged = False
663
- layers_done = 0
664
- max_layers_i = int(max_layers)
665
 
666
- for (modality, prefix, layer_idx), qkv in text_layers:
667
- if layers_done >= max_layers_i:
668
- break
669
 
670
- progress(
671
- 0.15 + 0.80 * layers_done / max(max_layers_i, 1),
672
- desc=f" {layer_idx} 层..."
 
 
673
  )
674
 
675
- if qkv["q"] is None or qkv["k"] is None or qkv["v"] is None:
676
- log_lines.append(
677
- f"Layer {layer_idx} [{modality}]: ⚠️ Q/K/V 不完整,跳过\n"
678
- )
679
- continue
680
 
681
- q_shard, q_key = qkv["q"]
682
- k_shard, k_key = qkv["k"]
683
- v_shard, v_key = qkv["v"]
 
 
 
684
 
685
- try:
686
- W_q = load_tensor_remote(
687
- get_file_url(model_id, q_shard), q_key,
688
- *all_shard_headers[q_shard], token
689
- )
690
- W_k = load_tensor_remote(
691
- get_file_url(model_id, k_shard), k_key,
692
- *all_shard_headers[k_shard], token
693
  )
694
- W_v = load_tensor_remote(
695
- get_file_url(model_id, v_shard), v_key,
696
- *all_shard_headers[v_shard], token
697
  )
698
- except ValueError as e:
699
- log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
700
- layers_done += 1
701
- continue
702
- except Exception as e:
703
- log_lines.append(f"Layer {layer_idx}: ❌ 加载失败({e})\n")
704
- layers_done += 1
705
- continue
706
 
707
- if W_q is None or W_k is None or W_v is None:
708
- log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
709
- layers_done += 1
710
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
 
712
- # ★ 一致性校验
713
- if W_q.shape[1] != W_k.shape[1] or W_k.shape[1] != W_v.shape[1]:
714
- log_lines.append(
715
- f"Layer {layer_idx}: ⚠️ Q/K/V 输入维度不一致 "
716
- f"Wq={list(W_q.shape)} Wk={list(W_k.shape)} "
717
- f"Wv={list(W_v.shape)},跳过\n"
718
- )
719
- del W_q, W_k, W_v
720
- layers_done += 1
721
- continue
722
 
723
- try:
724
- n_q_heads, n_kv_heads, d_head = infer_gqa_params(
725
- W_q, W_k, config_params, modality=modality
 
 
 
 
 
 
 
 
 
 
 
 
726
  )
727
- except ValueError as e:
728
- log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
 
729
  del W_q, W_k, W_v
730
- layers_done += 1
731
- continue
732
 
733
- if not gqa_logged:
734
- log_lines.append(
735
- f"🧠 GQA 结构:n_q={n_q_heads} n_kv={n_kv_heads} "
736
- f"group={n_q_heads//n_kv_heads} d_head={d_head}\n"
737
- f" W_q={list(W_q.shape)} W_k={list(W_k.shape)} "
738
- f"W_v={list(W_v.shape)}\n"
739
- f"{'═'*80}\n"
740
- )
741
- gqa_logged = True
742
 
743
- records, layer_log = analyze_layer_heads(
744
- W_q, W_k, W_v,
745
- layer_idx,
746
- n_q_heads, n_kv_heads, d_head,
747
- modality=modality
 
 
 
 
 
 
748
  )
749
- all_records.extend(records)
750
- log_lines.append(layer_log)
751
-
752
- del W_q, W_k, W_v
753
- layers_done += 1
754
-
755
- # ── 汇总 ─────────────────────────────────────
756
- if all_records:
757
- df = pd.DataFrame(all_records)
758
-
759
- def stat_block(arr: np.ndarray, name: str) -> str:
760
- arr = arr[~np.isnan(arr)]
761
- if len(arr) == 0:
762
- return f" {name:<14} (无数据)\n"
763
- return (
764
- f" {name:<14}"
765
- f" Median={np.median(arr):.6f}"
766
- f" Mean={np.mean(arr):.6f}"
767
- f" Min={np.min(arr):.6f}"
768
- f" Max={np.max(arr):.6f}\n"
769
- )
770
 
771
- text_df = df[df["modality"] == "text"]
772
-
773
- summary_lines = [
774
- f"\n{'═'*80}\n",
775
- f"📊 王氏五定律全局汇总 — {model_id}\n",
776
- f"{'═'*80}\n",
777
- f"文本层记录:{len(text_df)} 条 "
778
- f"({text_df['layer'].nunique()} 层 × "
779
- f"{text_df.groupby('layer').size().iloc[0] if len(text_df)>0 else 0} 头/层)\n\n",
780
-
781
- f"【第一定律 Pearson r(→ 1)】\n",
782
- stat_block(text_df["pearson_QK"].values, "Q-K:"),
783
- stat_block(text_df["pearson_QV"].values, "Q-V:"),
784
- stat_block(text_df["pearson_KV"].values, "K-V:"),
785
-
786
- f"\n【第二定律 SSR→ 0】\n",
787
- stat_block(text_df["ssr_QK"].values, "Q-K:"),
788
- stat_block(text_df["ssr_QV"].values, "Q-V:"),
789
- stat_block(text_df["ssr_KV"].values, "K-V:"),
790
-
791
- f"\n【第四定律 cosU 输出子空间】\n",
792
- stat_block(text_df["cosU_QK"].values, "cosU Q-K:"),
793
- stat_block(text_df["cosU_QV"].values, "cosU Q-V:"),
794
- stat_block(text_df["cosU_KV"].values, "cosU K-V:"),
795
-
796
- f"\n【第五定律 cosV 输入子空间】\n",
797
- stat_block(text_df["cosV_QK"].values, "cosV Q-K:"),
798
- stat_block(text_df["cosV_QV"].values, "cosV Q-V:"),
799
- stat_block(text_df["cosV_KV"].values, "cosV K-V:"),
800
-
801
- f"\n【第三定律 — 条件数】\n",
802
- stat_block(text_df["cond_Q"].values, "cond Q:"),
803
- stat_block(text_df["cond_K"].values, "cond K:"),
804
- stat_block(text_df["cond_V"].values, "cond V:"),
805
-
806
- f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n",
807
- f"{'═'*80}\n",
808
  ]
809
- log_lines.extend(summary_lines)
810
 
811
- return "".join(log_lines), df
812
- else:
813
- return "".join(log_lines) + "\n❌ 未获得任何有效结果\n", None
 
814
 
815
 
816
  # ─────────────────────────────────────────────
817
- # Gradio UI(不变)
818
  # ─────────────────────────────────────────────
819
 
820
  with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
@@ -823,7 +749,19 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
823
  # 🔬 Wang's Five Laws — LLM Spectral Analyzer
824
  **Mathematical Foundations of Large Language Models (MF-LLM)**
825
 
826
- 通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
 
 
 
 
 
 
 
 
 
 
 
 
827
  """)
828
 
829
  with gr.Row():
@@ -839,7 +777,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
839
  type="password"
840
  )
841
  max_layers_input = gr.Slider(
842
- label="最大分析层数",
843
  minimum=1, maximum=100, value=4, step=1
844
  )
845
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
@@ -848,24 +786,22 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
848
  gr.Markdown("""
849
  ### ✅ 推荐模型
850
  ```
 
 
851
  Qwen/Qwen2.5-14B-Instruct
852
  meta-llama/Llama-3-8B
853
- google/gemma-4-e2b
854
- google/gemma-4-31b-it
855
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
856
  ```
857
- ### 🔑 关键修复
858
- - ✅ 模态判断基于 prefix 路径
859
- - ✅ 视觉/文本分组独立编号
860
- - Q/K/V 输入维度一致性校验
861
- - ✅ 视觉层不复用文本 head_dim
862
  """)
863
 
864
  log_output = gr.Textbox(
865
  label="分析日志",
866
- lines=35, max_lines=100
867
  )
868
-
869
  table_output = gr.Dataframe(
870
  label="逐头全指标结果表",
871
  headers=[
 
5
  import re
6
  import numpy as np
7
  import torch
8
+ from scipy.stats import spearmanr
9
  from huggingface_hub import list_repo_files
10
  import pandas as pd
11
 
 
32
  UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
33
  QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
34
 
 
 
 
 
 
 
 
 
 
35
  # ─────────────────────────────────────────────
36
  # 工具函数
37
  # ─────────────────────────────────────────────
 
83
 
84
  r = requests.get(url, headers=req_headers, timeout=120)
85
  r.raise_for_status()
86
+ raw_bytes = r.content
87
 
88
  if torch_dtype == torch.bfloat16:
89
+ tensor = torch.frombuffer(bytearray(raw_bytes), dtype=torch.int16).view(torch.bfloat16)
90
  else:
91
+ tensor = torch.frombuffer(bytearray(raw_bytes), dtype=torch_dtype)
92
 
93
  return tensor.reshape(shape).float()
94
 
 
102
 
103
 
104
  def find_index_file(model_id: str, token: str = None) -> dict | None:
105
+ url = f"https://huggingface.co/{model_id}/resolve/main/model.safetensors.index.json"
 
106
  headers = {"Authorization": f"Bearer {token}"} if token else {}
107
  r = requests.get(url, headers=headers, timeout=15)
108
  return r.json() if r.status_code == 200 else None
 
116
  return f"❌ HTTP {code}:{e}"
117
 
118
 
 
 
 
 
 
119
  # ─────────────────────────────────────────────
120
+ # Gemma4 / 嵌套 config 安全解析
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # ─────────────────────────────────────────────
122
 
123
  def extract_config_params(config: dict) -> dict:
124
  if config is None:
125
  return {}
 
126
  text_cfg = config.get("text_config", {}) or {}
127
 
128
  def get_field(*keys):
 
145
 
146
 
147
  # ─────────────────────────────────────────────
148
+ # QKV 后缀分类
149
+ # ─────────────────────────────────────────────
150
+
151
+ def _classify_qkv_suffix(suffix: str) -> str | None:
152
+ """layers.{N}. 之后的后缀 → 'q'/'k'/'v'/None"""
153
+ if not suffix.endswith(".weight"):
154
+ return None
155
+ excludes = ["norm", "rope", "embed", "lm_head", "layernorm", "ln_"]
156
+ s = suffix.lower()
157
+ if any(e in s for e in excludes):
158
+ return None
159
+ if any(p in s for p in ["q_proj", "wq", "query", "q_a", "q_b"]):
160
+ return "q"
161
+ if any(p in s for p in ["k_proj", "wk", "key", "k_a", "k_b"]):
162
+ return "k"
163
+ if any(p in s for p in ["v_proj", "wv", "value", "v_a", "v_b"]):
164
+ return "v"
165
+ return None
166
+
167
+
168
+ # ─────────────────────────────────────────────
169
+ # 【核心】按组件前缀分组发现所有 QKV 层
170
+ # 每个前缀 = 一个独立组件(语言模型/视觉编码器/音频塔等)
171
+ # 组件内部层号保持原始值,不重排
172
+ # ─────────────────────────────────────────────
173
+
174
+ def discover_all_components(all_shard_headers: dict) -> dict:
175
+ """
176
+ 返回:
177
+ {
178
+ prefix (str): {
179
+ layer_idx (int): {
180
+ "q": (shard_name, full_key),
181
+ "k": (shard_name, full_key),
182
+ "v": (shard_name, full_key),
183
+ }
184
+ }
185
+ }
186
+ 每个 prefix 是一个独立的模型组件。
187
+ 层号是该组件内的原始层号,不做任何重排。
188
+ """
189
+ # 第一遍:收集所有前缀及其层角色
190
+ prefix_data: dict[str, dict[int, dict]] = {}
191
+
192
+ for shard_name, (header, _) in all_shard_headers.items():
193
+ for key in header.keys():
194
+ m = re.search(r'layers\.(\d+)\.', key)
195
+ if not m:
196
+ continue
197
+
198
+ layer_idx = int(m.group(1))
199
+ prefix = key[:m.start()] # 精确截断,不用 split
200
+ suffix = key[m.end():]
201
+
202
+ role = _classify_qkv_suffix(suffix)
203
+ if role is None:
204
+ continue
205
+
206
+ if prefix not in prefix_data:
207
+ prefix_data[prefix] = {}
208
+ if layer_idx not in prefix_data[prefix]:
209
+ prefix_data[prefix][layer_idx] = {"q": None, "k": None, "v": None}
210
+
211
+ if prefix_data[prefix][layer_idx][role] is None:
212
+ prefix_data[prefix][layer_idx][role] = (shard_name, key)
213
+
214
+ # 第二遍:只保留每个前缀中 QKV 完整的层
215
+ result = {}
216
+ for prefix, layers in prefix_data.items():
217
+ complete = {
218
+ idx: qkv for idx, qkv in layers.items()
219
+ if all(qkv[r] is not None for r in ("q", "k", "v"))
220
+ }
221
+ if complete:
222
+ result[prefix] = complete
223
+
224
+ return result
225
+
226
+
227
+ # ─────────────────────────────────────────────
228
+ # 组件类型推断(用于 modality 标注)
229
+ # ─────────────────────────────────────────────
230
+
231
+ VISION_PREFIX_PATTERNS = [
232
+ "vision", "visual", "img", "image",
233
+ "patch_embed", "vit", "clip",
234
+ ]
235
+ AUDIO_PREFIX_PATTERNS = [
236
+ "audio", "speech", "whisper",
237
+ ]
238
+ TEXT_PREFIX_PATTERNS = [
239
+ "language_model", "transformer", "model.layers",
240
+ "text", "decoder", "encoder",
241
+ ]
242
+
243
+ def infer_modality(prefix: str) -> str:
244
+ p = prefix.lower()
245
+ if any(v in p for v in VISION_PREFIX_PATTERNS):
246
+ return "vision"
247
+ if any(a in p for a in AUDIO_PREFIX_PATTERNS):
248
+ return "audio"
249
+ # 默认视为 text(language model)
250
+ return "text"
251
+
252
+
253
+ # ─────────────────────────────────────────────
254
+ # 量化检测
255
  # ─────────────────────────────────────────────
256
 
257
  def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
 
266
  if r.status_code == 200:
267
  cfg = r.json()
268
  qcfg = cfg.get("quantization_config", {})
269
+ qt = (qcfg.get("quant_type", "") or
270
+ qcfg.get("quant_method", "") or
271
+ cfg.get("quantization", "")).lower()
272
  if "gptq" in qt:
273
+ return True, f"❌ GPTQ {qcfg.get('bits','?')}bit 量化,请用原始 BF16 版本。"
 
274
  if "awq" in qt:
275
+ return True, "❌ AWQ 量化,请用原始 BF16 版本。"
276
  if "bitsandbytes" in qt or "bnb" in qt:
277
  warnings.append("⚠️ 检测到 bitsandbytes 量化,结果可能失真")
278
  except Exception:
279
  warnings.append("⚠️ 无法读取 config.json")
280
 
281
+ for kw in ["gptq", "awq", "gguf"]:
282
+ if kw in model_id.lower():
283
+ return True, f"❌ 模型名含 '{kw.upper()}',请使用原始 BF16 版本。"
 
284
 
285
  try:
286
  all_files = list(list_repo_files(model_id, token=token))
287
  if any(f.endswith(".gguf") for f in all_files):
288
  return True, "❌ 检测到 .gguf 文件,不支持该格式。"
289
  if not any(f.endswith(".safetensors") for f in all_files):
290
+ return True, "❌ 未找到 .safetensors 文件。"
291
  except Exception as e:
292
  warnings.append(f"⚠️ 文件列表检测失败:{e}")
293
 
294
  try:
295
  index_data = find_index_file(model_id, token)
296
+ first_shard = (
297
+ sorted(set(index_data["weight_map"].values()))[0]
298
+ if index_data else get_safetensor_files(model_id, token)[0]
299
+ )
 
300
  hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
301
+ bad = [k for k in hdr if any(s in k for s in QUANTIZED_KEY_SIGNATURES)]
302
+ if bad:
303
+ return True, f"❌ 检测到量化 key:{bad[:3]}"
304
+ good = {hdr[k].get("dtype", "") for k in list(hdr)[:20]} - UNSUPPORTED_SVD_DTYPES
 
 
 
305
  if good:
306
  warnings.append(f"✅ 权重格式:{good}")
307
  except Exception as e:
308
  warnings.append(f"⚠️ header 检测失败:{e}")
309
 
310
+ return False, "\n".join(warnings) if warnings else "✅ 未检测到量化,可以正常分析"
 
311
 
312
 
313
  # ─────────────────────────────────────────────
314
+ # GQA 参数推断
315
  # ─────────────────────────────────────────────
316
 
317
  def infer_gqa_params(
318
  W_q: torch.Tensor,
319
  W_k: torch.Tensor,
320
+ config_params: dict
321
+ ) -> tuple[int, int, int]:
322
+ q_rows, k_rows = W_q.shape[0], W_k.shape[0]
323
+
324
+ d_head = config_params.get("head_dim") if config_params else None
325
+ if not d_head and config_params:
326
+ nh = config_params.get("num_attention_heads") or 1
327
+ hs = config_params.get("hidden_size") or 0
328
+ if hs and nh:
329
+ d_head = hs // nh
 
 
 
 
 
 
 
 
 
330
  if not d_head:
331
+ for c in [256, 128, 96, 80, 64, 32]:
332
+ if q_rows % c == 0 and k_rows % c == 0:
333
+ d_head = c
334
  break
 
335
  if not d_head:
336
+ raise ValueError(f"无法推断 d_head:W_q={W_q.shape}, W_k={W_k.shape}")
 
 
337
 
338
+ n_q = q_rows // d_head
339
+ n_kv = k_rows // d_head
340
+ if n_q % n_kv != 0:
341
+ raise ValueError(f"n_q={n_q} 不能被 n_kv={n_kv} 整除")
342
+ return n_q, n_kv, d_head
 
 
 
343
 
344
 
345
  # ─────────────────────────────────────────────
346
  # 指标计算
347
  # ─────────────────────────────────────────────
348
 
349
+ def compute_pearson_corr(a: torch.Tensor, b: torch.Tensor) -> float:
350
+ am, bm = a - a.mean(), b - b.mean()
351
+ den = torch.norm(am) * torch.norm(bm)
352
+ return float(torch.dot(am, bm) / den) if den != 0 else 0.0
353
+
354
+ def compute_ssr(a: torch.Tensor, b: torch.Tensor) -> float:
355
+ n = min(a.shape[0], b.shape[0])
356
+ an = a[:n] / (torch.norm(a[:n]) + 1e-10)
357
+ bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
358
+ return float(torch.mean(torch.abs(an - bn)))
359
+
360
+ def compute_svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
361
+ n = min(a.shape[0], b.shape[0])
362
+ sa, sb = a[:n], b[:n]
 
363
  den = torch.dot(sb, sb)
364
  if den == 0:
365
  return 1.0, 0.0
366
+ alpha = torch.dot(sa, sb) / den
367
+ return float(alpha), float(torch.mean((sa - alpha * sb) ** 2))
368
+
369
+ def compute_cosU(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
370
+ r = min(U_a.shape[0], U_b.shape[0])
371
+ c = min(U_a.shape[1], U_b.shape[1])
372
+ Ua = U_a[:r, :c]
373
+ Ub = U_b[:r, :c]
374
+ Ua = Ua / (torch.norm(Ua, dim=0, keepdim=True) + 1e-10)
375
+ Ub = Ub / (torch.norm(Ub, dim=0, keepdim=True) + 1e-10)
376
+ return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
377
+
378
+ def compute_cosV(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
379
+ r = min(Vt_a.shape[0], Vt_b.shape[0])
380
+ c = min(Vt_a.shape[1], Vt_b.shape[1]) # ← 关键:列也取 min
381
+ Va = Vt_a[:r, :c]
382
+ Vb = Vt_b[:r, :c]
383
+ Va = Va / (torch.norm(Va, dim=1, keepdim=True) + 1e-10)
384
+ Vb = Vb / (torch.norm(Vb, dim=1, keepdim=True) + 1e-10)
385
+ return float(torch.abs((Va * Vb).sum(dim=1)).mean())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
387
 
388
  # ─────────────────────────────────────────────
389
+ # 逐头分析(保留原始层号)
390
  # ─────────────────────────────────────────────
391
 
392
  def analyze_layer_heads(
393
  W_q: torch.Tensor,
394
  W_k: torch.Tensor,
395
  W_v: torch.Tensor,
396
+ layer_idx: int, # 原始层号,不重排
397
+ n_q: int, n_kv: int, d_head: int,
398
+ modality: str,
 
 
399
  ) -> tuple[list[dict], str]:
 
 
 
 
 
 
 
400
 
401
+ group = n_q // n_kv
402
+ records, lines = [], []
 
403
 
404
+ lines.append(
405
  f"\n{'─'*80}\n"
406
  f"Layer {layer_idx:3d} [{modality}] "
407
+ f"n_q={n_q} n_kv={n_kv} group={group} d_head={d_head}\n"
 
408
  f"{'─'*80}\n"
 
 
409
  f" {'KV':>3} {'Q':>3} │"
410
  f" {'P_QK':>7} {'Sp_QK':>7} {'SSR_QK':>8} │"
411
  f" {'SSR_QV':>8} {'SSR_KV':>8} │"
 
414
  f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
415
  )
416
 
417
+ for kv_h in range(n_kv):
418
+ k_t = W_k[kv_h*d_head:(kv_h+1)*d_head, :]
419
+ v_t = W_v[kv_h*d_head:(kv_h+1)*d_head, :]
420
+ U_k, s_k, Vt_k = torch.linalg.svd(k_t, full_matrices=False)
421
+ U_v, s_v, Vt_v = torch.linalg.svd(v_t, full_matrices=False)
422
+
423
+ alpha_kv, res_kv = compute_svr(s_k, s_v)
424
+ cosU_KV = compute_cosU(U_k, U_v)
425
+ cosV_KV = compute_cosV(Vt_k, Vt_v)
426
+ ssr_kv = compute_ssr(s_k, s_v)
427
+ pkv = compute_pearson_corr(s_k[:min(len(s_k),len(s_v))],
428
+ s_v[:min(len(s_k),len(s_v))])
429
+
430
+ for q_off in range(group):
431
+ h = kv_h * group + q_off
432
+ q_t = W_q[h*d_head:(h+1)*d_head, :]
433
+ U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
434
+
435
+ nqk = min(len(s_q), len(s_k))
436
+ nqv = min(len(s_q), len(s_v))
437
+
438
+ pqk = compute_pearson_corr(s_q[:nqk], s_k[:nqk])
439
+ spqk = float(spearmanr(s_q[:nqk].numpy(), s_k[:nqk].numpy())[0])
440
+ ssr_qk = compute_ssr(s_q, s_k)
441
+ alpha_qk, res_qk = compute_svr(s_q, s_k)
442
+ cosU_QK = compute_cosU(U_q, U_k)
443
+ cosV_QK = compute_cosV(Vt_q, Vt_k)
444
+
445
+ pqv = compute_pearson_corr(s_q[:nqv], s_v[:nqv])
446
+ ssr_qv = compute_ssr(s_q, s_v)
447
+ alpha_qv, res_qv = compute_svr(s_q, s_v)
448
+ cosU_QV = compute_cosU(U_q, U_v)
449
+ cosV_QV = compute_cosV(Vt_q, Vt_v)
450
+
451
+ smxq = float(s_q.max()); smnq = float(s_q[s_q>1e-10].min()) if (s_q>1e-10).any() else 0.
452
+ smxk = float(s_k.max()); smnk = float(s_k[s_k>1e-10].min()) if (s_k>1e-10).any() else 0.
453
+ smxv = float(s_v.max()); smnv = float(s_v[s_v>1e-10].min()) if (s_v>1e-10).any() else 0.
 
 
 
 
 
 
 
 
 
 
 
 
 
454
 
455
  records.append({
456
+ "layer": layer_idx, "modality": modality,
457
+ "kv_head": kv_h, "q_head": h,
458
+ "pearson_QK": round(pqk,6), "spearman_QK": round(spqk,6),
459
+ "pearson_QV": round(pqv,6), "pearson_KV": round(pkv,6),
460
+ "ssr_QK": round(ssr_qk,8), "ssr_QV": round(ssr_qv,8),
461
+ "ssr_KV": round(ssr_kv,8),
462
+ "cosU_QK": round(cosU_QK,6), "cosU_QV": round(cosU_QV,6),
463
+ "cosU_KV": round(cosU_KV,6),
464
+ "cosV_QK": round(cosV_QK,6), "cosV_QV": round(cosV_QV,6),
465
+ "cosV_KV": round(cosV_KV,6),
466
+ "alpha_QK": round(alpha_qk,4), "alpha_QV": round(alpha_qv,4),
467
+ "alpha_KV": round(alpha_kv,4),
468
+ "alpha_res_QK": round(res_qk,6), "alpha_res_QV": round(res_qv,6),
469
+ "alpha_res_KV": round(res_kv,6),
470
+ "sigma_max_Q": round(smxq,4), "sigma_min_Q": round(smnq,4),
471
+ "sigma_max_K": round(smxk,4), "sigma_min_K": round(smnk,4),
472
+ "sigma_max_V": round(smxv,4), "sigma_min_V": round(smnv,4),
473
+ "cond_Q": round(smxq/(smnq+1e-10),2),
474
+ "cond_K": round(smxk/(smnk+1e-10),2),
475
+ "cond_V": round(smxv/(smnv+1e-10),2),
 
 
 
 
 
 
 
 
 
 
 
 
476
  })
477
 
478
+ lines.append(
479
+ f" {kv_h:>3d} {h:>3d} │"
480
+ f" {pqk:>+7.4f} {spqk:>+7.4f} {ssr_qk:>8.6f} │"
481
  f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
482
  f" {cosU_QK:>8.4f} {cosU_QV:>8.4f} {cosU_KV:>8.4f} │"
483
  f" {cosV_QK:>8.4f} {cosV_QV:>8.4f} {cosV_KV:>8.4f} │"
484
  f" {alpha_qk:>7.4f} {alpha_qv:>7.4f} {alpha_kv:>7.4f}\n"
485
  )
486
 
487
+ return records, "".join(lines)
488
 
489
 
490
  # ─────────────────────────────────────────────
 
492
  # ─────────────────────────────────────────────
493
 
494
  def analyze_model(
495
+ model_id: str,
496
+ hf_token: str,
497
  max_layers: int,
498
  progress=gr.Progress()
499
  ):
 
501
  return "❌ 请输入模型 ID", None
502
 
503
  token = hf_token.strip() or None
504
+ max_l = int(max_layers)
505
  log_lines = [f"🔍 分析模型:{model_id}\n{'═'*80}\n"]
506
  all_records: list[dict] = []
507
 
508
  # ── 量化检测 ─────────────────────────────────
509
  progress(0.02, desc="量化检测...")
510
+ blocked, qmsg = check_quantization(model_id, token)
511
+ log_lines.append(f"【量化检测】\n{qmsg}\n{'─'*80}\n")
512
+ if blocked:
513
  return "".join(log_lines), None
514
 
515
  # ── config.json ───────────────────────────────
 
521
  timeout=15
522
  )
523
  if r.status_code == 200:
524
+ raw_cfg = r.json()
525
+ config_params = extract_config_params(raw_cfg)
526
  log_lines.append(
527
  f"📋 config.json:\n"
528
  f" model_type = {config_params.get('model_type')}\n"
529
+ f" hidden_size = {config_params.get('hidden_size')}\n"
530
  f" num_attention_heads = {config_params.get('num_attention_heads')}\n"
531
  f" num_key_value_heads = {config_params.get('num_key_value_heads')}\n"
532
  f" head_dim = {config_params.get('head_dim')}\n"
533
  f"{'─'*80}\n"
534
  )
535
  except Exception:
536
+ log_lines.append("⚠️ 无法读取 config.json\n")
537
 
538
+ # ── 获取 shard 列表 ───────────────────────────
539
  progress(0.05, desc="读取模型索引...")
540
  try:
541
  index_data = find_index_file(model_id, token)
542
  if index_data:
543
  shard_files = sorted(set(index_data["weight_map"].values()))
544
+ log_lines.append(f"📦 分片模型,共 {len(shard_files)} 个 shard\n")
 
 
545
  else:
546
  shard_files = get_safetensor_files(model_id, token)
547
+ log_lines.append(f"📦 文件:{shard_files}\n")
548
  except requests.exceptions.HTTPError as e:
549
  return _http_error_msg(e, model_id), None
550
 
551
+ # ── 读取所有 shard header ────────────────────
552
+ progress(0.08, desc="读取 shard headers...")
553
  all_shard_headers: dict[str, tuple[dict, int]] = {}
554
  total_keys = 0
555
+ for sf in shard_files:
556
  try:
557
+ h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
558
+ all_shard_headers[sf] = (h, hs)
 
559
  total_keys += len(h)
560
  except Exception as e:
561
+ log_lines.append(f"⚠️ {sf} header 读取失败:{e}\n")
 
 
 
 
 
 
 
 
 
562
 
563
  log_lines.append(f"🔑 总 key 数:{total_keys}\n")
564
+
565
+ # ── 发现所有组件 ──────────────────────────────
566
+ progress(0.12, desc="识别组件结构...")
567
+ all_components = discover_all_components(all_shard_headers)
568
+
569
+ if not all_components:
570
+ sample = []
571
+ for sf, (h, _) in list(all_shard_headers.items())[:1]:
572
+ sample = list(h.keys())[:30]
573
+ return "".join(log_lines) + "⚠️ 无法识别 Q/K/V key,前30个 key:\n" + "\n".join(sample), None
574
+
575
+ # ── 打印组件概览 ──────────────────────────────
576
+ log_lines.append("📐 发现组件:\n")
577
+ for prefix, layers in sorted(all_components.items()):
578
+ modality = infer_modality(prefix)
579
+ sorted_l = sorted(layers.keys())
580
  log_lines.append(
581
  f" [{modality:6s}] prefix='{prefix}' "
582
+ f"层数={len(sorted_l)} "
583
+ f"范围={sorted_l[0]}~{sorted_l[-1]}\n"
584
  )
585
  log_lines.append(f"{'─'*80}\n")
586
 
587
+ # ── 逐组件逐层分析 ────────────────────────────
588
+ # 按前缀排序,每个组件独立分析,层号保持原始值
589
+ component_done = 0
590
+ total_components = len(all_components)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
+ for prefix, layers in sorted(all_components.items()):
593
+ modality = infer_modality(prefix)
594
+ sorted_idxs = sorted(layers.keys())
595
 
596
+ log_lines.append(
597
+ f"\n{'═'*80}\n"
598
+ f"🔷 组件:'{prefix}' [{modality}] "
599
+ f"共 {len(sorted_idxs)} 层\n"
600
+ f"{'═'*80}\n"
601
  )
602
 
603
+ # 组件内最多分析 max_layers 层(从原始层0开始,保持原始编号)
604
+ layers_in_component = 0
605
+ gqa_logged = False
 
 
606
 
607
+ for layer_idx in sorted_idxs:
608
+ if layers_in_component >= max_l:
609
+ log_lines.append(
610
+ f" ⏸️ 已达到最大层数 {max_l},该组件剩余层跳过\n"
611
+ )
612
+ break
613
 
614
+ overall_progress = (
615
+ component_done / total_components
616
+ + (layers_in_component / max(len(sorted_idxs), 1)) / total_components
 
 
 
 
 
617
  )
618
+ progress(
619
+ 0.15 + 0.80 * overall_progress,
620
+ desc=f"{modality} 层 {layer_idx}..."
621
  )
 
 
 
 
 
 
 
 
622
 
623
+ qkv = layers[layer_idx]
624
+ q_shard, q_key = qkv["q"]
625
+ k_shard, k_key = qkv["k"]
626
+ v_shard, v_key = qkv["v"]
627
+
628
+ try:
629
+ W_q = load_tensor_remote(
630
+ get_file_url(model_id, q_shard), q_key,
631
+ *all_shard_headers[q_shard], token
632
+ )
633
+ W_k = load_tensor_remote(
634
+ get_file_url(model_id, k_shard), k_key,
635
+ *all_shard_headers[k_shard], token
636
+ )
637
+ W_v = load_tensor_remote(
638
+ get_file_url(model_id, v_shard), v_key,
639
+ *all_shard_headers[v_shard], token
640
+ )
641
+ except Exception as e:
642
+ log_lines.append(f"Layer {layer_idx}: ❌ 加载失败:{e}\n")
643
+ layers_in_component += 1
644
+ continue
645
+
646
+ if W_q is None or W_k is None or W_v is None:
647
+ log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
648
+ layers_in_component += 1
649
+ continue
650
 
651
+ try:
652
+ # 组件内不传全局 config(避免参数错配视觉组件)
653
+ # 对语言模型组件才传 config_params
654
+ cfg = config_params if modality == "text" else {}
655
+ n_q, n_kv, d_head = infer_gqa_params(W_q, W_k, cfg)
656
+ except ValueError as e:
657
+ log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
658
+ del W_q, W_k, W_v
659
+ layers_in_component += 1
660
+ continue
661
 
662
+ if not gqa_logged:
663
+ log_lines.append(
664
+ f"🧠 注意力结构:n_q={n_q} n_kv={n_kv} "
665
+ f"group={n_q//n_kv} d_head={d_head}\n"
666
+ f" W_q={list(W_q.shape)} "
667
+ f"W_k={list(W_k.shape)} "
668
+ f"W_v={list(W_v.shape)}\n"
669
+ )
670
+ gqa_logged = True
671
+
672
+ records, layer_log = analyze_layer_heads(
673
+ W_q, W_k, W_v,
674
+ layer_idx, # ← 原始层号,不重排
675
+ n_q, n_kv, d_head,
676
+ modality=modality,
677
  )
678
+ all_records.extend(records)
679
+ log_lines.append(layer_log)
680
+
681
  del W_q, W_k, W_v
682
+ layers_in_component += 1
 
683
 
684
+ component_done += 1
685
+
686
+ # ── 全局汇总 ──────────────────────────────────
687
+ if not all_records:
688
+ return "".join(log_lines) + "\n❌ 未获得任何有效结果\n", None
 
 
 
 
689
 
690
+ df = pd.DataFrame(all_records)
691
+
692
+ def stat_block(arr, name):
693
+ if len(arr) == 0:
694
+ return f" {name:<14} 无数据\n"
695
+ return (
696
+ f" {name:<14}"
697
+ f" Median={np.median(arr):.6f}"
698
+ f" Mean={np.mean(arr):.6f}"
699
+ f" Min={np.min(arr):.6f}"
700
+ f" Max={np.max(arr):.6f}\n"
701
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
 
703
+ # modality 分组汇总
704
+ summary = [f"\n{'═'*80}\n📊 王氏五定律全局汇总 — {model_id}\n{'═'*80}\n"]
705
+
706
+ for mod in df["modality"].unique():
707
+ mdf = df[df["modality"] == mod]
708
+ summary.append(
709
+ f"\n▶ [{mod}] {len(mdf)} 条记录 "
710
+ f"({mdf['layer'].nunique()} 层 × "
711
+ f"{mdf.groupby('layer').size().iloc[0]} 头/层)\n"
712
+ )
713
+ summary += [
714
+ f" 【第一定律 Pearson r → 1】\n",
715
+ stat_block(mdf["pearson_QK"].values, "Q-K:"),
716
+ stat_block(mdf["pearson_QV"].values, "Q-V:"),
717
+ stat_block(mdf["pearson_KV"].values, "K-V:"),
718
+ f" 【第二定律 SSR → 0】\n",
719
+ stat_block(mdf["ssr_QK"].values, "Q-K:"),
720
+ stat_block(mdf["ssr_QV"].values, "Q-V:"),
721
+ stat_block(mdf["ssr_KV"].values, "K-V:"),
722
+ f" 【第四定律 cosU 输出子空间】\n",
723
+ stat_block(mdf["cosU_QK"].values, "cosU Q-K:"),
724
+ stat_block(mdf["cosU_QV"].values, "cosU Q-V:"),
725
+ stat_block(mdf["cosU_KV"].values, "cosU K-V:"),
726
+ f" 【第五定律 cosV 输入子空间】\n",
727
+ stat_block(mdf["cosV_QK"].values, "cosV Q-K:"),
728
+ stat_block(mdf["cosV_QV"].values, "cosV Q-V:"),
729
+ stat_block(mdf["cosV_KV"].values, "cosV K-V:"),
730
+ f" 【第三定律 条件数】\n",
731
+ stat_block(mdf["cond_Q"].values, "cond Q:"),
732
+ stat_block(mdf["cond_K"].values, "cond K:"),
733
+ stat_block(mdf["cond_V"].values, "cond V:"),
 
 
 
 
 
 
734
  ]
 
735
 
736
+ summary.append(f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n{'═'*80}\n")
737
+ log_lines.extend(summary)
738
+
739
+ return "".join(log_lines), df
740
 
741
 
742
  # ─────────────────────────────────────────────
743
+ # Gradio UI
744
  # ─────────────────────────────────────────────
745
 
746
  with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
 
749
  # 🔬 Wang's Five Laws — LLM Spectral Analyzer
750
  **Mathematical Foundations of Large Language Models (MF-LLM)**
751
 
752
+ 通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
753
+ 支持 GQA + 多模态(视觉/音频/语言各组件独立分析,原始层号保留)。
754
+
755
+ | 定律 | 指标 | 理论极值 |
756
+ |------|------|---------|
757
+ | 第一定律 | Pearson r | → 1 |
758
+ | 第二定律 | SSR | → 0 |
759
+ | 第三定律 | 条件数 κ | 越小越好 |
760
+ | 第四定律 | cosU(Uq,Uv) | < 1/√d_head(超正交) |
761
+ | 第五定律 | cosV | ≈ 1/√d_model(随机正交) |
762
+
763
+ [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
764
+ [![HAL](https://img.shields.io/badge/HAL-hal--05609398-red)](https://hal.science/hal-05609398)
765
  """)
766
 
767
  with gr.Row():
 
777
  type="password"
778
  )
779
  max_layers_input = gr.Slider(
780
+ label="每个组件最大分析层数",
781
  minimum=1, maximum=100, value=4, step=1
782
  )
783
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
 
786
  gr.Markdown("""
787
  ### ✅ 推荐模型
788
  ```
789
+ google/gemma-4-e2b ← 视觉+语言
790
+ google/gemma-4-31b-it ← 视觉+语言
791
  Qwen/Qwen2.5-14B-Instruct
792
  meta-llama/Llama-3-8B
 
 
793
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
794
  ```
795
+ ### 多模态分析说明
796
+ - 每个组件(语言/视觉/音频)**独立分析**
797
+ - 层号保持**原始编号**,不重排
798
+ - 汇总统计**按 modality 分组**展示
 
799
  """)
800
 
801
  log_output = gr.Textbox(
802
  label="分析日志",
803
+ lines=40, max_lines=200
804
  )
 
805
  table_output = gr.Dataframe(
806
  label="逐头全指标结果表",
807
  headers=[