Alex W. commited on
Commit
9ae44df
·
1 Parent(s): 726019a

正确逻辑:

Browse files

不按组件分类,不按 modality 分类
直接按 safetensors 里 layers.{N}. 的原始 N 值过滤
start_layer=0, end_layer=5 → 提取所有前缀下 N 在 [0,5] 范围内的层
同一个 N 在不同前缀下是不同的层,都要输出,保持原始 key 里的层号

Files changed (1) hide show
  1. app.py +281 -344
app.py CHANGED
@@ -110,16 +110,12 @@ def find_index_file(model_id: str, token: str = None) -> dict | None:
110
 
111
  def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
112
  code = e.response.status_code
113
- if code == 401: return "❌ 401 未授权:请填写有效的 HF Access Token"
114
  if code == 403: return f"❌ 403 禁止访问:请先接受 {model_id} 的使用协议"
115
- if code == 404: return f"❌ 404 未找到:模型 {model_id} 不存在"
116
  return f"❌ HTTP {code}:{e}"
117
 
118
 
119
- # ─────────────────────────────────────────────
120
- # Gemma4 / 嵌套 config 安全解析
121
- # ─────────────────────────────────────────────
122
-
123
  def extract_config_params(config: dict) -> dict:
124
  if config is None:
125
  return {}
@@ -128,11 +124,9 @@ def extract_config_params(config: dict) -> dict:
128
  def get_field(*keys):
129
  for k in keys:
130
  v = config.get(k)
131
- if v is not None:
132
- return v
133
  v = text_cfg.get(k)
134
- if v is not None:
135
- return v
136
  return None
137
 
138
  return {
@@ -149,7 +143,6 @@ def extract_config_params(config: dict) -> dict:
149
  # ─────────────────────────────────────────────
150
 
151
  def _classify_qkv_suffix(suffix: str) -> str | None:
152
- """layers.{N}. 之后的后缀 → 'q'/'k'/'v'/None"""
153
  if not suffix.endswith(".weight"):
154
  return None
155
  excludes = ["norm", "rope", "embed", "lm_head", "layernorm", "ln_"]
@@ -166,28 +159,26 @@ def _classify_qkv_suffix(suffix: str) -> str | None:
166
 
167
 
168
  # ─────────────────────────────────────────────
169
- # 核心组件前缀分组发现所有 QKV
170
- # 每个前缀 = 一个独立组件(语言模型/视觉编码器/音频塔等)
171
- # 组件内部层号保持原始值,不重排
 
 
 
 
 
 
 
172
  # ─────────────────────────────────────────────
173
 
174
- def discover_all_components(all_shard_headers: dict) -> dict:
175
  """
176
- 返回:
177
- {
178
- prefix (str): {
179
- layer_idx (int): {
180
- "q": (shard_name, full_key),
181
- "k": (shard_name, full_key),
182
- "v": (shard_name, full_key),
183
- }
184
- }
185
- }
186
- 每个 prefix 是一个独立的模型组件。
187
- 层号是该组件内的原始层号,不做任何重排。
188
  """
189
- # 第一遍:收集所有前缀及其层角色
190
- prefix_data: dict[str, dict[int, dict]] = {}
191
 
192
  for shard_name, (header, _) in all_shard_headers.items():
193
  for key in header.keys():
@@ -196,58 +187,25 @@ def discover_all_components(all_shard_headers: dict) -> dict:
196
  continue
197
 
198
  layer_idx = int(m.group(1))
199
- prefix = key[:m.start()] # 精确截断,不用 split
200
  suffix = key[m.end():]
201
 
202
  role = _classify_qkv_suffix(suffix)
203
  if role is None:
204
  continue
205
 
206
- if prefix not in prefix_data:
207
- prefix_data[prefix] = {}
208
- if layer_idx not in prefix_data[prefix]:
209
- prefix_data[prefix][layer_idx] = {"q": None, "k": None, "v": None}
210
-
211
- if prefix_data[prefix][layer_idx][role] is None:
212
- prefix_data[prefix][layer_idx][role] = (shard_name, key)
213
-
214
- # 第二遍:只保留每个前缀中 QKV 完整的层
215
- result = {}
216
- for prefix, layers in prefix_data.items():
217
- complete = {
218
- idx: qkv for idx, qkv in layers.items()
219
- if all(qkv[r] is not None for r in ("q", "k", "v"))
220
- }
221
- if complete:
222
- result[prefix] = complete
223
 
224
- return result
 
225
 
226
-
227
- # ─────────────────────────────────────────────
228
- # 组件类型推断(用于 modality 标注)
229
- # ─────────────────────────────────────────────
230
-
231
- VISION_PREFIX_PATTERNS = [
232
- "vision", "visual", "img", "image",
233
- "patch_embed", "vit", "clip",
234
- ]
235
- AUDIO_PREFIX_PATTERNS = [
236
- "audio", "speech", "whisper",
237
- ]
238
- TEXT_PREFIX_PATTERNS = [
239
- "language_model", "transformer", "model.layers",
240
- "text", "decoder", "encoder",
241
- ]
242
-
243
- def infer_modality(prefix: str) -> str:
244
- p = prefix.lower()
245
- if any(v in p for v in VISION_PREFIX_PATTERNS):
246
- return "vision"
247
- if any(a in p for a in AUDIO_PREFIX_PATTERNS):
248
- return "audio"
249
- # 默认视为 text(language model)
250
- return "text"
251
 
252
 
253
  # ─────────────────────────────────────────────
@@ -264,35 +222,35 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
264
  headers=headers, timeout=15
265
  )
266
  if r.status_code == 200:
267
- cfg = r.json()
268
  qcfg = cfg.get("quantization_config", {})
269
- qt = (qcfg.get("quant_type", "") or
270
- qcfg.get("quant_method", "") or
271
- cfg.get("quantization", "")).lower()
272
  if "gptq" in qt:
273
- return True, f"❌ GPTQ {qcfg.get('bits','?')}bit 量化,请用原始 BF16 版本。"
274
  if "awq" in qt:
275
  return True, "❌ AWQ 量化,请用原始 BF16 版本。"
276
  if "bitsandbytes" in qt or "bnb" in qt:
277
- warnings.append("⚠️ 检测到 bitsandbytes 量化,结果可能失真")
278
  except Exception:
279
  warnings.append("⚠️ 无法读取 config.json")
280
 
281
- for kw in ["gptq", "awq", "gguf"]:
282
  if kw in model_id.lower():
283
  return True, f"❌ 模型名含 '{kw.upper()}',请使用原始 BF16 版本。"
284
 
285
  try:
286
  all_files = list(list_repo_files(model_id, token=token))
287
  if any(f.endswith(".gguf") for f in all_files):
288
- return True, "❌ 检测到 .gguf 文件,不支持该格式。"
289
  if not any(f.endswith(".safetensors") for f in all_files):
290
  return True, "❌ 未找到 .safetensors 文件。"
291
  except Exception as e:
292
  warnings.append(f"⚠️ 文件列表检测失败:{e}")
293
 
294
  try:
295
- index_data = find_index_file(model_id, token)
296
  first_shard = (
297
  sorted(set(index_data["weight_map"].values()))[0]
298
  if index_data else get_safetensor_files(model_id, token)[0]
@@ -300,8 +258,8 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
300
  hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
301
  bad = [k for k in hdr if any(s in k for s in QUANTIZED_KEY_SIGNATURES)]
302
  if bad:
303
- return True, f"❌ 检测到量化 key:{bad[:3]}"
304
- good = {hdr[k].get("dtype", "") for k in list(hdr)[:20]} - UNSUPPORTED_SVD_DTYPES
305
  if good:
306
  warnings.append(f"✅ 权重格式:{good}")
307
  except Exception as e:
@@ -311,7 +269,7 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
311
 
312
 
313
  # ─────────────────────────────────────────────
314
- # GQA 参数推断
315
  # ─────────────────────────────────────────────
316
 
317
  def infer_gqa_params(
@@ -328,7 +286,7 @@ def infer_gqa_params(
328
  if hs and nh:
329
  d_head = hs // nh
330
  if not d_head:
331
- for c in [256, 128, 96, 80, 64, 32]:
332
  if q_rows % c == 0 and k_rows % c == 0:
333
  d_head = c
334
  break
@@ -346,64 +304,56 @@ def infer_gqa_params(
346
  # 指标计算
347
  # ─────────────────────────────────────────────
348
 
349
- def compute_pearson_corr(a: torch.Tensor, b: torch.Tensor) -> float:
350
  am, bm = a - a.mean(), b - b.mean()
351
  den = torch.norm(am) * torch.norm(bm)
352
  return float(torch.dot(am, bm) / den) if den != 0 else 0.0
353
 
354
  def compute_ssr(a: torch.Tensor, b: torch.Tensor) -> float:
355
- n = min(a.shape[0], b.shape[0])
356
  an = a[:n] / (torch.norm(a[:n]) + 1e-10)
357
  bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
358
  return float(torch.mean(torch.abs(an - bn)))
359
 
360
  def compute_svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
361
- n = min(a.shape[0], b.shape[0])
362
  sa, sb = a[:n], b[:n]
363
  den = torch.dot(sb, sb)
364
- if den == 0:
365
- return 1.0, 0.0
366
  alpha = torch.dot(sa, sb) / den
367
  return float(alpha), float(torch.mean((sa - alpha * sb) ** 2))
368
 
369
  def compute_cosU(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
370
  r = min(U_a.shape[0], U_b.shape[0])
371
  c = min(U_a.shape[1], U_b.shape[1])
372
- Ua = U_a[:r, :c]
373
- Ub = U_b[:r, :c]
374
- Ua = Ua / (torch.norm(Ua, dim=0, keepdim=True) + 1e-10)
375
- Ub = Ub / (torch.norm(Ub, dim=0, keepdim=True) + 1e-10)
376
  return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
377
 
378
  def compute_cosV(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
379
  r = min(Vt_a.shape[0], Vt_b.shape[0])
380
- c = min(Vt_a.shape[1], Vt_b.shape[1]) # ← 关键:列也取 min
381
- Va = Vt_a[:r, :c]
382
- Vb = Vt_b[:r, :c]
383
- Va = Va / (torch.norm(Va, dim=1, keepdim=True) + 1e-10)
384
- Vb = Vb / (torch.norm(Vb, dim=1, keepdim=True) + 1e-10)
385
  return float(torch.abs((Va * Vb).sum(dim=1)).mean())
386
 
387
 
388
  # ─────────────────────────────────────────────
389
- # 逐头分析(保留原始层号)
390
  # ─────────────────────────────────────────────
391
 
392
  def analyze_layer_heads(
393
- W_q: torch.Tensor,
394
- W_k: torch.Tensor,
395
- W_v: torch.Tensor,
396
- layer_idx: int, # 原始层号,不重排
397
  n_q: int, n_kv: int, d_head: int,
398
- modality: str,
399
  ) -> tuple[list[dict], str]:
400
 
401
- group = n_q // n_kv
402
- records, lines = [], []
403
-
404
- lines.append(
405
  f"\n{'─'*80}\n"
406
- f"Layer {layer_idx:3d} [{modality}] "
407
  f"n_q={n_q} n_kv={n_kv} group={group} d_head={d_head}\n"
408
  f"{'─'*80}\n"
409
  f" {'KV':>3} {'Q':>3} │"
@@ -412,7 +362,7 @@ def analyze_layer_heads(
412
  f" {'cosU_QK':>8} {'cosU_QV':>8} {'cosU_KV':>8} │"
413
  f" {'cosV_QK':>8} {'cosV_QV':>8} {'cosV_KV':>8} │"
414
  f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
415
- )
416
 
417
  for kv_h in range(n_kv):
418
  k_t = W_k[kv_h*d_head:(kv_h+1)*d_head, :]
@@ -424,64 +374,82 @@ def analyze_layer_heads(
424
  cosU_KV = compute_cosU(U_k, U_v)
425
  cosV_KV = compute_cosV(Vt_k, Vt_v)
426
  ssr_kv = compute_ssr(s_k, s_v)
427
- pkv = compute_pearson_corr(s_k[:min(len(s_k),len(s_v))],
428
- s_v[:min(len(s_k),len(s_v))])
 
 
429
 
430
  for q_off in range(group):
431
- h = kv_h * group + q_off
432
  q_t = W_q[h*d_head:(h+1)*d_head, :]
433
  U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
434
 
435
  nqk = min(len(s_q), len(s_k))
436
  nqv = min(len(s_q), len(s_v))
437
 
438
- pqk = compute_pearson_corr(s_q[:nqk], s_k[:nqk])
439
- spqk = float(spearmanr(s_q[:nqk].numpy(), s_k[:nqk].numpy())[0])
440
  ssr_qk = compute_ssr(s_q, s_k)
441
- alpha_qk, res_qk = compute_svr(s_q, s_k)
442
- cosU_QK = compute_cosU(U_q, U_k)
443
- cosV_QK = compute_cosV(Vt_q, Vt_k)
444
 
445
- pqv = compute_pearson_corr(s_q[:nqv], s_v[:nqv])
446
  ssr_qv = compute_ssr(s_q, s_v)
447
- alpha_qv, res_qv = compute_svr(s_q, s_v)
448
- cosU_QV = compute_cosU(U_q, U_v)
449
- cosV_QV = compute_cosV(Vt_q, Vt_v)
450
 
451
- smxq = float(s_q.max()); smnq = float(s_q[s_q>1e-10].min()) if (s_q>1e-10).any() else 0.
452
- smxk = float(s_k.max()); smnk = float(s_k[s_k>1e-10].min()) if (s_k>1e-10).any() else 0.
453
- smxv = float(s_v.max()); smnv = float(s_v[s_v>1e-10].min()) if (s_v>1e-10).any() else 0.
 
 
 
454
 
455
  records.append({
456
- "layer": layer_idx, "modality": modality,
457
- "kv_head": kv_h, "q_head": h,
458
- "pearson_QK": round(pqk,6), "spearman_QK": round(spqk,6),
459
- "pearson_QV": round(pqv,6), "pearson_KV": round(pkv,6),
460
- "ssr_QK": round(ssr_qk,8), "ssr_QV": round(ssr_qv,8),
461
- "ssr_KV": round(ssr_kv,8),
462
- "cosU_QK": round(cosU_QK,6), "cosU_QV": round(cosU_QV,6),
463
- "cosU_KV": round(cosU_KV,6),
464
- "cosV_QK": round(cosV_QK,6), "cosV_QV": round(cosV_QV,6),
465
- "cosV_KV": round(cosV_KV,6),
466
- "alpha_QK": round(alpha_qk,4), "alpha_QV": round(alpha_qv,4),
467
- "alpha_KV": round(alpha_kv,4),
468
- "alpha_res_QK": round(res_qk,6), "alpha_res_QV": round(res_qv,6),
469
- "alpha_res_KV": round(res_kv,6),
470
- "sigma_max_Q": round(smxq,4), "sigma_min_Q": round(smnq,4),
471
- "sigma_max_K": round(smxk,4), "sigma_min_K": round(smnk,4),
472
- "sigma_max_V": round(smxv,4), "sigma_min_V": round(smnv,4),
473
- "cond_Q": round(smxq/(smnq+1e-10),2),
474
- "cond_K": round(smxk/(smnk+1e-10),2),
475
- "cond_V": round(smxv/(smnv+1e-10),2),
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  })
477
 
478
  lines.append(
479
  f" {kv_h:>3d} {h:>3d} │"
480
  f" {pqk:>+7.4f} {spqk:>+7.4f} {ssr_qk:>8.6f} │"
481
  f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
482
- f" {cosU_QK:>8.4f} {cosU_QV:>8.4f} {cosU_KV:>8.4f} │"
483
- f" {cosV_QK:>8.4f} {cosV_QV:>8.4f} {cosV_KV:>8.4f} │"
484
- f" {alpha_qk:>7.4f} {alpha_qv:>7.4f} {alpha_kv:>7.4f}\n"
485
  )
486
 
487
  return records, "".join(lines)
@@ -492,25 +460,25 @@ def analyze_layer_heads(
492
  # ─────────────────────────────────────────────
493
 
494
  def analyze_model(
495
- model_id: str,
496
- hf_token: str,
497
- max_layers: int,
 
498
  progress=gr.Progress()
499
  ):
500
  if not model_id.strip():
501
  return "❌ 请输入模型 ID", None
502
 
503
- token = hf_token.strip() or None
504
- max_l = int(max_layers)
505
- log_lines = [f"🔍 分析模型:{model_id}\n{'═'*80}\n"]
506
  all_records: list[dict] = []
507
 
508
  # ── 量化检测 ─────────────────────────────────
509
  progress(0.02, desc="量化检测...")
510
  blocked, qmsg = check_quantization(model_id, token)
511
- log_lines.append(f"【量化检测】\n{qmsg}\n{'─'*80}\n")
512
  if blocked:
513
- return "".join(log_lines), None
514
 
515
  # ── config.json ───────────────────────────────
516
  config_params = {}
@@ -521,222 +489,177 @@ def analyze_model(
521
  timeout=15
522
  )
523
  if r.status_code == 200:
524
- raw_cfg = r.json()
525
- config_params = extract_config_params(raw_cfg)
526
- log_lines.append(
527
- f"📋 config.json:\n"
528
- f" model_type = {config_params.get('model_type')}\n"
529
- f" hidden_size = {config_params.get('hidden_size')}\n"
530
- f" num_attention_heads = {config_params.get('num_attention_heads')}\n"
531
- f" num_key_value_heads = {config_params.get('num_key_value_heads')}\n"
532
- f" head_dim = {config_params.get('head_dim')}\n"
533
  f"{'─'*80}\n"
534
  )
535
  except Exception:
536
- log_lines.append("⚠️ 无法读取 config.json\n")
537
 
538
  # ── 获取 shard 列表 ───────────────────────────
539
  progress(0.05, desc="读取模型索引...")
540
  try:
541
- index_data = find_index_file(model_id, token)
542
- if index_data:
543
- shard_files = sorted(set(index_data["weight_map"].values()))
544
- log_lines.append(f"📦 分片模型,共 {len(shard_files)} 个 shard\n")
545
- else:
546
- shard_files = get_safetensor_files(model_id, token)
547
- log_lines.append(f"📦 文件:{shard_files}\n")
548
  except requests.exceptions.HTTPError as e:
549
  return _http_error_msg(e, model_id), None
550
 
551
  # ── 读取所有 shard header ─────────────────────
552
  progress(0.08, desc="读取 shard headers...")
553
  all_shard_headers: dict[str, tuple[dict, int]] = {}
554
- total_keys = 0
555
  for sf in shard_files:
556
  try:
557
  h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
558
  all_shard_headers[sf] = (h, hs)
559
- total_keys += len(h)
560
  except Exception as e:
561
- log_lines.append(f"⚠️ {sf} header 读取失败:{e}\n")
562
-
563
- log_lines.append(f"🔑 总 key 数:{total_keys}\n")
564
-
565
- # ── 发现所有组件 ──────────────────────────────
566
- progress(0.12, desc="识别组件结构...")
567
- all_components = discover_all_components(all_shard_headers)
568
-
569
- if not all_components:
570
- sample = []
571
- for sf, (h, _) in list(all_shard_headers.items())[:1]:
572
- sample = list(h.keys())[:30]
573
- return "".join(log_lines) + "⚠️ 无法识别 Q/K/V key,前30个 key:\n" + "\n".join(sample), None
574
-
575
- # ── 打印组件概览 ──────────────────────────────
576
- log_lines.append("📐 发现组件:\n")
577
- for prefix, layers in sorted(all_components.items()):
578
- modality = infer_modality(prefix)
579
- sorted_l = sorted(layers.keys())
580
- log_lines.append(
581
- f" [{modality:6s}] prefix='{prefix}' "
582
- f"层数={len(sorted_l)} "
583
- f"范围={sorted_l[0]}~{sorted_l[-1]}\n"
584
- )
585
- log_lines.append(f"{'─'*80}\n")
586
-
587
- # ── 逐组件逐层分析 ────────────────────────────
588
- # 按前缀排序,每个组件独立分析,层号保持原始值
589
- component_done = 0
590
- total_components = len(all_components)
591
-
592
- for prefix, layers in sorted(all_components.items()):
593
- modality = infer_modality(prefix)
594
- sorted_idxs = sorted(layers.keys())
595
-
596
- log_lines.append(
597
- f"\n{'═'*80}\n"
598
- f"🔷 组件:'{prefix}' [{modality}] "
599
- f"共 {len(sorted_idxs)} 层\n"
600
- f"{'═'*80}\n"
601
- )
602
 
603
- # 组件内最多分析 max_layers 层(从原始层0开始,保持原始编号)
604
- layers_in_component = 0
605
- gqa_logged = False
606
 
607
- for layer_idx in sorted_idxs:
608
- if layers_in_component >= max_l:
609
- log_lines.append(
610
- f" ⏸️ 已达到最大层数 {max_l},该组件剩余层跳过\n"
611
- )
612
- break
613
 
614
- overall_progress = (
615
- component_done / total_components
616
- + (layers_in_component / max(len(sorted_idxs), 1)) / total_components
617
- )
618
- progress(
619
- 0.15 + 0.80 * overall_progress,
620
- desc=f"{modality} 层 {layer_idx}..."
621
- )
622
 
623
- qkv = layers[layer_idx]
624
- q_shard, q_key = qkv["q"]
625
- k_shard, k_key = qkv["k"]
626
- v_shard, v_key = qkv["v"]
 
 
 
627
 
628
- try:
629
- W_q = load_tensor_remote(
630
- get_file_url(model_id, q_shard), q_key,
631
- *all_shard_headers[q_shard], token
632
- )
633
- W_k = load_tensor_remote(
634
- get_file_url(model_id, k_shard), k_key,
635
- *all_shard_headers[k_shard], token
636
- )
637
- W_v = load_tensor_remote(
638
- get_file_url(model_id, v_shard), v_key,
639
- *all_shard_headers[v_shard], token
640
- )
641
- except Exception as e:
642
- log_lines.append(f"Layer {layer_idx}: ❌ 加载失败:{e}\n")
643
- layers_in_component += 1
644
- continue
645
 
646
- if W_q is None or W_k is None or W_v is None:
647
- log_lines.append(f"Layer {layer_idx}: ⚠️ tensor None,跳过\n")
648
- layers_in_component += 1
649
- continue
650
 
651
- try:
652
- # 组件内不传全局 config(避免参数错配视觉组件)
653
- # 对语言模型组件才传 config_params
654
- cfg = config_params if modality == "text" else {}
655
- n_q, n_kv, d_head = infer_gqa_params(W_q, W_k, cfg)
656
- except ValueError as e:
657
- log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
658
- del W_q, W_k, W_v
659
- layers_in_component += 1
660
- continue
661
 
662
- if not gqa_logged:
663
- log_lines.append(
664
- f"🧠 注意力结构:n_q={n_q} n_kv={n_kv} "
665
- f"group={n_q//n_kv} d_head={d_head}\n"
666
- f" W_q={list(W_q.shape)} "
667
- f"W_k={list(W_k.shape)} "
668
- f"W_v={list(W_v.shape)}\n"
669
- )
670
- gqa_logged = True
671
 
672
- records, layer_log = analyze_layer_heads(
673
- W_q, W_k, W_v,
674
- layer_idx, # 原始层号,不重排
675
- n_q, n_kv, d_head,
676
- modality=modality,
677
- )
678
- all_records.extend(records)
679
- log_lines.append(layer_log)
680
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  del W_q, W_k, W_v
682
- layers_in_component += 1
683
 
684
- component_done += 1
 
 
 
 
 
 
 
 
685
 
686
- # ── 全局汇总 ──────────────────────────────────
687
  if not all_records:
688
- return "".join(log_lines) + "\n❌ 未获得任何有效结果\n", None
689
 
690
  df = pd.DataFrame(all_records)
691
 
692
- def stat_block(arr, name):
693
- if len(arr) == 0:
694
- return f" {name:<14} 无数据\n"
695
- return (
696
- f" {name:<14}"
697
- f" Median={np.median(arr):.6f}"
698
- f" Mean={np.mean(arr):.6f}"
699
- f" Min={np.min(arr):.6f}"
700
- f" Max={np.max(arr):.6f}\n"
701
- )
702
 
703
- # modality 分组汇总
704
- summary = [f"\n{'═'*80}\n📊 王氏五定律全局汇总 — {model_id}\n{'═'*80}\n"]
705
 
706
- for mod in df["modality"].unique():
707
- mdf = df[df["modality"] == mod]
 
708
  summary.append(
709
- f"\n▶ [{mod}] {len(mdf)} 条记录 "
710
- f"{mdf['layer'].nunique()} 层 × "
711
- f"{mdf.groupby('layer').size().iloc[0]} 头/层)\n"
712
  )
713
  summary += [
714
- f" 【第一定律 Pearson r → 1】\n",
715
- stat_block(mdf["pearson_QK"].values, "Q-K:"),
716
- stat_block(mdf["pearson_QV"].values, "Q-V:"),
717
- stat_block(mdf["pearson_KV"].values, "K-V:"),
718
- f" 【第二定律 SSR → 0】\n",
719
- stat_block(mdf["ssr_QK"].values, "Q-K:"),
720
- stat_block(mdf["ssr_QV"].values, "Q-V:"),
721
- stat_block(mdf["ssr_KV"].values, "K-V:"),
722
- f" 【第四定律 cosU 输出子空间】\n",
723
- stat_block(mdf["cosU_QK"].values, "cosU Q-K:"),
724
- stat_block(mdf["cosU_QV"].values, "cosU Q-V:"),
725
- stat_block(mdf["cosU_KV"].values, "cosU K-V:"),
726
- f" 【第五定律 cosV 输入子空间】\n",
727
- stat_block(mdf["cosV_QK"].values, "cosV Q-K:"),
728
- stat_block(mdf["cosV_QV"].values, "cosV Q-V:"),
729
- stat_block(mdf["cosV_KV"].values, "cosV K-V:"),
730
- f" 【第三定律 条件数】\n",
731
- stat_block(mdf["cond_Q"].values, "cond Q:"),
732
- stat_block(mdf["cond_K"].values, "cond K:"),
733
- stat_block(mdf["cond_V"].values, "cond V:"),
734
  ]
735
 
736
  summary.append(f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n{'═'*80}\n")
737
- log_lines.extend(summary)
738
 
739
- return "".join(log_lines), df
740
 
741
 
742
  # ─────────────────────────────────────────────
@@ -750,7 +673,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
750
  **Mathematical Foundations of Large Language Models (MF-LLM)**
751
 
752
  通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
753
- 支持 GQA + 多模态(视觉/音频/语言各组件独立分析,原始层号保留)。
754
 
755
  | 定律 | 指标 | 理论极值 |
756
  |------|------|---------|
@@ -776,36 +699,50 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
776
  placeholder="hf_xxxxxxxxxxxxxxxx",
777
  type="password"
778
  )
779
- max_layers_input = gr.Slider(
780
- label="每个组件最大分析层数",
781
- minimum=1, maximum=100, value=4, step=1
782
- )
 
 
 
 
 
783
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
784
 
785
  with gr.Column(scale=1):
786
  gr.Markdown("""
787
  ### ✅ 推荐模型
788
  ```
789
- google/gemma-4-e2b ← 视觉+语言
790
- google/gemma-4-31b-it ← 视觉+语言
791
  Qwen/Qwen2.5-14B-Instruct
792
  meta-llama/Llama-3-8B
793
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
794
  ```
795
- ### 多模态分析说明
796
- - 每个组件(语言/视觉/音频)**独立分析**
797
- - 层号保持**原始编号**,不重排
798
- - 汇总统计**按 modality 分组**展示
 
 
 
 
 
 
 
 
 
799
  """)
800
 
801
  log_output = gr.Textbox(
802
  label="分析日志",
803
- lines=40, max_lines=200
804
  )
805
  table_output = gr.Dataframe(
806
  label="逐头全指标结果表",
807
  headers=[
808
- "layer","modality","kv_head","q_head",
809
  "pearson_QK","spearman_QK","pearson_QV","pearson_KV",
810
  "ssr_QK","ssr_QV","ssr_KV",
811
  "cosU_QK","cosU_QV","cosU_KV",
@@ -821,7 +758,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
821
 
822
  analyze_btn.click(
823
  fn=analyze_model,
824
- inputs=[model_input, token_input, max_layers_input],
825
  outputs=[log_output, table_output]
826
  )
827
 
 
110
 
111
  def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
112
  code = e.response.status_code
113
+ if code == 401: return "❌ 401 未授权"
114
  if code == 403: return f"❌ 403 禁止访问:请先接受 {model_id} 的使用协议"
115
+ if code == 404: return f"❌ 404 未找到:{model_id}"
116
  return f"❌ HTTP {code}:{e}"
117
 
118
 
 
 
 
 
119
  def extract_config_params(config: dict) -> dict:
120
  if config is None:
121
  return {}
 
124
  def get_field(*keys):
125
  for k in keys:
126
  v = config.get(k)
127
+ if v is not None: return v
 
128
  v = text_cfg.get(k)
129
+ if v is not None: return v
 
130
  return None
131
 
132
  return {
 
143
  # ─────────────────────────────────────────────
144
 
145
  def _classify_qkv_suffix(suffix: str) -> str | None:
 
146
  if not suffix.endswith(".weight"):
147
  return None
148
  excludes = ["norm", "rope", "embed", "lm_head", "layernorm", "ln_"]
 
159
 
160
 
161
  # ─────────────────────────────────────────────
162
+ # 核心原始号扫描,不合并不重排
163
+ # 返回结构:
164
+ # {
165
+ # (prefix, layer_idx): {
166
+ # "q": (shard, key),
167
+ # "k": (shard, key),
168
+ # "v": (shard, key),
169
+ # }
170
+ # }
171
+ # key 是 (prefix, layer_idx) 元组,保证不同组件同编号层不混淆
172
  # ─────────────────────────────────────────────
173
 
174
+ def scan_all_qkv(all_shard_headers: dict) -> dict:
175
  """
176
+ 扫描所有 shard 中的 Q/K/V weight。
177
+ 以 (prefix, layer_idx) 为 key,保证:
178
+ - 不同组件的同编号层互相独立
179
+ - 层号是 safetensors 里的原始值
 
 
 
 
 
 
 
 
180
  """
181
+ result: dict[tuple[str, int], dict] = {}
 
182
 
183
  for shard_name, (header, _) in all_shard_headers.items():
184
  for key in header.keys():
 
187
  continue
188
 
189
  layer_idx = int(m.group(1))
190
+ prefix = key[:m.start()] # 精确截断
191
  suffix = key[m.end():]
192
 
193
  role = _classify_qkv_suffix(suffix)
194
  if role is None:
195
  continue
196
 
197
+ slot = (prefix, layer_idx)
198
+ if slot not in result:
199
+ result[slot] = {"q": None, "k": None, "v": None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ if result[slot][role] is None:
202
+ result[slot][role] = (shard_name, key)
203
 
204
+ # 只保留 QKV 完整的槽
205
+ return {
206
+ slot: qkv for slot, qkv in result.items()
207
+ if all(qkv[r] is not None for r in ("q", "k", "v"))
208
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
 
211
  # ─────────────────────────────────────────────
 
222
  headers=headers, timeout=15
223
  )
224
  if r.status_code == 200:
225
+ cfg = r.json()
226
  qcfg = cfg.get("quantization_config", {})
227
+ qt = (qcfg.get("quant_type","") or
228
+ qcfg.get("quant_method","") or
229
+ cfg.get("quantization","")).lower()
230
  if "gptq" in qt:
231
+ return True, f"❌ GPTQ {qcfg.get('bits','?')}bit,请用原始 BF16 版本。"
232
  if "awq" in qt:
233
  return True, "❌ AWQ 量化,请用原始 BF16 版本。"
234
  if "bitsandbytes" in qt or "bnb" in qt:
235
+ warnings.append("⚠️ bitsandbytes 量化,结果可能失真")
236
  except Exception:
237
  warnings.append("⚠️ 无法读取 config.json")
238
 
239
+ for kw in ["gptq","awq","gguf"]:
240
  if kw in model_id.lower():
241
  return True, f"❌ 模型名含 '{kw.upper()}',请使用原始 BF16 版本。"
242
 
243
  try:
244
  all_files = list(list_repo_files(model_id, token=token))
245
  if any(f.endswith(".gguf") for f in all_files):
246
+ return True, "❌ 检测到 .gguf 文件,不支持。"
247
  if not any(f.endswith(".safetensors") for f in all_files):
248
  return True, "❌ 未找到 .safetensors 文件。"
249
  except Exception as e:
250
  warnings.append(f"⚠️ 文件列表检测失败:{e}")
251
 
252
  try:
253
+ index_data = find_index_file(model_id, token)
254
  first_shard = (
255
  sorted(set(index_data["weight_map"].values()))[0]
256
  if index_data else get_safetensor_files(model_id, token)[0]
 
258
  hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
259
  bad = [k for k in hdr if any(s in k for s in QUANTIZED_KEY_SIGNATURES)]
260
  if bad:
261
+ return True, f"❌ 量化 key:{bad[:3]}"
262
+ good = {hdr[k].get("dtype","") for k in list(hdr)[:20]} - UNSUPPORTED_SVD_DTYPES
263
  if good:
264
  warnings.append(f"✅ 权重格式:{good}")
265
  except Exception as e:
 
269
 
270
 
271
  # ─────────────────────────────────────────────
272
+ # GQA 推断
273
  # ─────────────────────────────────────────────
274
 
275
  def infer_gqa_params(
 
286
  if hs and nh:
287
  d_head = hs // nh
288
  if not d_head:
289
+ for c in [256, 128, 96, 80, 64, 48, 40, 32]:
290
  if q_rows % c == 0 and k_rows % c == 0:
291
  d_head = c
292
  break
 
304
  # 指标计算
305
  # ─────────────────────────────────────────────
306
 
307
+ def compute_pearson(a: torch.Tensor, b: torch.Tensor) -> float:
308
  am, bm = a - a.mean(), b - b.mean()
309
  den = torch.norm(am) * torch.norm(bm)
310
  return float(torch.dot(am, bm) / den) if den != 0 else 0.0
311
 
312
  def compute_ssr(a: torch.Tensor, b: torch.Tensor) -> float:
313
+ n = min(a.shape[0], b.shape[0])
314
  an = a[:n] / (torch.norm(a[:n]) + 1e-10)
315
  bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
316
  return float(torch.mean(torch.abs(an - bn)))
317
 
318
  def compute_svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
319
+ n = min(a.shape[0], b.shape[0])
320
  sa, sb = a[:n], b[:n]
321
  den = torch.dot(sb, sb)
322
+ if den == 0: return 1.0, 0.0
 
323
  alpha = torch.dot(sa, sb) / den
324
  return float(alpha), float(torch.mean((sa - alpha * sb) ** 2))
325
 
326
  def compute_cosU(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
327
  r = min(U_a.shape[0], U_b.shape[0])
328
  c = min(U_a.shape[1], U_b.shape[1])
329
+ Ua = U_a[:r, :c] / (torch.norm(U_a[:r, :c], dim=0, keepdim=True) + 1e-10)
330
+ Ub = U_b[:r, :c] / (torch.norm(U_b[:r, :c], dim=0, keepdim=True) + 1e-10)
 
 
331
  return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
332
 
333
  def compute_cosV(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
334
  r = min(Vt_a.shape[0], Vt_b.shape[0])
335
+ c = min(Vt_a.shape[1], Vt_b.shape[1])
336
+ Va = Vt_a[:r, :c] / (torch.norm(Vt_a[:r, :c], dim=1, keepdim=True) + 1e-10)
337
+ Vb = Vt_b[:r, :c] / (torch.norm(Vt_b[:r, :c], dim=1, keepdim=True) + 1e-10)
 
 
338
  return float(torch.abs((Va * Vb).sum(dim=1)).mean())
339
 
340
 
341
  # ─────────────────────────────────────────────
342
+ # 逐头分析(原始层号直接传入,不做任何变换
343
  # ─────────────────────────────────────────────
344
 
345
  def analyze_layer_heads(
346
+ W_q: torch.Tensor, W_k: torch.Tensor, W_v: torch.Tensor,
347
+ prefix: str, # 组件前缀,用于日志
348
+ layer_idx: int, # 原始层号,直接来自 safetensors key
 
349
  n_q: int, n_kv: int, d_head: int,
 
350
  ) -> tuple[list[dict], str]:
351
 
352
+ group = n_q // n_kv
353
+ records = []
354
+ lines = [
 
355
  f"\n{'─'*80}\n"
356
+ f"[{prefix}] Layer {layer_idx:3d} "
357
  f"n_q={n_q} n_kv={n_kv} group={group} d_head={d_head}\n"
358
  f"{'─'*80}\n"
359
  f" {'KV':>3} {'Q':>3} │"
 
362
  f" {'cosU_QK':>8} {'cosU_QV':>8} {'cosU_KV':>8} │"
363
  f" {'cosV_QK':>8} {'cosV_QV':>8} {'cosV_KV':>8} │"
364
  f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
365
+ ]
366
 
367
  for kv_h in range(n_kv):
368
  k_t = W_k[kv_h*d_head:(kv_h+1)*d_head, :]
 
374
  cosU_KV = compute_cosU(U_k, U_v)
375
  cosV_KV = compute_cosV(Vt_k, Vt_v)
376
  ssr_kv = compute_ssr(s_k, s_v)
377
+ pkv = compute_pearson(
378
+ s_k[:min(len(s_k), len(s_v))],
379
+ s_v[:min(len(s_k), len(s_v))]
380
+ )
381
 
382
  for q_off in range(group):
383
+ h = kv_h * group + q_off
384
  q_t = W_q[h*d_head:(h+1)*d_head, :]
385
  U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
386
 
387
  nqk = min(len(s_q), len(s_k))
388
  nqv = min(len(s_q), len(s_v))
389
 
390
+ pqk = compute_pearson(s_q[:nqk], s_k[:nqk])
391
+ spqk = float(spearmanr(s_q[:nqk].numpy(), s_k[:nqk].numpy())[0])
392
  ssr_qk = compute_ssr(s_q, s_k)
393
+ a_qk, r_qk = compute_svr(s_q, s_k)
394
+ cU_QK = compute_cosU(U_q, U_k)
395
+ cV_QK = compute_cosV(Vt_q, Vt_k)
396
 
397
+ pqv = compute_pearson(s_q[:nqv], s_v[:nqv])
398
  ssr_qv = compute_ssr(s_q, s_v)
399
+ a_qv, r_qv = compute_svr(s_q, s_v)
400
+ cU_QV = compute_cosU(U_q, U_v)
401
+ cV_QV = compute_cosV(Vt_q, Vt_v)
402
 
403
+ smxq = float(s_q.max())
404
+ smnq = float(s_q[s_q>1e-10].min()) if (s_q>1e-10).any() else 0.
405
+ smxk = float(s_k.max())
406
+ smnk = float(s_k[s_k>1e-10].min()) if (s_k>1e-10).any() else 0.
407
+ smxv = float(s_v.max())
408
+ smnv = float(s_v[s_v>1e-10].min()) if (s_v>1e-10).any() else 0.
409
 
410
  records.append({
411
+ # prefix + layer_idx 完整保留,不做任何变换
412
+ "prefix": prefix,
413
+ "layer": layer_idx,
414
+ "kv_head": kv_h,
415
+ "q_head": h,
416
+ "pearson_QK": round(pqk, 6),
417
+ "spearman_QK": round(spqk, 6),
418
+ "pearson_QV": round(pqv, 6),
419
+ "pearson_KV": round(pkv, 6),
420
+ "ssr_QK": round(ssr_qk, 8),
421
+ "ssr_QV": round(ssr_qv, 8),
422
+ "ssr_KV": round(ssr_kv, 8),
423
+ "cosU_QK": round(cU_QK, 6),
424
+ "cosU_QV": round(cU_QV, 6),
425
+ "cosU_KV": round(cosU_KV,6),
426
+ "cosV_QK": round(cV_QK, 6),
427
+ "cosV_QV": round(cV_QV, 6),
428
+ "cosV_KV": round(cosV_KV,6),
429
+ "alpha_QK": round(a_qk, 4),
430
+ "alpha_QV": round(a_qv, 4),
431
+ "alpha_KV": round(alpha_kv,4),
432
+ "alpha_res_QK": round(r_qk, 6),
433
+ "alpha_res_QV": round(r_qv, 6),
434
+ "alpha_res_KV": round(res_kv, 6),
435
+ "sigma_max_Q": round(smxq, 4),
436
+ "sigma_min_Q": round(smnq, 4),
437
+ "sigma_max_K": round(smxk, 4),
438
+ "sigma_min_K": round(smnk, 4),
439
+ "sigma_max_V": round(smxv, 4),
440
+ "sigma_min_V": round(smnv, 4),
441
+ "cond_Q": round(smxq/(smnq+1e-10), 2),
442
+ "cond_K": round(smxk/(smnk+1e-10), 2),
443
+ "cond_V": round(smxv/(smnv+1e-10), 2),
444
  })
445
 
446
  lines.append(
447
  f" {kv_h:>3d} {h:>3d} │"
448
  f" {pqk:>+7.4f} {spqk:>+7.4f} {ssr_qk:>8.6f} │"
449
  f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
450
+ f" {cU_QK:>8.4f} {cU_QV:>8.4f} {cosU_KV:>8.4f} │"
451
+ f" {cV_QK:>8.4f} {cV_QV:>8.4f} {cosV_KV:>8.4f} │"
452
+ f" {a_qk:>7.4f} {a_qv:>7.4f} {alpha_kv:>7.4f}\n"
453
  )
454
 
455
  return records, "".join(lines)
 
460
  # ─────────────────────────────────────────────
461
 
462
  def analyze_model(
463
+ model_id: str,
464
+ hf_token: str,
465
+ start_layer: int, # ★ 原始层号起点
466
+ end_layer: int, # ★ 原始层号终点(含)
467
  progress=gr.Progress()
468
  ):
469
  if not model_id.strip():
470
  return "❌ 请输入模型 ID", None
471
 
472
+ token = hf_token.strip() or None
473
+ log = [f"🔍 分析模型:{model_id} 层范围:{start_layer}~{end_layer}\n{'═'*80}\n"]
 
474
  all_records: list[dict] = []
475
 
476
  # ── 量化检测 ─────────────────────────────────
477
  progress(0.02, desc="量化检测...")
478
  blocked, qmsg = check_quantization(model_id, token)
479
+ log.append(f"【量化检测】\n{qmsg}\n{'─'*80}\n")
480
  if blocked:
481
+ return "".join(log), None
482
 
483
  # ── config.json ───────────────────────────────
484
  config_params = {}
 
489
  timeout=15
490
  )
491
  if r.status_code == 200:
492
+ config_params = extract_config_params(r.json())
493
+ log.append(
494
+ f"📋 config:model_type={config_params.get('model_type')} "
495
+ f"hidden={config_params.get('hidden_size')} "
496
+ f"n_heads={config_params.get('num_attention_heads')} "
497
+ f"n_kv={config_params.get('num_key_value_heads')} "
498
+ f"head_dim={config_params.get('head_dim')}\n"
 
 
499
  f"{'─'*80}\n"
500
  )
501
  except Exception:
502
+ log.append("⚠️ 无法读取 config.json\n")
503
 
504
  # ── 获取 shard 列表 ───────────────────────────
505
  progress(0.05, desc="读取模型索引...")
506
  try:
507
+ index_data = find_index_file(model_id, token)
508
+ shard_files = (
509
+ sorted(set(index_data["weight_map"].values()))
510
+ if index_data else get_safetensor_files(model_id, token)
511
+ )
512
+ log.append(f"📦 {len(shard_files)} 个 shard\n")
 
513
  except requests.exceptions.HTTPError as e:
514
  return _http_error_msg(e, model_id), None
515
 
516
  # ── 读取所有 shard header ─────────────────────
517
  progress(0.08, desc="读取 shard headers...")
518
  all_shard_headers: dict[str, tuple[dict, int]] = {}
 
519
  for sf in shard_files:
520
  try:
521
  h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
522
  all_shard_headers[sf] = (h, hs)
 
523
  except Exception as e:
524
+ log.append(f"⚠️ {sf} 读取失败:{e}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
 
526
+ log.append(f"🔑 key 数:{sum(len(h) for h,_ in all_shard_headers.values())}\n")
 
 
527
 
528
+ # ── 扫描所有 QKV 槽 ───────────────────────────
529
+ progress(0.12, desc="扫描 QKV 结构...")
530
+ all_slots = scan_all_qkv(all_shard_headers)
 
 
 
531
 
532
+ if not all_slots:
533
+ sample = list(next(iter(all_shard_headers.values()))[0].keys())[:20]
534
+ return "".join(log) + "⚠️ 无法识别 Q/K/V\n" + "\n".join(sample), None
 
 
 
 
 
535
 
536
+ # ── 按原始层号过滤 [start_layer, end_layer] ───
537
+ # 直接用 safetensors key 里的层号,不做任何变换
538
+ filtered_slots = {
539
+ (prefix, layer_idx): qkv
540
+ for (prefix, layer_idx), qkv in all_slots.items()
541
+ if start_layer <= layer_idx <= end_layer
542
+ }
543
 
544
+ if not filtered_slots:
545
+ # 打印实际存在的层号范围供参考
546
+ by_prefix: dict[str, list[int]] = {}
547
+ for (prefix, layer_idx) in all_slots:
548
+ by_prefix.setdefault(prefix, []).append(layer_idx)
549
+ info = "\n".join(
550
+ f" {p}: {sorted(v)}"
551
+ for p, v in sorted(by_prefix.items())
552
+ )
553
+ return "".join(log) + f"⚠️ 层范围 {start_layer}~{end_layer} 内无数据。\n实际层号:\n{info}\n", None
 
 
 
 
 
 
 
554
 
555
+ # ── 打印结构概览 ──────────────────────────────
556
+ by_prefix: dict[str, list[int]] = {}
557
+ for (prefix, layer_idx) in filtered_slots:
558
+ by_prefix.setdefault(prefix, []).append(layer_idx)
559
 
560
+ log.append(f"📐 层范围 {start_layer}~{end_layer} 内发现的组件:\n")
561
+ for p, idxs in sorted(by_prefix.items()):
562
+ log.append(f" '{p}' 层号 {sorted(idxs)}\n")
563
+ log.append(f"{'═'*80}\n")
 
 
 
 
 
 
564
 
565
+ # ── 按 (prefix, layer_idx) 顺序分析 ──────────
566
+ # ★ sorted 保证输出有序,但层号本身不变
567
+ sorted_slots = sorted(filtered_slots.items(), key=lambda x: (x[0][0], x[0][1]))
568
+ total = len(sorted_slots)
 
 
 
 
 
569
 
570
+ for i, ((prefix, layer_idx), qkv) in enumerate(sorted_slots):
571
+ progress(0.15 + 0.80 * i / max(total, 1),
572
+ desc=f"{prefix} layer {layer_idx}...")
 
 
 
 
 
573
 
574
+ q_shard, q_key = qkv["q"]
575
+ k_shard, k_key = qkv["k"]
576
+ v_shard, v_key = qkv["v"]
577
+
578
+ try:
579
+ W_q = load_tensor_remote(
580
+ get_file_url(model_id, q_shard), q_key,
581
+ *all_shard_headers[q_shard], token)
582
+ W_k = load_tensor_remote(
583
+ get_file_url(model_id, k_shard), k_key,
584
+ *all_shard_headers[k_shard], token)
585
+ W_v = load_tensor_remote(
586
+ get_file_url(model_id, v_shard), v_key,
587
+ *all_shard_headers[v_shard], token)
588
+ except Exception as e:
589
+ log.append(f"[{prefix}] Layer {layer_idx}: ❌ 加载失败:{e}\n")
590
+ continue
591
+
592
+ if W_q is None or W_k is None or W_v is None:
593
+ log.append(f"[{prefix}] Layer {layer_idx}: ⚠️ tensor 为 None\n")
594
+ continue
595
+
596
+ try:
597
+ n_q, n_kv, d_head = infer_gqa_params(W_q, W_k, config_params)
598
+ except ValueError as e:
599
+ log.append(f"[{prefix}] Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
600
  del W_q, W_k, W_v
601
+ continue
602
 
603
+ records, layer_log = analyze_layer_heads(
604
+ W_q, W_k, W_v,
605
+ prefix, # 传入原始前缀
606
+ layer_idx, # ★ 传入原始层号,函数内不做任何变换
607
+ n_q, n_kv, d_head,
608
+ )
609
+ all_records.extend(records)
610
+ log.append(layer_log)
611
+ del W_q, W_k, W_v
612
 
613
+ # ── 汇总 ─────────────────────────────────────
614
  if not all_records:
615
+ return "".join(log) + "\n❌ 未获得任何有效结果\n", None
616
 
617
  df = pd.DataFrame(all_records)
618
 
619
+ def stat(arr, name):
620
+ return (f" {name:<14}"
621
+ f" Median={np.median(arr):.6f}"
622
+ f" Mean={np.mean(arr):.6f}"
623
+ f" Min={np.min(arr):.6f}"
624
+ f" Max={np.max(arr):.6f}\n")
 
 
 
 
625
 
626
+ summary = [f"\n{'═'*80}\n📊 汇总 — {model_id} 层 {start_layer}~{end_layer}\n{'═'*80}\n"]
 
627
 
628
+ # prefix 分组汇总
629
+ for pfx in df["prefix"].unique():
630
+ pdf = df[df["prefix"] == pfx]
631
  summary.append(
632
+ f"\n▶ {pfx}\n"
633
+ f" 记录:{len(pdf)} 条,"
634
+ f"层:{sorted(pdf['layer'].unique())}\n"
635
  )
636
  summary += [
637
+ " 【第一定律 Pearson r → 1】\n",
638
+ stat(pdf["pearson_QK"].values, "Q-K:"),
639
+ stat(pdf["pearson_QV"].values, "Q-V:"),
640
+ stat(pdf["pearson_KV"].values, "K-V:"),
641
+ " 【第二定律 SSR → 0】\n",
642
+ stat(pdf["ssr_QK"].values, "Q-K:"),
643
+ stat(pdf["ssr_QV"].values, "Q-V:"),
644
+ stat(pdf["ssr_KV"].values, "K-V:"),
645
+ " 【第四定律 cosU 输出子空间】\n",
646
+ stat(pdf["cosU_QK"].values, "cosU Q-K:"),
647
+ stat(pdf["cosU_QV"].values, "cosU Q-V:"),
648
+ stat(pdf["cosU_KV"].values, "cosU K-V:"),
649
+ " 【第五定律 cosV 输入子空间】\n",
650
+ stat(pdf["cosV_QK"].values, "cosV Q-K:"),
651
+ stat(pdf["cosV_QV"].values, "cosV Q-V:"),
652
+ stat(pdf["cosV_KV"].values, "cosV K-V:"),
653
+ " 【第三定律 条件数】\n",
654
+ stat(pdf["cond_Q"].values, "cond Q:"),
655
+ stat(pdf["cond_K"].values, "cond K:"),
656
+ stat(pdf["cond_V"].values, "cond V:"),
657
  ]
658
 
659
  summary.append(f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n{'═'*80}\n")
660
+ log.extend(summary)
661
 
662
+ return "".join(log), df
663
 
664
 
665
  # ─────────────────────────────────────────────
 
673
  **Mathematical Foundations of Large Language Models (MF-LLM)**
674
 
675
  通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
676
+ safetensors 原始层号分析,支持混合模态模型(视觉/音频/语言同时输出)。
677
 
678
  | 定律 | 指标 | 理论极值 |
679
  |------|------|---------|
 
699
  placeholder="hf_xxxxxxxxxxxxxxxx",
700
  type="password"
701
  )
702
+ with gr.Row():
703
+ start_layer_input = gr.Number(
704
+ label="起始层号(原始层号,含)",
705
+ value=0, minimum=0, maximum=999, precision=0
706
+ )
707
+ end_layer_input = gr.Number(
708
+ label="结束层号(原始层号,含)",
709
+ value=5, minimum=0, maximum=999, precision=0
710
+ )
711
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
712
 
713
  with gr.Column(scale=1):
714
  gr.Markdown("""
715
  ### ✅ 推荐模型
716
  ```
717
+ google/gemma-4-e2b
718
+ google/gemma-4-31b-it
719
  Qwen/Qwen2.5-14B-Instruct
720
  meta-llama/Llama-3-8B
721
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
722
  ```
723
+ ### 层号说明
724
+ - 层号 = safetensors key 中 `layers.{N}` 的 **N**
725
+ - **不按组件重排**,原始值直接输出
726
+ - 混合模态模型(如 Gemma-4):
727
+ - `layers.0~11` 同时含 audio/vision/text 层
728
+ - 全部输出,按前缀区分组件
729
+
730
+ ### 示例:Gemma-4-E2B
731
+ | 组件 | 层范围 |
732
+ |------|--------|
733
+ | audio_tower | 0~11 |
734
+ | language_model | 0~34 |
735
+ | vision_tower | 0~15 |
736
  """)
737
 
738
  log_output = gr.Textbox(
739
  label="分析日志",
740
+ lines=40, max_lines=300
741
  )
742
  table_output = gr.Dataframe(
743
  label="逐头全指标结果表",
744
  headers=[
745
+ "prefix","layer","kv_head","q_head",
746
  "pearson_QK","spearman_QK","pearson_QV","pearson_KV",
747
  "ssr_QK","ssr_QV","ssr_KV",
748
  "cosU_QK","cosU_QV","cosU_KV",
 
758
 
759
  analyze_btn.click(
760
  fn=analyze_model,
761
+ inputs=[model_input, token_input, start_layer_input, end_layer_input],
762
  outputs=[log_output, table_output]
763
  )
764