Spaces:
Running
Running
Alex W. commited on
Commit ·
a69ce56
1
Parent(s): f467caf
改动清单
Browse files[改动1] 顶部 + is_vision_key() + 主循环 新增 VISION_KEY_PATTERNS,自动检测并跳过视觉层(vision/visual/vit等),log 显示跳过计数
[改动2] compute_right_vector_alignment() 新增函数:计算右奇异向量(输入子空间)cosV,对应第五定律
[改动3] analyze_layer_heads() + 主循环 加载 W_v;计算 Q-V、K-V 全套指标;新增 sigma_max/min、cond_Q/K/V(第三定律)
[改动4] analyze_layer_heads() 签名 新增 modality 参数,结果表中记录 "text"/"vision"
[改动5] 全局汇总 分模态统计;补全五定律全部指标的 Median/Mean/Min/Max
[改动6] Gradio UI 推荐列表 更新为 gemma-4-e2b / gemma-4-e4b-it,补充多模态说明
[改动7] read_safetensors_header() 加入 __metadata__ 过滤(参考 reference code)
[改动8] 结果表 headers 扩展为 31 列,覆盖全部新增指标
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
import requests
|
| 3 |
import struct
|
| 4 |
import json
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
import torch
|
| 7 |
from scipy.stats import pearsonr, spearmanr
|
|
@@ -31,6 +32,13 @@ except AttributeError:
|
|
| 31 |
UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
|
| 32 |
QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# ─────────────────────────────────────────────
|
| 36 |
# 工具函数
|
|
@@ -51,7 +59,10 @@ def read_safetensors_header(url: str, token: str = None) -> tuple[dict, int]:
|
|
| 51 |
timeout=30
|
| 52 |
)
|
| 53 |
r.raise_for_status()
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def load_tensor_remote(
|
|
@@ -100,7 +111,8 @@ def get_safetensor_files(model_id: str, token: str = None) -> list:
|
|
| 100 |
|
| 101 |
|
| 102 |
def find_index_file(model_id: str, token: str = None) -> dict | None:
|
| 103 |
-
url = f"https://huggingface.co/{model_id}/resolve/main/
|
|
|
|
| 104 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 105 |
r = requests.get(url, headers=headers, timeout=15)
|
| 106 |
return r.json() if r.status_code == 200 else None
|
|
@@ -114,15 +126,20 @@ def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
|
|
| 114 |
return f"❌ HTTP {code}:{e}"
|
| 115 |
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# ─────────────────────────────────────────────
|
| 118 |
-
# 量化三重检测
|
| 119 |
# ─────────────────────────────────────────────
|
| 120 |
|
| 121 |
def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
| 122 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 123 |
warnings = []
|
| 124 |
|
| 125 |
-
# 检测 1:config.json
|
| 126 |
try:
|
| 127 |
r = requests.get(
|
| 128 |
f"https://huggingface.co/{model_id}/resolve/main/config.json",
|
|
@@ -136,8 +153,7 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 136 |
cfg.get("quantization","")).lower()
|
| 137 |
if "gptq" in qt:
|
| 138 |
bits = qcfg.get("bits","?")
|
| 139 |
-
return True,
|
| 140 |
-
f" 请改用原始 BF16 版本。")
|
| 141 |
if "awq" in qt:
|
| 142 |
return True, "❌ 检测到 AWQ 量化,请改用原始 BF16 版本。"
|
| 143 |
if "bitsandbytes" in qt or "bnb" in qt:
|
|
@@ -145,7 +161,6 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 145 |
except Exception:
|
| 146 |
warnings.append("⚠️ 无法读取 config.json")
|
| 147 |
|
| 148 |
-
# 检测 2:文件名 / 模型名关键词
|
| 149 |
mid_lower = model_id.lower()
|
| 150 |
for kw in ["gptq","awq","gguf"]:
|
| 151 |
if kw in mid_lower:
|
|
@@ -160,7 +175,6 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 160 |
except Exception as e:
|
| 161 |
warnings.append(f"⚠️ 文件列表检测失败:{e}")
|
| 162 |
|
| 163 |
-
# 检测 3:header key 签名
|
| 164 |
try:
|
| 165 |
index_data = find_index_file(model_id, token)
|
| 166 |
if index_data:
|
|
@@ -173,10 +187,9 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 173 |
bad_keys = [k for k in all_keys
|
| 174 |
if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
|
| 175 |
if bad_keys:
|
| 176 |
-
return True,
|
| 177 |
-
f" 请使用原始 BF16 版本。")
|
| 178 |
dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
|
| 179 |
-
good
|
| 180 |
if good:
|
| 181 |
warnings.append(f"✅ 权重格式:{good}")
|
| 182 |
except Exception as e:
|
|
@@ -187,47 +200,36 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 187 |
|
| 188 |
|
| 189 |
# ─────────────────────────────────────────────
|
| 190 |
-
# GQA 参数自动推断
|
| 191 |
# ───────────────────────��─────────────────────
|
| 192 |
|
| 193 |
-
def infer_gqa_params(
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
权重 shape 约定(最常见):
|
| 201 |
-
W_q : (n_q_heads * d_head, d_model) → shape[0] = n_q * d_h
|
| 202 |
-
W_k : (n_kv_heads * d_head, d_model) → shape[0] = n_kv * d_h
|
| 203 |
-
|
| 204 |
-
d_head 优先从 config.json 读取,其次用常见默认值猜测。
|
| 205 |
-
"""
|
| 206 |
-
q_rows, d_model = W_q.shape[0], W_q.shape[1]
|
| 207 |
-
k_rows = W_k.shape[0]
|
| 208 |
|
| 209 |
-
# 从 config.json 读取 d_head
|
| 210 |
d_head = None
|
| 211 |
if config:
|
| 212 |
d_head = (
|
| 213 |
config.get("head_dim") or
|
| 214 |
config.get("kv_channels") or
|
| 215 |
-
config.get("hidden_size", 0) // config.get("num_attention_heads", 1)
|
| 216 |
)
|
| 217 |
if d_head == 0:
|
| 218 |
d_head = None
|
| 219 |
|
| 220 |
-
# 如果 config 没给,用常见值探测(64, 80, 96, 128, 256)
|
| 221 |
if not d_head:
|
| 222 |
-
for candidate in [256, 128, 96, 80, 64]:
|
| 223 |
if q_rows % candidate == 0 and k_rows % candidate == 0:
|
| 224 |
d_head = candidate
|
| 225 |
break
|
| 226 |
|
| 227 |
if not d_head:
|
| 228 |
raise ValueError(
|
| 229 |
-
f"无法推断 d_head:W_q
|
| 230 |
-
f"请在 config.json 中确认 head_dim 字段。"
|
| 231 |
)
|
| 232 |
|
| 233 |
n_q_heads = q_rows // d_head
|
|
@@ -235,164 +237,231 @@ def infer_gqa_params(W_q: torch.Tensor, W_k: torch.Tensor, config: dict | None)
|
|
| 235 |
|
| 236 |
if n_q_heads % n_kv_heads != 0:
|
| 237 |
raise ValueError(
|
| 238 |
-
f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除
|
| 239 |
-
f"请检查 d_head 推断是否正确。"
|
| 240 |
)
|
| 241 |
-
|
| 242 |
return n_q_heads, n_kv_heads, d_head
|
| 243 |
|
| 244 |
|
| 245 |
# ─────────────────────────────────────────────
|
| 246 |
-
#
|
| 247 |
# ─────────────────────────────────────────────
|
| 248 |
|
| 249 |
-
def
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
|
| 255 |
|
| 256 |
def compute_singular_value_ratio(
|
| 257 |
-
|
| 258 |
) -> tuple[float, float]:
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
|
| 272 |
def compute_left_vector_alignment(
|
| 273 |
-
|
| 274 |
) -> float:
|
| 275 |
"""
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
| 278 |
"""
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
|
|
|
| 284 |
|
| 285 |
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
) -> float:
|
| 289 |
"""
|
| 290 |
-
|
| 291 |
-
|
|
|
|
|
|
|
| 292 |
"""
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
| 298 |
|
| 299 |
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
SSR = mean_i |s̃_q_i - s̃_k_i|
|
| 304 |
-
"""
|
| 305 |
-
min_len = min(s_q.shape[0], s_k.shape[0])
|
| 306 |
-
sq = s_q[:min_len].cpu().numpy()
|
| 307 |
-
sk = s_k[:min_len].cpu().numpy()
|
| 308 |
-
sq_n = sq / (np.linalg.norm(sq) + 1e-10)
|
| 309 |
-
sk_n = sk / (np.linalg.norm(sk) + 1e-10)
|
| 310 |
-
return float(np.mean(np.abs(sq_n - sk_n)))
|
| 311 |
-
|
| 312 |
|
| 313 |
def analyze_layer_heads(
|
| 314 |
W_q: torch.Tensor,
|
| 315 |
W_k: torch.Tensor,
|
|
|
|
| 316 |
layer_idx: int,
|
| 317 |
n_q_heads: int,
|
| 318 |
n_kv_heads: int,
|
| 319 |
d_head: int,
|
|
|
|
| 320 |
) -> tuple[list[dict], str]:
|
| 321 |
"""
|
| 322 |
-
GQA 逐头分析:
|
| 323 |
-
|
| 324 |
-
|
|
|
|
| 325 |
"""
|
| 326 |
group_size = n_q_heads // n_kv_heads
|
| 327 |
records = []
|
| 328 |
log_lines = []
|
| 329 |
|
| 330 |
log_lines.append(
|
| 331 |
-
f"\n{'─'*
|
| 332 |
-
f"Layer {layer_idx:3d} "
|
| 333 |
-
f"
|
| 334 |
-
f"group={group_size}
|
| 335 |
-
f"{'─'*
|
| 336 |
)
|
|
|
|
| 337 |
log_lines.append(
|
| 338 |
-
f" {'KV
|
| 339 |
-
f"{'
|
| 340 |
-
f"{'
|
| 341 |
-
f"{'
|
|
|
|
|
|
|
| 342 |
)
|
| 343 |
|
| 344 |
for kv_h in range(n_kv_heads):
|
| 345 |
-
|
|
|
|
| 346 |
k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
for q_offset in range(group_size):
|
| 350 |
-
h_idx
|
| 351 |
-
|
| 352 |
-
# ── 提取 Q 头矩阵 (d_head × d_model) ──
|
| 353 |
q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
|
| 354 |
-
U_q, s_q,
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
s_q[:
|
| 363 |
-
s_k[:
|
| 364 |
-
)
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
records.append({
|
| 379 |
-
|
| 380 |
-
"
|
| 381 |
-
"
|
| 382 |
-
"
|
| 383 |
-
"
|
| 384 |
-
|
| 385 |
-
"
|
| 386 |
-
"
|
| 387 |
-
"
|
| 388 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
})
|
| 390 |
|
| 391 |
log_lines.append(
|
| 392 |
-
f"
|
| 393 |
-
f"{
|
| 394 |
-
f"{
|
| 395 |
-
f"{
|
|
|
|
|
|
|
| 396 |
)
|
| 397 |
|
| 398 |
return records, "".join(log_lines)
|
|
@@ -412,17 +481,17 @@ def analyze_model(
|
|
| 412 |
return "❌ 请输入模型 ID", None
|
| 413 |
|
| 414 |
token = hf_token.strip() or None
|
| 415 |
-
log_lines = [f"🔍 分析模型:{model_id}\n{'═'*
|
| 416 |
all_records: list[dict] = []
|
| 417 |
|
| 418 |
# ── 量化检测 ─────────────────────────────────
|
| 419 |
progress(0.02, desc="量化检测...")
|
| 420 |
is_blocked, quant_msg = check_quantization(model_id, token)
|
| 421 |
-
log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*
|
| 422 |
if is_blocked:
|
| 423 |
return "".join(log_lines), None
|
| 424 |
|
| 425 |
-
# ──
|
| 426 |
config = None
|
| 427 |
try:
|
| 428 |
r = requests.get(
|
|
@@ -434,19 +503,20 @@ def analyze_model(
|
|
| 434 |
config = r.json()
|
| 435 |
log_lines.append(
|
| 436 |
f"📋 config.json:\n"
|
| 437 |
-
f"
|
|
|
|
| 438 |
f" num_attention_heads = {config.get('num_attention_heads')}\n"
|
| 439 |
f" num_key_value_heads = {config.get('num_key_value_heads')}\n"
|
| 440 |
-
f" head_dim
|
| 441 |
-
f"{'─'*
|
| 442 |
)
|
| 443 |
except Exception:
|
| 444 |
log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
|
| 445 |
|
| 446 |
-
# ──
|
| 447 |
progress(0.05, desc="读取模型索引...")
|
| 448 |
try:
|
| 449 |
-
index_data
|
| 450 |
shard_headers: dict[str, tuple[dict, int]] = {}
|
| 451 |
|
| 452 |
if index_data:
|
|
@@ -455,22 +525,19 @@ def analyze_model(
|
|
| 455 |
f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard\n"
|
| 456 |
)
|
| 457 |
else:
|
| 458 |
-
sf_files
|
| 459 |
-
if not sf_files:
|
| 460 |
-
return "❌ 未找到 .safetensors 文件", None
|
| 461 |
weight_map = None
|
| 462 |
log_lines.append(f"📦 单文件:{sf_files}\n")
|
| 463 |
except requests.exceptions.HTTPError as e:
|
| 464 |
return _http_error_msg(e, model_id), None
|
| 465 |
|
| 466 |
-
# ── 探测第一个 shard
|
| 467 |
progress(0.08, desc="识别层结构...")
|
| 468 |
try:
|
| 469 |
if index_data:
|
| 470 |
first_shard = sorted(set(index_data["weight_map"].values()))[0]
|
| 471 |
else:
|
| 472 |
first_shard = sf_files[0]
|
| 473 |
-
|
| 474 |
first_url = get_file_url(model_id, first_shard)
|
| 475 |
first_header, first_hsize = read_safetensors_header(first_url, token)
|
| 476 |
shard_headers[first_shard] = (first_header, first_hsize)
|
|
@@ -478,27 +545,37 @@ def analyze_model(
|
|
| 478 |
except Exception as e:
|
| 479 |
return f"❌ 读取 shard header 失败:{e}", None
|
| 480 |
|
| 481 |
-
#
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
)]
|
| 485 |
if not q_candidates:
|
| 486 |
-
sample = "\n".join(
|
| 487 |
-
return f"⚠️ 无法识别 Q/K key,前 30 个 key:\n{sample}", None
|
| 488 |
|
| 489 |
sample_q = q_candidates[0]
|
| 490 |
-
if
|
| 491 |
-
elif "query"
|
| 492 |
-
elif "wq"
|
| 493 |
else:
|
| 494 |
-
|
| 495 |
-
|
|
|
|
| 496 |
|
| 497 |
-
log_lines.append(f"🔑 Q suffix:{
|
| 498 |
-
log_lines.append(f"🔑 K suffix:{
|
| 499 |
-
log_lines.append(f"{
|
|
|
|
| 500 |
|
| 501 |
-
# ── 辅助:查找 key 所在 shard ────────────────
|
| 502 |
def get_shard_for_key(key: str) -> str | None:
|
| 503 |
if index_data:
|
| 504 |
return index_data["weight_map"].get(key)
|
|
@@ -511,7 +588,7 @@ def analyze_model(
|
|
| 511 |
return None
|
| 512 |
|
| 513 |
# ── 逐层分析 ─────────────────────────────────
|
| 514 |
-
|
| 515 |
|
| 516 |
for layer_idx in range(int(max_layers)):
|
| 517 |
progress(
|
|
@@ -519,17 +596,28 @@ def analyze_model(
|
|
| 519 |
desc=f"第 {layer_idx} 层..."
|
| 520 |
)
|
| 521 |
|
| 522 |
-
q_key = f"model.layers.{layer_idx}.{
|
| 523 |
-
k_key = f"model.layers.{layer_idx}.{
|
|
|
|
| 524 |
|
| 525 |
q_shard = get_shard_for_key(q_key)
|
| 526 |
k_shard = get_shard_for_key(k_key)
|
|
|
|
| 527 |
|
| 528 |
if q_shard is None or k_shard is None:
|
| 529 |
-
log_lines.append(
|
|
|
|
|
|
|
| 530 |
break
|
| 531 |
|
| 532 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
if shard not in shard_headers:
|
| 534 |
h, hs = read_safetensors_header(get_file_url(model_id, shard), token)
|
| 535 |
shard_headers[shard] = (h, hs)
|
|
@@ -543,91 +631,108 @@ def analyze_model(
|
|
| 543 |
get_file_url(model_id, k_shard), k_key,
|
| 544 |
*shard_headers[k_shard], token
|
| 545 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
except ValueError as e:
|
| 547 |
log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
|
| 548 |
continue
|
| 549 |
|
| 550 |
-
if W_q is None or W_k is None:
|
| 551 |
log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
|
| 552 |
continue
|
| 553 |
|
| 554 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
try:
|
| 556 |
n_q_heads, n_kv_heads, d_head = infer_gqa_params(W_q, W_k, config)
|
| 557 |
except ValueError as e:
|
| 558 |
log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
|
| 559 |
-
del W_q, W_k
|
| 560 |
continue
|
| 561 |
|
| 562 |
-
if not
|
| 563 |
-
group_size = n_q_heads // n_kv_heads
|
| 564 |
log_lines.append(
|
| 565 |
-
f"🧠 GQA 结构:
|
| 566 |
-
f"
|
| 567 |
-
f"
|
| 568 |
-
f"
|
| 569 |
-
f"
|
| 570 |
-
f"W_k shape: {list(W_k.shape)}\n"
|
| 571 |
-
f"{'═'*70}\n"
|
| 572 |
)
|
| 573 |
-
|
| 574 |
|
| 575 |
-
#
|
| 576 |
records, layer_log = analyze_layer_heads(
|
| 577 |
-
W_q, W_k,
|
| 578 |
-
|
|
|
|
|
|
|
| 579 |
)
|
| 580 |
all_records.extend(records)
|
| 581 |
log_lines.append(layer_log)
|
| 582 |
|
| 583 |
-
del W_q, W_k
|
| 584 |
|
| 585 |
-
# ── 全局汇总
|
| 586 |
if all_records:
|
| 587 |
df = pd.DataFrame(all_records)
|
| 588 |
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
f"
|
| 604 |
-
f"
|
| 605 |
-
f"
|
| 606 |
-
f"
|
| 607 |
-
f"
|
| 608 |
-
|
| 609 |
-
f"【第一定律 —
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
f"
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
f"【第四定律 —
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
f"【
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
|
| 632 |
return "".join(log_lines), df
|
| 633 |
else:
|
|
@@ -645,13 +750,15 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 645 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 646 |
|
| 647 |
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 648 |
-
支持 GQA
|
| 649 |
|
| 650 |
-
| 定律 | 指标 | 理论极值 |
|
| 651 |
-
|------|------|---------|
|
| 652 |
-
| 第一定律 | Pearson r / Spearman r | → 1 |
|
| 653 |
-
| 第二定律 | SSR | → 0 |
|
| 654 |
-
| 第
|
|
|
|
|
|
|
| 655 |
|
| 656 |
[](https://doi.org/10.5281/zenodo.19707844)
|
| 657 |
[](https://hal.science/hal-05609398)
|
|
@@ -661,8 +768,8 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 661 |
with gr.Column(scale=2):
|
| 662 |
model_input = gr.Textbox(
|
| 663 |
label="HuggingFace 模型 ID",
|
| 664 |
-
placeholder="
|
| 665 |
-
value="
|
| 666 |
)
|
| 667 |
token_input = gr.Textbox(
|
| 668 |
label="HF Access Token(公开模型可留空)",
|
|
@@ -675,14 +782,15 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 675 |
)
|
| 676 |
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 677 |
|
|
|
|
| 678 |
with gr.Column(scale=1):
|
| 679 |
gr.Markdown("""
|
| 680 |
### ✅ 推荐模型
|
| 681 |
```
|
| 682 |
Qwen/Qwen2.5-14B-Instruct (GQA 8Q/2K)
|
| 683 |
meta-llama/Llama-3-8B (GQA)
|
| 684 |
-
google/gemma-4-e2b (MHA)
|
| 685 |
-
google/gemma-4-e4b-it (MHA)
|
| 686 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 687 |
```
|
| 688 |
### GQA 典型结构
|
|
@@ -691,21 +799,33 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 691 |
| Qwen2.5-7B | 28 | 4 | 7 |
|
| 692 |
| LLaMA-3-8B | 32 | 8 | 4 |
|
| 693 |
| Qwen2.5-14B | 40 | 8 | 5 |
|
| 694 |
-
| Gemma-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
""")
|
| 696 |
|
| 697 |
log_output = gr.Textbox(
|
| 698 |
label="分析日志(逐头详情)",
|
| 699 |
-
lines=35, max_lines=
|
| 700 |
)
|
| 701 |
|
| 702 |
table_output = gr.Dataframe(
|
| 703 |
-
label="逐头结果表",
|
| 704 |
headers=[
|
| 705 |
-
"
|
| 706 |
-
"
|
| 707 |
-
"
|
| 708 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
]
|
| 710 |
)
|
| 711 |
|
|
|
|
| 2 |
import requests
|
| 3 |
import struct
|
| 4 |
import json
|
| 5 |
+
import re # [改动1] 新增:用于多模态层名过滤
|
| 6 |
import numpy as np
|
| 7 |
import torch
|
| 8 |
from scipy.stats import pearsonr, spearmanr
|
|
|
|
| 32 |
UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
|
| 33 |
QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
|
| 34 |
|
| 35 |
+
# [改动1] 多模态视觉层关键词 → 跳过这些层
|
| 36 |
+
VISION_KEY_PATTERNS = [
|
| 37 |
+
"vision", "visual", "image_encoder",
|
| 38 |
+
"img_encoder", "patch_embed", "vit",
|
| 39 |
+
"vision_tower", "mm_projector",
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
|
| 43 |
# ─────────────────────────────────────────────
|
| 44 |
# 工具函数
|
|
|
|
| 59 |
timeout=30
|
| 60 |
)
|
| 61 |
r.raise_for_status()
|
| 62 |
+
raw = json.loads(r.content)
|
| 63 |
+
# 过滤 __metadata__
|
| 64 |
+
raw.pop("__metadata__", None)
|
| 65 |
+
return raw, header_size
|
| 66 |
|
| 67 |
|
| 68 |
def load_tensor_remote(
|
|
|
|
| 111 |
|
| 112 |
|
| 113 |
def find_index_file(model_id: str, token: str = None) -> dict | None:
|
| 114 |
+
url = (f"https://huggingface.co/{model_id}/resolve/main/"
|
| 115 |
+
f"model.safetensors.index.json")
|
| 116 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 117 |
r = requests.get(url, headers=headers, timeout=15)
|
| 118 |
return r.json() if r.status_code == 200 else None
|
|
|
|
| 126 |
return f"❌ HTTP {code}:{e}"
|
| 127 |
|
| 128 |
|
| 129 |
+
# [改动1] 判断一个 key 是否属于视觉模态层
|
| 130 |
+
def is_vision_key(key: str) -> bool:
|
| 131 |
+
key_lower = key.lower()
|
| 132 |
+
return any(pat in key_lower for pat in VISION_KEY_PATTERNS)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
# ─────────────────────────────────────────────
|
| 136 |
+
# 量化三重检测(不变)
|
| 137 |
# ─────────────────────────────────────────────
|
| 138 |
|
| 139 |
def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
| 140 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 141 |
warnings = []
|
| 142 |
|
|
|
|
| 143 |
try:
|
| 144 |
r = requests.get(
|
| 145 |
f"https://huggingface.co/{model_id}/resolve/main/config.json",
|
|
|
|
| 153 |
cfg.get("quantization","")).lower()
|
| 154 |
if "gptq" in qt:
|
| 155 |
bits = qcfg.get("bits","?")
|
| 156 |
+
return True, f"❌ 检测到 GPTQ {bits}bit 量化,请改用原始 BF16 版本。"
|
|
|
|
| 157 |
if "awq" in qt:
|
| 158 |
return True, "❌ 检测到 AWQ 量化,请改用原始 BF16 版本。"
|
| 159 |
if "bitsandbytes" in qt or "bnb" in qt:
|
|
|
|
| 161 |
except Exception:
|
| 162 |
warnings.append("⚠️ 无法读取 config.json")
|
| 163 |
|
|
|
|
| 164 |
mid_lower = model_id.lower()
|
| 165 |
for kw in ["gptq","awq","gguf"]:
|
| 166 |
if kw in mid_lower:
|
|
|
|
| 175 |
except Exception as e:
|
| 176 |
warnings.append(f"⚠️ 文件列表检测失败:{e}")
|
| 177 |
|
|
|
|
| 178 |
try:
|
| 179 |
index_data = find_index_file(model_id, token)
|
| 180 |
if index_data:
|
|
|
|
| 187 |
bad_keys = [k for k in all_keys
|
| 188 |
if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
|
| 189 |
if bad_keys:
|
| 190 |
+
return True, f"❌ 检测到量化 key:{bad_keys[:3]},请使用原始 BF16 版本。"
|
|
|
|
| 191 |
dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
|
| 192 |
+
good = dtypes - UNSUPPORTED_SVD_DTYPES
|
| 193 |
if good:
|
| 194 |
warnings.append(f"✅ 权重格式:{good}")
|
| 195 |
except Exception as e:
|
|
|
|
| 200 |
|
| 201 |
|
| 202 |
# ─────────────────────────────────────────────
|
| 203 |
+
# GQA 参数自动推断(不变)
|
| 204 |
# ───────────────────────��─────────────────────
|
| 205 |
|
| 206 |
+
def infer_gqa_params(
|
| 207 |
+
W_q: torch.Tensor,
|
| 208 |
+
W_k: torch.Tensor,
|
| 209 |
+
config: dict | None
|
| 210 |
+
) -> tuple[int,int,int]:
|
| 211 |
+
q_rows = W_q.shape[0]
|
| 212 |
+
k_rows = W_k.shape[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
|
|
|
| 214 |
d_head = None
|
| 215 |
if config:
|
| 216 |
d_head = (
|
| 217 |
config.get("head_dim") or
|
| 218 |
config.get("kv_channels") or
|
| 219 |
+
config.get("hidden_size", 0) // max(config.get("num_attention_heads", 1), 1)
|
| 220 |
)
|
| 221 |
if d_head == 0:
|
| 222 |
d_head = None
|
| 223 |
|
|
|
|
| 224 |
if not d_head:
|
| 225 |
+
for candidate in [256, 128, 96, 80, 64, 32]:
|
| 226 |
if q_rows % candidate == 0 and k_rows % candidate == 0:
|
| 227 |
d_head = candidate
|
| 228 |
break
|
| 229 |
|
| 230 |
if not d_head:
|
| 231 |
raise ValueError(
|
| 232 |
+
f"无法推断 d_head:W_q={W_q.shape}, W_k={W_k.shape}"
|
|
|
|
| 233 |
)
|
| 234 |
|
| 235 |
n_q_heads = q_rows // d_head
|
|
|
|
| 237 |
|
| 238 |
if n_q_heads % n_kv_heads != 0:
|
| 239 |
raise ValueError(
|
| 240 |
+
f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除"
|
|
|
|
| 241 |
)
|
|
|
|
| 242 |
return n_q_heads, n_kv_heads, d_head
|
| 243 |
|
| 244 |
|
| 245 |
# ─────────────────────────────────────────────
|
| 246 |
+
# [改动2] 指标计算函数:新增右奇异向量对齐
|
| 247 |
# ─────────────────────────────────────────────
|
| 248 |
|
| 249 |
+
def compute_pearson_corr(s_a: torch.Tensor, s_b: torch.Tensor) -> float:
|
| 250 |
+
am = s_a - s_a.mean()
|
| 251 |
+
bm = s_b - s_b.mean()
|
| 252 |
+
num = torch.dot(am, bm)
|
| 253 |
+
den = torch.norm(am, 2) * torch.norm(bm, 2)
|
| 254 |
+
return float(num / den) if den != 0 else 0.0
|
| 255 |
|
| 256 |
|
| 257 |
def compute_singular_value_ratio(
|
| 258 |
+
s_a: torch.Tensor, s_b: torch.Tensor
|
| 259 |
) -> tuple[float, float]:
|
| 260 |
+
min_len = min(s_a.shape[0], s_b.shape[0])
|
| 261 |
+
sa = s_a[:min_len]
|
| 262 |
+
sb = s_b[:min_len]
|
| 263 |
+
num = torch.dot(sa, sb)
|
| 264 |
+
den = torch.dot(sb, sb)
|
| 265 |
+
if den == 0:
|
| 266 |
+
return 1.0, 0.0
|
| 267 |
+
alpha = num / den
|
| 268 |
+
residual = torch.mean((sa - alpha * sb) ** 2).item()
|
| 269 |
+
return float(alpha), float(residual)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def compute_ssr(s_a: torch.Tensor, s_b: torch.Tensor) -> float:
|
| 273 |
+
min_len = min(s_a.shape[0], s_b.shape[0])
|
| 274 |
+
sa = s_a[:min_len]
|
| 275 |
+
sb = s_b[:min_len]
|
| 276 |
+
sa_n = sa / (torch.norm(sa) + 1e-10)
|
| 277 |
+
sb_n = sb / (torch.norm(sb) + 1e-10)
|
| 278 |
+
return float(torch.mean(torch.abs(sa_n - sb_n)))
|
| 279 |
|
| 280 |
|
| 281 |
def compute_left_vector_alignment(
|
| 282 |
+
U_a: torch.Tensor, U_b: torch.Tensor
|
| 283 |
) -> float:
|
| 284 |
"""
|
| 285 |
+
左奇异向量(输出子空间)对齐度:
|
| 286 |
+
cosU = mean_i |<u_a_i, u_b_i>|
|
| 287 |
+
对应第四定律:cos(Uq,Uk) ≈ 1/√d_head(随机正交)
|
| 288 |
+
cos(Uq,Uv) < 1/√d_head(超正交)
|
| 289 |
"""
|
| 290 |
+
min_c = min(U_a.shape[1], U_b.shape[1])
|
| 291 |
+
Ua = U_a[:, :min_c]
|
| 292 |
+
Ub = U_b[:, :min_c]
|
| 293 |
+
Ua_n = Ua / (torch.norm(Ua, dim=0, keepdim=True) + 1e-10)
|
| 294 |
+
Ub_n = Ub / (torch.norm(Ub, dim=0, keepdim=True) + 1e-10)
|
| 295 |
+
return float(torch.diag(torch.abs(Ua_n.T @ Ub_n)).mean())
|
| 296 |
|
| 297 |
|
| 298 |
+
# [改动2] 新增:右奇异向量(输入子空间)对齐度
|
| 299 |
+
def compute_right_vector_alignment(
|
| 300 |
+
Vt_a: torch.Tensor, Vt_b: torch.Tensor
|
| 301 |
) -> float:
|
| 302 |
"""
|
| 303 |
+
右奇异向量(输入子空间)对齐度:
|
| 304 |
+
cosV = mean_i |<v_a_i, v_b_i>|
|
| 305 |
+
对应第五定律:所有对之间 ≈ 1/√d_model(全局随机正交)
|
| 306 |
+
注意:SVD 返回 Vt(转置),每行是一个右奇异向量
|
| 307 |
"""
|
| 308 |
+
min_r = min(Vt_a.shape[0], Vt_b.shape[0])
|
| 309 |
+
Va_n = Vt_a[:min_r, :]
|
| 310 |
+
Vb_n = Vt_b[:min_r, :]
|
| 311 |
+
Va_n = Va_n / (torch.norm(Va_n, dim=1, keepdim=True) + 1e-10)
|
| 312 |
+
Vb_n = Vb_n / (torch.norm(Vb_n, dim=1, keepdim=True) + 1e-10)
|
| 313 |
+
return float(torch.abs((Va_n * Vb_n).sum(dim=1)).mean())
|
| 314 |
|
| 315 |
|
| 316 |
+
# ─────────────────────────────────────────────
|
| 317 |
+
# [改动3] 逐头分析:Q-K + Q-V + K-V 全指标
|
| 318 |
+
# ─────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
def analyze_layer_heads(
|
| 321 |
W_q: torch.Tensor,
|
| 322 |
W_k: torch.Tensor,
|
| 323 |
+
W_v: torch.Tensor, # [改动3] 新增 W_v 输入
|
| 324 |
layer_idx: int,
|
| 325 |
n_q_heads: int,
|
| 326 |
n_kv_heads: int,
|
| 327 |
d_head: int,
|
| 328 |
+
modality: str = "text", # [改动4] 新增 modality 标记
|
| 329 |
) -> tuple[list[dict], str]:
|
| 330 |
"""
|
| 331 |
+
GQA 逐头全指标分析:
|
| 332 |
+
对每个 KV 头:
|
| 333 |
+
- 计算 K-V 对的全部指标(只算一次)
|
| 334 |
+
- 对组内每个 Q 头:计算 Q-K、Q-V 全部指标
|
| 335 |
"""
|
| 336 |
group_size = n_q_heads // n_kv_heads
|
| 337 |
records = []
|
| 338 |
log_lines = []
|
| 339 |
|
| 340 |
log_lines.append(
|
| 341 |
+
f"\n{'─'*80}\n"
|
| 342 |
+
f"Layer {layer_idx:3d} [{modality}] " # [改动4] 显示模态
|
| 343 |
+
f"n_q={n_q_heads} n_kv={n_kv_heads} "
|
| 344 |
+
f"group={group_size} d_head={d_head}\n"
|
| 345 |
+
f"{'─'*80}\n"
|
| 346 |
)
|
| 347 |
+
# 表头
|
| 348 |
log_lines.append(
|
| 349 |
+
f" {'KV':>3} {'Q':>3} │"
|
| 350 |
+
f" {'P_QK':>7} {'Sp_QK':>7} {'SSR_QK':>8} │"
|
| 351 |
+
f" {'SSR_QV':>8} {'SSR_KV':>8} │"
|
| 352 |
+
f" {'cosU_QK':>8} {'cosU_QV':>8} {'cosU_KV':>8} │"
|
| 353 |
+
f" {'cosV_QK':>8} {'cosV_QV':>8} {'cosV_KV':>8} │"
|
| 354 |
+
f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
|
| 355 |
)
|
| 356 |
|
| 357 |
for kv_h in range(n_kv_heads):
|
| 358 |
+
|
| 359 |
+
# ── 提取 K / V 头矩阵 ─────────────────────────
|
| 360 |
k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
|
| 361 |
+
v_tensor = W_v[kv_h * d_head : (kv_h + 1) * d_head, :] # [改动3]
|
| 362 |
+
|
| 363 |
+
U_k, s_k, Vt_k = torch.linalg.svd(k_tensor, full_matrices=False)
|
| 364 |
+
U_v, s_v, Vt_v = torch.linalg.svd(v_tensor, full_matrices=False) # [改动3]
|
| 365 |
+
|
| 366 |
+
# ── K-V 指标(每个 KV 头只算一次)─────────────
|
| 367 |
+
alpha_kv, alpha_res_kv = compute_singular_value_ratio(s_k, s_v)
|
| 368 |
+
cosU_KV = compute_left_vector_alignment(U_k, U_v)
|
| 369 |
+
cosV_KV = compute_right_vector_alignment(Vt_k, Vt_v) # [改动2]
|
| 370 |
+
ssr_kv = compute_ssr(s_k, s_v)
|
| 371 |
+
pearson_kv = compute_pearson_corr(
|
| 372 |
+
s_k[:min(s_k.shape[0], s_v.shape[0])],
|
| 373 |
+
s_v[:min(s_k.shape[0], s_v.shape[0])]
|
| 374 |
+
)
|
| 375 |
|
| 376 |
for q_offset in range(group_size):
|
| 377 |
+
h_idx = kv_h * group_size + q_offset
|
|
|
|
|
|
|
| 378 |
q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
|
| 379 |
+
U_q, s_q, Vt_q = torch.linalg.svd(q_tensor, full_matrices=False)
|
| 380 |
+
|
| 381 |
+
min_qk = min(s_q.shape[0], s_k.shape[0])
|
| 382 |
+
min_qv = min(s_q.shape[0], s_v.shape[0])
|
| 383 |
+
|
| 384 |
+
# ── Q-K 指标 ──────────────────────────────
|
| 385 |
+
pearson_qk = compute_pearson_corr(s_q[:min_qk], s_k[:min_qk])
|
| 386 |
+
spearman_qk = float(spearmanr(
|
| 387 |
+
s_q[:min_qk].cpu().numpy(),
|
| 388 |
+
s_k[:min_qk].cpu().numpy()
|
| 389 |
+
)[0])
|
| 390 |
+
ssr_qk = compute_ssr(s_q, s_k)
|
| 391 |
+
alpha_qk, alpha_res_qk = compute_singular_value_ratio(s_q, s_k)
|
| 392 |
+
cosU_QK = compute_left_vector_alignment(U_q, U_k)
|
| 393 |
+
cosV_QK = compute_right_vector_alignment(Vt_q, Vt_k) # [改动2]
|
| 394 |
+
|
| 395 |
+
# ── Q-V 指标 ────────────────────────────── [改动3]
|
| 396 |
+
pearson_qv = compute_pearson_corr(s_q[:min_qv], s_v[:min_qv])
|
| 397 |
+
ssr_qv = compute_ssr(s_q, s_v)
|
| 398 |
+
alpha_qv, alpha_res_qv = compute_singular_value_ratio(s_q, s_v)
|
| 399 |
+
cosU_QV = compute_left_vector_alignment(U_q, U_v)
|
| 400 |
+
cosV_QV = compute_right_vector_alignment(Vt_q, Vt_v) # [改动2]
|
| 401 |
+
|
| 402 |
+
# ── 奇异值范围 ───────────────────────────── [改动3]
|
| 403 |
+
sig_max_q = float(s_q.max())
|
| 404 |
+
sig_min_q = float(s_q[s_q > 1e-10].min()) if (s_q > 1e-10).any() else 0.0
|
| 405 |
+
sig_max_k = float(s_k.max())
|
| 406 |
+
sig_min_k = float(s_k[s_k > 1e-10].min()) if (s_k > 1e-10).any() else 0.0
|
| 407 |
+
sig_max_v = float(s_v.max())
|
| 408 |
+
sig_min_v = float(s_v[s_v > 1e-10].min()) if (s_v > 1e-10).any() else 0.0
|
| 409 |
+
|
| 410 |
+
# 条件数(第三定律)
|
| 411 |
+
cond_q = sig_max_q / (sig_min_q + 1e-10)
|
| 412 |
+
cond_k = sig_max_k / (sig_min_k + 1e-10)
|
| 413 |
+
cond_v = sig_max_v / (sig_min_v + 1e-10)
|
| 414 |
|
| 415 |
records.append({
|
| 416 |
+
# 位置信息
|
| 417 |
+
"layer": layer_idx,
|
| 418 |
+
"modality": modality, # [改动4]
|
| 419 |
+
"kv_head": kv_h,
|
| 420 |
+
"q_head": h_idx,
|
| 421 |
+
# 第一定律:谱线性对齐
|
| 422 |
+
"pearson_QK": round(pearson_qk, 6),
|
| 423 |
+
"spearman_QK": round(spearman_qk, 6),
|
| 424 |
+
"pearson_QV": round(pearson_qv, 6), # [改动3]
|
| 425 |
+
"pearson_KV": round(pearson_kv, 6), # [改动3]
|
| 426 |
+
# 第二定律:SSR
|
| 427 |
+
"ssr_QK": round(ssr_qk, 8),
|
| 428 |
+
"ssr_QV": round(ssr_qv, 8), # [改动3]
|
| 429 |
+
"ssr_KV": round(ssr_kv, 8), # [改动3]
|
| 430 |
+
# 第四定律:左奇异向量(输出子空间)
|
| 431 |
+
"cosU_QK": round(cosU_QK, 6),
|
| 432 |
+
"cosU_QV": round(cosU_QV, 6), # [改动3]
|
| 433 |
+
"cosU_KV": round(cosU_KV, 6), # [改动3]
|
| 434 |
+
# 第五定律:右奇异向量(输入子空间)[改动2]
|
| 435 |
+
"cosV_QK": round(cosV_QK, 6),
|
| 436 |
+
"cosV_QV": round(cosV_QV, 6),
|
| 437 |
+
"cosV_KV": round(cosV_KV, 6),
|
| 438 |
+
# 尺度因子
|
| 439 |
+
"alpha_QK": round(alpha_qk, 4),
|
| 440 |
+
"alpha_QV": round(alpha_qv, 4), # [改动3]
|
| 441 |
+
"alpha_KV": round(alpha_kv, 4), # [改动3]
|
| 442 |
+
"alpha_res_QK": round(alpha_res_qk, 6),
|
| 443 |
+
"alpha_res_QV": round(alpha_res_qv, 6), # [改动3]
|
| 444 |
+
"alpha_res_KV": round(alpha_res_kv, 6), # [改动3]
|
| 445 |
+
# 奇异值范围 [改动3]
|
| 446 |
+
"sigma_max_Q": round(sig_max_q, 4),
|
| 447 |
+
"sigma_min_Q": round(sig_min_q, 4),
|
| 448 |
+
"sigma_max_K": round(sig_max_k, 4),
|
| 449 |
+
"sigma_min_K": round(sig_min_k, 4),
|
| 450 |
+
"sigma_max_V": round(sig_max_v, 4),
|
| 451 |
+
"sigma_min_V": round(sig_min_v, 4),
|
| 452 |
+
# 条件数(第三定律)[改动3]
|
| 453 |
+
"cond_Q": round(cond_q, 2),
|
| 454 |
+
"cond_K": round(cond_k, 2),
|
| 455 |
+
"cond_V": round(cond_v, 2),
|
| 456 |
})
|
| 457 |
|
| 458 |
log_lines.append(
|
| 459 |
+
f" {kv_h:>3d} {h_idx:>3d} │"
|
| 460 |
+
f" {pearson_qk:>+7.4f} {spearman_qk:>+7.4f} {ssr_qk:>8.6f} │"
|
| 461 |
+
f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
|
| 462 |
+
f" {cosU_QK:>8.4f} {cosU_QV:>8.4f} {cosU_KV:>8.4f} │"
|
| 463 |
+
f" {cosV_QK:>8.4f} {cosV_QV:>8.4f} {cosV_KV:>8.4f} │"
|
| 464 |
+
f" {alpha_qk:>7.4f} {alpha_qv:>7.4f} {alpha_kv:>7.4f}\n"
|
| 465 |
)
|
| 466 |
|
| 467 |
return records, "".join(log_lines)
|
|
|
|
| 481 |
return "❌ 请输入模型 ID", None
|
| 482 |
|
| 483 |
token = hf_token.strip() or None
|
| 484 |
+
log_lines = [f"🔍 分析模型:{model_id}\n{'═'*80}\n"]
|
| 485 |
all_records: list[dict] = []
|
| 486 |
|
| 487 |
# ── 量化检测 ─────────────────────────────────
|
| 488 |
progress(0.02, desc="量化检测...")
|
| 489 |
is_blocked, quant_msg = check_quantization(model_id, token)
|
| 490 |
+
log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*80}\n")
|
| 491 |
if is_blocked:
|
| 492 |
return "".join(log_lines), None
|
| 493 |
|
| 494 |
+
# ── config.json ───────────────────────────────
|
| 495 |
config = None
|
| 496 |
try:
|
| 497 |
r = requests.get(
|
|
|
|
| 503 |
config = r.json()
|
| 504 |
log_lines.append(
|
| 505 |
f"📋 config.json:\n"
|
| 506 |
+
f" model_type = {config.get('model_type')}\n"
|
| 507 |
+
f" hidden_size = {config.get('hidden_size')}\n"
|
| 508 |
f" num_attention_heads = {config.get('num_attention_heads')}\n"
|
| 509 |
f" num_key_value_heads = {config.get('num_key_value_heads')}\n"
|
| 510 |
+
f" head_dim = {config.get('head_dim')}\n"
|
| 511 |
+
f"{'─'*80}\n"
|
| 512 |
)
|
| 513 |
except Exception:
|
| 514 |
log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
|
| 515 |
|
| 516 |
+
# ── 分片索引 ──────────────────────────────────
|
| 517 |
progress(0.05, desc="读取模型索引...")
|
| 518 |
try:
|
| 519 |
+
index_data = find_index_file(model_id, token)
|
| 520 |
shard_headers: dict[str, tuple[dict, int]] = {}
|
| 521 |
|
| 522 |
if index_data:
|
|
|
|
| 525 |
f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard\n"
|
| 526 |
)
|
| 527 |
else:
|
| 528 |
+
sf_files = get_safetensor_files(model_id, token)
|
|
|
|
|
|
|
| 529 |
weight_map = None
|
| 530 |
log_lines.append(f"📦 单文件:{sf_files}\n")
|
| 531 |
except requests.exceptions.HTTPError as e:
|
| 532 |
return _http_error_msg(e, model_id), None
|
| 533 |
|
| 534 |
+
# ── 探测第一个 shard ──────────────────────────
|
| 535 |
progress(0.08, desc="识别层结构...")
|
| 536 |
try:
|
| 537 |
if index_data:
|
| 538 |
first_shard = sorted(set(index_data["weight_map"].values()))[0]
|
| 539 |
else:
|
| 540 |
first_shard = sf_files[0]
|
|
|
|
| 541 |
first_url = get_file_url(model_id, first_shard)
|
| 542 |
first_header, first_hsize = read_safetensors_header(first_url, token)
|
| 543 |
shard_headers[first_shard] = (first_header, first_hsize)
|
|
|
|
| 545 |
except Exception as e:
|
| 546 |
return f"❌ 读取 shard header 失败:{e}", None
|
| 547 |
|
| 548 |
+
# [改动1] 区分文本层 key 和视觉层 key
|
| 549 |
+
text_keys = [k for k in all_keys if not is_vision_key(k)]
|
| 550 |
+
vision_keys = [k for k in all_keys if is_vision_key(k)]
|
| 551 |
+
log_lines.append(
|
| 552 |
+
f"🔑 总 key 数:{len(all_keys)} "
|
| 553 |
+
f"(文本层:{len(text_keys)},视觉层跳过:{len(vision_keys)})\n"
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
# 识别 Q/K/V key 命名规则(只在文本 key 中识别)
|
| 557 |
+
q_candidates = [k for k in text_keys if any(
|
| 558 |
+
p in k for p in ["q_proj.weight","query.weight","q.weight","wq.weight"]
|
| 559 |
)]
|
| 560 |
if not q_candidates:
|
| 561 |
+
sample = "\n".join(text_keys[:30])
|
| 562 |
+
return f"⚠️ 无法识别文本层 Q/K/V key,前 30 个文本 key:\n{sample}", None
|
| 563 |
|
| 564 |
sample_q = q_candidates[0]
|
| 565 |
+
if "q_proj" in sample_q: q_sfx, k_sfx, v_sfx = "self_attn.q_proj.weight", "self_attn.k_proj.weight", "self_attn.v_proj.weight"
|
| 566 |
+
elif "query" in sample_q: q_sfx, k_sfx, v_sfx = "attention.query.weight", "attention.key.weight", "attention.value.weight"
|
| 567 |
+
elif "wq" in sample_q: q_sfx, k_sfx, v_sfx = "attention.wq.weight", "attention.wk.weight", "attention.wv.weight"
|
| 568 |
else:
|
| 569 |
+
q_sfx = sample_q.split("layers.0.")[-1]
|
| 570 |
+
k_sfx = q_sfx.replace("q.", "k.")
|
| 571 |
+
v_sfx = q_sfx.replace("q.", "v.")
|
| 572 |
|
| 573 |
+
log_lines.append(f"🔑 Q suffix:{q_sfx}\n")
|
| 574 |
+
log_lines.append(f"🔑 K suffix:{k_sfx}\n")
|
| 575 |
+
log_lines.append(f"🔑 V suffix:{v_sfx}\n") # [改动3]
|
| 576 |
+
log_lines.append(f"{'═'*80}\n")
|
| 577 |
|
| 578 |
+
# ── 辅助:查找 key 所在 shard ─────────────────
|
| 579 |
def get_shard_for_key(key: str) -> str | None:
|
| 580 |
if index_data:
|
| 581 |
return index_data["weight_map"].get(key)
|
|
|
|
| 588 |
return None
|
| 589 |
|
| 590 |
# ── 逐层分析 ─────────────────────────────────
|
| 591 |
+
gqa_logged = False
|
| 592 |
|
| 593 |
for layer_idx in range(int(max_layers)):
|
| 594 |
progress(
|
|
|
|
| 596 |
desc=f"第 {layer_idx} 层..."
|
| 597 |
)
|
| 598 |
|
| 599 |
+
q_key = f"model.layers.{layer_idx}.{q_sfx}"
|
| 600 |
+
k_key = f"model.layers.{layer_idx}.{k_sfx}"
|
| 601 |
+
v_key = f"model.layers.{layer_idx}.{v_sfx}" # [改动3]
|
| 602 |
|
| 603 |
q_shard = get_shard_for_key(q_key)
|
| 604 |
k_shard = get_shard_for_key(k_key)
|
| 605 |
+
v_shard = get_shard_for_key(v_key) # [改动3]
|
| 606 |
|
| 607 |
if q_shard is None or k_shard is None:
|
| 608 |
+
log_lines.append(
|
| 609 |
+
f"\nLayer {layer_idx}: Q/K 未找到,分析结束(共 {layer_idx} 层)\n"
|
| 610 |
+
)
|
| 611 |
break
|
| 612 |
|
| 613 |
+
# [改动3] V 找不到时降级处理(不阻断整体分析)
|
| 614 |
+
if v_shard is None:
|
| 615 |
+
log_lines.append(
|
| 616 |
+
f"Layer {layer_idx}: ⚠️ V 未找到,跳过该层\n"
|
| 617 |
+
)
|
| 618 |
+
continue
|
| 619 |
+
|
| 620 |
+
for shard in {q_shard, k_shard, v_shard}:
|
| 621 |
if shard not in shard_headers:
|
| 622 |
h, hs = read_safetensors_header(get_file_url(model_id, shard), token)
|
| 623 |
shard_headers[shard] = (h, hs)
|
|
|
|
| 631 |
get_file_url(model_id, k_shard), k_key,
|
| 632 |
*shard_headers[k_shard], token
|
| 633 |
)
|
| 634 |
+
W_v = load_tensor_remote( # [改动3]
|
| 635 |
+
get_file_url(model_id, v_shard), v_key,
|
| 636 |
+
*shard_headers[v_shard], token
|
| 637 |
+
)
|
| 638 |
except ValueError as e:
|
| 639 |
log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
|
| 640 |
continue
|
| 641 |
|
| 642 |
+
if W_q is None or W_k is None or W_v is None:
|
| 643 |
log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
|
| 644 |
continue
|
| 645 |
|
| 646 |
+
# [改动1] 判断该层是文本层还是视觉层
|
| 647 |
+
modality = "vision" if is_vision_key(q_key) else "text"
|
| 648 |
+
if modality == "vision":
|
| 649 |
+
log_lines.append(f"Layer {layer_idx}: 🖼️ 视觉层,跳过\n")
|
| 650 |
+
del W_q, W_k, W_v
|
| 651 |
+
continue
|
| 652 |
+
|
| 653 |
+
# GQA 推断
|
| 654 |
try:
|
| 655 |
n_q_heads, n_kv_heads, d_head = infer_gqa_params(W_q, W_k, config)
|
| 656 |
except ValueError as e:
|
| 657 |
log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
|
| 658 |
+
del W_q, W_k, W_v
|
| 659 |
continue
|
| 660 |
|
| 661 |
+
if not gqa_logged:
|
|
|
|
| 662 |
log_lines.append(
|
| 663 |
+
f"🧠 GQA 结构:n_q={n_q_heads} n_kv={n_kv_heads} "
|
| 664 |
+
f"group={n_q_heads//n_kv_heads} d_head={d_head}\n"
|
| 665 |
+
f" W_q={list(W_q.shape)} W_k={list(W_k.shape)} "
|
| 666 |
+
f"W_v={list(W_v.shape)}\n" # [改动3]
|
| 667 |
+
f"{'═'*80}\n"
|
|
|
|
|
|
|
| 668 |
)
|
| 669 |
+
gqa_logged = True
|
| 670 |
|
| 671 |
+
# 逐头全指标计算
|
| 672 |
records, layer_log = analyze_layer_heads(
|
| 673 |
+
W_q, W_k, W_v, # [改动3]
|
| 674 |
+
layer_idx,
|
| 675 |
+
n_q_heads, n_kv_heads, d_head,
|
| 676 |
+
modality=modality # [改动4]
|
| 677 |
)
|
| 678 |
all_records.extend(records)
|
| 679 |
log_lines.append(layer_log)
|
| 680 |
|
| 681 |
+
del W_q, W_k, W_v
|
| 682 |
|
| 683 |
+
# ── 全局汇总 ──────────────────────────────────
|
| 684 |
if all_records:
|
| 685 |
df = pd.DataFrame(all_records)
|
| 686 |
|
| 687 |
+
# [改动5] 分模态统计
|
| 688 |
+
def stat_block(arr: np.ndarray, name: str) -> str:
|
| 689 |
+
return (
|
| 690 |
+
f" {name:<14}"
|
| 691 |
+
f" Median={np.median(arr):.6f}"
|
| 692 |
+
f" Mean={np.mean(arr):.6f}"
|
| 693 |
+
f" Min={np.min(arr):.6f}"
|
| 694 |
+
f" Max={np.max(arr):.6f}\n"
|
| 695 |
+
)
|
| 696 |
+
|
| 697 |
+
text_df = df[df["modality"] == "text"]
|
| 698 |
+
|
| 699 |
+
summary_lines = [
|
| 700 |
+
f"\n{'═'*80}\n",
|
| 701 |
+
f"📊 王氏五定律全局汇总 — {model_id}\n",
|
| 702 |
+
f"{'═'*80}\n",
|
| 703 |
+
f"文本层记录:{len(text_df)} 条 "
|
| 704 |
+
f"({text_df['layer'].nunique()} 层 × "
|
| 705 |
+
f"{text_df.groupby('layer').size().iloc[0] if len(text_df)>0 else 0} 头/层���\n\n",
|
| 706 |
+
|
| 707 |
+
f"【第一定律 — Pearson r(→ 1)】\n",
|
| 708 |
+
stat_block(text_df["pearson_QK"].values, "Q-K:"),
|
| 709 |
+
stat_block(text_df["pearson_QV"].values, "Q-V:"), # [改动3]
|
| 710 |
+
stat_block(text_df["pearson_KV"].values, "K-V:"), # [改动3]
|
| 711 |
+
|
| 712 |
+
f"\n【第二定律 — SSR(→ 0)】\n",
|
| 713 |
+
stat_block(text_df["ssr_QK"].values, "Q-K:"),
|
| 714 |
+
stat_block(text_df["ssr_QV"].values, "Q-V:"), # [改动3]
|
| 715 |
+
stat_block(text_df["ssr_KV"].values, "K-V:"), # [改动3]
|
| 716 |
+
|
| 717 |
+
f"\n【第四定律 — cosU 输出子空间(Q-K≈1/√d,Q-V<1/√d 超正交)】\n",
|
| 718 |
+
stat_block(text_df["cosU_QK"].values, "cosU Q-K:"),
|
| 719 |
+
stat_block(text_df["cosU_QV"].values, "cosU Q-V:"), # [改动3]
|
| 720 |
+
stat_block(text_df["cosU_KV"].values, "cosU K-V:"), # [改动3]
|
| 721 |
+
|
| 722 |
+
f"\n【第五定律 — cosV 输入子空间(≈1/√d_model 全局随机正交)】\n", # [改动2]
|
| 723 |
+
stat_block(text_df["cosV_QK"].values, "cosV Q-K:"),
|
| 724 |
+
stat_block(text_df["cosV_QV"].values, "cosV Q-V:"),
|
| 725 |
+
stat_block(text_df["cosV_KV"].values, "cosV K-V:"),
|
| 726 |
+
|
| 727 |
+
f"\n【第三定律 — 条件数(越小越稳定)】\n", # [改动3]
|
| 728 |
+
stat_block(text_df["cond_Q"].values, "cond Q:"),
|
| 729 |
+
stat_block(text_df["cond_K"].values, "cond K:"),
|
| 730 |
+
stat_block(text_df["cond_V"].values, "cond V:"),
|
| 731 |
+
|
| 732 |
+
f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n",
|
| 733 |
+
f"{'═'*80}\n",
|
| 734 |
+
]
|
| 735 |
+
log_lines.extend(summary_lines)
|
| 736 |
|
| 737 |
return "".join(log_lines), df
|
| 738 |
else:
|
|
|
|
| 750 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 751 |
|
| 752 |
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 753 |
+
支持 GQA + 多模态(自动跳过视觉层)。逐头计算全部五定律指标:
|
| 754 |
|
| 755 |
+
| 定律 | 指标 | 理论极值 | 对象 |
|
| 756 |
+
|------|------|---------|------|
|
| 757 |
+
| 第一定律 | Pearson r / Spearman r | → 1 | Q-K |
|
| 758 |
+
| 第二定律 | SSR | → 0 | Q-K, Q-V, K-V |
|
| 759 |
+
| 第三定律 | 条件数 κ | 越小越好 | Q, K, V |
|
| 760 |
+
| 第四定律 | cosU(Uq,Uk) | ≈1/√d_head;cosU(Uq,Uv)<1/√d_head | Q-K, Q-V, K-V |
|
| 761 |
+
| 第五定律 | cosV(Vq,Vk) | ≈1/√d_model(随机正交) | Q-K, Q-V, K-V |
|
| 762 |
|
| 763 |
[](https://doi.org/10.5281/zenodo.19707844)
|
| 764 |
[](https://hal.science/hal-05609398)
|
|
|
|
| 768 |
with gr.Column(scale=2):
|
| 769 |
model_input = gr.Textbox(
|
| 770 |
label="HuggingFace 模型 ID",
|
| 771 |
+
placeholder="google/gemma-4-e2b",
|
| 772 |
+
value="google/gemma-4-e2b"
|
| 773 |
)
|
| 774 |
token_input = gr.Textbox(
|
| 775 |
label="HF Access Token(公开模型可留空)",
|
|
|
|
| 782 |
)
|
| 783 |
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 784 |
|
| 785 |
+
# [改动6] 更新推荐模型列表
|
| 786 |
with gr.Column(scale=1):
|
| 787 |
gr.Markdown("""
|
| 788 |
### ✅ 推荐模型
|
| 789 |
```
|
| 790 |
Qwen/Qwen2.5-14B-Instruct (GQA 8Q/2K)
|
| 791 |
meta-llama/Llama-3-8B (GQA)
|
| 792 |
+
google/gemma-4-e2b (MHA 多模态)
|
| 793 |
+
google/gemma-4-e4b-it (MHA 多模态)
|
| 794 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 795 |
```
|
| 796 |
### GQA 典型结构
|
|
|
|
| 799 |
| Qwen2.5-7B | 28 | 4 | 7 |
|
| 800 |
| LLaMA-3-8B | 32 | 8 | 4 |
|
| 801 |
| Qwen2.5-14B | 40 | 8 | 5 |
|
| 802 |
+
| Gemma-4-E2B | 8 | 4 | 2 |
|
| 803 |
+
|
| 804 |
+
### 🖼️ 多模态说明
|
| 805 |
+
- 视觉层自动跳过
|
| 806 |
+
- 仅分析文本 Transformer 层
|
| 807 |
+
- 跳过关键词:`vision / visual / vit / patch_embed`
|
| 808 |
""")
|
| 809 |
|
| 810 |
log_output = gr.Textbox(
|
| 811 |
label="分析日志(逐头详情)",
|
| 812 |
+
lines=35, max_lines=100
|
| 813 |
)
|
| 814 |
|
| 815 |
table_output = gr.Dataframe(
|
| 816 |
+
label="逐头全指标结果表",
|
| 817 |
headers=[
|
| 818 |
+
"layer","modality","kv_head","q_head",
|
| 819 |
+
"pearson_QK","spearman_QK","pearson_QV","pearson_KV",
|
| 820 |
+
"ssr_QK","ssr_QV","ssr_KV",
|
| 821 |
+
"cosU_QK","cosU_QV","cosU_KV",
|
| 822 |
+
"cosV_QK","cosV_QV","cosV_KV",
|
| 823 |
+
"alpha_QK","alpha_QV","alpha_KV",
|
| 824 |
+
"alpha_res_QK","alpha_res_QV","alpha_res_KV",
|
| 825 |
+
"sigma_max_Q","sigma_min_Q",
|
| 826 |
+
"sigma_max_K","sigma_min_K",
|
| 827 |
+
"sigma_max_V","sigma_min_V",
|
| 828 |
+
"cond_Q","cond_K","cond_V",
|
| 829 |
]
|
| 830 |
)
|
| 831 |
|