Spaces:
Running
Running
Alex W. commited on
Commit ·
9ae44df
1
Parent(s): 726019a
正确逻辑:
Browse files不按组件分类,不按 modality 分类
直接按 safetensors 里 layers.{N}. 的原始 N 值过滤
start_layer=0, end_layer=5 → 提取所有前缀下 N 在 [0,5] 范围内的层
同一个 N 在不同前缀下是不同的层,都要输出,保持原始 key 里的层号
app.py
CHANGED
|
@@ -110,16 +110,12 @@ def find_index_file(model_id: str, token: str = None) -> dict | None:
|
|
| 110 |
|
| 111 |
def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
|
| 112 |
code = e.response.status_code
|
| 113 |
-
if code == 401: return "❌ 401 未授权
|
| 114 |
if code == 403: return f"❌ 403 禁止访问:请先接受 {model_id} 的使用协议"
|
| 115 |
-
if code == 404: return f"❌ 404 未找到:
|
| 116 |
return f"❌ HTTP {code}:{e}"
|
| 117 |
|
| 118 |
|
| 119 |
-
# ─────────────────────────────────────────────
|
| 120 |
-
# Gemma4 / 嵌套 config 安全解析
|
| 121 |
-
# ─────────────────────────────────────────────
|
| 122 |
-
|
| 123 |
def extract_config_params(config: dict) -> dict:
|
| 124 |
if config is None:
|
| 125 |
return {}
|
|
@@ -128,11 +124,9 @@ def extract_config_params(config: dict) -> dict:
|
|
| 128 |
def get_field(*keys):
|
| 129 |
for k in keys:
|
| 130 |
v = config.get(k)
|
| 131 |
-
if v is not None:
|
| 132 |
-
return v
|
| 133 |
v = text_cfg.get(k)
|
| 134 |
-
if v is not None:
|
| 135 |
-
return v
|
| 136 |
return None
|
| 137 |
|
| 138 |
return {
|
|
@@ -149,7 +143,6 @@ def extract_config_params(config: dict) -> dict:
|
|
| 149 |
# ─────────────────────────────────────────────
|
| 150 |
|
| 151 |
def _classify_qkv_suffix(suffix: str) -> str | None:
|
| 152 |
-
"""layers.{N}. 之后的后缀 → 'q'/'k'/'v'/None"""
|
| 153 |
if not suffix.endswith(".weight"):
|
| 154 |
return None
|
| 155 |
excludes = ["norm", "rope", "embed", "lm_head", "layernorm", "ln_"]
|
|
@@ -166,28 +159,26 @@ def _classify_qkv_suffix(suffix: str) -> str | None:
|
|
| 166 |
|
| 167 |
|
| 168 |
# ─────────────────────────────────────────────
|
| 169 |
-
#
|
| 170 |
-
#
|
| 171 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
# ─────────────────────────────────────────────
|
| 173 |
|
| 174 |
-
def
|
| 175 |
"""
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
"q": (shard_name, full_key),
|
| 181 |
-
"k": (shard_name, full_key),
|
| 182 |
-
"v": (shard_name, full_key),
|
| 183 |
-
}
|
| 184 |
-
}
|
| 185 |
-
}
|
| 186 |
-
每个 prefix 是一个独立的模型组件。
|
| 187 |
-
层号是该组件内的原始层号,不做任何重排。
|
| 188 |
"""
|
| 189 |
-
|
| 190 |
-
prefix_data: dict[str, dict[int, dict]] = {}
|
| 191 |
|
| 192 |
for shard_name, (header, _) in all_shard_headers.items():
|
| 193 |
for key in header.keys():
|
|
@@ -196,58 +187,25 @@ def discover_all_components(all_shard_headers: dict) -> dict:
|
|
| 196 |
continue
|
| 197 |
|
| 198 |
layer_idx = int(m.group(1))
|
| 199 |
-
prefix = key[:m.start()] # 精确截断
|
| 200 |
suffix = key[m.end():]
|
| 201 |
|
| 202 |
role = _classify_qkv_suffix(suffix)
|
| 203 |
if role is None:
|
| 204 |
continue
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
prefix_data[prefix][layer_idx] = {"q": None, "k": None, "v": None}
|
| 210 |
-
|
| 211 |
-
if prefix_data[prefix][layer_idx][role] is None:
|
| 212 |
-
prefix_data[prefix][layer_idx][role] = (shard_name, key)
|
| 213 |
-
|
| 214 |
-
# 第二遍:只保留每个前缀中 QKV 完整的层
|
| 215 |
-
result = {}
|
| 216 |
-
for prefix, layers in prefix_data.items():
|
| 217 |
-
complete = {
|
| 218 |
-
idx: qkv for idx, qkv in layers.items()
|
| 219 |
-
if all(qkv[r] is not None for r in ("q", "k", "v"))
|
| 220 |
-
}
|
| 221 |
-
if complete:
|
| 222 |
-
result[prefix] = complete
|
| 223 |
|
| 224 |
-
|
|
|
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
VISION_PREFIX_PATTERNS = [
|
| 232 |
-
"vision", "visual", "img", "image",
|
| 233 |
-
"patch_embed", "vit", "clip",
|
| 234 |
-
]
|
| 235 |
-
AUDIO_PREFIX_PATTERNS = [
|
| 236 |
-
"audio", "speech", "whisper",
|
| 237 |
-
]
|
| 238 |
-
TEXT_PREFIX_PATTERNS = [
|
| 239 |
-
"language_model", "transformer", "model.layers",
|
| 240 |
-
"text", "decoder", "encoder",
|
| 241 |
-
]
|
| 242 |
-
|
| 243 |
-
def infer_modality(prefix: str) -> str:
|
| 244 |
-
p = prefix.lower()
|
| 245 |
-
if any(v in p for v in VISION_PREFIX_PATTERNS):
|
| 246 |
-
return "vision"
|
| 247 |
-
if any(a in p for a in AUDIO_PREFIX_PATTERNS):
|
| 248 |
-
return "audio"
|
| 249 |
-
# 默认视为 text(language model)
|
| 250 |
-
return "text"
|
| 251 |
|
| 252 |
|
| 253 |
# ─────────────────────────────────────────────
|
|
@@ -264,35 +222,35 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 264 |
headers=headers, timeout=15
|
| 265 |
)
|
| 266 |
if r.status_code == 200:
|
| 267 |
-
cfg
|
| 268 |
qcfg = cfg.get("quantization_config", {})
|
| 269 |
-
qt
|
| 270 |
-
|
| 271 |
-
|
| 272 |
if "gptq" in qt:
|
| 273 |
-
return True, f"❌ GPTQ {qcfg.get('bits','?')}bit
|
| 274 |
if "awq" in qt:
|
| 275 |
return True, "❌ AWQ 量化,请用原始 BF16 版本。"
|
| 276 |
if "bitsandbytes" in qt or "bnb" in qt:
|
| 277 |
-
warnings.append("⚠️
|
| 278 |
except Exception:
|
| 279 |
warnings.append("⚠️ 无法读取 config.json")
|
| 280 |
|
| 281 |
-
for kw in ["gptq",
|
| 282 |
if kw in model_id.lower():
|
| 283 |
return True, f"❌ 模型名含 '{kw.upper()}',请使用原始 BF16 版本。"
|
| 284 |
|
| 285 |
try:
|
| 286 |
all_files = list(list_repo_files(model_id, token=token))
|
| 287 |
if any(f.endswith(".gguf") for f in all_files):
|
| 288 |
-
return True, "❌ 检测到 .gguf 文件,不支持
|
| 289 |
if not any(f.endswith(".safetensors") for f in all_files):
|
| 290 |
return True, "❌ 未找到 .safetensors 文件。"
|
| 291 |
except Exception as e:
|
| 292 |
warnings.append(f"⚠️ 文件列表检测失败:{e}")
|
| 293 |
|
| 294 |
try:
|
| 295 |
-
index_data
|
| 296 |
first_shard = (
|
| 297 |
sorted(set(index_data["weight_map"].values()))[0]
|
| 298 |
if index_data else get_safetensor_files(model_id, token)[0]
|
|
@@ -300,8 +258,8 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 300 |
hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
|
| 301 |
bad = [k for k in hdr if any(s in k for s in QUANTIZED_KEY_SIGNATURES)]
|
| 302 |
if bad:
|
| 303 |
-
return True, f"❌
|
| 304 |
-
good = {hdr[k].get("dtype",
|
| 305 |
if good:
|
| 306 |
warnings.append(f"✅ 权重格式:{good}")
|
| 307 |
except Exception as e:
|
|
@@ -311,7 +269,7 @@ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
|
| 311 |
|
| 312 |
|
| 313 |
# ─────────────────────────────────────────────
|
| 314 |
-
# GQA
|
| 315 |
# ─────────────────────────────────────────────
|
| 316 |
|
| 317 |
def infer_gqa_params(
|
|
@@ -328,7 +286,7 @@ def infer_gqa_params(
|
|
| 328 |
if hs and nh:
|
| 329 |
d_head = hs // nh
|
| 330 |
if not d_head:
|
| 331 |
-
for c in [256, 128, 96, 80, 64, 32]:
|
| 332 |
if q_rows % c == 0 and k_rows % c == 0:
|
| 333 |
d_head = c
|
| 334 |
break
|
|
@@ -346,64 +304,56 @@ def infer_gqa_params(
|
|
| 346 |
# 指标计算
|
| 347 |
# ─────────────────────────────────────────────
|
| 348 |
|
| 349 |
-
def
|
| 350 |
am, bm = a - a.mean(), b - b.mean()
|
| 351 |
den = torch.norm(am) * torch.norm(bm)
|
| 352 |
return float(torch.dot(am, bm) / den) if den != 0 else 0.0
|
| 353 |
|
| 354 |
def compute_ssr(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 355 |
-
n
|
| 356 |
an = a[:n] / (torch.norm(a[:n]) + 1e-10)
|
| 357 |
bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
|
| 358 |
return float(torch.mean(torch.abs(an - bn)))
|
| 359 |
|
| 360 |
def compute_svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
|
| 361 |
-
n
|
| 362 |
sa, sb = a[:n], b[:n]
|
| 363 |
den = torch.dot(sb, sb)
|
| 364 |
-
if den == 0:
|
| 365 |
-
return 1.0, 0.0
|
| 366 |
alpha = torch.dot(sa, sb) / den
|
| 367 |
return float(alpha), float(torch.mean((sa - alpha * sb) ** 2))
|
| 368 |
|
| 369 |
def compute_cosU(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
|
| 370 |
r = min(U_a.shape[0], U_b.shape[0])
|
| 371 |
c = min(U_a.shape[1], U_b.shape[1])
|
| 372 |
-
Ua = U_a[:r, :c]
|
| 373 |
-
Ub = U_b[:r, :c]
|
| 374 |
-
Ua = Ua / (torch.norm(Ua, dim=0, keepdim=True) + 1e-10)
|
| 375 |
-
Ub = Ub / (torch.norm(Ub, dim=0, keepdim=True) + 1e-10)
|
| 376 |
return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
|
| 377 |
|
| 378 |
def compute_cosV(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
|
| 379 |
r = min(Vt_a.shape[0], Vt_b.shape[0])
|
| 380 |
-
c = min(Vt_a.shape[1], Vt_b.shape[1])
|
| 381 |
-
Va = Vt_a[:r, :c]
|
| 382 |
-
Vb = Vt_b[:r, :c]
|
| 383 |
-
Va = Va / (torch.norm(Va, dim=1, keepdim=True) + 1e-10)
|
| 384 |
-
Vb = Vb / (torch.norm(Vb, dim=1, keepdim=True) + 1e-10)
|
| 385 |
return float(torch.abs((Va * Vb).sum(dim=1)).mean())
|
| 386 |
|
| 387 |
|
| 388 |
# ─────────────────────────────────────────────
|
| 389 |
-
# 逐头分析(
|
| 390 |
# ─────────────────────────────────────────────
|
| 391 |
|
| 392 |
def analyze_layer_heads(
|
| 393 |
-
W_q: torch.Tensor,
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
layer_idx: int, # 原始层号,不重排
|
| 397 |
n_q: int, n_kv: int, d_head: int,
|
| 398 |
-
modality: str,
|
| 399 |
) -> tuple[list[dict], str]:
|
| 400 |
|
| 401 |
-
group
|
| 402 |
-
records
|
| 403 |
-
|
| 404 |
-
lines.append(
|
| 405 |
f"\n{'─'*80}\n"
|
| 406 |
-
f"Layer {layer_idx:3d}
|
| 407 |
f"n_q={n_q} n_kv={n_kv} group={group} d_head={d_head}\n"
|
| 408 |
f"{'─'*80}\n"
|
| 409 |
f" {'KV':>3} {'Q':>3} │"
|
|
@@ -412,7 +362,7 @@ def analyze_layer_heads(
|
|
| 412 |
f" {'cosU_QK':>8} {'cosU_QV':>8} {'cosU_KV':>8} │"
|
| 413 |
f" {'cosV_QK':>8} {'cosV_QV':>8} {'cosV_KV':>8} │"
|
| 414 |
f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
|
| 415 |
-
|
| 416 |
|
| 417 |
for kv_h in range(n_kv):
|
| 418 |
k_t = W_k[kv_h*d_head:(kv_h+1)*d_head, :]
|
|
@@ -424,64 +374,82 @@ def analyze_layer_heads(
|
|
| 424 |
cosU_KV = compute_cosU(U_k, U_v)
|
| 425 |
cosV_KV = compute_cosV(Vt_k, Vt_v)
|
| 426 |
ssr_kv = compute_ssr(s_k, s_v)
|
| 427 |
-
pkv =
|
| 428 |
-
|
|
|
|
|
|
|
| 429 |
|
| 430 |
for q_off in range(group):
|
| 431 |
-
h
|
| 432 |
q_t = W_q[h*d_head:(h+1)*d_head, :]
|
| 433 |
U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
|
| 434 |
|
| 435 |
nqk = min(len(s_q), len(s_k))
|
| 436 |
nqv = min(len(s_q), len(s_v))
|
| 437 |
|
| 438 |
-
pqk
|
| 439 |
-
spqk
|
| 440 |
ssr_qk = compute_ssr(s_q, s_k)
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
|
| 445 |
-
pqv
|
| 446 |
ssr_qv = compute_ssr(s_q, s_v)
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
|
| 451 |
-
smxq = float(s_q.max())
|
| 452 |
-
|
| 453 |
-
|
|
|
|
|
|
|
|
|
|
| 454 |
|
| 455 |
records.append({
|
| 456 |
-
|
| 457 |
-
"
|
| 458 |
-
"
|
| 459 |
-
"
|
| 460 |
-
"
|
| 461 |
-
"
|
| 462 |
-
"
|
| 463 |
-
"
|
| 464 |
-
"
|
| 465 |
-
"
|
| 466 |
-
"
|
| 467 |
-
"
|
| 468 |
-
"
|
| 469 |
-
"
|
| 470 |
-
"
|
| 471 |
-
"
|
| 472 |
-
"
|
| 473 |
-
"
|
| 474 |
-
"
|
| 475 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
})
|
| 477 |
|
| 478 |
lines.append(
|
| 479 |
f" {kv_h:>3d} {h:>3d} │"
|
| 480 |
f" {pqk:>+7.4f} {spqk:>+7.4f} {ssr_qk:>8.6f} │"
|
| 481 |
f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
|
| 482 |
-
f" {
|
| 483 |
-
f" {
|
| 484 |
-
f" {
|
| 485 |
)
|
| 486 |
|
| 487 |
return records, "".join(lines)
|
|
@@ -492,25 +460,25 @@ def analyze_layer_heads(
|
|
| 492 |
# ─────────────────────────────────────────────
|
| 493 |
|
| 494 |
def analyze_model(
|
| 495 |
-
model_id:
|
| 496 |
-
hf_token:
|
| 497 |
-
|
|
|
|
| 498 |
progress=gr.Progress()
|
| 499 |
):
|
| 500 |
if not model_id.strip():
|
| 501 |
return "❌ 请输入模型 ID", None
|
| 502 |
|
| 503 |
-
token
|
| 504 |
-
|
| 505 |
-
log_lines = [f"🔍 分析模型:{model_id}\n{'═'*80}\n"]
|
| 506 |
all_records: list[dict] = []
|
| 507 |
|
| 508 |
# ── 量化检测 ─────────────────────────────────
|
| 509 |
progress(0.02, desc="量化检测...")
|
| 510 |
blocked, qmsg = check_quantization(model_id, token)
|
| 511 |
-
|
| 512 |
if blocked:
|
| 513 |
-
return "".join(
|
| 514 |
|
| 515 |
# ── config.json ───────────────────────────────
|
| 516 |
config_params = {}
|
|
@@ -521,222 +489,177 @@ def analyze_model(
|
|
| 521 |
timeout=15
|
| 522 |
)
|
| 523 |
if r.status_code == 200:
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
f"
|
| 528 |
-
f"
|
| 529 |
-
f"
|
| 530 |
-
f"
|
| 531 |
-
f" num_key_value_heads = {config_params.get('num_key_value_heads')}\n"
|
| 532 |
-
f" head_dim = {config_params.get('head_dim')}\n"
|
| 533 |
f"{'─'*80}\n"
|
| 534 |
)
|
| 535 |
except Exception:
|
| 536 |
-
|
| 537 |
|
| 538 |
# ── 获取 shard 列表 ───────────────────────────
|
| 539 |
progress(0.05, desc="读取模型索引...")
|
| 540 |
try:
|
| 541 |
-
index_data
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
log_lines.append(f"📦 文件:{shard_files}\n")
|
| 548 |
except requests.exceptions.HTTPError as e:
|
| 549 |
return _http_error_msg(e, model_id), None
|
| 550 |
|
| 551 |
# ── 读取所有 shard header ─────────────────────
|
| 552 |
progress(0.08, desc="读取 shard headers...")
|
| 553 |
all_shard_headers: dict[str, tuple[dict, int]] = {}
|
| 554 |
-
total_keys = 0
|
| 555 |
for sf in shard_files:
|
| 556 |
try:
|
| 557 |
h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
|
| 558 |
all_shard_headers[sf] = (h, hs)
|
| 559 |
-
total_keys += len(h)
|
| 560 |
except Exception as e:
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
log_lines.append(f"🔑 总 key 数:{total_keys}\n")
|
| 564 |
-
|
| 565 |
-
# ── 发现所有组件 ──────────────────────────────
|
| 566 |
-
progress(0.12, desc="识别组件结构...")
|
| 567 |
-
all_components = discover_all_components(all_shard_headers)
|
| 568 |
-
|
| 569 |
-
if not all_components:
|
| 570 |
-
sample = []
|
| 571 |
-
for sf, (h, _) in list(all_shard_headers.items())[:1]:
|
| 572 |
-
sample = list(h.keys())[:30]
|
| 573 |
-
return "".join(log_lines) + "⚠️ 无法识别 Q/K/V key,前30个 key:\n" + "\n".join(sample), None
|
| 574 |
-
|
| 575 |
-
# ── 打印组件概览 ──────────────────────────────
|
| 576 |
-
log_lines.append("📐 发现组件:\n")
|
| 577 |
-
for prefix, layers in sorted(all_components.items()):
|
| 578 |
-
modality = infer_modality(prefix)
|
| 579 |
-
sorted_l = sorted(layers.keys())
|
| 580 |
-
log_lines.append(
|
| 581 |
-
f" [{modality:6s}] prefix='{prefix}' "
|
| 582 |
-
f"层数={len(sorted_l)} "
|
| 583 |
-
f"范围={sorted_l[0]}~{sorted_l[-1]}\n"
|
| 584 |
-
)
|
| 585 |
-
log_lines.append(f"{'─'*80}\n")
|
| 586 |
-
|
| 587 |
-
# ── 逐组件逐层分析 ────────────────────────────
|
| 588 |
-
# 按前缀排序,每个组件独立分析,层号保持原始值
|
| 589 |
-
component_done = 0
|
| 590 |
-
total_components = len(all_components)
|
| 591 |
-
|
| 592 |
-
for prefix, layers in sorted(all_components.items()):
|
| 593 |
-
modality = infer_modality(prefix)
|
| 594 |
-
sorted_idxs = sorted(layers.keys())
|
| 595 |
-
|
| 596 |
-
log_lines.append(
|
| 597 |
-
f"\n{'═'*80}\n"
|
| 598 |
-
f"🔷 组件:'{prefix}' [{modality}] "
|
| 599 |
-
f"共 {len(sorted_idxs)} 层\n"
|
| 600 |
-
f"{'═'*80}\n"
|
| 601 |
-
)
|
| 602 |
|
| 603 |
-
|
| 604 |
-
layers_in_component = 0
|
| 605 |
-
gqa_logged = False
|
| 606 |
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
f" ⏸️ 已达到最大层数 {max_l},该组件剩余层跳过\n"
|
| 611 |
-
)
|
| 612 |
-
break
|
| 613 |
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
)
|
| 618 |
-
progress(
|
| 619 |
-
0.15 + 0.80 * overall_progress,
|
| 620 |
-
desc=f"{modality} 层 {layer_idx}..."
|
| 621 |
-
)
|
| 622 |
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
|
|
|
|
|
|
|
|
|
| 627 |
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
get_file_url(model_id, v_shard), v_key,
|
| 639 |
-
*all_shard_headers[v_shard], token
|
| 640 |
-
)
|
| 641 |
-
except Exception as e:
|
| 642 |
-
log_lines.append(f"Layer {layer_idx}: ❌ 加载失败:{e}\n")
|
| 643 |
-
layers_in_component += 1
|
| 644 |
-
continue
|
| 645 |
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
n_q, n_kv, d_head = infer_gqa_params(W_q, W_k, cfg)
|
| 656 |
-
except ValueError as e:
|
| 657 |
-
log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
|
| 658 |
-
del W_q, W_k, W_v
|
| 659 |
-
layers_in_component += 1
|
| 660 |
-
continue
|
| 661 |
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
f" W_q={list(W_q.shape)} "
|
| 667 |
-
f"W_k={list(W_k.shape)} "
|
| 668 |
-
f"W_v={list(W_v.shape)}\n"
|
| 669 |
-
)
|
| 670 |
-
gqa_logged = True
|
| 671 |
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
n_q, n_kv, d_head,
|
| 676 |
-
modality=modality,
|
| 677 |
-
)
|
| 678 |
-
all_records.extend(records)
|
| 679 |
-
log_lines.append(layer_log)
|
| 680 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
del W_q, W_k, W_v
|
| 682 |
-
|
| 683 |
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
-
# ──
|
| 687 |
if not all_records:
|
| 688 |
-
return "".join(
|
| 689 |
|
| 690 |
df = pd.DataFrame(all_records)
|
| 691 |
|
| 692 |
-
def
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
f" Mean={np.mean(arr):.6f}"
|
| 699 |
-
f" Min={np.min(arr):.6f}"
|
| 700 |
-
f" Max={np.max(arr):.6f}\n"
|
| 701 |
-
)
|
| 702 |
|
| 703 |
-
|
| 704 |
-
summary = [f"\n{'═'*80}\n📊 王氏五定律全局汇总 — {model_id}\n{'═'*80}\n"]
|
| 705 |
|
| 706 |
-
|
| 707 |
-
|
|
|
|
| 708 |
summary.append(
|
| 709 |
-
f"\n▶
|
| 710 |
-
f"
|
| 711 |
-
f"{
|
| 712 |
)
|
| 713 |
summary += [
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
]
|
| 735 |
|
| 736 |
summary.append(f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n{'═'*80}\n")
|
| 737 |
-
|
| 738 |
|
| 739 |
-
return "".join(
|
| 740 |
|
| 741 |
|
| 742 |
# ─────────────────────────────────────────────
|
|
@@ -750,7 +673,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 750 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 751 |
|
| 752 |
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 753 |
-
|
| 754 |
|
| 755 |
| 定律 | 指标 | 理论极值 |
|
| 756 |
|------|------|---------|
|
|
@@ -776,36 +699,50 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 776 |
placeholder="hf_xxxxxxxxxxxxxxxx",
|
| 777 |
type="password"
|
| 778 |
)
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 783 |
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 784 |
|
| 785 |
with gr.Column(scale=1):
|
| 786 |
gr.Markdown("""
|
| 787 |
### ✅ 推荐模型
|
| 788 |
```
|
| 789 |
-
google/gemma-4-e2b
|
| 790 |
-
google/gemma-4-31b-it
|
| 791 |
Qwen/Qwen2.5-14B-Instruct
|
| 792 |
meta-llama/Llama-3-8B
|
| 793 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 794 |
```
|
| 795 |
-
###
|
| 796 |
-
-
|
| 797 |
-
-
|
| 798 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 799 |
""")
|
| 800 |
|
| 801 |
log_output = gr.Textbox(
|
| 802 |
label="分析日志",
|
| 803 |
-
lines=40, max_lines=
|
| 804 |
)
|
| 805 |
table_output = gr.Dataframe(
|
| 806 |
label="逐头全指标结果表",
|
| 807 |
headers=[
|
| 808 |
-
"
|
| 809 |
"pearson_QK","spearman_QK","pearson_QV","pearson_KV",
|
| 810 |
"ssr_QK","ssr_QV","ssr_KV",
|
| 811 |
"cosU_QK","cosU_QV","cosU_KV",
|
|
@@ -821,7 +758,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 821 |
|
| 822 |
analyze_btn.click(
|
| 823 |
fn=analyze_model,
|
| 824 |
-
inputs=[model_input, token_input,
|
| 825 |
outputs=[log_output, table_output]
|
| 826 |
)
|
| 827 |
|
|
|
|
| 110 |
|
| 111 |
def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
|
| 112 |
code = e.response.status_code
|
| 113 |
+
if code == 401: return "❌ 401 未授权"
|
| 114 |
if code == 403: return f"❌ 403 禁止访问:请先接受 {model_id} 的使用协议"
|
| 115 |
+
if code == 404: return f"❌ 404 未找到:{model_id}"
|
| 116 |
return f"❌ HTTP {code}:{e}"
|
| 117 |
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
def extract_config_params(config: dict) -> dict:
|
| 120 |
if config is None:
|
| 121 |
return {}
|
|
|
|
| 124 |
def get_field(*keys):
|
| 125 |
for k in keys:
|
| 126 |
v = config.get(k)
|
| 127 |
+
if v is not None: return v
|
|
|
|
| 128 |
v = text_cfg.get(k)
|
| 129 |
+
if v is not None: return v
|
|
|
|
| 130 |
return None
|
| 131 |
|
| 132 |
return {
|
|
|
|
| 143 |
# ─────────────────────────────────────────────
|
| 144 |
|
| 145 |
def _classify_qkv_suffix(suffix: str) -> str | None:
|
|
|
|
| 146 |
if not suffix.endswith(".weight"):
|
| 147 |
return None
|
| 148 |
excludes = ["norm", "rope", "embed", "lm_head", "layernorm", "ln_"]
|
|
|
|
| 159 |
|
| 160 |
|
| 161 |
# ─────────────────────────────────────────────
|
| 162 |
+
# ★ 核心:按原始层号扫描,不合并不重排
|
| 163 |
+
# 返回结构:
|
| 164 |
+
# {
|
| 165 |
+
# (prefix, layer_idx): {
|
| 166 |
+
# "q": (shard, key),
|
| 167 |
+
# "k": (shard, key),
|
| 168 |
+
# "v": (shard, key),
|
| 169 |
+
# }
|
| 170 |
+
# }
|
| 171 |
+
# key 是 (prefix, layer_idx) 元组,保证不同组件同编号层不混淆
|
| 172 |
# ─────────────────────────────────────────────
|
| 173 |
|
| 174 |
+
def scan_all_qkv(all_shard_headers: dict) -> dict:
|
| 175 |
"""
|
| 176 |
+
扫描所有 shard 中的 Q/K/V weight。
|
| 177 |
+
以 (prefix, layer_idx) 为 key,保证:
|
| 178 |
+
- 不同组件的同编号层互相独立
|
| 179 |
+
- 层号是 safetensors 里的原始值
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
"""
|
| 181 |
+
result: dict[tuple[str, int], dict] = {}
|
|
|
|
| 182 |
|
| 183 |
for shard_name, (header, _) in all_shard_headers.items():
|
| 184 |
for key in header.keys():
|
|
|
|
| 187 |
continue
|
| 188 |
|
| 189 |
layer_idx = int(m.group(1))
|
| 190 |
+
prefix = key[:m.start()] # 精确截断
|
| 191 |
suffix = key[m.end():]
|
| 192 |
|
| 193 |
role = _classify_qkv_suffix(suffix)
|
| 194 |
if role is None:
|
| 195 |
continue
|
| 196 |
|
| 197 |
+
slot = (prefix, layer_idx)
|
| 198 |
+
if slot not in result:
|
| 199 |
+
result[slot] = {"q": None, "k": None, "v": None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
+
if result[slot][role] is None:
|
| 202 |
+
result[slot][role] = (shard_name, key)
|
| 203 |
|
| 204 |
+
# 只保留 QKV 完整的槽
|
| 205 |
+
return {
|
| 206 |
+
slot: qkv for slot, qkv in result.items()
|
| 207 |
+
if all(qkv[r] is not None for r in ("q", "k", "v"))
|
| 208 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
|
| 211 |
# ─────────────────────────────────────────────
|
|
|
|
| 222 |
headers=headers, timeout=15
|
| 223 |
)
|
| 224 |
if r.status_code == 200:
|
| 225 |
+
cfg = r.json()
|
| 226 |
qcfg = cfg.get("quantization_config", {})
|
| 227 |
+
qt = (qcfg.get("quant_type","") or
|
| 228 |
+
qcfg.get("quant_method","") or
|
| 229 |
+
cfg.get("quantization","")).lower()
|
| 230 |
if "gptq" in qt:
|
| 231 |
+
return True, f"❌ GPTQ {qcfg.get('bits','?')}bit,请用原始 BF16 版本。"
|
| 232 |
if "awq" in qt:
|
| 233 |
return True, "❌ AWQ 量化,请用原始 BF16 版本。"
|
| 234 |
if "bitsandbytes" in qt or "bnb" in qt:
|
| 235 |
+
warnings.append("⚠️ bitsandbytes 量化,结果可能失真")
|
| 236 |
except Exception:
|
| 237 |
warnings.append("⚠️ 无法读取 config.json")
|
| 238 |
|
| 239 |
+
for kw in ["gptq","awq","gguf"]:
|
| 240 |
if kw in model_id.lower():
|
| 241 |
return True, f"❌ 模型名含 '{kw.upper()}',请使用原始 BF16 版本。"
|
| 242 |
|
| 243 |
try:
|
| 244 |
all_files = list(list_repo_files(model_id, token=token))
|
| 245 |
if any(f.endswith(".gguf") for f in all_files):
|
| 246 |
+
return True, "❌ 检测到 .gguf 文件,不支持。"
|
| 247 |
if not any(f.endswith(".safetensors") for f in all_files):
|
| 248 |
return True, "❌ 未找到 .safetensors 文件。"
|
| 249 |
except Exception as e:
|
| 250 |
warnings.append(f"⚠️ 文件列表检测失败:{e}")
|
| 251 |
|
| 252 |
try:
|
| 253 |
+
index_data = find_index_file(model_id, token)
|
| 254 |
first_shard = (
|
| 255 |
sorted(set(index_data["weight_map"].values()))[0]
|
| 256 |
if index_data else get_safetensor_files(model_id, token)[0]
|
|
|
|
| 258 |
hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
|
| 259 |
bad = [k for k in hdr if any(s in k for s in QUANTIZED_KEY_SIGNATURES)]
|
| 260 |
if bad:
|
| 261 |
+
return True, f"❌ 量化 key:{bad[:3]}"
|
| 262 |
+
good = {hdr[k].get("dtype","") for k in list(hdr)[:20]} - UNSUPPORTED_SVD_DTYPES
|
| 263 |
if good:
|
| 264 |
warnings.append(f"✅ 权重格式:{good}")
|
| 265 |
except Exception as e:
|
|
|
|
| 269 |
|
| 270 |
|
| 271 |
# ─────────────────────────────────────────────
|
| 272 |
+
# GQA 推断
|
| 273 |
# ─────────────────────────────────────────────
|
| 274 |
|
| 275 |
def infer_gqa_params(
|
|
|
|
| 286 |
if hs and nh:
|
| 287 |
d_head = hs // nh
|
| 288 |
if not d_head:
|
| 289 |
+
for c in [256, 128, 96, 80, 64, 48, 40, 32]:
|
| 290 |
if q_rows % c == 0 and k_rows % c == 0:
|
| 291 |
d_head = c
|
| 292 |
break
|
|
|
|
| 304 |
# 指标计算
|
| 305 |
# ─────────────────────────────────────────────
|
| 306 |
|
| 307 |
+
def compute_pearson(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 308 |
am, bm = a - a.mean(), b - b.mean()
|
| 309 |
den = torch.norm(am) * torch.norm(bm)
|
| 310 |
return float(torch.dot(am, bm) / den) if den != 0 else 0.0
|
| 311 |
|
| 312 |
def compute_ssr(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 313 |
+
n = min(a.shape[0], b.shape[0])
|
| 314 |
an = a[:n] / (torch.norm(a[:n]) + 1e-10)
|
| 315 |
bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
|
| 316 |
return float(torch.mean(torch.abs(an - bn)))
|
| 317 |
|
| 318 |
def compute_svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
|
| 319 |
+
n = min(a.shape[0], b.shape[0])
|
| 320 |
sa, sb = a[:n], b[:n]
|
| 321 |
den = torch.dot(sb, sb)
|
| 322 |
+
if den == 0: return 1.0, 0.0
|
|
|
|
| 323 |
alpha = torch.dot(sa, sb) / den
|
| 324 |
return float(alpha), float(torch.mean((sa - alpha * sb) ** 2))
|
| 325 |
|
| 326 |
def compute_cosU(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
|
| 327 |
r = min(U_a.shape[0], U_b.shape[0])
|
| 328 |
c = min(U_a.shape[1], U_b.shape[1])
|
| 329 |
+
Ua = U_a[:r, :c] / (torch.norm(U_a[:r, :c], dim=0, keepdim=True) + 1e-10)
|
| 330 |
+
Ub = U_b[:r, :c] / (torch.norm(U_b[:r, :c], dim=0, keepdim=True) + 1e-10)
|
|
|
|
|
|
|
| 331 |
return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
|
| 332 |
|
| 333 |
def compute_cosV(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
|
| 334 |
r = min(Vt_a.shape[0], Vt_b.shape[0])
|
| 335 |
+
c = min(Vt_a.shape[1], Vt_b.shape[1])
|
| 336 |
+
Va = Vt_a[:r, :c] / (torch.norm(Vt_a[:r, :c], dim=1, keepdim=True) + 1e-10)
|
| 337 |
+
Vb = Vt_b[:r, :c] / (torch.norm(Vt_b[:r, :c], dim=1, keepdim=True) + 1e-10)
|
|
|
|
|
|
|
| 338 |
return float(torch.abs((Va * Vb).sum(dim=1)).mean())
|
| 339 |
|
| 340 |
|
| 341 |
# ─────────────────────────────────────────────
|
| 342 |
+
# 逐头分析(原始层号直接传入,不做任何变换)
|
| 343 |
# ─────────────────────────────────────────────
|
| 344 |
|
| 345 |
def analyze_layer_heads(
|
| 346 |
+
W_q: torch.Tensor, W_k: torch.Tensor, W_v: torch.Tensor,
|
| 347 |
+
prefix: str, # 组件前缀,用于日志
|
| 348 |
+
layer_idx: int, # 原始层号,直接来自 safetensors key
|
|
|
|
| 349 |
n_q: int, n_kv: int, d_head: int,
|
|
|
|
| 350 |
) -> tuple[list[dict], str]:
|
| 351 |
|
| 352 |
+
group = n_q // n_kv
|
| 353 |
+
records = []
|
| 354 |
+
lines = [
|
|
|
|
| 355 |
f"\n{'─'*80}\n"
|
| 356 |
+
f"[{prefix}] Layer {layer_idx:3d} "
|
| 357 |
f"n_q={n_q} n_kv={n_kv} group={group} d_head={d_head}\n"
|
| 358 |
f"{'─'*80}\n"
|
| 359 |
f" {'KV':>3} {'Q':>3} │"
|
|
|
|
| 362 |
f" {'cosU_QK':>8} {'cosU_QV':>8} {'cosU_KV':>8} │"
|
| 363 |
f" {'cosV_QK':>8} {'cosV_QV':>8} {'cosV_KV':>8} │"
|
| 364 |
f" {'α_QK':>7} {'α_QV':>7} {'α_KV':>7}\n"
|
| 365 |
+
]
|
| 366 |
|
| 367 |
for kv_h in range(n_kv):
|
| 368 |
k_t = W_k[kv_h*d_head:(kv_h+1)*d_head, :]
|
|
|
|
| 374 |
cosU_KV = compute_cosU(U_k, U_v)
|
| 375 |
cosV_KV = compute_cosV(Vt_k, Vt_v)
|
| 376 |
ssr_kv = compute_ssr(s_k, s_v)
|
| 377 |
+
pkv = compute_pearson(
|
| 378 |
+
s_k[:min(len(s_k), len(s_v))],
|
| 379 |
+
s_v[:min(len(s_k), len(s_v))]
|
| 380 |
+
)
|
| 381 |
|
| 382 |
for q_off in range(group):
|
| 383 |
+
h = kv_h * group + q_off
|
| 384 |
q_t = W_q[h*d_head:(h+1)*d_head, :]
|
| 385 |
U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
|
| 386 |
|
| 387 |
nqk = min(len(s_q), len(s_k))
|
| 388 |
nqv = min(len(s_q), len(s_v))
|
| 389 |
|
| 390 |
+
pqk = compute_pearson(s_q[:nqk], s_k[:nqk])
|
| 391 |
+
spqk = float(spearmanr(s_q[:nqk].numpy(), s_k[:nqk].numpy())[0])
|
| 392 |
ssr_qk = compute_ssr(s_q, s_k)
|
| 393 |
+
a_qk, r_qk = compute_svr(s_q, s_k)
|
| 394 |
+
cU_QK = compute_cosU(U_q, U_k)
|
| 395 |
+
cV_QK = compute_cosV(Vt_q, Vt_k)
|
| 396 |
|
| 397 |
+
pqv = compute_pearson(s_q[:nqv], s_v[:nqv])
|
| 398 |
ssr_qv = compute_ssr(s_q, s_v)
|
| 399 |
+
a_qv, r_qv = compute_svr(s_q, s_v)
|
| 400 |
+
cU_QV = compute_cosU(U_q, U_v)
|
| 401 |
+
cV_QV = compute_cosV(Vt_q, Vt_v)
|
| 402 |
|
| 403 |
+
smxq = float(s_q.max())
|
| 404 |
+
smnq = float(s_q[s_q>1e-10].min()) if (s_q>1e-10).any() else 0.
|
| 405 |
+
smxk = float(s_k.max())
|
| 406 |
+
smnk = float(s_k[s_k>1e-10].min()) if (s_k>1e-10).any() else 0.
|
| 407 |
+
smxv = float(s_v.max())
|
| 408 |
+
smnv = float(s_v[s_v>1e-10].min()) if (s_v>1e-10).any() else 0.
|
| 409 |
|
| 410 |
records.append({
|
| 411 |
+
# ★ prefix + layer_idx 完整保留,不做任何变换
|
| 412 |
+
"prefix": prefix,
|
| 413 |
+
"layer": layer_idx,
|
| 414 |
+
"kv_head": kv_h,
|
| 415 |
+
"q_head": h,
|
| 416 |
+
"pearson_QK": round(pqk, 6),
|
| 417 |
+
"spearman_QK": round(spqk, 6),
|
| 418 |
+
"pearson_QV": round(pqv, 6),
|
| 419 |
+
"pearson_KV": round(pkv, 6),
|
| 420 |
+
"ssr_QK": round(ssr_qk, 8),
|
| 421 |
+
"ssr_QV": round(ssr_qv, 8),
|
| 422 |
+
"ssr_KV": round(ssr_kv, 8),
|
| 423 |
+
"cosU_QK": round(cU_QK, 6),
|
| 424 |
+
"cosU_QV": round(cU_QV, 6),
|
| 425 |
+
"cosU_KV": round(cosU_KV,6),
|
| 426 |
+
"cosV_QK": round(cV_QK, 6),
|
| 427 |
+
"cosV_QV": round(cV_QV, 6),
|
| 428 |
+
"cosV_KV": round(cosV_KV,6),
|
| 429 |
+
"alpha_QK": round(a_qk, 4),
|
| 430 |
+
"alpha_QV": round(a_qv, 4),
|
| 431 |
+
"alpha_KV": round(alpha_kv,4),
|
| 432 |
+
"alpha_res_QK": round(r_qk, 6),
|
| 433 |
+
"alpha_res_QV": round(r_qv, 6),
|
| 434 |
+
"alpha_res_KV": round(res_kv, 6),
|
| 435 |
+
"sigma_max_Q": round(smxq, 4),
|
| 436 |
+
"sigma_min_Q": round(smnq, 4),
|
| 437 |
+
"sigma_max_K": round(smxk, 4),
|
| 438 |
+
"sigma_min_K": round(smnk, 4),
|
| 439 |
+
"sigma_max_V": round(smxv, 4),
|
| 440 |
+
"sigma_min_V": round(smnv, 4),
|
| 441 |
+
"cond_Q": round(smxq/(smnq+1e-10), 2),
|
| 442 |
+
"cond_K": round(smxk/(smnk+1e-10), 2),
|
| 443 |
+
"cond_V": round(smxv/(smnv+1e-10), 2),
|
| 444 |
})
|
| 445 |
|
| 446 |
lines.append(
|
| 447 |
f" {kv_h:>3d} {h:>3d} │"
|
| 448 |
f" {pqk:>+7.4f} {spqk:>+7.4f} {ssr_qk:>8.6f} │"
|
| 449 |
f" {ssr_qv:>8.6f} {ssr_kv:>8.6f} │"
|
| 450 |
+
f" {cU_QK:>8.4f} {cU_QV:>8.4f} {cosU_KV:>8.4f} │"
|
| 451 |
+
f" {cV_QK:>8.4f} {cV_QV:>8.4f} {cosV_KV:>8.4f} │"
|
| 452 |
+
f" {a_qk:>7.4f} {a_qv:>7.4f} {alpha_kv:>7.4f}\n"
|
| 453 |
)
|
| 454 |
|
| 455 |
return records, "".join(lines)
|
|
|
|
| 460 |
# ─────────────────────────────────────────────
|
| 461 |
|
| 462 |
def analyze_model(
|
| 463 |
+
model_id: str,
|
| 464 |
+
hf_token: str,
|
| 465 |
+
start_layer: int, # ★ 原始层号起点
|
| 466 |
+
end_layer: int, # ★ 原始层号终点(含)
|
| 467 |
progress=gr.Progress()
|
| 468 |
):
|
| 469 |
if not model_id.strip():
|
| 470 |
return "❌ 请输入模型 ID", None
|
| 471 |
|
| 472 |
+
token = hf_token.strip() or None
|
| 473 |
+
log = [f"🔍 分析模型:{model_id} 层范围:{start_layer}~{end_layer}\n{'═'*80}\n"]
|
|
|
|
| 474 |
all_records: list[dict] = []
|
| 475 |
|
| 476 |
# ── 量化检测 ─────────────────────────────────
|
| 477 |
progress(0.02, desc="量化检测...")
|
| 478 |
blocked, qmsg = check_quantization(model_id, token)
|
| 479 |
+
log.append(f"【量化检测】\n{qmsg}\n{'─'*80}\n")
|
| 480 |
if blocked:
|
| 481 |
+
return "".join(log), None
|
| 482 |
|
| 483 |
# ── config.json ───────────────────────────────
|
| 484 |
config_params = {}
|
|
|
|
| 489 |
timeout=15
|
| 490 |
)
|
| 491 |
if r.status_code == 200:
|
| 492 |
+
config_params = extract_config_params(r.json())
|
| 493 |
+
log.append(
|
| 494 |
+
f"📋 config:model_type={config_params.get('model_type')} "
|
| 495 |
+
f"hidden={config_params.get('hidden_size')} "
|
| 496 |
+
f"n_heads={config_params.get('num_attention_heads')} "
|
| 497 |
+
f"n_kv={config_params.get('num_key_value_heads')} "
|
| 498 |
+
f"head_dim={config_params.get('head_dim')}\n"
|
|
|
|
|
|
|
| 499 |
f"{'─'*80}\n"
|
| 500 |
)
|
| 501 |
except Exception:
|
| 502 |
+
log.append("⚠️ 无法读取 config.json\n")
|
| 503 |
|
| 504 |
# ── 获取 shard 列表 ───────────────────────────
|
| 505 |
progress(0.05, desc="读取模型索引...")
|
| 506 |
try:
|
| 507 |
+
index_data = find_index_file(model_id, token)
|
| 508 |
+
shard_files = (
|
| 509 |
+
sorted(set(index_data["weight_map"].values()))
|
| 510 |
+
if index_data else get_safetensor_files(model_id, token)
|
| 511 |
+
)
|
| 512 |
+
log.append(f"📦 共 {len(shard_files)} 个 shard\n")
|
|
|
|
| 513 |
except requests.exceptions.HTTPError as e:
|
| 514 |
return _http_error_msg(e, model_id), None
|
| 515 |
|
| 516 |
# ── 读取所有 shard header ─────────────────────
|
| 517 |
progress(0.08, desc="读取 shard headers...")
|
| 518 |
all_shard_headers: dict[str, tuple[dict, int]] = {}
|
|
|
|
| 519 |
for sf in shard_files:
|
| 520 |
try:
|
| 521 |
h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
|
| 522 |
all_shard_headers[sf] = (h, hs)
|
|
|
|
| 523 |
except Exception as e:
|
| 524 |
+
log.append(f"⚠️ {sf} 读取失败:{e}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
|
| 526 |
+
log.append(f"🔑 总 key 数:{sum(len(h) for h,_ in all_shard_headers.values())}\n")
|
|
|
|
|
|
|
| 527 |
|
| 528 |
+
# ── 扫描所有 QKV 槽 ───────────────────────────
|
| 529 |
+
progress(0.12, desc="扫描 QKV 结构...")
|
| 530 |
+
all_slots = scan_all_qkv(all_shard_headers)
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
+
if not all_slots:
|
| 533 |
+
sample = list(next(iter(all_shard_headers.values()))[0].keys())[:20]
|
| 534 |
+
return "".join(log) + "⚠️ 无法识别 Q/K/V\n" + "\n".join(sample), None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
|
| 536 |
+
# ── 按原始层号过滤 [start_layer, end_layer] ───
|
| 537 |
+
# ★ 直接用 safetensors key 里的层号,不做任何变换
|
| 538 |
+
filtered_slots = {
|
| 539 |
+
(prefix, layer_idx): qkv
|
| 540 |
+
for (prefix, layer_idx), qkv in all_slots.items()
|
| 541 |
+
if start_layer <= layer_idx <= end_layer
|
| 542 |
+
}
|
| 543 |
|
| 544 |
+
if not filtered_slots:
|
| 545 |
+
# 打印实际存在的层号范围供参考
|
| 546 |
+
by_prefix: dict[str, list[int]] = {}
|
| 547 |
+
for (prefix, layer_idx) in all_slots:
|
| 548 |
+
by_prefix.setdefault(prefix, []).append(layer_idx)
|
| 549 |
+
info = "\n".join(
|
| 550 |
+
f" {p}: {sorted(v)}"
|
| 551 |
+
for p, v in sorted(by_prefix.items())
|
| 552 |
+
)
|
| 553 |
+
return "".join(log) + f"⚠️ 层范围 {start_layer}~{end_layer} 内无数据。\n实际层号:\n{info}\n", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
+
# ── 打印结构概览 ──────────────────────────────
|
| 556 |
+
by_prefix: dict[str, list[int]] = {}
|
| 557 |
+
for (prefix, layer_idx) in filtered_slots:
|
| 558 |
+
by_prefix.setdefault(prefix, []).append(layer_idx)
|
| 559 |
|
| 560 |
+
log.append(f"📐 层范围 {start_layer}~{end_layer} 内发现的组件:\n")
|
| 561 |
+
for p, idxs in sorted(by_prefix.items()):
|
| 562 |
+
log.append(f" '{p}' → 层号 {sorted(idxs)}\n")
|
| 563 |
+
log.append(f"{'═'*80}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
|
| 565 |
+
# ── 按 (prefix, layer_idx) 顺序分析 ──────────
|
| 566 |
+
# ★ sorted 保证输出有序,但层号本身不变
|
| 567 |
+
sorted_slots = sorted(filtered_slots.items(), key=lambda x: (x[0][0], x[0][1]))
|
| 568 |
+
total = len(sorted_slots)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 569 |
|
| 570 |
+
for i, ((prefix, layer_idx), qkv) in enumerate(sorted_slots):
|
| 571 |
+
progress(0.15 + 0.80 * i / max(total, 1),
|
| 572 |
+
desc=f"{prefix} layer {layer_idx}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
+
q_shard, q_key = qkv["q"]
|
| 575 |
+
k_shard, k_key = qkv["k"]
|
| 576 |
+
v_shard, v_key = qkv["v"]
|
| 577 |
+
|
| 578 |
+
try:
|
| 579 |
+
W_q = load_tensor_remote(
|
| 580 |
+
get_file_url(model_id, q_shard), q_key,
|
| 581 |
+
*all_shard_headers[q_shard], token)
|
| 582 |
+
W_k = load_tensor_remote(
|
| 583 |
+
get_file_url(model_id, k_shard), k_key,
|
| 584 |
+
*all_shard_headers[k_shard], token)
|
| 585 |
+
W_v = load_tensor_remote(
|
| 586 |
+
get_file_url(model_id, v_shard), v_key,
|
| 587 |
+
*all_shard_headers[v_shard], token)
|
| 588 |
+
except Exception as e:
|
| 589 |
+
log.append(f"[{prefix}] Layer {layer_idx}: ❌ 加载失败:{e}\n")
|
| 590 |
+
continue
|
| 591 |
+
|
| 592 |
+
if W_q is None or W_k is None or W_v is None:
|
| 593 |
+
log.append(f"[{prefix}] Layer {layer_idx}: ⚠️ tensor 为 None\n")
|
| 594 |
+
continue
|
| 595 |
+
|
| 596 |
+
try:
|
| 597 |
+
n_q, n_kv, d_head = infer_gqa_params(W_q, W_k, config_params)
|
| 598 |
+
except ValueError as e:
|
| 599 |
+
log.append(f"[{prefix}] Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
|
| 600 |
del W_q, W_k, W_v
|
| 601 |
+
continue
|
| 602 |
|
| 603 |
+
records, layer_log = analyze_layer_heads(
|
| 604 |
+
W_q, W_k, W_v,
|
| 605 |
+
prefix, # 传入原始前缀
|
| 606 |
+
layer_idx, # ★ 传入原始层号,函数内不做任何变换
|
| 607 |
+
n_q, n_kv, d_head,
|
| 608 |
+
)
|
| 609 |
+
all_records.extend(records)
|
| 610 |
+
log.append(layer_log)
|
| 611 |
+
del W_q, W_k, W_v
|
| 612 |
|
| 613 |
+
# ── 汇总 ─────────────────────────────────────
|
| 614 |
if not all_records:
|
| 615 |
+
return "".join(log) + "\n❌ 未获得任何有效结果\n", None
|
| 616 |
|
| 617 |
df = pd.DataFrame(all_records)
|
| 618 |
|
| 619 |
+
def stat(arr, name):
|
| 620 |
+
return (f" {name:<14}"
|
| 621 |
+
f" Median={np.median(arr):.6f}"
|
| 622 |
+
f" Mean={np.mean(arr):.6f}"
|
| 623 |
+
f" Min={np.min(arr):.6f}"
|
| 624 |
+
f" Max={np.max(arr):.6f}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
|
| 626 |
+
summary = [f"\n{'═'*80}\n📊 汇总 — {model_id} 层 {start_layer}~{end_layer}\n{'═'*80}\n"]
|
|
|
|
| 627 |
|
| 628 |
+
# 按 prefix 分组汇总
|
| 629 |
+
for pfx in df["prefix"].unique():
|
| 630 |
+
pdf = df[df["prefix"] == pfx]
|
| 631 |
summary.append(
|
| 632 |
+
f"\n▶ {pfx}\n"
|
| 633 |
+
f" 记录:{len(pdf)} 条,"
|
| 634 |
+
f"层:{sorted(pdf['layer'].unique())}\n"
|
| 635 |
)
|
| 636 |
summary += [
|
| 637 |
+
" 【第一定律 Pearson r → 1】\n",
|
| 638 |
+
stat(pdf["pearson_QK"].values, "Q-K:"),
|
| 639 |
+
stat(pdf["pearson_QV"].values, "Q-V:"),
|
| 640 |
+
stat(pdf["pearson_KV"].values, "K-V:"),
|
| 641 |
+
" 【第二定律 SSR → 0】\n",
|
| 642 |
+
stat(pdf["ssr_QK"].values, "Q-K:"),
|
| 643 |
+
stat(pdf["ssr_QV"].values, "Q-V:"),
|
| 644 |
+
stat(pdf["ssr_KV"].values, "K-V:"),
|
| 645 |
+
" 【第四定律 cosU 输出子空间】\n",
|
| 646 |
+
stat(pdf["cosU_QK"].values, "cosU Q-K:"),
|
| 647 |
+
stat(pdf["cosU_QV"].values, "cosU Q-V:"),
|
| 648 |
+
stat(pdf["cosU_KV"].values, "cosU K-V:"),
|
| 649 |
+
" 【第五定律 cosV 输入子空间】\n",
|
| 650 |
+
stat(pdf["cosV_QK"].values, "cosV Q-K:"),
|
| 651 |
+
stat(pdf["cosV_QV"].values, "cosV Q-V:"),
|
| 652 |
+
stat(pdf["cosV_KV"].values, "cosV K-V:"),
|
| 653 |
+
" 【第三定律 条件数】\n",
|
| 654 |
+
stat(pdf["cond_Q"].values, "cond Q:"),
|
| 655 |
+
stat(pdf["cond_K"].values, "cond K:"),
|
| 656 |
+
stat(pdf["cond_V"].values, "cond V:"),
|
| 657 |
]
|
| 658 |
|
| 659 |
summary.append(f"\n⚡ 理论极值:Pearson→1, SSR→0, cosU(QV)<1/√d_head\n{'═'*80}\n")
|
| 660 |
+
log.extend(summary)
|
| 661 |
|
| 662 |
+
return "".join(log), df
|
| 663 |
|
| 664 |
|
| 665 |
# ─────────────────────────────────────────────
|
|
|
|
| 673 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 674 |
|
| 675 |
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 676 |
+
按 safetensors 原始层号分析,支持混合模态模型(视觉/音频/语言同时输出)。
|
| 677 |
|
| 678 |
| 定律 | 指标 | 理论极值 |
|
| 679 |
|------|------|---------|
|
|
|
|
| 699 |
placeholder="hf_xxxxxxxxxxxxxxxx",
|
| 700 |
type="password"
|
| 701 |
)
|
| 702 |
+
with gr.Row():
|
| 703 |
+
start_layer_input = gr.Number(
|
| 704 |
+
label="起始层号(原始层号,含)",
|
| 705 |
+
value=0, minimum=0, maximum=999, precision=0
|
| 706 |
+
)
|
| 707 |
+
end_layer_input = gr.Number(
|
| 708 |
+
label="结束层号(原始层号,含)",
|
| 709 |
+
value=5, minimum=0, maximum=999, precision=0
|
| 710 |
+
)
|
| 711 |
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 712 |
|
| 713 |
with gr.Column(scale=1):
|
| 714 |
gr.Markdown("""
|
| 715 |
### ✅ 推荐模型
|
| 716 |
```
|
| 717 |
+
google/gemma-4-e2b
|
| 718 |
+
google/gemma-4-31b-it
|
| 719 |
Qwen/Qwen2.5-14B-Instruct
|
| 720 |
meta-llama/Llama-3-8B
|
| 721 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 722 |
```
|
| 723 |
+
### 层号说明
|
| 724 |
+
- 层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 725 |
+
- **不按组件重排**,原始值直接输出
|
| 726 |
+
- 混合模态模型(如 Gemma-4):
|
| 727 |
+
- `layers.0~11` 同时含 audio/vision/text 层
|
| 728 |
+
- 全部输出,按前缀区分组件
|
| 729 |
+
|
| 730 |
+
### 示例:Gemma-4-E2B
|
| 731 |
+
| 组件 | 层范围 |
|
| 732 |
+
|------|--------|
|
| 733 |
+
| audio_tower | 0~11 |
|
| 734 |
+
| language_model | 0~34 |
|
| 735 |
+
| vision_tower | 0~15 |
|
| 736 |
""")
|
| 737 |
|
| 738 |
log_output = gr.Textbox(
|
| 739 |
label="分析日志",
|
| 740 |
+
lines=40, max_lines=300
|
| 741 |
)
|
| 742 |
table_output = gr.Dataframe(
|
| 743 |
label="逐头全指标结果表",
|
| 744 |
headers=[
|
| 745 |
+
"prefix","layer","kv_head","q_head",
|
| 746 |
"pearson_QK","spearman_QK","pearson_QV","pearson_KV",
|
| 747 |
"ssr_QK","ssr_QV","ssr_KV",
|
| 748 |
"cosU_QK","cosU_QV","cosU_KV",
|
|
|
|
| 758 |
|
| 759 |
analyze_btn.click(
|
| 760 |
fn=analyze_model,
|
| 761 |
+
inputs=[model_input, token_input, start_layer_input, end_layer_input],
|
| 762 |
outputs=[log_output, table_output]
|
| 763 |
)
|
| 764 |
|