Spaces:
Running
def build_tab_analyze():
Browse fileswith gr.Tab("📊 分析"):
gr.Markdown("""
**第二步:选择层范围,计算王氏五定律全指标**
层号 = safetensors key 中 `layers.{N}` 的原始 N,K=V 共享层自动处理。
""")
with gr.Row():
with gr.Column(scale=3):
model_id_input = gr.Textbox(
label="HuggingFace 模型 ID",
placeholder="google/gemma-4-e2b",
value="google/gemma-4-e2b"
)
token_input = gr.Textbox(
label="HF Access Token(公开模型可留空)",
type="password"
)
with gr.Row():
start_input = gr.Number(
label="起始层号(含)",
value=0, minimum=0, maximum=9999, precision=0
)
end_input = gr.Number(
label="结束层号(含)",
value=5, minimum=0, maximum=9999, precision=0
)
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
with gr.Column(scale=1):
gr.Markdown(SIDEBAR_MD)
analyze_log = gr.Textbox(
label="分析日志(逐头详情)",
lines=35, max_lines=300
)
analyze_table = gr.Dataframe(
label="逐头全指标结果表",
headers=[
"prefix", "layer", "kv_head", "q_head", "kv_shared",
"pearson_QK", "spearman_QK", "pearson_QV", "pearson_KV",
"ssr_QK", "ssr_QV", "ssr_KV",
"cosU_QK", "cosU_QV", "cosU_KV",
"cosV_QK", "cosV_QV", "cosV_KV",
"alpha_QK", "alpha_QV", "alpha_KV",
"alpha_res_QK", "alpha_res_QV", "alpha_res_KV",
"sigma_max_Q", "sigma_min_Q",
"sigma_max_K", "sigma_min_K",
"sigma_max_V", "sigma_min_V",
"cond_Q", "cond_K", "cond_V",
"head_dim", "d_model", "n_q_heads", "n_kv_heads",
]
)
analyze_btn.click(
fn=run_analysis,
inputs=[model_id_input, token_input, start_input, end_input],
outputs=[analyze_log, analyze_table]
)
- app.py +1 -1
- core/metrics.py +110 -127
- ui/tab_analyze.py +49 -35
- ui/tab_inspect.py +37 -9
|
@@ -11,7 +11,7 @@ from ui.tab_analyze import build_tab_analyze
|
|
| 11 |
|
| 12 |
with gr.Blocks(
|
| 13 |
title="Wang's Five Laws — LLM Spectral Analyzer",
|
| 14 |
-
theme=gr.themes.Soft()
|
| 15 |
) as demo:
|
| 16 |
|
| 17 |
gr.Markdown("""
|
|
|
|
| 11 |
|
| 12 |
with gr.Blocks(
|
| 13 |
title="Wang's Five Laws — LLM Spectral Analyzer",
|
| 14 |
+
# theme=gr.themes.Soft()
|
| 15 |
) as demo:
|
| 16 |
|
| 17 |
gr.Markdown("""
|
|
@@ -1,32 +1,21 @@
|
|
| 1 |
# core/metrics.py
|
| 2 |
-
"""
|
| 3 |
-
王氏五定律全部指标计算
|
| 4 |
-
输入:Q/K/V weight tensors
|
| 5 |
-
输出:结构化指标字典
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
import torch
|
| 9 |
import numpy as np
|
| 10 |
from scipy.stats import spearmanr
|
| 11 |
from core.layer_profile import LayerProfile
|
| 12 |
|
| 13 |
|
| 14 |
-
# ─────────────────────────────────────────────
|
| 15 |
-
# 基础指标
|
| 16 |
-
# ─────────────────────────────────────────────
|
| 17 |
-
|
| 18 |
def pearson(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 19 |
am, bm = a - a.mean(), b - b.mean()
|
| 20 |
den = torch.norm(am) * torch.norm(bm)
|
| 21 |
return float(torch.dot(am, bm) / den) if den > 1e-10 else 0.0
|
| 22 |
|
| 23 |
|
| 24 |
-
def
|
| 25 |
return float(spearmanr(a.numpy(), b.numpy())[0])
|
| 26 |
|
| 27 |
|
| 28 |
def ssr(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 29 |
-
"""Spectral Shape Residual(第二定律核心)"""
|
| 30 |
n = min(a.shape[0], b.shape[0])
|
| 31 |
an = a[:n] / (torch.norm(a[:n]) + 1e-10)
|
| 32 |
bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
|
|
@@ -34,38 +23,48 @@ def ssr(a: torch.Tensor, b: torch.Tensor) -> float:
|
|
| 34 |
|
| 35 |
|
| 36 |
def svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
|
| 37 |
-
"""
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
if den < 1e-10:
|
| 42 |
return 1.0, 0.0
|
| 43 |
-
alpha
|
| 44 |
-
|
| 45 |
-
return float(alpha),
|
| 46 |
|
| 47 |
|
| 48 |
def cos_U(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
c = min(U_a.shape[1], U_b.shape[1])
|
| 52 |
Ua = U_a[:r, :c] / (torch.norm(U_a[:r, :c], dim=0, keepdim=True) + 1e-10)
|
| 53 |
Ub = U_b[:r, :c] / (torch.norm(U_b[:r, :c], dim=0, keepdim=True) + 1e-10)
|
| 54 |
return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
|
| 55 |
|
| 56 |
|
| 57 |
def cos_V(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
c = min(Vt_a.shape[1], Vt_b.shape[1])
|
| 61 |
Va = Vt_a[:r, :c] / (torch.norm(Vt_a[:r, :c], dim=1, keepdim=True) + 1e-10)
|
| 62 |
Vb = Vt_b[:r, :c] / (torch.norm(Vt_b[:r, :c], dim=1, keepdim=True) + 1e-10)
|
| 63 |
return float(torch.abs((Va * Vb).sum(dim=1)).mean())
|
| 64 |
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
def analyze_layer(
|
| 71 |
W_q: torch.Tensor,
|
|
@@ -73,12 +72,7 @@ def analyze_layer(
|
|
| 73 |
W_v: torch.Tensor,
|
| 74 |
profile: LayerProfile,
|
| 75 |
) -> tuple[list[dict], str]:
|
| 76 |
-
"""
|
| 77 |
-
对一个 LayerProfile 做逐头全指标分析。
|
| 78 |
-
返回 (records列表, 日志字符串)
|
| 79 |
|
| 80 |
-
K=V 共享层:KV 指标直接填理论值,不重复计算
|
| 81 |
-
"""
|
| 82 |
n_q = profile.n_q_heads
|
| 83 |
n_kv = profile.n_kv_heads
|
| 84 |
d_head = profile.head_dim
|
|
@@ -88,7 +82,6 @@ def analyze_layer(
|
|
| 88 |
records: list[dict] = []
|
| 89 |
lines: list[str] = []
|
| 90 |
|
| 91 |
-
# 日志头
|
| 92 |
kv_tag = " [K=V共享]" if kv_shared else ""
|
| 93 |
lines.append(
|
| 94 |
f"\n{'─'*80}\n"
|
|
@@ -111,9 +104,11 @@ def analyze_layer(
|
|
| 111 |
U_k, s_k, Vt_k = torch.linalg.svd(k_t, full_matrices=False)
|
| 112 |
U_v, s_v, Vt_v = torch.linalg.svd(v_t, full_matrices=False)
|
| 113 |
|
|
|
|
|
|
|
|
|
|
| 114 |
# KV 指标
|
| 115 |
if kv_shared:
|
| 116 |
-
# W_v = W_k → 理论值
|
| 117 |
ssr_kv = 0.0
|
| 118 |
pkv = 1.0
|
| 119 |
cosU_KV = 1.0
|
|
@@ -121,11 +116,9 @@ def analyze_layer(
|
|
| 121 |
alpha_kv = 1.0
|
| 122 |
res_kv = 0.0
|
| 123 |
else:
|
|
|
|
| 124 |
ssr_kv = ssr(s_k, s_v)
|
| 125 |
-
pkv = pearson(
|
| 126 |
-
s_k[:min(len(s_k), len(s_v))],
|
| 127 |
-
s_v[:min(len(s_k), len(s_v))]
|
| 128 |
-
)
|
| 129 |
cosU_KV = cos_U(U_k, U_v)
|
| 130 |
cosV_KV = cos_V(Vt_k, Vt_v)
|
| 131 |
alpha_kv, res_kv = svr(s_k, s_v)
|
|
@@ -135,81 +128,72 @@ def analyze_layer(
|
|
| 135 |
q_t = W_q[h * d_head:(h + 1) * d_head, :]
|
| 136 |
U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
|
| 137 |
|
|
|
|
|
|
|
| 138 |
nqk = min(len(s_q), len(s_k))
|
| 139 |
nqv = min(len(s_q), len(s_v))
|
| 140 |
|
| 141 |
-
# QK
|
| 142 |
-
pqk
|
| 143 |
-
spqk
|
| 144 |
-
ssr_qk
|
| 145 |
a_qk, r_qk = svr(s_q, s_k)
|
| 146 |
-
cU_QK
|
| 147 |
-
cV_QK
|
| 148 |
|
| 149 |
-
# QV
|
| 150 |
-
pqv
|
| 151 |
-
ssr_qv
|
| 152 |
a_qv, r_qv = svr(s_q, s_v)
|
| 153 |
-
cU_QV
|
| 154 |
-
cV_QV
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
smnv = float(s_v[s_v > 1e-10].min()) if (s_v > 1e-10).any() else 0.
|
| 163 |
-
|
| 164 |
-
rec = {
|
| 165 |
-
# 位置
|
| 166 |
-
"prefix": profile.prefix,
|
| 167 |
-
"layer": profile.layer_idx,
|
| 168 |
-
"kv_head": kv_h,
|
| 169 |
-
"q_head": h,
|
| 170 |
-
"kv_shared": kv_shared,
|
| 171 |
# 第一定律
|
| 172 |
-
"pearson_QK":
|
| 173 |
-
"spearman_QK":
|
| 174 |
-
"pearson_QV":
|
| 175 |
-
"pearson_KV":
|
| 176 |
# 第二定律
|
| 177 |
-
"ssr_QK":
|
| 178 |
-
"ssr_QV":
|
| 179 |
-
"ssr_KV":
|
| 180 |
# 第四定律
|
| 181 |
-
"cosU_QK":
|
| 182 |
-
"cosU_QV":
|
| 183 |
-
"cosU_KV":
|
| 184 |
# 第五定律
|
| 185 |
-
"cosV_QK":
|
| 186 |
-
"cosV_QV":
|
| 187 |
-
"cosV_KV":
|
| 188 |
-
# 尺度
|
| 189 |
-
"alpha_QK":
|
| 190 |
-
"alpha_QV":
|
| 191 |
-
"alpha_KV":
|
| 192 |
-
"alpha_res_QK":
|
| 193 |
-
"alpha_res_QV":
|
| 194 |
-
"alpha_res_KV":
|
| 195 |
-
# 奇异值范围
|
| 196 |
-
"sigma_max_Q":
|
| 197 |
-
"sigma_min_Q":
|
| 198 |
-
"sigma_max_K":
|
| 199 |
-
"sigma_min_K":
|
| 200 |
-
"sigma_max_V":
|
| 201 |
-
"sigma_min_V":
|
| 202 |
-
|
| 203 |
-
"
|
| 204 |
-
"
|
| 205 |
-
"cond_V": round(smxv / (smnv + 1e-10), 2),
|
| 206 |
# 维度信息
|
| 207 |
-
"head_dim":
|
| 208 |
-
"d_model":
|
| 209 |
-
"n_q_heads":
|
| 210 |
-
"n_kv_heads":
|
| 211 |
-
}
|
| 212 |
-
records.append(rec)
|
| 213 |
|
| 214 |
lines.append(
|
| 215 |
f" {kv_h:>3d} {h:>3d} │"
|
|
@@ -223,12 +207,7 @@ def analyze_layer(
|
|
| 223 |
return records, "".join(lines)
|
| 224 |
|
| 225 |
|
| 226 |
-
# ─────────────────────────────────────────────
|
| 227 |
-
# 全局汇总统计
|
| 228 |
-
# ─────────────────────────────────────────────
|
| 229 |
-
|
| 230 |
def summarize_records(records: list[dict], model_id: str) -> str:
|
| 231 |
-
"""生成全局汇总文本"""
|
| 232 |
if not records:
|
| 233 |
return "❌ 无记录\n"
|
| 234 |
|
|
@@ -253,36 +232,40 @@ def summarize_records(records: list[dict], model_id: str) -> str:
|
|
| 253 |
]
|
| 254 |
|
| 255 |
for pfx in sorted(df["prefix"].unique()):
|
| 256 |
-
pdf
|
| 257 |
-
|
| 258 |
-
real_kv
|
| 259 |
|
| 260 |
lines.append(
|
| 261 |
f"\n▶ {pfx}\n"
|
| 262 |
f" 记录:{len(pdf)} 条,"
|
| 263 |
f"层:{sorted(pdf['layer'].unique())}\n"
|
| 264 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
lines += [
|
| 266 |
" 【第一定律 Pearson r → 1】\n",
|
| 267 |
-
stat(pdf["pearson_QK"].values,
|
| 268 |
-
stat(pdf["pearson_QV"].values,
|
| 269 |
-
stat(
|
| 270 |
" 【第二定律 SSR → 0】\n",
|
| 271 |
-
stat(pdf["ssr_QK"].values,
|
| 272 |
-
stat(pdf["ssr_QV"].values,
|
| 273 |
-
stat(
|
| 274 |
" 【第四定律 cosU 输出子空间】\n",
|
| 275 |
-
stat(pdf["cosU_QK"].values,
|
| 276 |
-
stat(pdf["cosU_QV"].values,
|
| 277 |
-
stat(
|
| 278 |
" 【第五定律 cosV 输入子空间】\n",
|
| 279 |
-
stat(pdf["cosV_QK"].values,
|
| 280 |
-
stat(pdf["cosV_QV"].values,
|
| 281 |
-
stat(
|
| 282 |
-
" 【第三定律 条件数】\n",
|
| 283 |
-
stat(pdf["cond_Q"].values,
|
| 284 |
-
stat(pdf["cond_K"].values,
|
| 285 |
-
stat(pdf["cond_V"].values,
|
| 286 |
]
|
| 287 |
|
| 288 |
lines.append(
|
|
|
|
| 1 |
# core/metrics.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import torch
|
| 3 |
import numpy as np
|
| 4 |
from scipy.stats import spearmanr
|
| 5 |
from core.layer_profile import LayerProfile
|
| 6 |
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def pearson(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 9 |
am, bm = a - a.mean(), b - b.mean()
|
| 10 |
den = torch.norm(am) * torch.norm(bm)
|
| 11 |
return float(torch.dot(am, bm) / den) if den > 1e-10 else 0.0
|
| 12 |
|
| 13 |
|
| 14 |
+
def spearman_r(a: torch.Tensor, b: torch.Tensor) -> float:
|
| 15 |
return float(spearmanr(a.numpy(), b.numpy())[0])
|
| 16 |
|
| 17 |
|
| 18 |
def ssr(a: torch.Tensor, b: torch.Tensor) -> float:
|
|
|
|
| 19 |
n = min(a.shape[0], b.shape[0])
|
| 20 |
an = a[:n] / (torch.norm(a[:n]) + 1e-10)
|
| 21 |
bn = b[:n] / (torch.norm(b[:n]) + 1e-10)
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def svr(a: torch.Tensor, b: torch.Tensor) -> tuple[float, float]:
|
| 26 |
+
"""
|
| 27 |
+
最小二乘法拟合:alpha = argmin ||s_a - alpha * s_b||^2
|
| 28 |
+
返回 (alpha, residual)
|
| 29 |
+
residual = mean((s_a - alpha * s_b)^2)
|
| 30 |
+
"""
|
| 31 |
+
n = min(a.shape[0], b.shape[0])
|
| 32 |
+
sa, sb = a[:n], b[:n]
|
| 33 |
+
den = torch.dot(sb, sb)
|
| 34 |
if den < 1e-10:
|
| 35 |
return 1.0, 0.0
|
| 36 |
+
alpha = torch.dot(sa, sb) / den
|
| 37 |
+
residual= float(torch.mean((sa - alpha * sb) ** 2))
|
| 38 |
+
return float(alpha), residual
|
| 39 |
|
| 40 |
|
| 41 |
def cos_U(U_a: torch.Tensor, U_b: torch.Tensor) -> float:
|
| 42 |
+
r = min(U_a.shape[0], U_b.shape[0])
|
| 43 |
+
c = min(U_a.shape[1], U_b.shape[1])
|
|
|
|
| 44 |
Ua = U_a[:r, :c] / (torch.norm(U_a[:r, :c], dim=0, keepdim=True) + 1e-10)
|
| 45 |
Ub = U_b[:r, :c] / (torch.norm(U_b[:r, :c], dim=0, keepdim=True) + 1e-10)
|
| 46 |
return float(torch.diag(torch.abs(Ua.T @ Ub)).mean())
|
| 47 |
|
| 48 |
|
| 49 |
def cos_V(Vt_a: torch.Tensor, Vt_b: torch.Tensor) -> float:
|
| 50 |
+
r = min(Vt_a.shape[0], Vt_b.shape[0])
|
| 51 |
+
c = min(Vt_a.shape[1], Vt_b.shape[1])
|
|
|
|
| 52 |
Va = Vt_a[:r, :c] / (torch.norm(Vt_a[:r, :c], dim=1, keepdim=True) + 1e-10)
|
| 53 |
Vb = Vt_b[:r, :c] / (torch.norm(Vt_b[:r, :c], dim=1, keepdim=True) + 1e-10)
|
| 54 |
return float(torch.abs((Va * Vb).sum(dim=1)).mean())
|
| 55 |
|
| 56 |
|
| 57 |
+
def sigma_stats(s: torch.Tensor) -> tuple[float, float, float]:
|
| 58 |
+
"""
|
| 59 |
+
返回 (sigma_max, sigma_min, cond)
|
| 60 |
+
sigma_min 过滤接近零的奇异值,避免条件数虚高
|
| 61 |
+
"""
|
| 62 |
+
s_max = float(s.max())
|
| 63 |
+
valid = s[s > 1e-10]
|
| 64 |
+
s_min = float(valid.min()) if valid.numel() > 0 else 0.0
|
| 65 |
+
cond = s_max / (s_min + 1e-10)
|
| 66 |
+
return s_max, s_min, cond
|
| 67 |
+
|
| 68 |
|
| 69 |
def analyze_layer(
|
| 70 |
W_q: torch.Tensor,
|
|
|
|
| 72 |
W_v: torch.Tensor,
|
| 73 |
profile: LayerProfile,
|
| 74 |
) -> tuple[list[dict], str]:
|
|
|
|
|
|
|
|
|
|
| 75 |
|
|
|
|
|
|
|
| 76 |
n_q = profile.n_q_heads
|
| 77 |
n_kv = profile.n_kv_heads
|
| 78 |
d_head = profile.head_dim
|
|
|
|
| 82 |
records: list[dict] = []
|
| 83 |
lines: list[str] = []
|
| 84 |
|
|
|
|
| 85 |
kv_tag = " [K=V共享]" if kv_shared else ""
|
| 86 |
lines.append(
|
| 87 |
f"\n{'─'*80}\n"
|
|
|
|
| 104 |
U_k, s_k, Vt_k = torch.linalg.svd(k_t, full_matrices=False)
|
| 105 |
U_v, s_v, Vt_v = torch.linalg.svd(v_t, full_matrices=False)
|
| 106 |
|
| 107 |
+
smxk, smnk, cond_k = sigma_stats(s_k)
|
| 108 |
+
smxv, smnv, cond_v = sigma_stats(s_v)
|
| 109 |
+
|
| 110 |
# KV 指标
|
| 111 |
if kv_shared:
|
|
|
|
| 112 |
ssr_kv = 0.0
|
| 113 |
pkv = 1.0
|
| 114 |
cosU_KV = 1.0
|
|
|
|
| 116 |
alpha_kv = 1.0
|
| 117 |
res_kv = 0.0
|
| 118 |
else:
|
| 119 |
+
n_kv_sv = min(len(s_k), len(s_v))
|
| 120 |
ssr_kv = ssr(s_k, s_v)
|
| 121 |
+
pkv = pearson(s_k[:n_kv_sv], s_v[:n_kv_sv])
|
|
|
|
|
|
|
|
|
|
| 122 |
cosU_KV = cos_U(U_k, U_v)
|
| 123 |
cosV_KV = cos_V(Vt_k, Vt_v)
|
| 124 |
alpha_kv, res_kv = svr(s_k, s_v)
|
|
|
|
| 128 |
q_t = W_q[h * d_head:(h + 1) * d_head, :]
|
| 129 |
U_q, s_q, Vt_q = torch.linalg.svd(q_t, full_matrices=False)
|
| 130 |
|
| 131 |
+
smxq, smnq, cond_q = sigma_stats(s_q)
|
| 132 |
+
|
| 133 |
nqk = min(len(s_q), len(s_k))
|
| 134 |
nqv = min(len(s_q), len(s_v))
|
| 135 |
|
| 136 |
+
# QK
|
| 137 |
+
pqk = pearson(s_q[:nqk], s_k[:nqk])
|
| 138 |
+
spqk = spearman_r(s_q[:nqk], s_k[:nqk])
|
| 139 |
+
ssr_qk = ssr(s_q, s_k)
|
| 140 |
a_qk, r_qk = svr(s_q, s_k)
|
| 141 |
+
cU_QK = cos_U(U_q, U_k)
|
| 142 |
+
cV_QK = cos_V(Vt_q, Vt_k)
|
| 143 |
|
| 144 |
+
# QV
|
| 145 |
+
pqv = pearson(s_q[:nqv], s_v[:nqv])
|
| 146 |
+
ssr_qv = ssr(s_q, s_v)
|
| 147 |
a_qv, r_qv = svr(s_q, s_v)
|
| 148 |
+
cU_QV = cos_U(U_q, U_v)
|
| 149 |
+
cV_QV = cos_V(Vt_q, Vt_v)
|
| 150 |
+
|
| 151 |
+
records.append({
|
| 152 |
+
"prefix": profile.prefix,
|
| 153 |
+
"layer": profile.layer_idx,
|
| 154 |
+
"kv_head": kv_h,
|
| 155 |
+
"q_head": h,
|
| 156 |
+
"kv_shared": kv_shared,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
# 第一定律
|
| 158 |
+
"pearson_QK": round(pqk, 6),
|
| 159 |
+
"spearman_QK": round(spqk, 6),
|
| 160 |
+
"pearson_QV": round(pqv, 6),
|
| 161 |
+
"pearson_KV": round(pkv, 6),
|
| 162 |
# 第二定律
|
| 163 |
+
"ssr_QK": round(ssr_qk, 8),
|
| 164 |
+
"ssr_QV": round(ssr_qv, 8),
|
| 165 |
+
"ssr_KV": round(ssr_kv, 8),
|
| 166 |
# 第四定律
|
| 167 |
+
"cosU_QK": round(cU_QK, 6),
|
| 168 |
+
"cosU_QV": round(cU_QV, 6),
|
| 169 |
+
"cosU_KV": round(cosU_KV, 6),
|
| 170 |
# 第五定律
|
| 171 |
+
"cosV_QK": round(cV_QK, 6),
|
| 172 |
+
"cosV_QV": round(cV_QV, 6),
|
| 173 |
+
"cosV_KV": round(cosV_KV, 6),
|
| 174 |
+
# 尺度因子 + 最小二乘残差
|
| 175 |
+
"alpha_QK": round(a_qk, 4),
|
| 176 |
+
"alpha_QV": round(a_qv, 4),
|
| 177 |
+
"alpha_KV": round(alpha_kv,4),
|
| 178 |
+
"alpha_res_QK": round(r_qk, 6),
|
| 179 |
+
"alpha_res_QV": round(r_qv, 6),
|
| 180 |
+
"alpha_res_KV": round(res_kv, 6),
|
| 181 |
+
# 第三定律:奇异值范围 + 条件数
|
| 182 |
+
"sigma_max_Q": round(smxq, 4),
|
| 183 |
+
"sigma_min_Q": round(smnq, 4),
|
| 184 |
+
"sigma_max_K": round(smxk, 4),
|
| 185 |
+
"sigma_min_K": round(smnk, 4),
|
| 186 |
+
"sigma_max_V": round(smxv, 4),
|
| 187 |
+
"sigma_min_V": round(smnv, 4),
|
| 188 |
+
"cond_Q": round(cond_q, 2),
|
| 189 |
+
"cond_K": round(cond_k, 2),
|
| 190 |
+
"cond_V": round(cond_v, 2),
|
|
|
|
| 191 |
# 维度信息
|
| 192 |
+
"head_dim": d_head,
|
| 193 |
+
"d_model": profile.d_model,
|
| 194 |
+
"n_q_heads": n_q,
|
| 195 |
+
"n_kv_heads": n_kv,
|
| 196 |
+
})
|
|
|
|
| 197 |
|
| 198 |
lines.append(
|
| 199 |
f" {kv_h:>3d} {h:>3d} │"
|
|
|
|
| 207 |
return records, "".join(lines)
|
| 208 |
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
def summarize_records(records: list[dict], model_id: str) -> str:
|
|
|
|
| 211 |
if not records:
|
| 212 |
return "❌ 无记录\n"
|
| 213 |
|
|
|
|
| 232 |
]
|
| 233 |
|
| 234 |
for pfx in sorted(df["prefix"].unique()):
|
| 235 |
+
pdf = df[df["prefix"] == pfx]
|
| 236 |
+
real_kv = pdf[~pdf["kv_shared"]]
|
| 237 |
+
kv_df = real_kv if len(real_kv) > 0 else pdf
|
| 238 |
|
| 239 |
lines.append(
|
| 240 |
f"\n▶ {pfx}\n"
|
| 241 |
f" 记录:{len(pdf)} 条,"
|
| 242 |
f"层:{sorted(pdf['layer'].unique())}\n"
|
| 243 |
)
|
| 244 |
+
if pdf["kv_shared"].any():
|
| 245 |
+
n_shared = pdf[pdf["kv_shared"]]["layer"].nunique()
|
| 246 |
+
lines.append(f" ⚠️ 含 {n_shared} 个 K=V共享层,KV指标为理论值\n")
|
| 247 |
+
|
| 248 |
lines += [
|
| 249 |
" 【第一定律 Pearson r → 1】\n",
|
| 250 |
+
stat(pdf["pearson_QK"].values, "Q-K:"),
|
| 251 |
+
stat(pdf["pearson_QV"].values, "Q-V:"),
|
| 252 |
+
stat(kv_df["pearson_KV"].values, "K-V(实):"),
|
| 253 |
" 【第二定律 SSR → 0】\n",
|
| 254 |
+
stat(pdf["ssr_QK"].values, "Q-K:"),
|
| 255 |
+
stat(pdf["ssr_QV"].values, "Q-V:"),
|
| 256 |
+
stat(kv_df["ssr_KV"].values, "K-V(实):"),
|
| 257 |
" 【第四定律 cosU 输出子空间】\n",
|
| 258 |
+
stat(pdf["cosU_QK"].values, "cosU Q-K:"),
|
| 259 |
+
stat(pdf["cosU_QV"].values, "cosU Q-V:"),
|
| 260 |
+
stat(kv_df["cosU_KV"].values, "cosU K-V:"),
|
| 261 |
" 【第五定律 cosV 输入子空间】\n",
|
| 262 |
+
stat(pdf["cosV_QK"].values, "cosV Q-K:"),
|
| 263 |
+
stat(pdf["cosV_QV"].values, "cosV Q-V:"),
|
| 264 |
+
stat(kv_df["cosV_KV"].values, "cosV K-V:"),
|
| 265 |
+
" 【第三定律 条件数(sigma_min 已过滤零值)】\n",
|
| 266 |
+
stat(pdf["cond_Q"].values, "cond Q:"),
|
| 267 |
+
stat(pdf["cond_K"].values, "cond K:"),
|
| 268 |
+
stat(pdf["cond_V"].values, "cond V:"),
|
| 269 |
]
|
| 270 |
|
| 271 |
lines.append(
|
|
@@ -26,6 +26,39 @@ from core.layer_profile import (
|
|
| 26 |
from core.metrics import analyze_layer, summarize_records
|
| 27 |
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def run_analysis(
|
| 30 |
model_id: str,
|
| 31 |
hf_token: str,
|
|
@@ -186,16 +219,11 @@ def run_analysis(
|
|
| 186 |
# Tab2 UI 组件
|
| 187 |
# ─────────────────────────────────────────────
|
| 188 |
|
| 189 |
-
def build_tab_analyze(
|
| 190 |
-
shared_model_id: gr.Textbox = None,
|
| 191 |
-
shared_token: gr.Textbox = None,
|
| 192 |
-
):
|
| 193 |
with gr.Tab("📊 分析"):
|
| 194 |
gr.Markdown("""
|
| 195 |
**第二步:选择层范围,计算王氏五定律全指标**
|
| 196 |
-
|
| 197 |
-
- 所有组件(language/vision/audio)同时分析
|
| 198 |
-
- K=V 共享层自动标注,KV 指标填理论值
|
| 199 |
""")
|
| 200 |
|
| 201 |
with gr.Row():
|
|
@@ -205,37 +233,23 @@ def build_tab_analyze(
|
|
| 205 |
placeholder="google/gemma-4-e2b",
|
| 206 |
value="google/gemma-4-e2b"
|
| 207 |
)
|
| 208 |
-
with gr.Column(scale=2):
|
| 209 |
token_input = gr.Textbox(
|
| 210 |
-
label="HF Access Token",
|
| 211 |
type="password"
|
| 212 |
)
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
with gr.Row():
|
| 228 |
-
gr.Markdown("""
|
| 229 |
-
### 层号参考
|
| 230 |
-
| 模型 | 组件 | 层范围 |
|
| 231 |
-
|------|------|--------|
|
| 232 |
-
| Gemma-4-E2B | language | 0~34 |
|
| 233 |
-
| Gemma-4-E2B | vision | 0~15 |
|
| 234 |
-
| Gemma-4-31B | language(local) | 0~59(非5的倍数+5) |
|
| 235 |
-
| Gemma-4-31B | language(global) | 5,11,17...59 |
|
| 236 |
-
| Qwen2.5-14B | language | 0~47 |
|
| 237 |
-
| LLaMA-3-8B | language | 0~31 |
|
| 238 |
-
""")
|
| 239 |
|
| 240 |
analyze_log = gr.Textbox(
|
| 241 |
label="分析日志(逐头详情)",
|
|
|
|
| 26 |
from core.metrics import analyze_layer, summarize_records
|
| 27 |
|
| 28 |
|
| 29 |
+
SIDEBAR_MD = """
|
| 30 |
+
### ✅ 推荐模型
|
| 31 |
+
google/gemma-4-e2b
|
| 32 |
+
google/gemma-4-e4b-it
|
| 33 |
+
google/gemma-4-31b-it
|
| 34 |
+
Qwen/Qwen2.5-14B-Instruct
|
| 35 |
+
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 36 |
+
meta-llama/Meta-Llama-3-8B
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
### 层号说明
|
| 40 |
+
- 层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 41 |
+
- **不按组件重排**,原始值直接输出
|
| 42 |
+
- 混合模态模型(如 Gemma-4):
|
| 43 |
+
- `layers.0~11` 同时含 audio/vision/text 层
|
| 44 |
+
- 全部输出,按前缀区分组件
|
| 45 |
+
|
| 46 |
+
### 示例:Gemma-4-E2B
|
| 47 |
+
| 组件 | 层范围 |
|
| 48 |
+
|------|--------|
|
| 49 |
+
| audio_tower | 0~11 |
|
| 50 |
+
| language_model | 0~34 |
|
| 51 |
+
| vision_tower | 0~15 |
|
| 52 |
+
|
| 53 |
+
### 示例:Gemma-4-31B
|
| 54 |
+
| 组件 | 层范围 |
|
| 55 |
+
|------|--------|
|
| 56 |
+
| language(局部层) | 0~59 |
|
| 57 |
+
| language(全局层) | 5,11,17...59 |
|
| 58 |
+
| vision_tower | 0~26 |
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
|
| 62 |
def run_analysis(
|
| 63 |
model_id: str,
|
| 64 |
hf_token: str,
|
|
|
|
| 219 |
# Tab2 UI 组件
|
| 220 |
# ─────────────────────────────────────────────
|
| 221 |
|
| 222 |
+
def build_tab_analyze():
|
|
|
|
|
|
|
|
|
|
| 223 |
with gr.Tab("📊 分析"):
|
| 224 |
gr.Markdown("""
|
| 225 |
**第二步:选择层范围,计算王氏五定律全指标**
|
| 226 |
+
层号 = safetensors key 中 `layers.{N}` 的原始 N,K=V 共享层自动处理。
|
|
|
|
|
|
|
| 227 |
""")
|
| 228 |
|
| 229 |
with gr.Row():
|
|
|
|
| 233 |
placeholder="google/gemma-4-e2b",
|
| 234 |
value="google/gemma-4-e2b"
|
| 235 |
)
|
|
|
|
| 236 |
token_input = gr.Textbox(
|
| 237 |
+
label="HF Access Token(公开模型可留空)",
|
| 238 |
type="password"
|
| 239 |
)
|
| 240 |
+
with gr.Row():
|
| 241 |
+
start_input = gr.Number(
|
| 242 |
+
label="起始层号(含)",
|
| 243 |
+
value=0, minimum=0, maximum=9999, precision=0
|
| 244 |
+
)
|
| 245 |
+
end_input = gr.Number(
|
| 246 |
+
label="结束层号(含)",
|
| 247 |
+
value=5, minimum=0, maximum=9999, precision=0
|
| 248 |
+
)
|
| 249 |
+
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 250 |
+
|
| 251 |
+
with gr.Column(scale=1):
|
| 252 |
+
gr.Markdown(SIDEBAR_MD)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
analyze_log = gr.Textbox(
|
| 255 |
label="分析日志(逐头详情)",
|
|
@@ -22,6 +22,38 @@ from core.layer_profile import (
|
|
| 22 |
extract_config_params,
|
| 23 |
)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def inspect_model(
|
| 27 |
model_id: str,
|
|
@@ -134,11 +166,8 @@ def inspect_model(
|
|
| 134 |
def build_tab_inspect():
|
| 135 |
with gr.Tab("🔬 结构探测"):
|
| 136 |
gr.Markdown("""
|
| 137 |
-
**第一步:先探测模型结构**
|
| 138 |
-
|
| 139 |
-
- 自动推断 head_dim(支持异构层,如 Gemma-4-31B 局部/全局层)
|
| 140 |
-
- 自动检测 K=V 共享层
|
| 141 |
-
- 结果供「分析」Tab 使用
|
| 142 |
""")
|
| 143 |
|
| 144 |
with gr.Row():
|
|
@@ -148,15 +177,14 @@ def build_tab_inspect():
|
|
| 148 |
placeholder="google/gemma-4-e2b",
|
| 149 |
value="google/gemma-4-e2b"
|
| 150 |
)
|
| 151 |
-
with gr.Column(scale=2):
|
| 152 |
inspect_token = gr.Textbox(
|
| 153 |
label="HF Access Token(公开模型可留空)",
|
| 154 |
type="password"
|
| 155 |
)
|
|
|
|
|
|
|
| 156 |
with gr.Column(scale=1):
|
| 157 |
-
|
| 158 |
-
"🔍 探测结构", variant="secondary", size="lg"
|
| 159 |
-
)
|
| 160 |
|
| 161 |
inspect_log = gr.Textbox(
|
| 162 |
label="结构探测日志",
|
|
|
|
| 22 |
extract_config_params,
|
| 23 |
)
|
| 24 |
|
| 25 |
+
SIDEBAR_MD = """
|
| 26 |
+
### ✅ 推荐模型
|
| 27 |
+
google/gemma-4-e2b
|
| 28 |
+
google/gemma-4-e4b-it
|
| 29 |
+
google/gemma-4-31b-it
|
| 30 |
+
Qwen/Qwen2.5-14B-Instruct
|
| 31 |
+
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 32 |
+
meta-llama/Meta-Llama-3-8B
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
### 层号说明
|
| 36 |
+
- 层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 37 |
+
- **不按组件重排**,原始值直接输出
|
| 38 |
+
- 混合模态模型(如 Gemma-4):
|
| 39 |
+
- `layers.0~11` 同时含 audio/vision/text 层
|
| 40 |
+
- 全部输出,按前缀区分组件
|
| 41 |
+
|
| 42 |
+
### 示例:Gemma-4-E2B
|
| 43 |
+
| 组件 | 层范围 |
|
| 44 |
+
|------|--------|
|
| 45 |
+
| audio_tower | 0~11 |
|
| 46 |
+
| language_model | 0~34 |
|
| 47 |
+
| vision_tower | 0~15 |
|
| 48 |
+
|
| 49 |
+
### 示例:Gemma-4-31B
|
| 50 |
+
| 组件 | 层范围 |
|
| 51 |
+
|------|--------|
|
| 52 |
+
| language(局部层) | 0~59 |
|
| 53 |
+
| language(全局层) | 5,11,17...59 |
|
| 54 |
+
| vision_tower | 0~26 |
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
|
| 58 |
def inspect_model(
|
| 59 |
model_id: str,
|
|
|
|
| 166 |
def build_tab_inspect():
|
| 167 |
with gr.Tab("🔬 结构探测"):
|
| 168 |
gr.Markdown("""
|
| 169 |
+
**第一步:先探测模型结构**,自动识别组件、head_dim、K=V共享层。
|
| 170 |
+
结果供「分析」Tab 使用。
|
|
|
|
|
|
|
|
|
|
| 171 |
""")
|
| 172 |
|
| 173 |
with gr.Row():
|
|
|
|
| 177 |
placeholder="google/gemma-4-e2b",
|
| 178 |
value="google/gemma-4-e2b"
|
| 179 |
)
|
|
|
|
| 180 |
inspect_token = gr.Textbox(
|
| 181 |
label="HF Access Token(公开模型可留空)",
|
| 182 |
type="password"
|
| 183 |
)
|
| 184 |
+
inspect_btn = gr.Button("🔍 探测结构", variant="secondary")
|
| 185 |
+
|
| 186 |
with gr.Column(scale=1):
|
| 187 |
+
gr.Markdown(SIDEBAR_MD)
|
|
|
|
|
|
|
| 188 |
|
| 189 |
inspect_log = gr.Textbox(
|
| 190 |
label="结构探测日志",
|