Spaces:
Running
Running
Alex W. commited on
Commit ·
dbcd9e0
1
Parent(s): fe55b90
核心变化:整层 SVD → 逐头 SVD。
Browse files之前(错误): 现在(正确):
整个 W_q (4096×4096) 每个 Q 头 (128×4096)
整个 W_k (4096×4096) 每个 K 头 (128×4096)
1次 SVD n_q_heads 次 SVD
每个 Q 头 vs 其对应 K 头
GQA 分组逻辑
n_q_heads=32, n_kv_heads=8, group_size=4
KV头0 → Q头 0,1,2,3
KV头1 → Q头 4,5,6,7
KV头2 → Q头 8,9,10,11
...
KV头7 → Q头 28,29,30,31
app.py
CHANGED
|
@@ -4,11 +4,12 @@ import struct
|
|
| 4 |
import json
|
| 5 |
import numpy as np
|
| 6 |
import torch
|
| 7 |
-
from scipy import
|
| 8 |
from huggingface_hub import list_repo_files
|
|
|
|
| 9 |
|
| 10 |
# ─────────────────────────────────────────────
|
| 11 |
-
#
|
| 12 |
# ─────────────────────────────────────────────
|
| 13 |
|
| 14 |
DTYPE_MAP = {
|
|
@@ -18,25 +19,32 @@ DTYPE_MAP = {
|
|
| 18 |
"F64": (torch.float64, 8),
|
| 19 |
"I32": (torch.int32, 4),
|
| 20 |
"I64": (torch.int64, 8),
|
|
|
|
|
|
|
| 21 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def get_file_url(model_id: str, filename: str) -> str:
|
| 24 |
-
"""生成 HuggingFace 直链 URL"""
|
| 25 |
return f"https://huggingface.co/{model_id}/resolve/main/{filename}"
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
只读取 safetensors 文件头部(几KB),
|
| 30 |
-
获取所有 tensor 的 offset、dtype、shape
|
| 31 |
-
"""
|
| 32 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 33 |
-
|
| 34 |
-
# 第一步:读前 8 bytes → 获取 header_size
|
| 35 |
r = requests.get(url, headers={**headers, "Range": "bytes=0-7"}, timeout=30)
|
| 36 |
r.raise_for_status()
|
| 37 |
header_size = struct.unpack("<Q", r.content)[0]
|
| 38 |
-
|
| 39 |
-
# 第二步:读 header JSON
|
| 40 |
r = requests.get(
|
| 41 |
url,
|
| 42 |
headers={**headers, "Range": f"bytes=8-{8 + header_size - 1}"},
|
|
@@ -45,346 +53,661 @@ def read_safetensors_header(url: str, token: str = None) -> dict:
|
|
| 45 |
r.raise_for_status()
|
| 46 |
return json.loads(r.content), header_size
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
if tensor_name not in header:
|
| 55 |
return None
|
| 56 |
-
|
| 57 |
-
info = header[tensor_name]
|
| 58 |
dtype_str = info["dtype"]
|
| 59 |
-
shape
|
| 60 |
-
offsets
|
| 61 |
-
|
| 62 |
if dtype_str not in DTYPE_MAP:
|
| 63 |
-
raise ValueError(f"
|
| 64 |
-
|
|
|
|
|
|
|
| 65 |
torch_dtype, _ = DTYPE_MAP[dtype_str]
|
| 66 |
-
|
| 67 |
-
# 计算文件中的绝对字节位置
|
| 68 |
-
# safetensors 文件布局:8字节(header_size) + header_size字节(header) + 数据区
|
| 69 |
abs_start = 8 + header_size + offsets[0]
|
| 70 |
abs_end = 8 + header_size + offsets[1] - 1
|
| 71 |
-
|
| 72 |
req_headers = {"Range": f"bytes={abs_start}-{abs_end}"}
|
| 73 |
if token:
|
| 74 |
req_headers["Authorization"] = f"Bearer {token}"
|
| 75 |
-
|
| 76 |
r = requests.get(url, headers=req_headers, timeout=120)
|
| 77 |
r.raise_for_status()
|
| 78 |
-
|
| 79 |
-
# 转换为 tensor(BF16 需特殊处理)
|
| 80 |
raw = r.content
|
|
|
|
| 81 |
if torch_dtype == torch.bfloat16:
|
| 82 |
tensor = torch.frombuffer(bytearray(raw), dtype=torch.int16).view(torch.bfloat16)
|
| 83 |
else:
|
| 84 |
tensor = torch.frombuffer(bytearray(raw), dtype=torch_dtype)
|
| 85 |
-
|
| 86 |
-
return tensor.reshape(shape).float() # 统一转 float32 做 SVD
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
# ─────────────────────────────────────────────
|
| 91 |
|
| 92 |
def get_safetensor_files(model_id: str, token: str = None) -> list:
|
| 93 |
-
"""列出模型 repo 中的所有 .safetensors 文件"""
|
| 94 |
kwargs = {"token": token} if token else {}
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
def find_index_file(model_id: str, token: str = None):
|
| 100 |
-
"""检查是否有 model.safetensors.index.json(分片模型)"""
|
| 101 |
url = f"https://huggingface.co/{model_id}/resolve/main/model.safetensors.index.json"
|
| 102 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 103 |
r = requests.get(url, headers=headers, timeout=15)
|
| 104 |
-
if r.status_code == 200
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# ─────────────────────────────────────────────
|
| 109 |
-
#
|
| 110 |
# ─────────────────────────────────────────────
|
| 111 |
|
| 112 |
-
def
|
| 113 |
-
"""
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# ────────────────────────────���────────────────
|
| 131 |
-
#
|
| 132 |
# ─────────────────────────────────────────────
|
| 133 |
|
| 134 |
-
def
|
| 135 |
"""
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
if not model_id.strip():
|
| 142 |
-
return "❌ 请输入模型 ID
|
| 143 |
-
|
| 144 |
-
token
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
try:
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
if index_data:
|
| 159 |
-
weight_map = index_data
|
| 160 |
-
log_lines.append(
|
|
|
|
|
|
|
| 161 |
else:
|
| 162 |
-
# 单文件模型
|
| 163 |
sf_files = get_safetensor_files(model_id, token)
|
| 164 |
if not sf_files:
|
| 165 |
-
return "❌ 未找到 .safetensors 文件
|
| 166 |
-
weight_map =
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
first_shard = None
|
| 175 |
if index_data:
|
| 176 |
-
first_shard =
|
| 177 |
else:
|
| 178 |
first_shard = sf_files[0]
|
| 179 |
-
|
| 180 |
first_url = get_file_url(model_id, first_shard)
|
| 181 |
first_header, first_hsize = read_safetensors_header(first_url, token)
|
| 182 |
shard_headers[first_shard] = (first_header, first_hsize)
|
| 183 |
-
|
| 184 |
-
# 自动检测 Q/K key 命名模式
|
| 185 |
all_keys = list(first_header.keys())
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
# Range Request 只下载 Q 和 K tensor
|
| 255 |
-
q_url = get_file_url(model_id, q_shard)
|
| 256 |
-
k_url = get_file_url(model_id, k_shard)
|
| 257 |
-
|
| 258 |
-
q_header, q_hsize = shard_headers[q_shard]
|
| 259 |
-
k_header, k_hsize = shard_headers[k_shard]
|
| 260 |
-
|
| 261 |
-
W_q = load_tensor_remote(q_url, q_key, q_header, q_hsize, token)
|
| 262 |
-
W_k = load_tensor_remote(k_url, k_key, k_header, k_hsize, token)
|
| 263 |
-
|
| 264 |
-
if W_q is None or W_k is None:
|
| 265 |
-
log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 读取失败\n")
|
| 266 |
-
break
|
| 267 |
-
|
| 268 |
-
r, ssr = compute_svd_metrics(W_q, W_k)
|
| 269 |
-
pearson_list.append(r)
|
| 270 |
-
ssr_list.append(ssr)
|
| 271 |
-
results.append({
|
| 272 |
-
"Layer": layer_idx,
|
| 273 |
-
"Pearson_r": round(r, 6),
|
| 274 |
-
"SSR": round(ssr, 6)
|
| 275 |
-
})
|
| 276 |
-
|
| 277 |
-
log_lines.append(
|
| 278 |
-
f"Layer {layer_idx:3d} | Q shape: {list(W_q.shape)} "
|
| 279 |
-
f"| Pearson r = {r:.4f} | SSR = {ssr:.6f}\n"
|
| 280 |
)
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
del W_q, W_k
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
f"
|
| 290 |
-
f"
|
| 291 |
-
f"
|
| 292 |
-
f"
|
| 293 |
-
f"
|
| 294 |
-
f"
|
| 295 |
-
f"
|
| 296 |
-
f" Min: {np.min(pearson_list):.4f} "
|
| 297 |
-
f" Max: {np.max(pearson_list):.4f}\n\n"
|
| 298 |
-
f"【第二定律 - 谱形状保真 SSR】\n"
|
| 299 |
-
f" Median: {np.median(ssr_list):.6f} "
|
| 300 |
-
f" Mean: {np.mean(ssr_list):.6f}\n"
|
| 301 |
-
f" Min: {np.min(ssr_list):.6f} "
|
| 302 |
-
f" Max: {np.max(ssr_list):.6f}\n\n"
|
| 303 |
-
f"⚡ 理论值:Pearson r → 1,SSR → 0\n"
|
| 304 |
-
f"{'='*50}\n"
|
| 305 |
)
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
#
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
return "".join(log_lines), df
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
return "❌ 401 未授权:该模型需要 HF Token,请填写 Access Token", None
|
| 317 |
-
elif e.response.status_code == 403:
|
| 318 |
-
return "❌ 403 禁止访问:请确认已在 HF 接受该模型的使用协议", None
|
| 319 |
-
elif e.response.status_code == 404:
|
| 320 |
-
return f"❌ 404 未找到:模型 {model_id} 不存在或文件路径错误", None
|
| 321 |
-
else:
|
| 322 |
-
return f"❌ HTTP 错误:{e}", None
|
| 323 |
-
except Exception as e:
|
| 324 |
-
return f"❌ 错误:{str(e)}", None
|
| 325 |
|
| 326 |
# ─────────────────────────────────────────────
|
| 327 |
# Gradio UI
|
| 328 |
# ─────────────────────────────────────────────
|
| 329 |
|
| 330 |
with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
|
|
| 331 |
gr.Markdown("""
|
| 332 |
# 🔬 Wang's Five Laws — LLM Spectral Analyzer
|
| 333 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 334 |
-
|
| 335 |
-
通过 HTTP Range Request 直接读取
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
[](https://doi.org/10.5281/zenodo.19707844)
|
|
|
|
| 341 |
""")
|
| 342 |
-
|
| 343 |
with gr.Row():
|
| 344 |
with gr.Column(scale=2):
|
| 345 |
model_input = gr.Textbox(
|
| 346 |
label="HuggingFace 模型 ID",
|
| 347 |
-
placeholder="
|
| 348 |
value="Qwen/Qwen2.5-14B-Instruct"
|
| 349 |
)
|
| 350 |
token_input = gr.Textbox(
|
| 351 |
label="HF Access Token(公开模型可留空)",
|
| 352 |
-
placeholder="
|
| 353 |
type="password"
|
| 354 |
)
|
| 355 |
max_layers_input = gr.Slider(
|
| 356 |
label="最大分析层数",
|
| 357 |
-
minimum=1, maximum=100, value=
|
| 358 |
)
|
| 359 |
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 360 |
-
|
| 361 |
with gr.Column(scale=1):
|
| 362 |
gr.Markdown("""
|
| 363 |
-
###
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
""")
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
| 388 |
analyze_btn.click(
|
| 389 |
fn=analyze_model,
|
| 390 |
inputs=[model_input, token_input, max_layers_input],
|
|
|
|
| 4 |
import json
|
| 5 |
import numpy as np
|
| 6 |
import torch
|
| 7 |
+
from scipy.stats import pearsonr, spearmanr
|
| 8 |
from huggingface_hub import list_repo_files
|
| 9 |
+
import pandas as pd
|
| 10 |
|
| 11 |
# ─────────────────────────────────────────────
|
| 12 |
+
# dtype 映射
|
| 13 |
# ─────────────────────────────────────────────
|
| 14 |
|
| 15 |
DTYPE_MAP = {
|
|
|
|
| 19 |
"F64": (torch.float64, 8),
|
| 20 |
"I32": (torch.int32, 4),
|
| 21 |
"I64": (torch.int64, 8),
|
| 22 |
+
"I8": (torch.int8, 1),
|
| 23 |
+
"U8": (torch.uint8, 1),
|
| 24 |
}
|
| 25 |
+
try:
|
| 26 |
+
DTYPE_MAP["F8_E4M3"] = (torch.float8_e4m3fn, 1)
|
| 27 |
+
DTYPE_MAP["F8_E5M2"] = (torch.float8_e5m2, 1)
|
| 28 |
+
except AttributeError:
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
|
| 32 |
+
QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# ─────────────────────────────────────────────
|
| 36 |
+
# 工具函数
|
| 37 |
+
# ─────────────────────────────────────────────
|
| 38 |
|
| 39 |
def get_file_url(model_id: str, filename: str) -> str:
|
|
|
|
| 40 |
return f"https://huggingface.co/{model_id}/resolve/main/{filename}"
|
| 41 |
|
| 42 |
+
|
| 43 |
+
def read_safetensors_header(url: str, token: str = None) -> tuple[dict, int]:
|
|
|
|
|
|
|
|
|
|
| 44 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
|
|
|
|
|
|
| 45 |
r = requests.get(url, headers={**headers, "Range": "bytes=0-7"}, timeout=30)
|
| 46 |
r.raise_for_status()
|
| 47 |
header_size = struct.unpack("<Q", r.content)[0]
|
|
|
|
|
|
|
| 48 |
r = requests.get(
|
| 49 |
url,
|
| 50 |
headers={**headers, "Range": f"bytes=8-{8 + header_size - 1}"},
|
|
|
|
| 53 |
r.raise_for_status()
|
| 54 |
return json.loads(r.content), header_size
|
| 55 |
|
| 56 |
+
|
| 57 |
+
def load_tensor_remote(
|
| 58 |
+
url: str, tensor_name: str,
|
| 59 |
+
header: dict, header_size: int,
|
| 60 |
+
token: str = None
|
| 61 |
+
) -> torch.Tensor | None:
|
| 62 |
if tensor_name not in header:
|
| 63 |
return None
|
| 64 |
+
info = header[tensor_name]
|
|
|
|
| 65 |
dtype_str = info["dtype"]
|
| 66 |
+
shape = info["shape"]
|
| 67 |
+
offsets = info["data_offsets"]
|
| 68 |
+
|
| 69 |
if dtype_str not in DTYPE_MAP:
|
| 70 |
+
raise ValueError(f"未知 dtype: {dtype_str}")
|
| 71 |
+
if dtype_str in UNSUPPORTED_SVD_DTYPES:
|
| 72 |
+
raise ValueError(f"dtype={dtype_str} 为量化格式,无法 SVD")
|
| 73 |
+
|
| 74 |
torch_dtype, _ = DTYPE_MAP[dtype_str]
|
|
|
|
|
|
|
|
|
|
| 75 |
abs_start = 8 + header_size + offsets[0]
|
| 76 |
abs_end = 8 + header_size + offsets[1] - 1
|
| 77 |
+
|
| 78 |
req_headers = {"Range": f"bytes={abs_start}-{abs_end}"}
|
| 79 |
if token:
|
| 80 |
req_headers["Authorization"] = f"Bearer {token}"
|
| 81 |
+
|
| 82 |
r = requests.get(url, headers=req_headers, timeout=120)
|
| 83 |
r.raise_for_status()
|
|
|
|
|
|
|
| 84 |
raw = r.content
|
| 85 |
+
|
| 86 |
if torch_dtype == torch.bfloat16:
|
| 87 |
tensor = torch.frombuffer(bytearray(raw), dtype=torch.int16).view(torch.bfloat16)
|
| 88 |
else:
|
| 89 |
tensor = torch.frombuffer(bytearray(raw), dtype=torch_dtype)
|
|
|
|
|
|
|
| 90 |
|
| 91 |
+
return tensor.reshape(shape).float()
|
| 92 |
+
|
|
|
|
| 93 |
|
| 94 |
def get_safetensor_files(model_id: str, token: str = None) -> list:
|
|
|
|
| 95 |
kwargs = {"token": token} if token else {}
|
| 96 |
+
return sorted(
|
| 97 |
+
f for f in list_repo_files(model_id, **kwargs)
|
| 98 |
+
if f.endswith(".safetensors")
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
|
| 102 |
+
def find_index_file(model_id: str, token: str = None) -> dict | None:
|
|
|
|
| 103 |
url = f"https://huggingface.co/{model_id}/resolve/main/model.safetensors.index.json"
|
| 104 |
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 105 |
r = requests.get(url, headers=headers, timeout=15)
|
| 106 |
+
return r.json() if r.status_code == 200 else None
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
|
| 110 |
+
code = e.response.status_code
|
| 111 |
+
if code == 401: return "❌ 401 未授权:请填写有效的 HF Access Token"
|
| 112 |
+
if code == 403: return f"❌ 403 禁止访问:请先接受 {model_id} 的使用协议"
|
| 113 |
+
if code == 404: return f"❌ 404 未找到:模型 {model_id} 不存在"
|
| 114 |
+
return f"❌ HTTP {code}:{e}"
|
| 115 |
+
|
| 116 |
|
| 117 |
# ─────────────────────────────────────────────
|
| 118 |
+
# 量化三重检测
|
| 119 |
# ─────────────────────────────────────────────
|
| 120 |
|
| 121 |
+
def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
|
| 122 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
| 123 |
+
warnings = []
|
| 124 |
+
|
| 125 |
+
# 检测 1:config.json
|
| 126 |
+
try:
|
| 127 |
+
r = requests.get(
|
| 128 |
+
f"https://huggingface.co/{model_id}/resolve/main/config.json",
|
| 129 |
+
headers=headers, timeout=15
|
| 130 |
+
)
|
| 131 |
+
if r.status_code == 200:
|
| 132 |
+
cfg = r.json()
|
| 133 |
+
qcfg = cfg.get("quantization_config", {})
|
| 134 |
+
qt = (qcfg.get("quant_type","") or
|
| 135 |
+
qcfg.get("quant_method","") or
|
| 136 |
+
cfg.get("quantization","")).lower()
|
| 137 |
+
if "gptq" in qt:
|
| 138 |
+
bits = qcfg.get("bits","?")
|
| 139 |
+
return True, (f"❌ 检测到 GPTQ {bits}bit 量化\n"
|
| 140 |
+
f" 请改用原始 BF16 版本。")
|
| 141 |
+
if "awq" in qt:
|
| 142 |
+
return True, "❌ 检测到 AWQ 量化,请改用原始 BF16 版本。"
|
| 143 |
+
if "bitsandbytes" in qt or "bnb" in qt:
|
| 144 |
+
warnings.append("⚠️ 检测到 bitsandbytes 量化,结果可能失真")
|
| 145 |
+
except Exception:
|
| 146 |
+
warnings.append("⚠️ 无法读取 config.json")
|
| 147 |
+
|
| 148 |
+
# 检测 2:文件名 / 模型名关键词
|
| 149 |
+
mid_lower = model_id.lower()
|
| 150 |
+
for kw in ["gptq","awq","gguf"]:
|
| 151 |
+
if kw in mid_lower:
|
| 152 |
+
return True, f"❌ 模型名含 '{kw.upper()}',为量化版本,请使用原始 BF16 版本。"
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
all_files = list(list_repo_files(model_id, token=token))
|
| 156 |
+
if any(f.endswith(".gguf") for f in all_files):
|
| 157 |
+
return True, "❌ 检测到 .gguf 文件,不支持该格式。"
|
| 158 |
+
if not any(f.endswith(".safetensors") for f in all_files):
|
| 159 |
+
return True, "❌ 未找到 .safetensors 文件,仅支持 safetensors 格式。"
|
| 160 |
+
except Exception as e:
|
| 161 |
+
warnings.append(f"⚠️ 文件列表检测失败:{e}")
|
| 162 |
+
|
| 163 |
+
# 检测 3:header key 签名
|
| 164 |
+
try:
|
| 165 |
+
index_data = find_index_file(model_id, token)
|
| 166 |
+
if index_data:
|
| 167 |
+
first_shard = sorted(set(index_data["weight_map"].values()))[0]
|
| 168 |
+
else:
|
| 169 |
+
sf = get_safetensor_files(model_id, token)
|
| 170 |
+
first_shard = sf[0]
|
| 171 |
+
hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
|
| 172 |
+
all_keys = list(hdr.keys())
|
| 173 |
+
bad_keys = [k for k in all_keys
|
| 174 |
+
if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
|
| 175 |
+
if bad_keys:
|
| 176 |
+
return True, (f"❌ 检测到量化 key:{bad_keys[:3]}\n"
|
| 177 |
+
f" 请使用原始 BF16 版本。")
|
| 178 |
+
dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
|
| 179 |
+
good = dtypes - UNSUPPORTED_SVD_DTYPES
|
| 180 |
+
if good:
|
| 181 |
+
warnings.append(f"✅ 权重格式:{good}")
|
| 182 |
+
except Exception as e:
|
| 183 |
+
warnings.append(f"⚠️ header 检测失败:{e}")
|
| 184 |
+
|
| 185 |
+
msg = "\n".join(warnings) if warnings else "✅ 未检测到量化,可以正常分析"
|
| 186 |
+
return False, msg
|
| 187 |
+
|
| 188 |
|
| 189 |
# ────────────────────────────���────────────────
|
| 190 |
+
# GQA 参数自动推断
|
| 191 |
# ─────────────────────────────────────────────
|
| 192 |
|
| 193 |
+
def infer_gqa_params(W_q: torch.Tensor, W_k: torch.Tensor, config: dict | None) -> tuple[int,int,int]:
|
| 194 |
"""
|
| 195 |
+
自动推断:
|
| 196 |
+
- n_q_heads : Q 头数量
|
| 197 |
+
- n_kv_heads : KV 头数量(GQA)
|
| 198 |
+
- d_head : 每个头的维度
|
| 199 |
+
|
| 200 |
+
权重 shape 约定(最常见):
|
| 201 |
+
W_q : (n_q_heads * d_head, d_model) → shape[0] = n_q * d_h
|
| 202 |
+
W_k : (n_kv_heads * d_head, d_model) → shape[0] = n_kv * d_h
|
| 203 |
+
|
| 204 |
+
d_head 优先从 config.json 读取,其次用常见默认值猜测。
|
| 205 |
"""
|
| 206 |
+
q_rows, d_model = W_q.shape[0], W_q.shape[1]
|
| 207 |
+
k_rows = W_k.shape[0]
|
| 208 |
+
|
| 209 |
+
# 从 config.json 读取 d_head
|
| 210 |
+
d_head = None
|
| 211 |
+
if config:
|
| 212 |
+
d_head = (
|
| 213 |
+
config.get("head_dim") or
|
| 214 |
+
config.get("kv_channels") or
|
| 215 |
+
config.get("hidden_size", 0) // config.get("num_attention_heads", 1)
|
| 216 |
+
)
|
| 217 |
+
if d_head == 0:
|
| 218 |
+
d_head = None
|
| 219 |
+
|
| 220 |
+
# 如果 config 没给,用常见值探测(64, 80, 96, 128, 256)
|
| 221 |
+
if not d_head:
|
| 222 |
+
for candidate in [256, 128, 96, 80, 64]:
|
| 223 |
+
if q_rows % candidate == 0 and k_rows % candidate == 0:
|
| 224 |
+
d_head = candidate
|
| 225 |
+
break
|
| 226 |
+
|
| 227 |
+
if not d_head:
|
| 228 |
+
raise ValueError(
|
| 229 |
+
f"无法推断 d_head:W_q.shape={W_q.shape}, W_k.shape={W_k.shape}\n"
|
| 230 |
+
f"请在 config.json 中确认 head_dim 字段。"
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
n_q_heads = q_rows // d_head
|
| 234 |
+
n_kv_heads = k_rows // d_head
|
| 235 |
+
|
| 236 |
+
if n_q_heads % n_kv_heads != 0:
|
| 237 |
+
raise ValueError(
|
| 238 |
+
f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除,"
|
| 239 |
+
f"请检查 d_head 推断是否正确。"
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
return n_q_heads, n_kv_heads, d_head
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
# ─────────────────────────────────────────────
|
| 246 |
+
# 逐头 SVD 指标计算
|
| 247 |
+
# ─────────────────────────────────────────────
|
| 248 |
+
|
| 249 |
+
def compute_pearson_corr_torch(s_q: torch.Tensor, s_k: torch.Tensor) -> float:
|
| 250 |
+
sq = s_q.cpu().numpy()
|
| 251 |
+
sk = s_k.cpu().numpy()
|
| 252 |
+
r, _ = pearsonr(sq, sk)
|
| 253 |
+
return float(r)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def compute_singular_value_ratio(
|
| 257 |
+
s_q: torch.Tensor, s_k: torch.Tensor
|
| 258 |
+
) -> tuple[float, float]:
|
| 259 |
+
"""
|
| 260 |
+
估计尺度因子 α = median(s_q / s_k)
|
| 261 |
+
残差 = mean|s_q - α * s_k| / mean(s_q)
|
| 262 |
+
"""
|
| 263 |
+
min_len = min(s_q.shape[0], s_k.shape[0])
|
| 264 |
+
sq = s_q[:min_len]
|
| 265 |
+
sk = s_k[:min_len]
|
| 266 |
+
ratio = sq / (sk + 1e-10)
|
| 267 |
+
alpha = float(ratio.median())
|
| 268 |
+
residual = float((sq - alpha * sk).abs().mean() / (sq.mean() + 1e-10))
|
| 269 |
+
return alpha, residual
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def compute_left_vector_alignment(
|
| 273 |
+
U_q: torch.Tensor, U_k: torch.Tensor
|
| 274 |
+
) -> float:
|
| 275 |
+
"""
|
| 276 |
+
第四定律:左奇异向量(输出子空间)对齐度
|
| 277 |
+
cos_u = mean_i |<u_q_i, u_k_i>|
|
| 278 |
+
"""
|
| 279 |
+
min_len = min(U_q.shape[1], U_k.shape[1])
|
| 280 |
+
U_q = U_q[:, :min_len]
|
| 281 |
+
U_k = U_k[:, :min_len]
|
| 282 |
+
cos_vals = (U_q * U_k).sum(dim=0).abs()
|
| 283 |
+
return float(cos_vals.mean())
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def compute_covariance_alignment(
|
| 287 |
+
W_q: torch.Tensor, W_k: torch.Tensor, alpha: float
|
| 288 |
+
) -> float:
|
| 289 |
+
"""
|
| 290 |
+
协方差矩阵对齐误差:
|
| 291 |
+
err = ||W_q W_q^T - α² W_k W_k^T||_F / ||W_k W_k^T||_F
|
| 292 |
+
"""
|
| 293 |
+
cov_q = W_q @ W_q.T
|
| 294 |
+
cov_k = W_k @ W_k.T
|
| 295 |
+
diff = cov_q - (alpha ** 2) * cov_k
|
| 296 |
+
err = float(torch.norm(diff, p='fro') / (torch.norm(cov_k, p='fro') + 1e-10))
|
| 297 |
+
return err
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def compute_ssr(s_q: torch.Tensor, s_k: torch.Tensor) -> float:
|
| 301 |
+
"""
|
| 302 |
+
第二定律:归一化谱形状残差
|
| 303 |
+
SSR = mean_i |s̃_q_i - s̃_k_i|
|
| 304 |
+
"""
|
| 305 |
+
min_len = min(s_q.shape[0], s_k.shape[0])
|
| 306 |
+
sq = s_q[:min_len].cpu().numpy()
|
| 307 |
+
sk = s_k[:min_len].cpu().numpy()
|
| 308 |
+
sq_n = sq / (np.linalg.norm(sq) + 1e-10)
|
| 309 |
+
sk_n = sk / (np.linalg.norm(sk) + 1e-10)
|
| 310 |
+
return float(np.mean(np.abs(sq_n - sk_n)))
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def analyze_layer_heads(
|
| 314 |
+
W_q: torch.Tensor,
|
| 315 |
+
W_k: torch.Tensor,
|
| 316 |
+
layer_idx: int,
|
| 317 |
+
n_q_heads: int,
|
| 318 |
+
n_kv_heads: int,
|
| 319 |
+
d_head: int,
|
| 320 |
+
) -> tuple[list[dict], str]:
|
| 321 |
+
"""
|
| 322 |
+
GQA 逐头分析:
|
| 323 |
+
- 每个 KV 头对应 group_size = n_q_heads // n_kv_heads 个 Q 头
|
| 324 |
+
- 每个 Q 头分别与其对应的 K 头做 SVD 指标计算
|
| 325 |
+
"""
|
| 326 |
+
group_size = n_q_heads // n_kv_heads
|
| 327 |
+
records = []
|
| 328 |
+
log_lines = []
|
| 329 |
+
|
| 330 |
+
log_lines.append(
|
| 331 |
+
f"\n{'─'*70}\n"
|
| 332 |
+
f"Layer {layer_idx:3d} "
|
| 333 |
+
f"[n_q={n_q_heads}, n_kv={n_kv_heads}, "
|
| 334 |
+
f"group={group_size}, d_head={d_head}]\n"
|
| 335 |
+
f"{'─'*70}\n"
|
| 336 |
+
)
|
| 337 |
+
log_lines.append(
|
| 338 |
+
f" {'KV头':>4} {'Q头':>4} "
|
| 339 |
+
f"{'Pearson':>8} {'Spearman':>9} "
|
| 340 |
+
f"{'α':>7} {'α残差':>8} "
|
| 341 |
+
f"{'cos(Uq,Uk)':>10} {'协方差误差':>10} {'SSR':>10}\n"
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
for kv_h in range(n_kv_heads):
|
| 345 |
+
# ── 提取 K 头矩阵 (d_head × d_model) ──
|
| 346 |
+
k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
|
| 347 |
+
U_k, s_k, _ = torch.linalg.svd(k_tensor, full_matrices=False)
|
| 348 |
+
|
| 349 |
+
for q_offset in range(group_size):
|
| 350 |
+
h_idx = kv_h * group_size + q_offset
|
| 351 |
+
|
| 352 |
+
# ── 提取 Q 头矩阵 (d_head × d_model) ──
|
| 353 |
+
q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
|
| 354 |
+
U_q, s_q, _ = torch.linalg.svd(q_tensor, full_matrices=False)
|
| 355 |
+
|
| 356 |
+
# 1. Pearson r(第一定律)
|
| 357 |
+
min_len = min(s_q.shape[0], s_k.shape[0])
|
| 358 |
+
pearson_r = compute_pearson_corr_torch(s_q[:min_len], s_k[:min_len])
|
| 359 |
+
|
| 360 |
+
# 2. Spearman r(排名相关,对异常值更鲁棒)
|
| 361 |
+
spearman_r, _ = spearmanr(
|
| 362 |
+
s_q[:min_len].cpu().numpy(),
|
| 363 |
+
s_k[:min_len].cpu().numpy()
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
# 3. 尺度因子 α 与残差
|
| 367 |
+
alpha, alpha_res = compute_singular_value_ratio(s_q, s_k)
|
| 368 |
+
|
| 369 |
+
# 4. 左奇异向量对齐(第四定律)
|
| 370 |
+
cos_u = compute_left_vector_alignment(U_q, U_k)
|
| 371 |
+
|
| 372 |
+
# 5. 协方差矩阵对齐误差
|
| 373 |
+
cov_err = compute_covariance_alignment(q_tensor, k_tensor, alpha)
|
| 374 |
+
|
| 375 |
+
# 6. SSR(第二定律)
|
| 376 |
+
ssr = compute_ssr(s_q, s_k)
|
| 377 |
+
|
| 378 |
+
records.append({
|
| 379 |
+
"Layer": layer_idx,
|
| 380 |
+
"KV_head": kv_h,
|
| 381 |
+
"Q_head": h_idx,
|
| 382 |
+
"Pearson_r": round(pearson_r, 6),
|
| 383 |
+
"Spearman_r": round(float(spearman_r), 6),
|
| 384 |
+
"Alpha": round(alpha, 4),
|
| 385 |
+
"Alpha_res": round(alpha_res, 6),
|
| 386 |
+
"cos_Uq_Uk": round(cos_u, 6),
|
| 387 |
+
"Cov_err": round(cov_err, 6),
|
| 388 |
+
"SSR": round(ssr, 6),
|
| 389 |
+
})
|
| 390 |
+
|
| 391 |
+
log_lines.append(
|
| 392 |
+
f" KV={kv_h:>3d} Q={h_idx:>3d} "
|
| 393 |
+
f"{pearson_r:>+8.4f} {float(spearman_r):>+9.4f} "
|
| 394 |
+
f"{alpha:>7.4f} {alpha_res:>8.2e} "
|
| 395 |
+
f"{cos_u:>10.4f} {cov_err:>10.4f} {ssr:>10.6f}\n"
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
return records, "".join(log_lines)
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
# ─────────────────────────────────────────────
|
| 402 |
+
# 主分析函数
|
| 403 |
+
# ─────────────────────────────────────────────
|
| 404 |
+
|
| 405 |
+
def analyze_model(
|
| 406 |
+
model_id: str,
|
| 407 |
+
hf_token: str,
|
| 408 |
+
max_layers: int,
|
| 409 |
+
progress=gr.Progress()
|
| 410 |
+
):
|
| 411 |
if not model_id.strip():
|
| 412 |
+
return "❌ 请输入模型 ID", None
|
| 413 |
+
|
| 414 |
+
token = hf_token.strip() or None
|
| 415 |
+
log_lines = [f"🔍 分析模型:{model_id}\n{'═'*70}\n"]
|
| 416 |
+
all_records: list[dict] = []
|
| 417 |
+
|
| 418 |
+
# ── 量化检测 ─────────────────────────────────
|
| 419 |
+
progress(0.02, desc="量化检测...")
|
| 420 |
+
is_blocked, quant_msg = check_quantization(model_id, token)
|
| 421 |
+
log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*70}\n")
|
| 422 |
+
if is_blocked:
|
| 423 |
+
return "".join(log_lines), None
|
| 424 |
+
|
| 425 |
+
# ── 读取 config.json(用于推断 d_head)────────
|
| 426 |
+
config = None
|
| 427 |
try:
|
| 428 |
+
r = requests.get(
|
| 429 |
+
f"https://huggingface.co/{model_id}/resolve/main/config.json",
|
| 430 |
+
headers={"Authorization": f"Bearer {token}"} if token else {},
|
| 431 |
+
timeout=15
|
| 432 |
+
)
|
| 433 |
+
if r.status_code == 200:
|
| 434 |
+
config = r.json()
|
| 435 |
+
log_lines.append(
|
| 436 |
+
f"📋 config.json:\n"
|
| 437 |
+
f" hidden_size = {config.get('hidden_size')}\n"
|
| 438 |
+
f" num_attention_heads = {config.get('num_attention_heads')}\n"
|
| 439 |
+
f" num_key_value_heads = {config.get('num_key_value_heads')}\n"
|
| 440 |
+
f" head_dim = {config.get('head_dim')}\n"
|
| 441 |
+
f"{'─'*70}\n"
|
| 442 |
+
)
|
| 443 |
+
except Exception:
|
| 444 |
+
log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
|
| 445 |
+
|
| 446 |
+
# ── 获取分片索引 ──���──────────────────────────
|
| 447 |
+
progress(0.05, desc="读取模型索引...")
|
| 448 |
+
try:
|
| 449 |
+
index_data = find_index_file(model_id, token)
|
| 450 |
+
shard_headers: dict[str, tuple[dict, int]] = {}
|
| 451 |
+
|
| 452 |
if index_data:
|
| 453 |
+
weight_map = index_data["weight_map"]
|
| 454 |
+
log_lines.append(
|
| 455 |
+
f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard\n"
|
| 456 |
+
)
|
| 457 |
else:
|
|
|
|
| 458 |
sf_files = get_safetensor_files(model_id, token)
|
| 459 |
if not sf_files:
|
| 460 |
+
return "❌ 未找到 .safetensors 文件", None
|
| 461 |
+
weight_map = None
|
| 462 |
+
log_lines.append(f"📦 单文件:{sf_files}\n")
|
| 463 |
+
except requests.exceptions.HTTPError as e:
|
| 464 |
+
return _http_error_msg(e, model_id), None
|
| 465 |
+
|
| 466 |
+
# ── 探测第一个 shard,识别 Q/K key 命名 ──────
|
| 467 |
+
progress(0.08, desc="识别层结构...")
|
| 468 |
+
try:
|
|
|
|
| 469 |
if index_data:
|
| 470 |
+
first_shard = sorted(set(index_data["weight_map"].values()))[0]
|
| 471 |
else:
|
| 472 |
first_shard = sf_files[0]
|
| 473 |
+
|
| 474 |
first_url = get_file_url(model_id, first_shard)
|
| 475 |
first_header, first_hsize = read_safetensors_header(first_url, token)
|
| 476 |
shard_headers[first_shard] = (first_header, first_hsize)
|
|
|
|
|
|
|
| 477 |
all_keys = list(first_header.keys())
|
| 478 |
+
except Exception as e:
|
| 479 |
+
return f"❌ 读取 shard header 失败:{e}", None
|
| 480 |
+
|
| 481 |
+
# 识别 Q/K key 命名规则
|
| 482 |
+
q_candidates = [k for k in all_keys if any(
|
| 483 |
+
p in k for p in ["q_proj.weight", "query.weight", "q.weight", "wq.weight"]
|
| 484 |
+
)]
|
| 485 |
+
if not q_candidates:
|
| 486 |
+
sample = "\n".join(all_keys[:30])
|
| 487 |
+
return f"⚠️ 无法识别 Q/K key,前 30 个 key:\n{sample}", None
|
| 488 |
+
|
| 489 |
+
sample_q = q_candidates[0]
|
| 490 |
+
if "q_proj" in sample_q: q_suffix, k_suffix = "self_attn.q_proj.weight", "self_attn.k_proj.weight"
|
| 491 |
+
elif "query" in sample_q: q_suffix, k_suffix = "attention.query.weight", "attention.key.weight"
|
| 492 |
+
elif "wq" in sample_q: q_suffix, k_suffix = "attention.wq.weight", "attention.wk.weight"
|
| 493 |
+
else:
|
| 494 |
+
q_suffix = sample_q.split("layers.0.")[-1]
|
| 495 |
+
k_suffix = q_suffix.replace("q.", "k.")
|
| 496 |
+
|
| 497 |
+
log_lines.append(f"🔑 Q suffix:{q_suffix}\n")
|
| 498 |
+
log_lines.append(f"🔑 K suffix:{k_suffix}\n")
|
| 499 |
+
log_lines.append(f"{'═'*70}\n")
|
| 500 |
+
|
| 501 |
+
# ── 辅助:查找 key 所在 shard ────────────────
|
| 502 |
+
def get_shard_for_key(key: str) -> str | None:
|
| 503 |
+
if index_data:
|
| 504 |
+
return index_data["weight_map"].get(key)
|
| 505 |
+
for sf in sf_files:
|
| 506 |
+
if sf not in shard_headers:
|
| 507 |
+
h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
|
| 508 |
+
shard_headers[sf] = (h, hs)
|
| 509 |
+
if key in shard_headers[sf][0]:
|
| 510 |
+
return sf
|
| 511 |
+
return None
|
| 512 |
+
|
| 513 |
+
# ── 逐层分析 ─────────────────────────────────
|
| 514 |
+
gqa_inferred = False # 只打印一次 GQA 信息
|
| 515 |
+
|
| 516 |
+
for layer_idx in range(int(max_layers)):
|
| 517 |
+
progress(
|
| 518 |
+
0.10 + 0.85 * layer_idx / int(max_layers),
|
| 519 |
+
desc=f"第 {layer_idx} 层..."
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
q_key = f"model.layers.{layer_idx}.{q_suffix}"
|
| 523 |
+
k_key = f"model.layers.{layer_idx}.{k_suffix}"
|
| 524 |
+
|
| 525 |
+
q_shard = get_shard_for_key(q_key)
|
| 526 |
+
k_shard = get_shard_for_key(k_key)
|
| 527 |
+
|
| 528 |
+
if q_shard is None or k_shard is None:
|
| 529 |
+
log_lines.append(f"\nLayer {layer_idx}: Q/K 未找到,分析结束(共 {layer_idx} 层)\n")
|
| 530 |
+
break
|
| 531 |
+
|
| 532 |
+
for shard in {q_shard, k_shard}:
|
| 533 |
+
if shard not in shard_headers:
|
| 534 |
+
h, hs = read_safetensors_header(get_file_url(model_id, shard), token)
|
| 535 |
+
shard_headers[shard] = (h, hs)
|
| 536 |
+
|
| 537 |
+
try:
|
| 538 |
+
W_q = load_tensor_remote(
|
| 539 |
+
get_file_url(model_id, q_shard), q_key,
|
| 540 |
+
*shard_headers[q_shard], token
|
| 541 |
+
)
|
| 542 |
+
W_k = load_tensor_remote(
|
| 543 |
+
get_file_url(model_id, k_shard), k_key,
|
| 544 |
+
*shard_headers[k_shard], token
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
)
|
| 546 |
+
except ValueError as e:
|
| 547 |
+
log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
|
| 548 |
+
continue
|
| 549 |
+
|
| 550 |
+
if W_q is None or W_k is None:
|
| 551 |
+
log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
|
| 552 |
+
continue
|
| 553 |
+
|
| 554 |
+
# ── GQA 参数推断(只做一次,后续复用)───
|
| 555 |
+
try:
|
| 556 |
+
n_q_heads, n_kv_heads, d_head = infer_gqa_params(W_q, W_k, config)
|
| 557 |
+
except ValueError as e:
|
| 558 |
+
log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
|
| 559 |
del W_q, W_k
|
| 560 |
+
continue
|
| 561 |
+
|
| 562 |
+
if not gqa_inferred:
|
| 563 |
+
group_size = n_q_heads // n_kv_heads
|
| 564 |
+
log_lines.append(
|
| 565 |
+
f"🧠 GQA 结构:n_q_heads={n_q_heads}, "
|
| 566 |
+
f"n_kv_heads={n_kv_heads}, "
|
| 567 |
+
f"group_size={group_size}, "
|
| 568 |
+
f"d_head={d_head}\n"
|
| 569 |
+
f" W_q shape: {list(W_q.shape)}, "
|
| 570 |
+
f"W_k shape: {list(W_k.shape)}\n"
|
| 571 |
+
f"{'═'*70}\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
)
|
| 573 |
+
gqa_inferred = True
|
| 574 |
+
|
| 575 |
+
# ── 逐头计算 ────────────────────────────
|
| 576 |
+
records, layer_log = analyze_layer_heads(
|
| 577 |
+
W_q, W_k, layer_idx,
|
| 578 |
+
n_q_heads, n_kv_heads, d_head
|
| 579 |
+
)
|
| 580 |
+
all_records.extend(records)
|
| 581 |
+
log_lines.append(layer_log)
|
| 582 |
+
|
| 583 |
+
del W_q, W_k # 立即释放内存
|
| 584 |
+
|
| 585 |
+
# ── 全局汇总统计 ─────────────────────────────
|
| 586 |
+
if all_records:
|
| 587 |
+
df = pd.DataFrame(all_records)
|
| 588 |
+
|
| 589 |
+
pearson_vals = df["Pearson_r"].values
|
| 590 |
+
spearman_vals = df["Spearman_r"].values
|
| 591 |
+
ssr_vals = df["SSR"].values
|
| 592 |
+
cos_vals = df["cos_Uq_Uk"].values
|
| 593 |
+
cov_vals = df["Cov_err"].values
|
| 594 |
+
|
| 595 |
+
summary = (
|
| 596 |
+
f"\n{'═'*70}\n"
|
| 597 |
+
f"📊 王氏五定律全局汇总 — {model_id}\n"
|
| 598 |
+
f"{'═'*70}\n"
|
| 599 |
+
f"总分析:{len(df['Layer'].unique())} 层 × "
|
| 600 |
+
f"每层 {df.groupby('Layer').size().iloc[0]} 个 Q 头 "
|
| 601 |
+
f"= {len(all_records)} 条记录\n\n"
|
| 602 |
+
|
| 603 |
+
f"【第一定律 — Pearson r(→ 1)】\n"
|
| 604 |
+
f" Median={np.median(pearson_vals):.6f} "
|
| 605 |
+
f"Mean={np.mean(pearson_vals):.6f} "
|
| 606 |
+
f"Min={np.min(pearson_vals):.6f} "
|
| 607 |
+
f"Max={np.max(pearson_vals):.6f}\n\n"
|
| 608 |
+
|
| 609 |
+
f"【第一定律 — Spearman r(→ 1)】\n"
|
| 610 |
+
f" Median={np.median(spearman_vals):.6f} "
|
| 611 |
+
f"Mean={np.mean(spearman_vals):.6f}\n\n"
|
| 612 |
+
|
| 613 |
+
f"【第二定律 — SSR(→ 0)】\n"
|
| 614 |
+
f" Median={np.median(ssr_vals):.8f} "
|
| 615 |
+
f"Mean={np.mean(ssr_vals):.8f} "
|
| 616 |
+
f"Min={np.min(ssr_vals):.8f} "
|
| 617 |
+
f"Max={np.max(ssr_vals):.8f}\n\n"
|
| 618 |
+
|
| 619 |
+
f"【第四定律 — cos(Uq,Uk) 输出子空间对齐】\n"
|
| 620 |
+
f" Median={np.median(cos_vals):.6f} "
|
| 621 |
+
f"Mean={np.mean(cos_vals):.6f} "
|
| 622 |
+
f"(随机基准 ≈ 1/√d_head)\n\n"
|
| 623 |
+
|
| 624 |
+
f"【协方差对齐误差(越小越好)】\n"
|
| 625 |
+
f" Median={np.median(cov_vals):.6f} "
|
| 626 |
+
f"Mean={np.mean(cov_vals):.6f}\n"
|
| 627 |
+
|
| 628 |
+
f"{'═'*70}\n"
|
| 629 |
+
)
|
| 630 |
+
log_lines.append(summary)
|
| 631 |
+
|
| 632 |
return "".join(log_lines), df
|
| 633 |
+
else:
|
| 634 |
+
return "".join(log_lines) + "\n❌ 未获得任何有效结果\n", None
|
| 635 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 636 |
|
| 637 |
# ─────────────────────────────────────────────
|
| 638 |
# Gradio UI
|
| 639 |
# ─────────────────────────────────────────────
|
| 640 |
|
| 641 |
with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
| 642 |
+
|
| 643 |
gr.Markdown("""
|
| 644 |
# 🔬 Wang's Five Laws — LLM Spectral Analyzer
|
| 645 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 646 |
+
|
| 647 |
+
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 648 |
+
支持 GQA(Grouped Query Attention):对每个 Q 头分别与其对应 K 头做 SVD 分���。
|
| 649 |
+
|
| 650 |
+
| 定律 | 指标 | 理论极值 |
|
| 651 |
+
|------|------|---------|
|
| 652 |
+
| 第一定律 | Pearson r / Spearman r | → 1 |
|
| 653 |
+
| 第二定律 | SSR | → 0 |
|
| 654 |
+
| 第四定律 | cos(Uq, Uk) | ≈ 1/√d_head(随机正交)|
|
| 655 |
+
|
| 656 |
[](https://doi.org/10.5281/zenodo.19707844)
|
| 657 |
+
[](https://hal.science/hal-05609398)
|
| 658 |
""")
|
| 659 |
+
|
| 660 |
with gr.Row():
|
| 661 |
with gr.Column(scale=2):
|
| 662 |
model_input = gr.Textbox(
|
| 663 |
label="HuggingFace 模型 ID",
|
| 664 |
+
placeholder="Qwen/Qwen2.5-14B-Instruct",
|
| 665 |
value="Qwen/Qwen2.5-14B-Instruct"
|
| 666 |
)
|
| 667 |
token_input = gr.Textbox(
|
| 668 |
label="HF Access Token(公开模型可留空)",
|
| 669 |
+
placeholder="hf_xxxxxxxxxxxxxxxx",
|
| 670 |
type="password"
|
| 671 |
)
|
| 672 |
max_layers_input = gr.Slider(
|
| 673 |
label="最大分析层数",
|
| 674 |
+
minimum=1, maximum=100, value=4, step=1
|
| 675 |
)
|
| 676 |
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 677 |
+
|
| 678 |
with gr.Column(scale=1):
|
| 679 |
gr.Markdown("""
|
| 680 |
+
### ✅ 推荐模型
|
| 681 |
+
```
|
| 682 |
+
Qwen/Qwen2.5-7B-Instruct (GQA 8Q/2K)
|
| 683 |
+
meta-llama/Llama-3.2-1B (GQA)
|
| 684 |
+
google/gemma-2-2b (MHA)
|
| 685 |
+
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 686 |
+
```
|
| 687 |
+
### GQA 典型结构
|
| 688 |
+
| 模型 | Q头 | KV头 | 每组 |
|
| 689 |
+
|------|-----|------|------|
|
| 690 |
+
| Qwen2.5-7B | 28 | 4 | 7 |
|
| 691 |
+
| LLaMA-3-8B | 32 | 8 | 4 |
|
| 692 |
+
| Qwen2.5-14B | 40 | 8 | 5 |
|
| 693 |
+
| Gemma-2-2B | 8 | 4 | 2 |
|
| 694 |
""")
|
| 695 |
+
|
| 696 |
+
log_output = gr.Textbox(
|
| 697 |
+
label="分析日志(逐头详情)",
|
| 698 |
+
lines=35, max_lines=80
|
| 699 |
+
)
|
| 700 |
+
|
| 701 |
+
table_output = gr.Dataframe(
|
| 702 |
+
label="逐头结果表",
|
| 703 |
+
headers=[
|
| 704 |
+
"Layer","KV_head","Q_head",
|
| 705 |
+
"Pearson_r","Spearman_r",
|
| 706 |
+
"Alpha","Alpha_res",
|
| 707 |
+
"cos_Uq_Uk","Cov_err","SSR"
|
| 708 |
+
]
|
| 709 |
+
)
|
| 710 |
+
|
| 711 |
analyze_btn.click(
|
| 712 |
fn=analyze_model,
|
| 713 |
inputs=[model_input, token_input, max_layers_input],
|