Alex W. commited on
Commit
dbcd9e0
·
1 Parent(s): fe55b90

核心变化:整层 SVD → 逐头 SVD。

Browse files

之前(错误): 现在(正确):
整个 W_q (4096×4096) 每个 Q 头 (128×4096)
整个 W_k (4096×4096) 每个 K 头 (128×4096)
1次 SVD n_q_heads 次 SVD
每个 Q 头 vs 其对应 K 头

GQA 分组逻辑
n_q_heads=32, n_kv_heads=8, group_size=4

KV头0 → Q头 0,1,2,3
KV头1 → Q头 4,5,6,7
KV头2 → Q头 8,9,10,11
...
KV头7 → Q头 28,29,30,31

Files changed (1) hide show
  1. app.py +595 -272
app.py CHANGED
@@ -4,11 +4,12 @@ import struct
4
  import json
5
  import numpy as np
6
  import torch
7
- from scipy import stats
8
  from huggingface_hub import list_repo_files
 
9
 
10
  # ─────────────────────────────────────────────
11
- # 核心:HTTP Range Request 读取单个 tensor
12
  # ─────────────────────────────────────────────
13
 
14
  DTYPE_MAP = {
@@ -18,25 +19,32 @@ DTYPE_MAP = {
18
  "F64": (torch.float64, 8),
19
  "I32": (torch.int32, 4),
20
  "I64": (torch.int64, 8),
 
 
21
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def get_file_url(model_id: str, filename: str) -> str:
24
- """生成 HuggingFace 直链 URL"""
25
  return f"https://huggingface.co/{model_id}/resolve/main/{filename}"
26
 
27
- def read_safetensors_header(url: str, token: str = None) -> dict:
28
- """
29
- 只读取 safetensors 文件头部(几KB),
30
- 获取所有 tensor 的 offset、dtype、shape
31
- """
32
  headers = {"Authorization": f"Bearer {token}"} if token else {}
33
-
34
- # 第一步:读前 8 bytes → 获取 header_size
35
  r = requests.get(url, headers={**headers, "Range": "bytes=0-7"}, timeout=30)
36
  r.raise_for_status()
37
  header_size = struct.unpack("<Q", r.content)[0]
38
-
39
- # 第二步:读 header JSON
40
  r = requests.get(
41
  url,
42
  headers={**headers, "Range": f"bytes=8-{8 + header_size - 1}"},
@@ -45,346 +53,661 @@ def read_safetensors_header(url: str, token: str = None) -> dict:
45
  r.raise_for_status()
46
  return json.loads(r.content), header_size
47
 
48
- def load_tensor_remote(url: str, tensor_name: str, header: dict,
49
- header_size: int, token: str = None) -> torch.Tensor:
50
- """
51
- 只下载指定 tensor 的字节数据(Range Request),
52
- 完全不缓存整个文件
53
- """
54
  if tensor_name not in header:
55
  return None
56
-
57
- info = header[tensor_name]
58
  dtype_str = info["dtype"]
59
- shape = info["shape"]
60
- offsets = info["data_offsets"] # [start, end] 相对于数据区
61
-
62
  if dtype_str not in DTYPE_MAP:
63
- raise ValueError(f"不支持的 dtype: {dtype_str}")
64
-
 
 
65
  torch_dtype, _ = DTYPE_MAP[dtype_str]
66
-
67
- # 计算文件中的绝对字节位置
68
- # safetensors 文件布局:8字节(header_size) + header_size字节(header) + 数据区
69
  abs_start = 8 + header_size + offsets[0]
70
  abs_end = 8 + header_size + offsets[1] - 1
71
-
72
  req_headers = {"Range": f"bytes={abs_start}-{abs_end}"}
73
  if token:
74
  req_headers["Authorization"] = f"Bearer {token}"
75
-
76
  r = requests.get(url, headers=req_headers, timeout=120)
77
  r.raise_for_status()
78
-
79
- # 转换为 tensor(BF16 需特殊处理)
80
  raw = r.content
 
81
  if torch_dtype == torch.bfloat16:
82
  tensor = torch.frombuffer(bytearray(raw), dtype=torch.int16).view(torch.bfloat16)
83
  else:
84
  tensor = torch.frombuffer(bytearray(raw), dtype=torch_dtype)
85
-
86
- return tensor.reshape(shape).float() # 统一转 float32 做 SVD
87
 
88
- # ─────────────────────────────────────────────
89
- # 查找模型的 safetensors 文件列表
90
- # ─────────────────────────────────────────────
91
 
92
  def get_safetensor_files(model_id: str, token: str = None) -> list:
93
- """列出模型 repo 中的所有 .safetensors 文件"""
94
  kwargs = {"token": token} if token else {}
95
- all_files = list(list_repo_files(model_id, **kwargs))
96
- sf_files = [f for f in all_files if f.endswith(".safetensors")]
97
- return sorted(sf_files)
 
 
98
 
99
- def find_index_file(model_id: str, token: str = None):
100
- """检查是否有 model.safetensors.index.json(分片模型)"""
101
  url = f"https://huggingface.co/{model_id}/resolve/main/model.safetensors.index.json"
102
  headers = {"Authorization": f"Bearer {token}"} if token else {}
103
  r = requests.get(url, headers=headers, timeout=15)
104
- if r.status_code == 200:
105
- return r.json()
106
- return None
 
 
 
 
 
 
 
107
 
108
  # ─────────────────────────────────────────────
109
- # 王氏五定律计算核心
110
  # ─────────────────────────────────────────────
111
 
112
- def compute_svd_metrics(W_q: torch.Tensor, W_k: torch.Tensor):
113
- """对一层的 Q/K 矩阵计算 SVD,返回 Pearson r 和 SSR"""
114
- _, sq, _ = torch.linalg.svd(W_q, full_matrices=False)
115
- _, sk, _ = torch.linalg.svd(W_k, full_matrices=False)
116
-
117
- sq = sq.numpy()
118
- sk = sk.numpy()
119
-
120
- # 第一定律:Pearson r
121
- r, _ = stats.pearsonr(sq, sk)
122
-
123
- # 第二定律:SSR(谱形状残差)
124
- sq_norm = sq / (np.linalg.norm(sq) + 1e-10)
125
- sk_norm = sk / (np.linalg.norm(sk) + 1e-10)
126
- ssr = np.mean(np.abs(sq_norm - sk_norm))
127
-
128
- return float(r), float(ssr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  # ────────────────────────────���────────────────
131
- # 主分析函:扫描所有层
132
  # ─────────────────────────────────────────────
133
 
134
- def analyze_model(model_id: str, hf_token: str, max_layers: int, progress=gr.Progress()):
135
  """
136
- 主函数
137
- 1. 找到所有 safetensors 文件
138
- 2. 逐层用 Range Request 读取 Q/K tensor
139
- 3. 计算 SVD,输出 Pearson r 和 SSR
 
 
 
 
 
 
140
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  if not model_id.strip():
142
- return "❌ 请输入模型 ID,例如:Qwen/Qwen2.5-14B-Instruct", None
143
-
144
- token = hf_token.strip() if hf_token.strip() else None
145
- results = []
146
- log_lines = [f"🔍 分析模型:{model_id}\n"]
147
-
 
 
 
 
 
 
 
 
 
148
  try:
149
- # Step 1: 获取 tensor 名称 → 文件的映射
150
- progress(0.05, desc="读取模型索引...")
151
-
152
- # 尝试分片索引
153
- index_data = find_index_file(model_id, token)
154
-
155
- # 收集所有 shard 的 header
156
- shard_headers = {} # filename → (header_dict, header_size)
157
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  if index_data:
159
- weight_map = index_data.get("weight_map", {})
160
- log_lines.append(f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard 文件\n")
 
 
161
  else:
162
- # 单文件模型
163
  sf_files = get_safetensor_files(model_id, token)
164
  if not sf_files:
165
- return "❌ 未找到 .safetensors 文件,请检查模型 ID 或 token", None
166
- weight_map = {}
167
- for f in sf_files:
168
- log_lines.append(f"📦 单文件模型:{f}\n")
169
-
170
- # Step 2: 检测层数和 Q/K key 命名规则
171
- progress(0.1, desc="检层结构...")
172
-
173
- # 先读第一个 shard 来探测 key 命名
174
- first_shard = None
175
  if index_data:
176
- first_shard = list(set(index_data["weight_map"].values()))[0]
177
  else:
178
  first_shard = sf_files[0]
179
-
180
  first_url = get_file_url(model_id, first_shard)
181
  first_header, first_hsize = read_safetensors_header(first_url, token)
182
  shard_headers[first_shard] = (first_header, first_hsize)
183
-
184
- # 自动检测 Q/K key 命名模式
185
  all_keys = list(first_header.keys())
186
- q_keys_sample = [k for k in all_keys if any(
187
- p in k for p in ["q_proj.weight", "query.weight", "q.weight", "wq.weight"]
188
- )]
189
-
190
- if not q_keys_sample:
191
- # 展示所有 key 供用户参考
192
- sample_keys = "\n".join(all_keys[:30])
193
- return f"⚠️ 无法自动识别 Q/K key,前30个 key:\n{sample_keys}", None
194
-
195
- # 判断命名模式
196
- sample_q = q_keys_sample[0]
197
- if "q_proj" in sample_q:
198
- q_pattern = "self_attn.q_proj.weight"
199
- k_pattern = "self_attn.k_proj.weight"
200
- elif "query" in sample_q:
201
- q_pattern = "attention.query.weight"
202
- k_pattern = "attention.key.weight"
203
- else:
204
- q_pattern = sample_q.split(".")[-3] + ".q.weight"
205
- k_pattern = sample_q.split(".")[-3] + ".k.weight"
206
-
207
- log_lines.append(f"🔑 Q key 模式:{q_pattern}\n")
208
- log_lines.append(f"🔑 K key 模式:{k_pattern}\n\n")
209
-
210
- # Step 3: 逐层计算
211
- max_layers = int(max_layers)
212
- layer_idx = 0
213
- pearson_list = []
214
- ssr_list = []
215
-
216
- while layer_idx < max_layers:
217
- progress(0.1 + 0.85 * layer_idx / max_layers,
218
- desc=f"处理第 {layer_idx} 层...")
219
-
220
- # 构建 key 名称(支持常见命名方式)
221
- q_key = f"model.layers.{layer_idx}.{q_pattern}"
222
- k_key = f"model.layers.{layer_idx}.{k_pattern}"
223
-
224
- # 找到对应的 shard
225
- def get_shard_for_key(key):
226
- if index_data:
227
- return index_data["weight_map"].get(key)
228
- else:
229
- # 遍历所有 shard header 查找
230
- for sf in sf_files:
231
- if sf not in shard_headers:
232
- url = get_file_url(model_id, sf)
233
- h, hs = read_safetensors_header(url, token)
234
- shard_headers[sf] = (h, hs)
235
- h, _ = shard_headers[sf]
236
- if key in h:
237
- return sf
238
- return None
239
-
240
- q_shard = get_shard_for_key(q_key)
241
- k_shard = get_shard_for_key(k_key)
242
-
243
- if q_shard is None or k_shard is None:
244
- log_lines.append(f"Layer {layer_idx}: ⚠️ 未找到 Q/K,停止\n")
245
- break
246
-
247
- # 加载对应 shard 的 header
248
- for shard in [q_shard, k_shard]:
249
- if shard not in shard_headers:
250
- url = get_file_url(model_id, shard)
251
- h, hs = read_safetensors_header(url, token)
252
- shard_headers[shard] = (h, hs)
253
-
254
- # Range Request 只下载 Q 和 K tensor
255
- q_url = get_file_url(model_id, q_shard)
256
- k_url = get_file_url(model_id, k_shard)
257
-
258
- q_header, q_hsize = shard_headers[q_shard]
259
- k_header, k_hsize = shard_headers[k_shard]
260
-
261
- W_q = load_tensor_remote(q_url, q_key, q_header, q_hsize, token)
262
- W_k = load_tensor_remote(k_url, k_key, k_header, k_hsize, token)
263
-
264
- if W_q is None or W_k is None:
265
- log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 读取失败\n")
266
- break
267
-
268
- r, ssr = compute_svd_metrics(W_q, W_k)
269
- pearson_list.append(r)
270
- ssr_list.append(ssr)
271
- results.append({
272
- "Layer": layer_idx,
273
- "Pearson_r": round(r, 6),
274
- "SSR": round(ssr, 6)
275
- })
276
-
277
- log_lines.append(
278
- f"Layer {layer_idx:3d} | Q shape: {list(W_q.shape)} "
279
- f"| Pearson r = {r:.4f} | SSR = {ssr:.6f}\n"
280
  )
281
-
282
- # 释放内存
 
 
 
 
 
 
 
 
 
 
 
283
  del W_q, W_k
284
- layer_idx += 1
285
-
286
- # Step 4: 汇总统计
287
- if pearson_list:
288
- summary = (
289
- f"\n{'='*50}\n"
290
- f"📊 王氏五定律分析结果 — {model_id}\n"
291
- f"{'='*50}\n"
292
- f"总层数分析: {len(pearson_list)}\n\n"
293
- f"【第一定律 - 谱线性对齐 Pearson r】\n"
294
- f" Median: {np.median(pearson_list):.4f} "
295
- f" Mean: {np.mean(pearson_list):.4f}\n"
296
- f" Min: {np.min(pearson_list):.4f} "
297
- f" Max: {np.max(pearson_list):.4f}\n\n"
298
- f"【第二定律 - 谱形状保真 SSR】\n"
299
- f" Median: {np.median(ssr_list):.6f} "
300
- f" Mean: {np.mean(ssr_list):.6f}\n"
301
- f" Min: {np.min(ssr_list):.6f} "
302
- f" Max: {np.max(ssr_list):.6f}\n\n"
303
- f"⚡ 理论值:Pearson r → 1,SSR → 0\n"
304
- f"{'='*50}\n"
305
  )
306
- log_lines.append(summary)
307
-
308
- # 生成图表数据
309
- import pandas as pd
310
- df = pd.DataFrame(results)
311
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  return "".join(log_lines), df
313
-
314
- except requests.exceptions.HTTPError as e:
315
- if e.response.status_code == 401:
316
- return "❌ 401 未授权:该模型需要 HF Token,请填写 Access Token", None
317
- elif e.response.status_code == 403:
318
- return "❌ 403 禁止访问:请确认已在 HF 接受该模型的使用协议", None
319
- elif e.response.status_code == 404:
320
- return f"❌ 404 未找到:模型 {model_id} 不存在或文件路径错误", None
321
- else:
322
- return f"❌ HTTP 错误:{e}", None
323
- except Exception as e:
324
- return f"❌ 错误:{str(e)}", None
325
 
326
  # ─────────────────────────────────────────────
327
  # Gradio UI
328
  # ─────────────────────────────────────────────
329
 
330
  with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
 
331
  gr.Markdown("""
332
  # 🔬 Wang's Five Laws — LLM Spectral Analyzer
333
  **Mathematical Foundations of Large Language Models (MF-LLM)**
334
-
335
- 通过 HTTP Range Request 直接读取 HuggingFace 模型的 Q/K 权重 tensor
336
- **无需下载完整模型**,计算王氏五定律的核心指标
337
- - 📐 **第一定律**:Pearson r → 1(谱线性对齐)
338
- - 📏 **第二定律**:SSR 0(谱形状保真)
339
-
 
 
 
 
340
  [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
 
341
  """)
342
-
343
  with gr.Row():
344
  with gr.Column(scale=2):
345
  model_input = gr.Textbox(
346
  label="HuggingFace 模型 ID",
347
- placeholder="例如:Qwen/Qwen2.5-14B-Instruct",
348
  value="Qwen/Qwen2.5-14B-Instruct"
349
  )
350
  token_input = gr.Textbox(
351
  label="HF Access Token(公开模型可留空)",
352
- placeholder="hf_xxxxxxxxxxxx",
353
  type="password"
354
  )
355
  max_layers_input = gr.Slider(
356
  label="最大分析层数",
357
- minimum=1, maximum=100, value=32, step=1
358
  )
359
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
360
-
361
  with gr.Column(scale=1):
362
  gr.Markdown("""
363
- ### 💡 快速测试模型
364
- - `meta-llama/Llama-3.2-1B`
365
- - `Qwen/Qwen2.5-7B-Instruct`
366
- - `google/gemma-2-2b`
367
- - `deepseek-ai/DeepSeek-R1-Distill-Qwen-14B`
368
-
369
- ### ⚙️ 运行环境
370
- - CPU Only(无 GPU)
371
- - 每层约 5-30 秒(取决于网速和矩阵大小)
372
- - **零缓存**:仅下载 Q/K tensor 字节
 
 
 
 
373
  """)
374
-
375
- with gr.Row():
376
- log_output = gr.Textbox(
377
- label="分析日志",
378
- lines=25,
379
- max_lines=50
380
- )
381
-
382
- with gr.Row():
383
- table_output = gr.Dataframe(
384
- label="逐层结果(Pearson r & SSR)",
385
- headers=["Layer", "Pearson_r", "SSR"]
386
- )
387
-
 
 
388
  analyze_btn.click(
389
  fn=analyze_model,
390
  inputs=[model_input, token_input, max_layers_input],
 
4
  import json
5
  import numpy as np
6
  import torch
7
+ from scipy.stats import pearsonr, spearmanr
8
  from huggingface_hub import list_repo_files
9
+ import pandas as pd
10
 
11
  # ─────────────────────────────────────────────
12
+ # dtype 映射
13
  # ─────────────────────────────────────────────
14
 
15
  DTYPE_MAP = {
 
19
  "F64": (torch.float64, 8),
20
  "I32": (torch.int32, 4),
21
  "I64": (torch.int64, 8),
22
+ "I8": (torch.int8, 1),
23
+ "U8": (torch.uint8, 1),
24
  }
25
+ try:
26
+ DTYPE_MAP["F8_E4M3"] = (torch.float8_e4m3fn, 1)
27
+ DTYPE_MAP["F8_E5M2"] = (torch.float8_e5m2, 1)
28
+ except AttributeError:
29
+ pass
30
+
31
+ UNSUPPORTED_SVD_DTYPES = {"I8", "U8", "I32", "I64", "F8_E4M3", "F8_E5M2"}
32
+ QUANTIZED_KEY_SIGNATURES = ["qweight", "qzeros", "scales", "g_idx", "packed_weight"]
33
+
34
+
35
+ # ─────────────────────────────────────────────
36
+ # 工具函数
37
+ # ─────────────────────────────────────────────
38
 
39
  def get_file_url(model_id: str, filename: str) -> str:
 
40
  return f"https://huggingface.co/{model_id}/resolve/main/{filename}"
41
 
42
+
43
+ def read_safetensors_header(url: str, token: str = None) -> tuple[dict, int]:
 
 
 
44
  headers = {"Authorization": f"Bearer {token}"} if token else {}
 
 
45
  r = requests.get(url, headers={**headers, "Range": "bytes=0-7"}, timeout=30)
46
  r.raise_for_status()
47
  header_size = struct.unpack("<Q", r.content)[0]
 
 
48
  r = requests.get(
49
  url,
50
  headers={**headers, "Range": f"bytes=8-{8 + header_size - 1}"},
 
53
  r.raise_for_status()
54
  return json.loads(r.content), header_size
55
 
56
+
57
+ def load_tensor_remote(
58
+ url: str, tensor_name: str,
59
+ header: dict, header_size: int,
60
+ token: str = None
61
+ ) -> torch.Tensor | None:
62
  if tensor_name not in header:
63
  return None
64
+ info = header[tensor_name]
 
65
  dtype_str = info["dtype"]
66
+ shape = info["shape"]
67
+ offsets = info["data_offsets"]
68
+
69
  if dtype_str not in DTYPE_MAP:
70
+ raise ValueError(f"未知 dtype: {dtype_str}")
71
+ if dtype_str in UNSUPPORTED_SVD_DTYPES:
72
+ raise ValueError(f"dtype={dtype_str} 为量化格式,无法 SVD")
73
+
74
  torch_dtype, _ = DTYPE_MAP[dtype_str]
 
 
 
75
  abs_start = 8 + header_size + offsets[0]
76
  abs_end = 8 + header_size + offsets[1] - 1
77
+
78
  req_headers = {"Range": f"bytes={abs_start}-{abs_end}"}
79
  if token:
80
  req_headers["Authorization"] = f"Bearer {token}"
81
+
82
  r = requests.get(url, headers=req_headers, timeout=120)
83
  r.raise_for_status()
 
 
84
  raw = r.content
85
+
86
  if torch_dtype == torch.bfloat16:
87
  tensor = torch.frombuffer(bytearray(raw), dtype=torch.int16).view(torch.bfloat16)
88
  else:
89
  tensor = torch.frombuffer(bytearray(raw), dtype=torch_dtype)
 
 
90
 
91
+ return tensor.reshape(shape).float()
92
+
 
93
 
94
  def get_safetensor_files(model_id: str, token: str = None) -> list:
 
95
  kwargs = {"token": token} if token else {}
96
+ return sorted(
97
+ f for f in list_repo_files(model_id, **kwargs)
98
+ if f.endswith(".safetensors")
99
+ )
100
+
101
 
102
+ def find_index_file(model_id: str, token: str = None) -> dict | None:
 
103
  url = f"https://huggingface.co/{model_id}/resolve/main/model.safetensors.index.json"
104
  headers = {"Authorization": f"Bearer {token}"} if token else {}
105
  r = requests.get(url, headers=headers, timeout=15)
106
+ return r.json() if r.status_code == 200 else None
107
+
108
+
109
+ def _http_error_msg(e: requests.exceptions.HTTPError, model_id: str) -> str:
110
+ code = e.response.status_code
111
+ if code == 401: return "❌ 401 未授权:请填写有效的 HF Access Token"
112
+ if code == 403: return f"❌ 403 禁止访问:请先接受 {model_id} 的使用协议"
113
+ if code == 404: return f"❌ 404 未找到:模型 {model_id} 不存在"
114
+ return f"❌ HTTP {code}:{e}"
115
+
116
 
117
  # ─────────────────────────────────────────────
118
+ # 量化三重检测
119
  # ─────────────────────────────────────────────
120
 
121
+ def check_quantization(model_id: str, token: str = None) -> tuple[bool, str]:
122
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
123
+ warnings = []
124
+
125
+ # 检测 1:config.json
126
+ try:
127
+ r = requests.get(
128
+ f"https://huggingface.co/{model_id}/resolve/main/config.json",
129
+ headers=headers, timeout=15
130
+ )
131
+ if r.status_code == 200:
132
+ cfg = r.json()
133
+ qcfg = cfg.get("quantization_config", {})
134
+ qt = (qcfg.get("quant_type","") or
135
+ qcfg.get("quant_method","") or
136
+ cfg.get("quantization","")).lower()
137
+ if "gptq" in qt:
138
+ bits = qcfg.get("bits","?")
139
+ return True, (f"❌ 检测到 GPTQ {bits}bit 量化\n"
140
+ f" 请改用原始 BF16 版本。")
141
+ if "awq" in qt:
142
+ return True, "❌ 检测到 AWQ 量化,请改用原始 BF16 版本。"
143
+ if "bitsandbytes" in qt or "bnb" in qt:
144
+ warnings.append("⚠️ 检测到 bitsandbytes 量化,结果可能失真")
145
+ except Exception:
146
+ warnings.append("⚠️ 无法读取 config.json")
147
+
148
+ # 检测 2:文件名 / 模型名关键词
149
+ mid_lower = model_id.lower()
150
+ for kw in ["gptq","awq","gguf"]:
151
+ if kw in mid_lower:
152
+ return True, f"❌ 模型名含 '{kw.upper()}',为量化版本,请使用原始 BF16 版本。"
153
+
154
+ try:
155
+ all_files = list(list_repo_files(model_id, token=token))
156
+ if any(f.endswith(".gguf") for f in all_files):
157
+ return True, "❌ 检测到 .gguf 文件,不支持该格式。"
158
+ if not any(f.endswith(".safetensors") for f in all_files):
159
+ return True, "❌ 未找到 .safetensors 文件,仅支持 safetensors 格式。"
160
+ except Exception as e:
161
+ warnings.append(f"⚠️ 文件列表检测失败:{e}")
162
+
163
+ # 检测 3:header key 签名
164
+ try:
165
+ index_data = find_index_file(model_id, token)
166
+ if index_data:
167
+ first_shard = sorted(set(index_data["weight_map"].values()))[0]
168
+ else:
169
+ sf = get_safetensor_files(model_id, token)
170
+ first_shard = sf[0]
171
+ hdr, _ = read_safetensors_header(get_file_url(model_id, first_shard), token)
172
+ all_keys = list(hdr.keys())
173
+ bad_keys = [k for k in all_keys
174
+ if any(sig in k for sig in QUANTIZED_KEY_SIGNATURES)]
175
+ if bad_keys:
176
+ return True, (f"❌ 检测到量化 key:{bad_keys[:3]}\n"
177
+ f" 请使用原始 BF16 版本。")
178
+ dtypes = {hdr[k].get("dtype","") for k in all_keys[:20]}
179
+ good = dtypes - UNSUPPORTED_SVD_DTYPES
180
+ if good:
181
+ warnings.append(f"✅ 权重格式:{good}")
182
+ except Exception as e:
183
+ warnings.append(f"⚠️ header 检测失败:{e}")
184
+
185
+ msg = "\n".join(warnings) if warnings else "✅ 未检测到量化,可以正常分析"
186
+ return False, msg
187
+
188
 
189
  # ────────────────────────────���────────────────
190
+ # GQA 参自动推断
191
  # ─────────────────────────────────────────────
192
 
193
+ def infer_gqa_params(W_q: torch.Tensor, W_k: torch.Tensor, config: dict | None) -> tuple[int,int,int]:
194
  """
195
+ 自动推断
196
+ - n_q_heads : Q 头数量
197
+ - n_kv_heads : KV 头数量(GQA)
198
+ - d_head : 每个头的维度
199
+
200
+ 权重 shape 约定(最常见):
201
+ W_q : (n_q_heads * d_head, d_model) → shape[0] = n_q * d_h
202
+ W_k : (n_kv_heads * d_head, d_model) → shape[0] = n_kv * d_h
203
+
204
+ d_head 优先从 config.json 读取,其次用常见默认值猜测。
205
  """
206
+ q_rows, d_model = W_q.shape[0], W_q.shape[1]
207
+ k_rows = W_k.shape[0]
208
+
209
+ # 从 config.json 读取 d_head
210
+ d_head = None
211
+ if config:
212
+ d_head = (
213
+ config.get("head_dim") or
214
+ config.get("kv_channels") or
215
+ config.get("hidden_size", 0) // config.get("num_attention_heads", 1)
216
+ )
217
+ if d_head == 0:
218
+ d_head = None
219
+
220
+ # 如果 config 没给,用常见值探测(64, 80, 96, 128, 256)
221
+ if not d_head:
222
+ for candidate in [256, 128, 96, 80, 64]:
223
+ if q_rows % candidate == 0 and k_rows % candidate == 0:
224
+ d_head = candidate
225
+ break
226
+
227
+ if not d_head:
228
+ raise ValueError(
229
+ f"无法推断 d_head:W_q.shape={W_q.shape}, W_k.shape={W_k.shape}\n"
230
+ f"请在 config.json 中确认 head_dim 字段。"
231
+ )
232
+
233
+ n_q_heads = q_rows // d_head
234
+ n_kv_heads = k_rows // d_head
235
+
236
+ if n_q_heads % n_kv_heads != 0:
237
+ raise ValueError(
238
+ f"n_q_heads={n_q_heads} 不能被 n_kv_heads={n_kv_heads} 整除,"
239
+ f"请检查 d_head 推断是否正确。"
240
+ )
241
+
242
+ return n_q_heads, n_kv_heads, d_head
243
+
244
+
245
+ # ─────────────────────────────────────────────
246
+ # 逐头 SVD 指标计算
247
+ # ─────────────────────────────────────────────
248
+
249
+ def compute_pearson_corr_torch(s_q: torch.Tensor, s_k: torch.Tensor) -> float:
250
+ sq = s_q.cpu().numpy()
251
+ sk = s_k.cpu().numpy()
252
+ r, _ = pearsonr(sq, sk)
253
+ return float(r)
254
+
255
+
256
+ def compute_singular_value_ratio(
257
+ s_q: torch.Tensor, s_k: torch.Tensor
258
+ ) -> tuple[float, float]:
259
+ """
260
+ 估计尺度因子 α = median(s_q / s_k)
261
+ 残差 = mean|s_q - α * s_k| / mean(s_q)
262
+ """
263
+ min_len = min(s_q.shape[0], s_k.shape[0])
264
+ sq = s_q[:min_len]
265
+ sk = s_k[:min_len]
266
+ ratio = sq / (sk + 1e-10)
267
+ alpha = float(ratio.median())
268
+ residual = float((sq - alpha * sk).abs().mean() / (sq.mean() + 1e-10))
269
+ return alpha, residual
270
+
271
+
272
+ def compute_left_vector_alignment(
273
+ U_q: torch.Tensor, U_k: torch.Tensor
274
+ ) -> float:
275
+ """
276
+ 第四定律:左奇异向量(输出子空间)对齐度
277
+ cos_u = mean_i |<u_q_i, u_k_i>|
278
+ """
279
+ min_len = min(U_q.shape[1], U_k.shape[1])
280
+ U_q = U_q[:, :min_len]
281
+ U_k = U_k[:, :min_len]
282
+ cos_vals = (U_q * U_k).sum(dim=0).abs()
283
+ return float(cos_vals.mean())
284
+
285
+
286
+ def compute_covariance_alignment(
287
+ W_q: torch.Tensor, W_k: torch.Tensor, alpha: float
288
+ ) -> float:
289
+ """
290
+ 协方差矩阵对齐误差:
291
+ err = ||W_q W_q^T - α² W_k W_k^T||_F / ||W_k W_k^T||_F
292
+ """
293
+ cov_q = W_q @ W_q.T
294
+ cov_k = W_k @ W_k.T
295
+ diff = cov_q - (alpha ** 2) * cov_k
296
+ err = float(torch.norm(diff, p='fro') / (torch.norm(cov_k, p='fro') + 1e-10))
297
+ return err
298
+
299
+
300
+ def compute_ssr(s_q: torch.Tensor, s_k: torch.Tensor) -> float:
301
+ """
302
+ 第二定律:归一化谱形状残差
303
+ SSR = mean_i |s̃_q_i - s̃_k_i|
304
+ """
305
+ min_len = min(s_q.shape[0], s_k.shape[0])
306
+ sq = s_q[:min_len].cpu().numpy()
307
+ sk = s_k[:min_len].cpu().numpy()
308
+ sq_n = sq / (np.linalg.norm(sq) + 1e-10)
309
+ sk_n = sk / (np.linalg.norm(sk) + 1e-10)
310
+ return float(np.mean(np.abs(sq_n - sk_n)))
311
+
312
+
313
+ def analyze_layer_heads(
314
+ W_q: torch.Tensor,
315
+ W_k: torch.Tensor,
316
+ layer_idx: int,
317
+ n_q_heads: int,
318
+ n_kv_heads: int,
319
+ d_head: int,
320
+ ) -> tuple[list[dict], str]:
321
+ """
322
+ GQA 逐头分析:
323
+ - 每个 KV 头对应 group_size = n_q_heads // n_kv_heads 个 Q 头
324
+ - 每个 Q 头分别与其对应的 K 头做 SVD 指标计算
325
+ """
326
+ group_size = n_q_heads // n_kv_heads
327
+ records = []
328
+ log_lines = []
329
+
330
+ log_lines.append(
331
+ f"\n{'─'*70}\n"
332
+ f"Layer {layer_idx:3d} "
333
+ f"[n_q={n_q_heads}, n_kv={n_kv_heads}, "
334
+ f"group={group_size}, d_head={d_head}]\n"
335
+ f"{'─'*70}\n"
336
+ )
337
+ log_lines.append(
338
+ f" {'KV头':>4} {'Q头':>4} "
339
+ f"{'Pearson':>8} {'Spearman':>9} "
340
+ f"{'α':>7} {'α残差':>8} "
341
+ f"{'cos(Uq,Uk)':>10} {'协方差误差':>10} {'SSR':>10}\n"
342
+ )
343
+
344
+ for kv_h in range(n_kv_heads):
345
+ # ── 提取 K 头矩阵 (d_head × d_model) ──
346
+ k_tensor = W_k[kv_h * d_head : (kv_h + 1) * d_head, :]
347
+ U_k, s_k, _ = torch.linalg.svd(k_tensor, full_matrices=False)
348
+
349
+ for q_offset in range(group_size):
350
+ h_idx = kv_h * group_size + q_offset
351
+
352
+ # ── 提取 Q 头矩阵 (d_head × d_model) ──
353
+ q_tensor = W_q[h_idx * d_head : (h_idx + 1) * d_head, :]
354
+ U_q, s_q, _ = torch.linalg.svd(q_tensor, full_matrices=False)
355
+
356
+ # 1. Pearson r(第一定律)
357
+ min_len = min(s_q.shape[0], s_k.shape[0])
358
+ pearson_r = compute_pearson_corr_torch(s_q[:min_len], s_k[:min_len])
359
+
360
+ # 2. Spearman r(排名相关,对异常值更鲁棒)
361
+ spearman_r, _ = spearmanr(
362
+ s_q[:min_len].cpu().numpy(),
363
+ s_k[:min_len].cpu().numpy()
364
+ )
365
+
366
+ # 3. 尺度因子 α 与残差
367
+ alpha, alpha_res = compute_singular_value_ratio(s_q, s_k)
368
+
369
+ # 4. 左奇异向量对齐(第四定律)
370
+ cos_u = compute_left_vector_alignment(U_q, U_k)
371
+
372
+ # 5. 协方差矩阵对齐误差
373
+ cov_err = compute_covariance_alignment(q_tensor, k_tensor, alpha)
374
+
375
+ # 6. SSR(第二定律)
376
+ ssr = compute_ssr(s_q, s_k)
377
+
378
+ records.append({
379
+ "Layer": layer_idx,
380
+ "KV_head": kv_h,
381
+ "Q_head": h_idx,
382
+ "Pearson_r": round(pearson_r, 6),
383
+ "Spearman_r": round(float(spearman_r), 6),
384
+ "Alpha": round(alpha, 4),
385
+ "Alpha_res": round(alpha_res, 6),
386
+ "cos_Uq_Uk": round(cos_u, 6),
387
+ "Cov_err": round(cov_err, 6),
388
+ "SSR": round(ssr, 6),
389
+ })
390
+
391
+ log_lines.append(
392
+ f" KV={kv_h:>3d} Q={h_idx:>3d} "
393
+ f"{pearson_r:>+8.4f} {float(spearman_r):>+9.4f} "
394
+ f"{alpha:>7.4f} {alpha_res:>8.2e} "
395
+ f"{cos_u:>10.4f} {cov_err:>10.4f} {ssr:>10.6f}\n"
396
+ )
397
+
398
+ return records, "".join(log_lines)
399
+
400
+
401
+ # ─────────────────────────────────────────────
402
+ # 主分析函数
403
+ # ─────────────────────────────────────────────
404
+
405
+ def analyze_model(
406
+ model_id: str,
407
+ hf_token: str,
408
+ max_layers: int,
409
+ progress=gr.Progress()
410
+ ):
411
  if not model_id.strip():
412
+ return "❌ 请输入模型 ID", None
413
+
414
+ token = hf_token.strip() or None
415
+ log_lines = [f"🔍 分析模型:{model_id}\n{'═'*70}\n"]
416
+ all_records: list[dict] = []
417
+
418
+ # ── 量化检测 ─────────────────────────────────
419
+ progress(0.02, desc="量化检测...")
420
+ is_blocked, quant_msg = check_quantization(model_id, token)
421
+ log_lines.append(f"【量化检测】\n{quant_msg}\n{'─'*70}\n")
422
+ if is_blocked:
423
+ return "".join(log_lines), None
424
+
425
+ # ── 读取 config.json(用于推断 d_head)────────
426
+ config = None
427
  try:
428
+ r = requests.get(
429
+ f"https://huggingface.co/{model_id}/resolve/main/config.json",
430
+ headers={"Authorization": f"Bearer {token}"} if token else {},
431
+ timeout=15
432
+ )
433
+ if r.status_code == 200:
434
+ config = r.json()
435
+ log_lines.append(
436
+ f"📋 config.json:\n"
437
+ f" hidden_size = {config.get('hidden_size')}\n"
438
+ f" num_attention_heads = {config.get('num_attention_heads')}\n"
439
+ f" num_key_value_heads = {config.get('num_key_value_heads')}\n"
440
+ f" head_dim = {config.get('head_dim')}\n"
441
+ f"{'─'*70}\n"
442
+ )
443
+ except Exception:
444
+ log_lines.append("⚠️ 无法读取 config.json,将从 weight shape 自动推断\n")
445
+
446
+ # ── 获取分片索引 ──���──────────────────────────
447
+ progress(0.05, desc="读取模型索引...")
448
+ try:
449
+ index_data = find_index_file(model_id, token)
450
+ shard_headers: dict[str, tuple[dict, int]] = {}
451
+
452
  if index_data:
453
+ weight_map = index_data["weight_map"]
454
+ log_lines.append(
455
+ f"📦 分片模型,共 {len(set(weight_map.values()))} 个 shard\n"
456
+ )
457
  else:
 
458
  sf_files = get_safetensor_files(model_id, token)
459
  if not sf_files:
460
+ return "❌ 未找到 .safetensors 文件", None
461
+ weight_map = None
462
+ log_lines.append(f"📦 单文件:{sf_files}\n")
463
+ except requests.exceptions.HTTPError as e:
464
+ return _http_error_msg(e, model_id), None
465
+
466
+ # ── 探第一个 shard,识别 Q/K key 命名 ──────
467
+ progress(0.08, desc="识别层结构...")
468
+ try:
 
469
  if index_data:
470
+ first_shard = sorted(set(index_data["weight_map"].values()))[0]
471
  else:
472
  first_shard = sf_files[0]
473
+
474
  first_url = get_file_url(model_id, first_shard)
475
  first_header, first_hsize = read_safetensors_header(first_url, token)
476
  shard_headers[first_shard] = (first_header, first_hsize)
 
 
477
  all_keys = list(first_header.keys())
478
+ except Exception as e:
479
+ return f"❌ 读取 shard header 失败:{e}", None
480
+
481
+ # 识别 Q/K key 命名规则
482
+ q_candidates = [k for k in all_keys if any(
483
+ p in k for p in ["q_proj.weight", "query.weight", "q.weight", "wq.weight"]
484
+ )]
485
+ if not q_candidates:
486
+ sample = "\n".join(all_keys[:30])
487
+ return f"⚠️ 无法识别 Q/K key,前 30 个 key:\n{sample}", None
488
+
489
+ sample_q = q_candidates[0]
490
+ if "q_proj" in sample_q: q_suffix, k_suffix = "self_attn.q_proj.weight", "self_attn.k_proj.weight"
491
+ elif "query" in sample_q: q_suffix, k_suffix = "attention.query.weight", "attention.key.weight"
492
+ elif "wq" in sample_q: q_suffix, k_suffix = "attention.wq.weight", "attention.wk.weight"
493
+ else:
494
+ q_suffix = sample_q.split("layers.0.")[-1]
495
+ k_suffix = q_suffix.replace("q.", "k.")
496
+
497
+ log_lines.append(f"🔑 Q suffix:{q_suffix}\n")
498
+ log_lines.append(f"🔑 K suffix:{k_suffix}\n")
499
+ log_lines.append(f"{'═'*70}\n")
500
+
501
+ # ── 辅助:查找 key 所在 shard ────────────────
502
+ def get_shard_for_key(key: str) -> str | None:
503
+ if index_data:
504
+ return index_data["weight_map"].get(key)
505
+ for sf in sf_files:
506
+ if sf not in shard_headers:
507
+ h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
508
+ shard_headers[sf] = (h, hs)
509
+ if key in shard_headers[sf][0]:
510
+ return sf
511
+ return None
512
+
513
+ # ── 逐层分析 ─────────────────────────────────
514
+ gqa_inferred = False # 只打印一次 GQA 信息
515
+
516
+ for layer_idx in range(int(max_layers)):
517
+ progress(
518
+ 0.10 + 0.85 * layer_idx / int(max_layers),
519
+ desc=f"第 {layer_idx} 层..."
520
+ )
521
+
522
+ q_key = f"model.layers.{layer_idx}.{q_suffix}"
523
+ k_key = f"model.layers.{layer_idx}.{k_suffix}"
524
+
525
+ q_shard = get_shard_for_key(q_key)
526
+ k_shard = get_shard_for_key(k_key)
527
+
528
+ if q_shard is None or k_shard is None:
529
+ log_lines.append(f"\nLayer {layer_idx}: Q/K 未找到,分析结束(共 {layer_idx} 层)\n")
530
+ break
531
+
532
+ for shard in {q_shard, k_shard}:
533
+ if shard not in shard_headers:
534
+ h, hs = read_safetensors_header(get_file_url(model_id, shard), token)
535
+ shard_headers[shard] = (h, hs)
536
+
537
+ try:
538
+ W_q = load_tensor_remote(
539
+ get_file_url(model_id, q_shard), q_key,
540
+ *shard_headers[q_shard], token
541
+ )
542
+ W_k = load_tensor_remote(
543
+ get_file_url(model_id, k_shard), k_key,
544
+ *shard_headers[k_shard], token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  )
546
+ except ValueError as e:
547
+ log_lines.append(f"Layer {layer_idx}: ⚠️ 跳过({e})\n")
548
+ continue
549
+
550
+ if W_q is None or W_k is None:
551
+ log_lines.append(f"Layer {layer_idx}: ⚠️ tensor 为 None,跳过\n")
552
+ continue
553
+
554
+ # ── GQA 参数推断(只做一次,后续复用)───
555
+ try:
556
+ n_q_heads, n_kv_heads, d_head = infer_gqa_params(W_q, W_k, config)
557
+ except ValueError as e:
558
+ log_lines.append(f"Layer {layer_idx}: ❌ GQA 推断失败:{e}\n")
559
  del W_q, W_k
560
+ continue
561
+
562
+ if not gqa_inferred:
563
+ group_size = n_q_heads // n_kv_heads
564
+ log_lines.append(
565
+ f"🧠 GQA 结构:n_q_heads={n_q_heads}, "
566
+ f"n_kv_heads={n_kv_heads}, "
567
+ f"group_size={group_size}, "
568
+ f"d_head={d_head}\n"
569
+ f" W_q shape: {list(W_q.shape)}, "
570
+ f"W_k shape: {list(W_k.shape)}\n"
571
+ f"{'═'*70}\n"
 
 
 
 
 
 
 
 
 
572
  )
573
+ gqa_inferred = True
574
+
575
+ # ── 逐头计算 ────────────────────────────
576
+ records, layer_log = analyze_layer_heads(
577
+ W_q, W_k, layer_idx,
578
+ n_q_heads, n_kv_heads, d_head
579
+ )
580
+ all_records.extend(records)
581
+ log_lines.append(layer_log)
582
+
583
+ del W_q, W_k # 立即释放内存
584
+
585
+ # ── 全局汇总统计 ─────────────────────────────
586
+ if all_records:
587
+ df = pd.DataFrame(all_records)
588
+
589
+ pearson_vals = df["Pearson_r"].values
590
+ spearman_vals = df["Spearman_r"].values
591
+ ssr_vals = df["SSR"].values
592
+ cos_vals = df["cos_Uq_Uk"].values
593
+ cov_vals = df["Cov_err"].values
594
+
595
+ summary = (
596
+ f"\n{'═'*70}\n"
597
+ f"📊 王氏五定律全局汇总 — {model_id}\n"
598
+ f"{'═'*70}\n"
599
+ f"总分析:{len(df['Layer'].unique())} 层 × "
600
+ f"每层 {df.groupby('Layer').size().iloc[0]} 个 Q 头 "
601
+ f"= {len(all_records)} 条记录\n\n"
602
+
603
+ f"【第一定律 — Pearson r(→ 1)】\n"
604
+ f" Median={np.median(pearson_vals):.6f} "
605
+ f"Mean={np.mean(pearson_vals):.6f} "
606
+ f"Min={np.min(pearson_vals):.6f} "
607
+ f"Max={np.max(pearson_vals):.6f}\n\n"
608
+
609
+ f"【第一定律 — Spearman r(→ 1)】\n"
610
+ f" Median={np.median(spearman_vals):.6f} "
611
+ f"Mean={np.mean(spearman_vals):.6f}\n\n"
612
+
613
+ f"【第二定律 — SSR(→ 0)】\n"
614
+ f" Median={np.median(ssr_vals):.8f} "
615
+ f"Mean={np.mean(ssr_vals):.8f} "
616
+ f"Min={np.min(ssr_vals):.8f} "
617
+ f"Max={np.max(ssr_vals):.8f}\n\n"
618
+
619
+ f"【第四定律 — cos(Uq,Uk) 输出子空间对齐】\n"
620
+ f" Median={np.median(cos_vals):.6f} "
621
+ f"Mean={np.mean(cos_vals):.6f} "
622
+ f"(随机基准 ≈ 1/√d_head)\n\n"
623
+
624
+ f"【协方差对齐误差(越小越好)】\n"
625
+ f" Median={np.median(cov_vals):.6f} "
626
+ f"Mean={np.mean(cov_vals):.6f}\n"
627
+
628
+ f"{'═'*70}\n"
629
+ )
630
+ log_lines.append(summary)
631
+
632
  return "".join(log_lines), df
633
+ else:
634
+ return "".join(log_lines) + "\n❌ 未获得任何有效结果\n", None
635
+
 
 
 
 
 
 
 
 
 
636
 
637
  # ─────────────────────────────────────────────
638
  # Gradio UI
639
  # ─────────────────────────────────────────────
640
 
641
  with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
642
+
643
  gr.Markdown("""
644
  # 🔬 Wang's Five Laws — LLM Spectral Analyzer
645
  **Mathematical Foundations of Large Language Models (MF-LLM)**
646
+
647
+ 通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
648
+ 支持 GQA(Grouped Query Attention)对每个 Q 头分别与其对应 K 头做 SVD 分���。
649
+
650
+ | 定律 | 指标 | 理论极值 |
651
+ |------|------|---------|
652
+ | 第一定律 | Pearson r / Spearman r | → 1 |
653
+ | 第二定律 | SSR | → 0 |
654
+ | 第四定律 | cos(Uq, Uk) | ≈ 1/√d_head(随机正交)|
655
+
656
  [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
657
+ [![HAL](https://img.shields.io/badge/HAL-hal--05609398-red)](https://hal.science/hal-05609398)
658
  """)
659
+
660
  with gr.Row():
661
  with gr.Column(scale=2):
662
  model_input = gr.Textbox(
663
  label="HuggingFace 模型 ID",
664
+ placeholder="Qwen/Qwen2.5-14B-Instruct",
665
  value="Qwen/Qwen2.5-14B-Instruct"
666
  )
667
  token_input = gr.Textbox(
668
  label="HF Access Token(公开模型可留空)",
669
+ placeholder="hf_xxxxxxxxxxxxxxxx",
670
  type="password"
671
  )
672
  max_layers_input = gr.Slider(
673
  label="最大分析层数",
674
+ minimum=1, maximum=100, value=4, step=1
675
  )
676
  analyze_btn = gr.Button("🚀 开始分析", variant="primary")
677
+
678
  with gr.Column(scale=1):
679
  gr.Markdown("""
680
+ ### 推荐模型
681
+ ```
682
+ Qwen/Qwen2.5-7B-Instruct (GQA 8Q/2K)
683
+ meta-llama/Llama-3.2-1B (GQA)
684
+ google/gemma-2-2b (MHA)
685
+ deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
686
+ ```
687
+ ### GQA 典型结构
688
+ | 模型 | Q头 | KV头 | 每组 |
689
+ |------|-----|------|------|
690
+ | Qwen2.5-7B | 28 | 4 | 7 |
691
+ | LLaMA-3-8B | 32 | 8 | 4 |
692
+ | Qwen2.5-14B | 40 | 8 | 5 |
693
+ | Gemma-2-2B | 8 | 4 | 2 |
694
  """)
695
+
696
+ log_output = gr.Textbox(
697
+ label="分析日志(逐头详情)",
698
+ lines=35, max_lines=80
699
+ )
700
+
701
+ table_output = gr.Dataframe(
702
+ label="逐头结果表",
703
+ headers=[
704
+ "Layer","KV_head","Q_head",
705
+ "Pearson_r","Spearman_r",
706
+ "Alpha","Alpha_res",
707
+ "cos_Uq_Uk","Cov_err","SSR"
708
+ ]
709
+ )
710
+
711
  analyze_btn.click(
712
  fn=analyze_model,
713
  inputs=[model_input, token_input, max_layers_input],