Spaces:
Running
Running
Alex W. commited on
Commit ·
0d5efff
1
Parent(s): 9ae44df
直接打印 safetensors 的原始 key 结构
Browse files在分析之前,先加一个结构探测功能,把所有含 layers. 的 key 按层号归组打印出来,让数据说话。
app.py
CHANGED
|
@@ -662,12 +662,181 @@ def analyze_model(
|
|
| 662 |
return "".join(log), df
|
| 663 |
|
| 664 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
# ─────────────────────────────────────────────
|
| 666 |
# Gradio UI
|
| 667 |
# ─────────────────────────────────────────────
|
| 668 |
|
| 669 |
with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
| 670 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
gr.Markdown("""
|
| 672 |
# 🔬 Wang's Five Laws — LLM Spectral Analyzer
|
| 673 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
|
|
|
| 662 |
return "".join(log), df
|
| 663 |
|
| 664 |
|
| 665 |
+
def inspect_model_structure(
|
| 666 |
+
model_id: str,
|
| 667 |
+
hf_token: str,
|
| 668 |
+
progress=gr.Progress()
|
| 669 |
+
) -> str:
|
| 670 |
+
"""
|
| 671 |
+
不做任何分析,只打印模型的原始 key 结构。
|
| 672 |
+
让用户自己看清楚每一层到底有什么。
|
| 673 |
+
"""
|
| 674 |
+
token = hf_token.strip() or None
|
| 675 |
+
log = [f"🔬 结构探测:{model_id}\n{'═'*80}\n"]
|
| 676 |
+
|
| 677 |
+
# 获取 shard 列表
|
| 678 |
+
try:
|
| 679 |
+
index_data = find_index_file(model_id, token)
|
| 680 |
+
shard_files = (
|
| 681 |
+
sorted(set(index_data["weight_map"].values()))
|
| 682 |
+
if index_data else get_safetensor_files(model_id, token)
|
| 683 |
+
)
|
| 684 |
+
except Exception as e:
|
| 685 |
+
return f"❌ 获取文件列表失败:{e}"
|
| 686 |
+
|
| 687 |
+
# 读取所有 header
|
| 688 |
+
all_shard_headers = {}
|
| 689 |
+
for sf in shard_files:
|
| 690 |
+
try:
|
| 691 |
+
h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
|
| 692 |
+
all_shard_headers[sf] = (h, hs)
|
| 693 |
+
except Exception as e:
|
| 694 |
+
log.append(f"⚠️ {sf}:{e}\n")
|
| 695 |
+
|
| 696 |
+
# ── 收集所有含 layers.{N}. 的 key ────────────
|
| 697 |
+
# 结构:{ layer_idx: [ (prefix, suffix, shape, dtype) ] }
|
| 698 |
+
layer_entries: dict[int, list] = {}
|
| 699 |
+
|
| 700 |
+
for shard_name, (header, _) in all_shard_headers.items():
|
| 701 |
+
for key, info in header.items():
|
| 702 |
+
m = re.search(r'layers\.(\d+)\.', key)
|
| 703 |
+
if not m:
|
| 704 |
+
continue
|
| 705 |
+
layer_idx = int(m.group(1))
|
| 706 |
+
prefix = key[:m.start()]
|
| 707 |
+
suffix = key[m.end():]
|
| 708 |
+
shape = info.get("shape", [])
|
| 709 |
+
dtype = info.get("dtype", "?")
|
| 710 |
+
|
| 711 |
+
if layer_idx not in layer_entries:
|
| 712 |
+
layer_entries[layer_idx] = []
|
| 713 |
+
layer_entries[layer_idx].append((prefix, suffix, shape, dtype))
|
| 714 |
+
|
| 715 |
+
if not layer_entries:
|
| 716 |
+
return "".join(log) + "⚠️ 未找到任何含 layers.{N}. 的 key\n"
|
| 717 |
+
|
| 718 |
+
# ── 打印结构 ──────────────────────────────────
|
| 719 |
+
log.append(f"📊 共发现层号:{sorted(layer_entries.keys())}\n")
|
| 720 |
+
log.append(f"{'─'*80}\n")
|
| 721 |
+
|
| 722 |
+
for layer_idx in sorted(layer_entries.keys()):
|
| 723 |
+
entries = layer_entries[layer_idx]
|
| 724 |
+
|
| 725 |
+
# 按 prefix 分组
|
| 726 |
+
by_prefix: dict[str, list] = {}
|
| 727 |
+
for prefix, suffix, shape, dtype in entries:
|
| 728 |
+
by_prefix.setdefault(prefix, []).append((suffix, shape, dtype))
|
| 729 |
+
|
| 730 |
+
log.append(f"\n【Layer {layer_idx}】— 共 {len(entries)} 个 key,"
|
| 731 |
+
f"涉及 {len(by_prefix)} 个组件前缀\n")
|
| 732 |
+
|
| 733 |
+
for prefix, items in sorted(by_prefix.items()):
|
| 734 |
+
log.append(f" 前缀: '{prefix}'\n")
|
| 735 |
+
for suffix, shape, dtype in sorted(items):
|
| 736 |
+
log.append(f" {suffix:<50} {str(shape):<20} {dtype}\n")
|
| 737 |
+
|
| 738 |
+
log.append(f"\n{'═'*80}\n")
|
| 739 |
+
log.append("📌 说明:\n")
|
| 740 |
+
log.append(" - 如果每层只有一个前缀 → 该层属于单一组件\n")
|
| 741 |
+
log.append(" - 如果每层有多个前缀 → 不同组件恰好共用同一层号(独立权重,不混合)\n")
|
| 742 |
+
log.append(" - 层号只是 key 名里的数字,不代表物理上是同一层\n")
|
| 743 |
+
|
| 744 |
+
return "".join(log)
|
| 745 |
+
|
| 746 |
# ─────────────────────────────────────────────
|
| 747 |
# Gradio UI
|
| 748 |
# ─────────────────────────────────────────────
|
| 749 |
|
| 750 |
with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
| 751 |
|
| 752 |
+
with gr.Tabs():
|
| 753 |
+
|
| 754 |
+
# ── Tab 1:结构探测 ────────────────────────
|
| 755 |
+
with gr.Tab("🔬 结构探测"):
|
| 756 |
+
gr.Markdown("""
|
| 757 |
+
**先运行这个**,看清模型的原始 key 结构,
|
| 758 |
+
再决定分析哪些层号。
|
| 759 |
+
""")
|
| 760 |
+
with gr.Row():
|
| 761 |
+
inspect_model_input = gr.Textbox(
|
| 762 |
+
label="模型 ID",
|
| 763 |
+
value="google/gemma-4-e2b"
|
| 764 |
+
)
|
| 765 |
+
inspect_token_input = gr.Textbox(
|
| 766 |
+
label="HF Token",
|
| 767 |
+
type="password"
|
| 768 |
+
)
|
| 769 |
+
inspect_btn = gr.Button("🔍 探测结构", variant="secondary")
|
| 770 |
+
inspect_output = gr.Textbox(
|
| 771 |
+
label="原始结构",
|
| 772 |
+
lines=50, max_lines=200
|
| 773 |
+
)
|
| 774 |
+
inspect_btn.click(
|
| 775 |
+
fn=inspect_model_structure,
|
| 776 |
+
inputs=[inspect_model_input, inspect_token_input],
|
| 777 |
+
outputs=[inspect_output]
|
| 778 |
+
)
|
| 779 |
+
|
| 780 |
+
# ── Tab 2:分析 ───────────────────────────
|
| 781 |
+
with gr.Tab("📊 分析"):
|
| 782 |
+
with gr.Row():
|
| 783 |
+
with gr.Column(scale=2):
|
| 784 |
+
model_input = gr.Textbox(
|
| 785 |
+
label="HuggingFace 模型 ID",
|
| 786 |
+
value="google/gemma-4-e2b"
|
| 787 |
+
)
|
| 788 |
+
token_input = gr.Textbox(
|
| 789 |
+
label="HF Access Token",
|
| 790 |
+
type="password"
|
| 791 |
+
)
|
| 792 |
+
with gr.Row():
|
| 793 |
+
start_layer_input = gr.Number(
|
| 794 |
+
label="起始层号(含)",
|
| 795 |
+
value=0, minimum=0, maximum=999, precision=0
|
| 796 |
+
)
|
| 797 |
+
end_layer_input = gr.Number(
|
| 798 |
+
label="结束层号(含)",
|
| 799 |
+
value=5, minimum=0, maximum=999, precision=0
|
| 800 |
+
)
|
| 801 |
+
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 802 |
+
|
| 803 |
+
with gr.Column(scale=1):
|
| 804 |
+
gr.Markdown("""
|
| 805 |
+
### 层号说明
|
| 806 |
+
层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 807 |
+
|
| 808 |
+
**先用「结构探测」Tab 确认实际层号分布**
|
| 809 |
+
|
| 810 |
+
### Gemma-4-E2B 待确认:
|
| 811 |
+
- audio/vision/language 是否共用层号?
|
| 812 |
+
- 还是各自独立编号?
|
| 813 |
+
""")
|
| 814 |
+
|
| 815 |
+
log_output = gr.Textbox(label="分析日志", lines=40, max_lines=300)
|
| 816 |
+
table_output = gr.Dataframe(
|
| 817 |
+
label="逐头全指标结果表",
|
| 818 |
+
headers=[
|
| 819 |
+
"prefix","layer","kv_head","q_head",
|
| 820 |
+
"pearson_QK","spearman_QK","pearson_QV","pearson_KV",
|
| 821 |
+
"ssr_QK","ssr_QV","ssr_KV",
|
| 822 |
+
"cosU_QK","cosU_QV","cosU_KV",
|
| 823 |
+
"cosV_QK","cosV_QV","cosV_KV",
|
| 824 |
+
"alpha_QK","alpha_QV","alpha_KV",
|
| 825 |
+
"alpha_res_QK","alpha_res_QV","alpha_res_KV",
|
| 826 |
+
"sigma_max_Q","sigma_min_Q",
|
| 827 |
+
"sigma_max_K","sigma_min_K",
|
| 828 |
+
"sigma_max_V","sigma_min_V",
|
| 829 |
+
"cond_Q","cond_K","cond_V",
|
| 830 |
+
]
|
| 831 |
+
)
|
| 832 |
+
|
| 833 |
+
analyze_btn.click(
|
| 834 |
+
fn=analyze_model,
|
| 835 |
+
inputs=[model_input, token_input,
|
| 836 |
+
start_layer_input, end_layer_input],
|
| 837 |
+
outputs=[log_output, table_output]
|
| 838 |
+
)
|
| 839 |
+
|
| 840 |
gr.Markdown("""
|
| 841 |
# 🔬 Wang's Five Laws — LLM Spectral Analyzer
|
| 842 |
**Mathematical Foundations of Large Language Models (MF-LLM)**
|