Spaces:
Running
Running
Alex W. commited on
Commit ·
d3336ac
1
Parent(s): 0d5efff
change to tab ui
Browse files
app.py
CHANGED
|
@@ -143,17 +143,53 @@ def extract_config_params(config: dict) -> dict:
|
|
| 143 |
# ─────────────────────────────────────────────
|
| 144 |
|
| 145 |
def _classify_qkv_suffix(suffix: str) -> str | None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
if not suffix.endswith(".weight"):
|
| 147 |
return None
|
| 148 |
-
|
| 149 |
s = suffix.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
if any(e in s for e in excludes):
|
| 151 |
return None
|
|
|
|
|
|
|
| 152 |
if any(p in s for p in ["q_proj", "wq", "query", "q_a", "q_b"]):
|
| 153 |
return "q"
|
| 154 |
-
if any(p in s for p in ["k_proj", "wk",
|
|
|
|
|
|
|
|
|
|
| 155 |
return "k"
|
| 156 |
-
if any(p in s for p in ["v_proj", "wv", "value",
|
| 157 |
return "v"
|
| 158 |
return None
|
| 159 |
|
|
@@ -779,6 +815,24 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 779 |
|
| 780 |
# ── Tab 2:分析 ───────────────────────────
|
| 781 |
with gr.Tab("📊 分析"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 782 |
with gr.Row():
|
| 783 |
with gr.Column(scale=2):
|
| 784 |
model_input = gr.Textbox(
|
|
@@ -802,14 +856,27 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 802 |
|
| 803 |
with gr.Column(scale=1):
|
| 804 |
gr.Markdown("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
### 层号说明
|
| 806 |
-
层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
""")
|
| 814 |
|
| 815 |
log_output = gr.Textbox(label="分析日志", lines=40, max_lines=300)
|
|
@@ -837,99 +904,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
|
|
| 837 |
outputs=[log_output, table_output]
|
| 838 |
)
|
| 839 |
|
| 840 |
-
|
| 841 |
-
# 🔬 Wang's Five Laws — LLM Spectral Analyzer
|
| 842 |
-
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 843 |
-
|
| 844 |
-
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 845 |
-
按 safetensors 原始层号分析,支持混合模态模型(视觉/音频/语言同时输出)。
|
| 846 |
-
|
| 847 |
-
| 定律 | 指标 | 理论极值 |
|
| 848 |
-
|------|------|---------|
|
| 849 |
-
| 第一定律 | Pearson r | → 1 |
|
| 850 |
-
| 第二定律 | SSR | → 0 |
|
| 851 |
-
| 第三定律 | 条件数 κ | 越小越好 |
|
| 852 |
-
| 第四定律 | cosU(Uq,Uv) | < 1/√d_head(超正交) |
|
| 853 |
-
| 第五定律 | cosV | ≈ 1/√d_model(随机正交) |
|
| 854 |
-
|
| 855 |
-
[](https://doi.org/10.5281/zenodo.19707844)
|
| 856 |
-
[](https://hal.science/hal-05609398)
|
| 857 |
-
""")
|
| 858 |
-
|
| 859 |
-
with gr.Row():
|
| 860 |
-
with gr.Column(scale=2):
|
| 861 |
-
model_input = gr.Textbox(
|
| 862 |
-
label="HuggingFace 模型 ID",
|
| 863 |
-
placeholder="google/gemma-4-e2b",
|
| 864 |
-
value="google/gemma-4-e2b"
|
| 865 |
-
)
|
| 866 |
-
token_input = gr.Textbox(
|
| 867 |
-
label="HF Access Token(公开模型可留空)",
|
| 868 |
-
placeholder="hf_xxxxxxxxxxxxxxxx",
|
| 869 |
-
type="password"
|
| 870 |
-
)
|
| 871 |
-
with gr.Row():
|
| 872 |
-
start_layer_input = gr.Number(
|
| 873 |
-
label="起始层号(原始层号,含)",
|
| 874 |
-
value=0, minimum=0, maximum=999, precision=0
|
| 875 |
-
)
|
| 876 |
-
end_layer_input = gr.Number(
|
| 877 |
-
label="结束层号(原始层号,含)",
|
| 878 |
-
value=5, minimum=0, maximum=999, precision=0
|
| 879 |
-
)
|
| 880 |
-
analyze_btn = gr.Button("🚀 开始分析", variant="primary")
|
| 881 |
-
|
| 882 |
-
with gr.Column(scale=1):
|
| 883 |
-
gr.Markdown("""
|
| 884 |
-
### ✅ 推荐模型
|
| 885 |
-
```
|
| 886 |
-
google/gemma-4-e2b
|
| 887 |
-
google/gemma-4-31b-it
|
| 888 |
-
Qwen/Qwen2.5-14B-Instruct
|
| 889 |
-
meta-llama/Llama-3-8B
|
| 890 |
-
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 891 |
-
```
|
| 892 |
-
### 层号说明
|
| 893 |
-
- 层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 894 |
-
- **不按组件重排**,原始值直接输出
|
| 895 |
-
- 混合模态模型(如 Gemma-4):
|
| 896 |
-
- `layers.0~11` 同时含 audio/vision/text 层
|
| 897 |
-
- 全部输出,按前缀区分组件
|
| 898 |
-
|
| 899 |
-
### 示例:Gemma-4-E2B
|
| 900 |
-
| 组件 | 层范围 |
|
| 901 |
-
|------|--------|
|
| 902 |
-
| audio_tower | 0~11 |
|
| 903 |
-
| language_model | 0~34 |
|
| 904 |
-
| vision_tower | 0~15 |
|
| 905 |
-
""")
|
| 906 |
-
|
| 907 |
-
log_output = gr.Textbox(
|
| 908 |
-
label="分析日志",
|
| 909 |
-
lines=40, max_lines=300
|
| 910 |
-
)
|
| 911 |
-
table_output = gr.Dataframe(
|
| 912 |
-
label="逐头全指标结果表",
|
| 913 |
-
headers=[
|
| 914 |
-
"prefix","layer","kv_head","q_head",
|
| 915 |
-
"pearson_QK","spearman_QK","pearson_QV","pearson_KV",
|
| 916 |
-
"ssr_QK","ssr_QV","ssr_KV",
|
| 917 |
-
"cosU_QK","cosU_QV","cosU_KV",
|
| 918 |
-
"cosV_QK","cosV_QV","cosV_KV",
|
| 919 |
-
"alpha_QK","alpha_QV","alpha_KV",
|
| 920 |
-
"alpha_res_QK","alpha_res_QV","alpha_res_KV",
|
| 921 |
-
"sigma_max_Q","sigma_min_Q",
|
| 922 |
-
"sigma_max_K","sigma_min_K",
|
| 923 |
-
"sigma_max_V","sigma_min_V",
|
| 924 |
-
"cond_Q","cond_K","cond_V",
|
| 925 |
-
]
|
| 926 |
-
)
|
| 927 |
-
|
| 928 |
-
analyze_btn.click(
|
| 929 |
-
fn=analyze_model,
|
| 930 |
-
inputs=[model_input, token_input, start_layer_input, end_layer_input],
|
| 931 |
-
outputs=[log_output, table_output]
|
| 932 |
-
)
|
| 933 |
|
| 934 |
if __name__ == "__main__":
|
| 935 |
demo.launch()
|
|
|
|
| 143 |
# ─────────────────────────────────────────────
|
| 144 |
|
| 145 |
def _classify_qkv_suffix(suffix: str) -> str | None:
|
| 146 |
+
"""
|
| 147 |
+
layers.{N}. 之后的后缀 → 'q'/'k'/'v'/None
|
| 148 |
+
|
| 149 |
+
支持格式:
|
| 150 |
+
标准: self_attn.q_proj.weight
|
| 151 |
+
嵌套: self_attn.q_proj.linear.weight (audio/vision tower)
|
| 152 |
+
|
| 153 |
+
Gemma-4 实测后缀:
|
| 154 |
+
audio: self_attn.q_proj.linear.weight [1024, 1024]
|
| 155 |
+
audio: self_attn.k_proj.linear.weight [1024, 1024]
|
| 156 |
+
audio: self_attn.v_proj.linear.weight [1024, 1024]
|
| 157 |
+
vision: self_attn.q_proj.linear.weight [768, 768]
|
| 158 |
+
vision: self_attn.k_proj.linear.weight [768, 768]
|
| 159 |
+
vision: self_attn.v_proj.linear.weight [768, 768]
|
| 160 |
+
text: self_attn.q_proj.weight [2048, 1536]
|
| 161 |
+
text: self_attn.k_proj.weight [256, 1536]
|
| 162 |
+
text: self_attn.v_proj.weight [256, 1536]
|
| 163 |
+
"""
|
| 164 |
if not suffix.endswith(".weight"):
|
| 165 |
return None
|
| 166 |
+
|
| 167 |
s = suffix.lower()
|
| 168 |
+
|
| 169 |
+
# 精确排除非QKV权重
|
| 170 |
+
excludes = [
|
| 171 |
+
"norm", "rope", "embed", "lm_head", "layernorm", "ln_",
|
| 172 |
+
"o_proj", "out_proj", # 输出投影
|
| 173 |
+
"post", "relative", # audio tower 特有
|
| 174 |
+
"per_dim", "scalar", # audio tower 特有
|
| 175 |
+
"gate_proj", "up_proj", "down_proj", # FFN
|
| 176 |
+
"ffw_layer", # audio FFN
|
| 177 |
+
"depthwise", "conv", # audio conv
|
| 178 |
+
"linear_start", "linear_end", # audio conv
|
| 179 |
+
"per_layer", # language model 特有
|
| 180 |
+
]
|
| 181 |
if any(e in s for e in excludes):
|
| 182 |
return None
|
| 183 |
+
|
| 184 |
+
# Q/K/V 匹配
|
| 185 |
if any(p in s for p in ["q_proj", "wq", "query", "q_a", "q_b"]):
|
| 186 |
return "q"
|
| 187 |
+
if any(p in s for p in ["k_proj", "wk", "k_a", "k_b"]):
|
| 188 |
+
# 排除 k_norm(已在上面 norm 过滤,但双重保险)
|
| 189 |
+
if "k_norm" in s:
|
| 190 |
+
return None
|
| 191 |
return "k"
|
| 192 |
+
if any(p in s for p in ["v_proj", "wv", "value", "v_a", "v_b"]):
|
| 193 |
return "v"
|
| 194 |
return None
|
| 195 |
|
|
|
|
| 815 |
|
| 816 |
# ── Tab 2:分析 ───────────────────────────
|
| 817 |
with gr.Tab("📊 分析"):
|
| 818 |
+
gr.Markdown("""
|
| 819 |
+
# 🔬 Wang's Five Laws — LLM Spectral Analyzer
|
| 820 |
+
**Mathematical Foundations of Large Language Models (MF-LLM)**
|
| 821 |
+
|
| 822 |
+
通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
|
| 823 |
+
按 safetensors 原始层号分析,支持混合模态模型(视觉/音频/语言同时输出)。
|
| 824 |
+
|
| 825 |
+
| 定律 | 指标 | 理论极值 |
|
| 826 |
+
|------|------|---------|
|
| 827 |
+
| 第一定律 | Pearson r | → 1 |
|
| 828 |
+
| 第二定律 | SSR | → 0 |
|
| 829 |
+
| 第三定律 | 条件数 κ | 越小越好 |
|
| 830 |
+
| 第四定律 | cosU(Uq,Uv) | < 1/√d_head(超正交) |
|
| 831 |
+
| 第五定律 | cosV | ≈ 1/√d_model(随机正交) |
|
| 832 |
+
|
| 833 |
+
[](https://doi.org/10.5281/zenodo.19707844)
|
| 834 |
+
[](https://hal.science/hal-05609398)
|
| 835 |
+
""")
|
| 836 |
with gr.Row():
|
| 837 |
with gr.Column(scale=2):
|
| 838 |
model_input = gr.Textbox(
|
|
|
|
| 856 |
|
| 857 |
with gr.Column(scale=1):
|
| 858 |
gr.Markdown("""
|
| 859 |
+
### ✅ 推荐模型
|
| 860 |
+
```
|
| 861 |
+
google/gemma-4-e2b
|
| 862 |
+
google/gemma-4-31b-it
|
| 863 |
+
Qwen/Qwen2.5-14B-Instruct
|
| 864 |
+
meta-llama/Llama-3-8B
|
| 865 |
+
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
| 866 |
+
```
|
| 867 |
### 层号说明
|
| 868 |
+
- 层号 = safetensors key 中 `layers.{N}` 的 **N**
|
| 869 |
+
- **不按组件重排**,原始值直接输出
|
| 870 |
+
- 混合模态模型(如 Gemma-4):
|
| 871 |
+
- `layers.0~11` 同时含 audio/vision/text 层
|
| 872 |
+
- 全部输出,按前缀区分组件
|
| 873 |
+
|
| 874 |
+
### 示例:Gemma-4-E2B
|
| 875 |
+
| 组件 | 层范围 |
|
| 876 |
+
|------|--------|
|
| 877 |
+
| audio_tower | 0~11 |
|
| 878 |
+
| language_model | 0~34 |
|
| 879 |
+
| vision_tower | 0~15 |
|
| 880 |
""")
|
| 881 |
|
| 882 |
log_output = gr.Textbox(label="分析日志", lines=40, max_lines=300)
|
|
|
|
| 904 |
outputs=[log_output, table_output]
|
| 905 |
)
|
| 906 |
|
| 907 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 908 |
|
| 909 |
if __name__ == "__main__":
|
| 910 |
demo.launch()
|