Alex W. commited on
Commit
d3336ac
·
1 Parent(s): 0d5efff

change to tab ui

Browse files
Files changed (1) hide show
  1. app.py +78 -103
app.py CHANGED
@@ -143,17 +143,53 @@ def extract_config_params(config: dict) -> dict:
143
  # ─────────────────────────────────────────────
144
 
145
  def _classify_qkv_suffix(suffix: str) -> str | None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  if not suffix.endswith(".weight"):
147
  return None
148
- excludes = ["norm", "rope", "embed", "lm_head", "layernorm", "ln_"]
149
  s = suffix.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  if any(e in s for e in excludes):
151
  return None
 
 
152
  if any(p in s for p in ["q_proj", "wq", "query", "q_a", "q_b"]):
153
  return "q"
154
- if any(p in s for p in ["k_proj", "wk", "key", "k_a", "k_b"]):
 
 
 
155
  return "k"
156
- if any(p in s for p in ["v_proj", "wv", "value", "v_a", "v_b"]):
157
  return "v"
158
  return None
159
 
@@ -779,6 +815,24 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
779
 
780
  # ── Tab 2:分析 ───────────────────────────
781
  with gr.Tab("📊 分析"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  with gr.Row():
783
  with gr.Column(scale=2):
784
  model_input = gr.Textbox(
@@ -802,14 +856,27 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
802
 
803
  with gr.Column(scale=1):
804
  gr.Markdown("""
 
 
 
 
 
 
 
 
805
  ### 层号说明
806
- 层号 = safetensors key 中 `layers.{N}` 的 **N**
807
-
808
- **先用「结构探测」Tab 确认实际层号分布**
809
-
810
- ### Gemma-4-E2B 待确认:
811
- - audio/vision/language 是否共用层号?
812
- - 还是各自独立编号?
 
 
 
 
 
813
  """)
814
 
815
  log_output = gr.Textbox(label="分析日志", lines=40, max_lines=300)
@@ -837,99 +904,7 @@ with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
837
  outputs=[log_output, table_output]
838
  )
839
 
840
- gr.Markdown("""
841
- # 🔬 Wang's Five Laws — LLM Spectral Analyzer
842
- **Mathematical Foundations of Large Language Models (MF-LLM)**
843
-
844
- 通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
845
- 按 safetensors 原始层号分析,支持混合模态模型(视觉/音频/语言同时输出)。
846
-
847
- | 定律 | 指标 | 理论极值 |
848
- |------|------|---------|
849
- | 第一定律 | Pearson r | → 1 |
850
- | 第二定律 | SSR | → 0 |
851
- | 第三定律 | 条件数 κ | 越小越好 |
852
- | 第四定律 | cosU(Uq,Uv) | < 1/√d_head(超正交) |
853
- | 第五定律 | cosV | ≈ 1/√d_model(随机正交) |
854
-
855
- [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
856
- [![HAL](https://img.shields.io/badge/HAL-hal--05609398-red)](https://hal.science/hal-05609398)
857
- """)
858
-
859
- with gr.Row():
860
- with gr.Column(scale=2):
861
- model_input = gr.Textbox(
862
- label="HuggingFace 模型 ID",
863
- placeholder="google/gemma-4-e2b",
864
- value="google/gemma-4-e2b"
865
- )
866
- token_input = gr.Textbox(
867
- label="HF Access Token(公开模型可留空)",
868
- placeholder="hf_xxxxxxxxxxxxxxxx",
869
- type="password"
870
- )
871
- with gr.Row():
872
- start_layer_input = gr.Number(
873
- label="起始层号(原始层号,含)",
874
- value=0, minimum=0, maximum=999, precision=0
875
- )
876
- end_layer_input = gr.Number(
877
- label="结束层号(原始层号,含)",
878
- value=5, minimum=0, maximum=999, precision=0
879
- )
880
- analyze_btn = gr.Button("🚀 开始分析", variant="primary")
881
-
882
- with gr.Column(scale=1):
883
- gr.Markdown("""
884
- ### ✅ 推荐模型
885
- ```
886
- google/gemma-4-e2b
887
- google/gemma-4-31b-it
888
- Qwen/Qwen2.5-14B-Instruct
889
- meta-llama/Llama-3-8B
890
- deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
891
- ```
892
- ### 层号说明
893
- - 层号 = safetensors key 中 `layers.{N}` 的 **N**
894
- - **不按组件重排**,原始值直接输出
895
- - 混合模态模型(如 Gemma-4):
896
- - `layers.0~11` 同时含 audio/vision/text 层
897
- - 全部输出,按前缀区分组件
898
-
899
- ### 示例:Gemma-4-E2B
900
- | 组件 | 层范围 |
901
- |------|--------|
902
- | audio_tower | 0~11 |
903
- | language_model | 0~34 |
904
- | vision_tower | 0~15 |
905
- """)
906
-
907
- log_output = gr.Textbox(
908
- label="分析日志",
909
- lines=40, max_lines=300
910
- )
911
- table_output = gr.Dataframe(
912
- label="逐头全指标结果表",
913
- headers=[
914
- "prefix","layer","kv_head","q_head",
915
- "pearson_QK","spearman_QK","pearson_QV","pearson_KV",
916
- "ssr_QK","ssr_QV","ssr_KV",
917
- "cosU_QK","cosU_QV","cosU_KV",
918
- "cosV_QK","cosV_QV","cosV_KV",
919
- "alpha_QK","alpha_QV","alpha_KV",
920
- "alpha_res_QK","alpha_res_QV","alpha_res_KV",
921
- "sigma_max_Q","sigma_min_Q",
922
- "sigma_max_K","sigma_min_K",
923
- "sigma_max_V","sigma_min_V",
924
- "cond_Q","cond_K","cond_V",
925
- ]
926
- )
927
-
928
- analyze_btn.click(
929
- fn=analyze_model,
930
- inputs=[model_input, token_input, start_layer_input, end_layer_input],
931
- outputs=[log_output, table_output]
932
- )
933
 
934
  if __name__ == "__main__":
935
  demo.launch()
 
143
  # ─────────────────────────────────────────────
144
 
145
  def _classify_qkv_suffix(suffix: str) -> str | None:
146
+ """
147
+ layers.{N}. 之后的后缀 → 'q'/'k'/'v'/None
148
+
149
+ 支持格式:
150
+ 标准: self_attn.q_proj.weight
151
+ 嵌套: self_attn.q_proj.linear.weight (audio/vision tower)
152
+
153
+ Gemma-4 实测后缀:
154
+ audio: self_attn.q_proj.linear.weight [1024, 1024]
155
+ audio: self_attn.k_proj.linear.weight [1024, 1024]
156
+ audio: self_attn.v_proj.linear.weight [1024, 1024]
157
+ vision: self_attn.q_proj.linear.weight [768, 768]
158
+ vision: self_attn.k_proj.linear.weight [768, 768]
159
+ vision: self_attn.v_proj.linear.weight [768, 768]
160
+ text: self_attn.q_proj.weight [2048, 1536]
161
+ text: self_attn.k_proj.weight [256, 1536]
162
+ text: self_attn.v_proj.weight [256, 1536]
163
+ """
164
  if not suffix.endswith(".weight"):
165
  return None
166
+
167
  s = suffix.lower()
168
+
169
+ # 精确排除非QKV权重
170
+ excludes = [
171
+ "norm", "rope", "embed", "lm_head", "layernorm", "ln_",
172
+ "o_proj", "out_proj", # 输出投影
173
+ "post", "relative", # audio tower 特有
174
+ "per_dim", "scalar", # audio tower 特有
175
+ "gate_proj", "up_proj", "down_proj", # FFN
176
+ "ffw_layer", # audio FFN
177
+ "depthwise", "conv", # audio conv
178
+ "linear_start", "linear_end", # audio conv
179
+ "per_layer", # language model 特有
180
+ ]
181
  if any(e in s for e in excludes):
182
  return None
183
+
184
+ # Q/K/V 匹配
185
  if any(p in s for p in ["q_proj", "wq", "query", "q_a", "q_b"]):
186
  return "q"
187
+ if any(p in s for p in ["k_proj", "wk", "k_a", "k_b"]):
188
+ # 排除 k_norm(已在上面 norm 过滤,但双重保险)
189
+ if "k_norm" in s:
190
+ return None
191
  return "k"
192
+ if any(p in s for p in ["v_proj", "wv", "value", "v_a", "v_b"]):
193
  return "v"
194
  return None
195
 
 
815
 
816
  # ── Tab 2:分析 ───────────────────────────
817
  with gr.Tab("📊 分析"):
818
+ gr.Markdown("""
819
+ # 🔬 Wang's Five Laws — LLM Spectral Analyzer
820
+ **Mathematical Foundations of Large Language Models (MF-LLM)**
821
+
822
+ 通过 **HTTP Range Request** 直接读取 HF 权重,**无需下载整个模型**。
823
+ 按 safetensors 原始层号分析,支持混合模态模型(视觉/音频/语言同时输出)。
824
+
825
+ | 定律 | 指标 | 理论极值 |
826
+ |------|------|---------|
827
+ | 第一定律 | Pearson r | → 1 |
828
+ | 第二定律 | SSR | → 0 |
829
+ | 第三定律 | 条件数 κ | 越小越好 |
830
+ | 第四定律 | cosU(Uq,Uv) | < 1/√d_head(超正交) |
831
+ | 第五定律 | cosV | ≈ 1/√d_model(随机正交) |
832
+
833
+ [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.19707844-blue)](https://doi.org/10.5281/zenodo.19707844)
834
+ [![HAL](https://img.shields.io/badge/HAL-hal--05609398-red)](https://hal.science/hal-05609398)
835
+ """)
836
  with gr.Row():
837
  with gr.Column(scale=2):
838
  model_input = gr.Textbox(
 
856
 
857
  with gr.Column(scale=1):
858
  gr.Markdown("""
859
+ ### ✅ 推荐模型
860
+ ```
861
+ google/gemma-4-e2b
862
+ google/gemma-4-31b-it
863
+ Qwen/Qwen2.5-14B-Instruct
864
+ meta-llama/Llama-3-8B
865
+ deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
866
+ ```
867
  ### 层号说明
868
+ - 层号 = safetensors key 中 `layers.{N}` 的 **N**
869
+ - **不按组件重排**,原始值直接输出
870
+ - 混合模态模型(如 Gemma-4):
871
+ - `layers.0~11` 同时含 audio/vision/text 层
872
+ - 全部输出,按前缀区分组件
873
+
874
+ ### 示例:Gemma-4-E2B
875
+ | 组件 | 层范围 |
876
+ |------|--------|
877
+ | audio_tower | 0~11 |
878
+ | language_model | 0~34 |
879
+ | vision_tower | 0~15 |
880
  """)
881
 
882
  log_output = gr.Textbox(label="分析日志", lines=40, max_lines=300)
 
904
  outputs=[log_output, table_output]
905
  )
906
 
907
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908
 
909
  if __name__ == "__main__":
910
  demo.launch()