Alex W. commited on
Commit
0d5efff
·
1 Parent(s): 9ae44df

直接打印 safetensors 的原始 key 结构

Browse files

在分析之前,先加一个结构探测功能,把所有含 layers. 的 key 按层号归组打印出来,让数据说话。

Files changed (1) hide show
  1. app.py +169 -0
app.py CHANGED
@@ -662,12 +662,181 @@ def analyze_model(
662
  return "".join(log), df
663
 
664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
  # ─────────────────────────────────────────────
666
  # Gradio UI
667
  # ─────────────────────────────────────────────
668
 
669
  with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  gr.Markdown("""
672
  # 🔬 Wang's Five Laws — LLM Spectral Analyzer
673
  **Mathematical Foundations of Large Language Models (MF-LLM)**
 
662
  return "".join(log), df
663
 
664
 
665
+ def inspect_model_structure(
666
+ model_id: str,
667
+ hf_token: str,
668
+ progress=gr.Progress()
669
+ ) -> str:
670
+ """
671
+ 不做任何分析,只打印模型的原始 key 结构。
672
+ 让用户自己看清楚每一层到底有什么。
673
+ """
674
+ token = hf_token.strip() or None
675
+ log = [f"🔬 结构探测:{model_id}\n{'═'*80}\n"]
676
+
677
+ # 获取 shard 列表
678
+ try:
679
+ index_data = find_index_file(model_id, token)
680
+ shard_files = (
681
+ sorted(set(index_data["weight_map"].values()))
682
+ if index_data else get_safetensor_files(model_id, token)
683
+ )
684
+ except Exception as e:
685
+ return f"❌ 获取文件列表失败:{e}"
686
+
687
+ # 读取所有 header
688
+ all_shard_headers = {}
689
+ for sf in shard_files:
690
+ try:
691
+ h, hs = read_safetensors_header(get_file_url(model_id, sf), token)
692
+ all_shard_headers[sf] = (h, hs)
693
+ except Exception as e:
694
+ log.append(f"⚠️ {sf}:{e}\n")
695
+
696
+ # ── 收集所有含 layers.{N}. 的 key ────────────
697
+ # 结构:{ layer_idx: [ (prefix, suffix, shape, dtype) ] }
698
+ layer_entries: dict[int, list] = {}
699
+
700
+ for shard_name, (header, _) in all_shard_headers.items():
701
+ for key, info in header.items():
702
+ m = re.search(r'layers\.(\d+)\.', key)
703
+ if not m:
704
+ continue
705
+ layer_idx = int(m.group(1))
706
+ prefix = key[:m.start()]
707
+ suffix = key[m.end():]
708
+ shape = info.get("shape", [])
709
+ dtype = info.get("dtype", "?")
710
+
711
+ if layer_idx not in layer_entries:
712
+ layer_entries[layer_idx] = []
713
+ layer_entries[layer_idx].append((prefix, suffix, shape, dtype))
714
+
715
+ if not layer_entries:
716
+ return "".join(log) + "⚠️ 未找到任何含 layers.{N}. 的 key\n"
717
+
718
+ # ── 打印结构 ──────────────────────────────────
719
+ log.append(f"📊 共发现层号:{sorted(layer_entries.keys())}\n")
720
+ log.append(f"{'─'*80}\n")
721
+
722
+ for layer_idx in sorted(layer_entries.keys()):
723
+ entries = layer_entries[layer_idx]
724
+
725
+ # 按 prefix 分组
726
+ by_prefix: dict[str, list] = {}
727
+ for prefix, suffix, shape, dtype in entries:
728
+ by_prefix.setdefault(prefix, []).append((suffix, shape, dtype))
729
+
730
+ log.append(f"\n【Layer {layer_idx}】— 共 {len(entries)} 个 key,"
731
+ f"涉及 {len(by_prefix)} 个组件前缀\n")
732
+
733
+ for prefix, items in sorted(by_prefix.items()):
734
+ log.append(f" 前缀: '{prefix}'\n")
735
+ for suffix, shape, dtype in sorted(items):
736
+ log.append(f" {suffix:<50} {str(shape):<20} {dtype}\n")
737
+
738
+ log.append(f"\n{'═'*80}\n")
739
+ log.append("📌 说明:\n")
740
+ log.append(" - 如果每层只有一个前缀 → 该层属于单一组件\n")
741
+ log.append(" - 如果每层有多个前缀 → 不同组件恰好共用同一层号(独立权重,不混合)\n")
742
+ log.append(" - 层号只是 key 名里的数字,不代表物理上是同一层\n")
743
+
744
+ return "".join(log)
745
+
746
  # ─────────────────────────────────────────────
747
  # Gradio UI
748
  # ─────────────────────────────────────────────
749
 
750
  with gr.Blocks(title="Wang's Five Laws — LLM Spectral Analyzer") as demo:
751
 
752
+ with gr.Tabs():
753
+
754
+ # ── Tab 1:结构探测 ────────────────────────
755
+ with gr.Tab("🔬 结构探测"):
756
+ gr.Markdown("""
757
+ **先运行这个**,看清模型的原始 key 结构,
758
+ 再决定分析哪些层号。
759
+ """)
760
+ with gr.Row():
761
+ inspect_model_input = gr.Textbox(
762
+ label="模型 ID",
763
+ value="google/gemma-4-e2b"
764
+ )
765
+ inspect_token_input = gr.Textbox(
766
+ label="HF Token",
767
+ type="password"
768
+ )
769
+ inspect_btn = gr.Button("🔍 探测结构", variant="secondary")
770
+ inspect_output = gr.Textbox(
771
+ label="原始结构",
772
+ lines=50, max_lines=200
773
+ )
774
+ inspect_btn.click(
775
+ fn=inspect_model_structure,
776
+ inputs=[inspect_model_input, inspect_token_input],
777
+ outputs=[inspect_output]
778
+ )
779
+
780
+ # ── Tab 2:分析 ───────────────────────────
781
+ with gr.Tab("📊 分析"):
782
+ with gr.Row():
783
+ with gr.Column(scale=2):
784
+ model_input = gr.Textbox(
785
+ label="HuggingFace 模型 ID",
786
+ value="google/gemma-4-e2b"
787
+ )
788
+ token_input = gr.Textbox(
789
+ label="HF Access Token",
790
+ type="password"
791
+ )
792
+ with gr.Row():
793
+ start_layer_input = gr.Number(
794
+ label="起始层号(含)",
795
+ value=0, minimum=0, maximum=999, precision=0
796
+ )
797
+ end_layer_input = gr.Number(
798
+ label="结束层号(含)",
799
+ value=5, minimum=0, maximum=999, precision=0
800
+ )
801
+ analyze_btn = gr.Button("🚀 开始分析", variant="primary")
802
+
803
+ with gr.Column(scale=1):
804
+ gr.Markdown("""
805
+ ### 层号说明
806
+ 层号 = safetensors key 中 `layers.{N}` 的 **N**
807
+
808
+ **先用「结构探测」Tab 确认实际层号分布**
809
+
810
+ ### Gemma-4-E2B 待确认:
811
+ - audio/vision/language 是否共用层号?
812
+ - 还是各自独立编号?
813
+ """)
814
+
815
+ log_output = gr.Textbox(label="分析日志", lines=40, max_lines=300)
816
+ table_output = gr.Dataframe(
817
+ label="逐头全指标结果表",
818
+ headers=[
819
+ "prefix","layer","kv_head","q_head",
820
+ "pearson_QK","spearman_QK","pearson_QV","pearson_KV",
821
+ "ssr_QK","ssr_QV","ssr_KV",
822
+ "cosU_QK","cosU_QV","cosU_KV",
823
+ "cosV_QK","cosV_QV","cosV_KV",
824
+ "alpha_QK","alpha_QV","alpha_KV",
825
+ "alpha_res_QK","alpha_res_QV","alpha_res_KV",
826
+ "sigma_max_Q","sigma_min_Q",
827
+ "sigma_max_K","sigma_min_K",
828
+ "sigma_max_V","sigma_min_V",
829
+ "cond_Q","cond_K","cond_V",
830
+ ]
831
+ )
832
+
833
+ analyze_btn.click(
834
+ fn=analyze_model,
835
+ inputs=[model_input, token_input,
836
+ start_layer_input, end_layer_input],
837
+ outputs=[log_output, table_output]
838
+ )
839
+
840
  gr.Markdown("""
841
  # 🔬 Wang's Five Laws — LLM Spectral Analyzer
842
  **Mathematical Foundations of Large Language Models (MF-LLM)**