Alex W. commited on
Commit
667e8f1
·
1 Parent(s): b342230

debug for gemma-4-31b-it layer 0, KV head 0

Browse files
Files changed (1) hide show
  1. core/metrics.py +40 -13
core/metrics.py CHANGED
@@ -82,9 +82,17 @@ def analyze_layer(
82
  records: list[dict] = []
83
  lines: list[str] = []
84
 
85
- kv_tag = " [K=V共享]" if kv_shared else ""
86
  lines.append(
87
  f"\n{'─'*80}\n"
 
 
 
 
 
 
 
 
88
  f"[{profile.prefix}] Layer {profile.layer_idx:3d}{kv_tag} "
89
  f"n_q={n_q} n_kv={n_kv} group={group} "
90
  f"d_head={d_head}({profile.head_dim_source})\n"
@@ -107,6 +115,16 @@ def analyze_layer(
107
  smxk, smnk, cond_k = sigma_stats(s_k)
108
  smxv, smnv, cond_v = sigma_stats(s_v)
109
 
 
 
 
 
 
 
 
 
 
 
110
  # KV 指标
111
  if kv_shared:
112
  ssr_kv = 0.0
@@ -116,11 +134,11 @@ def analyze_layer(
116
  alpha_kv = 1.0
117
  res_kv = 0.0
118
  else:
119
- n_kv_sv = min(len(s_k), len(s_v))
120
- ssr_kv = ssr(s_k, s_v)
121
- pkv = pearson(s_k[:n_kv_sv], s_v[:n_kv_sv])
122
- cosU_KV = cos_U(U_k, U_v)
123
- cosV_KV = cos_V(Vt_k, Vt_v)
124
  alpha_kv, res_kv = svr(s_k, s_v)
125
 
126
  for q_off in range(group):
@@ -130,6 +148,13 @@ def analyze_layer(
130
 
131
  smxq, smnq, cond_q = sigma_stats(s_q)
132
 
 
 
 
 
 
 
 
133
  nqk = min(len(s_q), len(s_k))
134
  nqv = min(len(s_q), len(s_v))
135
 
@@ -148,37 +173,40 @@ def analyze_layer(
148
  cU_QV = cos_U(U_q, U_v)
149
  cV_QV = cos_V(Vt_q, Vt_v)
150
 
 
 
 
 
 
 
 
 
 
151
  records.append({
152
  "prefix": profile.prefix,
153
  "layer": profile.layer_idx,
154
  "kv_head": kv_h,
155
  "q_head": h,
156
  "kv_shared": kv_shared,
157
- # 第一定律
158
  "pearson_QK": round(pqk, 6),
159
  "spearman_QK": round(spqk, 6),
160
  "pearson_QV": round(pqv, 6),
161
  "pearson_KV": round(pkv, 6),
162
- # 第二定律
163
  "ssr_QK": round(ssr_qk, 8),
164
  "ssr_QV": round(ssr_qv, 8),
165
  "ssr_KV": round(ssr_kv, 8),
166
- # 第四定律
167
  "cosU_QK": round(cU_QK, 6),
168
  "cosU_QV": round(cU_QV, 6),
169
  "cosU_KV": round(cosU_KV, 6),
170
- # 第五定律
171
  "cosV_QK": round(cV_QK, 6),
172
  "cosV_QV": round(cV_QV, 6),
173
  "cosV_KV": round(cosV_KV, 6),
174
- # 尺度因子 + 最小二乘残差
175
  "alpha_QK": round(a_qk, 4),
176
  "alpha_QV": round(a_qv, 4),
177
  "alpha_KV": round(alpha_kv,4),
178
  "alpha_res_QK": round(r_qk, 6),
179
  "alpha_res_QV": round(r_qv, 6),
180
  "alpha_res_KV": round(res_kv, 6),
181
- # 第三定律:奇异值范围 + 条件数
182
  "sigma_max_Q": round(smxq, 4),
183
  "sigma_min_Q": round(smnq, 4),
184
  "sigma_max_K": round(smxk, 4),
@@ -188,7 +216,6 @@ def analyze_layer(
188
  "cond_Q": round(cond_q, 2),
189
  "cond_K": round(cond_k, 2),
190
  "cond_V": round(cond_v, 2),
191
- # 维度信息
192
  "head_dim": d_head,
193
  "d_model": profile.d_model,
194
  "n_q_heads": n_q,
 
82
  records: list[dict] = []
83
  lines: list[str] = []
84
 
85
+ # ── 调试:打印整体 shape ──────────────────────
86
  lines.append(
87
  f"\n{'─'*80}\n"
88
+ f"[DEBUG] W_q={list(W_q.shape)} W_k={list(W_k.shape)} "
89
+ f"W_v={list(W_v.shape)}\n"
90
+ f"[DEBUG] n_q={n_q} n_kv={n_kv} group={group} "
91
+ f"d_head={d_head} source={profile.head_dim_source}\n"
92
+ )
93
+
94
+ kv_tag = " [K=V共享]" if kv_shared else ""
95
+ lines.append(
96
  f"[{profile.prefix}] Layer {profile.layer_idx:3d}{kv_tag} "
97
  f"n_q={n_q} n_kv={n_kv} group={group} "
98
  f"d_head={d_head}({profile.head_dim_source})\n"
 
115
  smxk, smnk, cond_k = sigma_stats(s_k)
116
  smxv, smnv, cond_v = sigma_stats(s_v)
117
 
118
+ # ── 调试:打印每个 KV 头的切片和奇异值 ──────
119
+ lines.append(
120
+ f"[DEBUG] KV头{kv_h}: "
121
+ f"k_t={list(k_t.shape)} "
122
+ f"s_k前5={s_k[:5].tolist()}\n"
123
+ f"[DEBUG] KV头{kv_h}: "
124
+ f"v_t={list(v_t.shape)} "
125
+ f"s_v前5={s_v[:5].tolist()}\n"
126
+ )
127
+
128
  # KV 指标
129
  if kv_shared:
130
  ssr_kv = 0.0
 
134
  alpha_kv = 1.0
135
  res_kv = 0.0
136
  else:
137
+ n_kv_sv = min(len(s_k), len(s_v))
138
+ ssr_kv = ssr(s_k, s_v)
139
+ pkv = pearson(s_k[:n_kv_sv], s_v[:n_kv_sv])
140
+ cosU_KV = cos_U(U_k, U_v)
141
+ cosV_KV = cos_V(Vt_k, Vt_v)
142
  alpha_kv, res_kv = svr(s_k, s_v)
143
 
144
  for q_off in range(group):
 
148
 
149
  smxq, smnq, cond_q = sigma_stats(s_q)
150
 
151
+ # ── 调试:打印每个 Q 头的切片和奇异值 ────
152
+ lines.append(
153
+ f"[DEBUG] Q头{h}: "
154
+ f"q_t={list(q_t.shape)} "
155
+ f"s_q前5={s_q[:5].tolist()}\n"
156
+ )
157
+
158
  nqk = min(len(s_q), len(s_k))
159
  nqv = min(len(s_q), len(s_v))
160
 
 
173
  cU_QV = cos_U(U_q, U_v)
174
  cV_QV = cos_V(Vt_q, Vt_v)
175
 
176
+ # ── 调试:打印关键指标 ────────────────────
177
+ lines.append(
178
+ f"[DEBUG] Q头{h}: "
179
+ f"pearson={pqk:+.4f} "
180
+ f"alpha_QK={a_qk:.4f} "
181
+ f"s_q[0]={s_q[0]:.4f} "
182
+ f"s_k[0]={s_k[0]:.4f}\n"
183
+ )
184
+
185
  records.append({
186
  "prefix": profile.prefix,
187
  "layer": profile.layer_idx,
188
  "kv_head": kv_h,
189
  "q_head": h,
190
  "kv_shared": kv_shared,
 
191
  "pearson_QK": round(pqk, 6),
192
  "spearman_QK": round(spqk, 6),
193
  "pearson_QV": round(pqv, 6),
194
  "pearson_KV": round(pkv, 6),
 
195
  "ssr_QK": round(ssr_qk, 8),
196
  "ssr_QV": round(ssr_qv, 8),
197
  "ssr_KV": round(ssr_kv, 8),
 
198
  "cosU_QK": round(cU_QK, 6),
199
  "cosU_QV": round(cU_QV, 6),
200
  "cosU_KV": round(cosU_KV, 6),
 
201
  "cosV_QK": round(cV_QK, 6),
202
  "cosV_QV": round(cV_QV, 6),
203
  "cosV_KV": round(cosV_KV, 6),
 
204
  "alpha_QK": round(a_qk, 4),
205
  "alpha_QV": round(a_qv, 4),
206
  "alpha_KV": round(alpha_kv,4),
207
  "alpha_res_QK": round(r_qk, 6),
208
  "alpha_res_QV": round(r_qv, 6),
209
  "alpha_res_KV": round(res_kv, 6),
 
210
  "sigma_max_Q": round(smxq, 4),
211
  "sigma_min_Q": round(smnq, 4),
212
  "sigma_max_K": round(smxk, 4),
 
216
  "cond_Q": round(cond_q, 2),
217
  "cond_K": round(cond_k, 2),
218
  "cond_V": round(cond_v, 2),
 
219
  "head_dim": d_head,
220
  "d_model": profile.d_model,
221
  "n_q_heads": n_q,