han1823123123 commited on
Commit
8044666
·
verified ·
1 Parent(s): 227c672

Upload training_logs/mlc__seed42.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_logs/mlc__seed42.json +108 -0
training_logs/mlc__seed42.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 105877.6015625,
4
+ 4052.52197265625,
5
+ 3356.784423828125,
6
+ 3147.585205078125,
7
+ 2988.67578125,
8
+ 2866.823486328125,
9
+ 2799.510009765625,
10
+ 2778.681640625,
11
+ 2712.95751953125,
12
+ 2702.340576171875,
13
+ 2706.49755859375,
14
+ 2665.997314453125,
15
+ 2611.984375,
16
+ 2568.7568359375,
17
+ 2636.53759765625,
18
+ 2595.935546875,
19
+ 2539.989013671875,
20
+ 2571.783447265625,
21
+ 2591.739990234375,
22
+ 2576.46923828125,
23
+ 2507.060546875,
24
+ 2484.958251953125
25
+ ],
26
+ "l0": [
27
+ 500.0,
28
+ 493.721923828125,
29
+ 495.013671875,
30
+ 493.9873046875,
31
+ 494.047119140625,
32
+ 493.771728515625,
33
+ 494.263671875,
34
+ 493.904052734375,
35
+ 493.064453125,
36
+ 492.825927734375,
37
+ 491.211181640625,
38
+ 491.83544921875,
39
+ 490.74462890625,
40
+ 490.76708984375,
41
+ 491.23779296875,
42
+ 490.3310546875,
43
+ 490.146484375,
44
+ 490.1123046875,
45
+ 489.404296875,
46
+ 489.390380859375,
47
+ 490.21435546875,
48
+ 487.871337890625
49
+ ],
50
+ "steps_logged": [
51
+ 0,
52
+ 200,
53
+ 400,
54
+ 600,
55
+ 800,
56
+ 1000,
57
+ 1200,
58
+ 1400,
59
+ 1600,
60
+ 1800,
61
+ 2000,
62
+ 2200,
63
+ 2400,
64
+ 2600,
65
+ 2800,
66
+ 3000,
67
+ 3200,
68
+ 3400,
69
+ 3600,
70
+ 3800,
71
+ 4000,
72
+ 4200
73
+ ],
74
+ "final_step": 4200,
75
+ "converged": true,
76
+ "plateau_last": 0.01707623112428004,
77
+ "elapsed_s": 772.8916091918945,
78
+ "row": 4,
79
+ "arch_id": "mlc",
80
+ "arch": "mlc",
81
+ "group": 1,
82
+ "src_class": "MultiLayerCrosscoder",
83
+ "src_module": "src.architectures.mlc",
84
+ "T": null,
85
+ "T_max": null,
86
+ "t_sample": null,
87
+ "n_layers": 5,
88
+ "k_win": 500,
89
+ "k_pos": 100,
90
+ "shifts": null,
91
+ "alpha": null,
92
+ "gamma": null,
93
+ "n_scales": null,
94
+ "seed": 42,
95
+ "d_in": 2304,
96
+ "d_sae": 18432,
97
+ "subject_model": "google/gemma-2-2b",
98
+ "anchor_layer": 12,
99
+ "mlc_layers": [
100
+ 10,
101
+ 11,
102
+ 12,
103
+ 13,
104
+ 14
105
+ ],
106
+ "phase": "phase7_unification",
107
+ "run_id": "mlc__seed42"
108
+ }