han1823123123 commited on
Commit
da2748e
·
verified ·
1 Parent(s): 6a4dd79

Upload training_logs/phase5b_subseq_h8__seed1.json with huggingface_hub

Browse files
training_logs/phase5b_subseq_h8__seed1.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 121757.96875,
4
+ 50702.26953125,
5
+ 47850.796875,
6
+ 47004.0859375,
7
+ 46143.71484375,
8
+ 45698.10546875,
9
+ 45365.57421875,
10
+ 45648.64453125,
11
+ 45268.5234375,
12
+ 45107.01953125,
13
+ 44737.34375,
14
+ 45042.4375,
15
+ 45003.08984375,
16
+ 44817.30078125,
17
+ 44881.87109375,
18
+ 44662.80859375
19
+ ],
20
+ "l0": [
21
+ 500.0,
22
+ 495.823486328125,
23
+ 495.682861328125,
24
+ 495.972900390625,
25
+ 495.685302734375,
26
+ 495.725341796875,
27
+ 496.040283203125,
28
+ 497.181884765625,
29
+ 496.80126953125,
30
+ 496.099853515625,
31
+ 494.629638671875,
32
+ 495.306396484375,
33
+ 494.967041015625,
34
+ 495.694580078125,
35
+ 495.9208984375,
36
+ 494.676025390625
37
+ ],
38
+ "steps_logged": [
39
+ 0,
40
+ 200,
41
+ 400,
42
+ 600,
43
+ 800,
44
+ 1000,
45
+ 1200,
46
+ 1400,
47
+ 1600,
48
+ 1800,
49
+ 2000,
50
+ 2200,
51
+ 2400,
52
+ 2600,
53
+ 2800,
54
+ 3000
55
+ ],
56
+ "final_step": 3000,
57
+ "converged": true,
58
+ "plateau_last": 0.007604562277863013,
59
+ "elapsed_s": 5567.742310523987,
60
+ "T_max": 10,
61
+ "t_sample": 5,
62
+ "shifts": [
63
+ 1,
64
+ 2,
65
+ 5
66
+ ],
67
+ "matryoshka_h_size": 3686,
68
+ "row": 13,
69
+ "arch_id": "phase5b_subseq_h8",
70
+ "arch": "phase5b_subseq_h8",
71
+ "group": 2,
72
+ "src_class": "SubseqH8",
73
+ "src_module": "src.architectures.phase5b_subseq_sampling_txcdr",
74
+ "T": null,
75
+ "n_layers": null,
76
+ "k_win": 500,
77
+ "k_pos": 100,
78
+ "alpha": null,
79
+ "gamma": null,
80
+ "n_scales": null,
81
+ "seed": 1,
82
+ "d_in": 2304,
83
+ "d_sae": 18432,
84
+ "subject_model": "google/gemma-2-2b",
85
+ "anchor_layer": 12,
86
+ "mlc_layers": [
87
+ 10,
88
+ 11,
89
+ 12,
90
+ 13,
91
+ 14
92
+ ],
93
+ "phase": "phase7_unification",
94
+ "run_id": "phase5b_subseq_h8__seed1"
95
+ }