han1823123123 commited on
Commit
573dcfe
·
verified ·
1 Parent(s): 290cef7

Upload training_logs/phase5b_subseq_track2__seed2.json with huggingface_hub

Browse files
training_logs/phase5b_subseq_track2__seed2.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 16269.009765625,
4
+ 6297.28369140625,
5
+ 5702.6279296875,
6
+ 5396.48388671875,
7
+ 5268.8984375,
8
+ 5168.9423828125,
9
+ 5113.98583984375,
10
+ 5084.68798828125,
11
+ 5014.6572265625,
12
+ 5003.88134765625,
13
+ 4960.1376953125,
14
+ 4923.68505859375,
15
+ 4949.88916015625,
16
+ 4905.4580078125,
17
+ 4894.71142578125,
18
+ 4865.13916015625,
19
+ 4836.7626953125,
20
+ 4881.08349609375
21
+ ],
22
+ "l0": [
23
+ 500.0,
24
+ 497.01904296875,
25
+ 496.993896484375,
26
+ 496.538330078125,
27
+ 496.166748046875,
28
+ 497.13623046875,
29
+ 496.052001953125,
30
+ 496.489501953125,
31
+ 495.185546875,
32
+ 495.975341796875,
33
+ 496.177734375,
34
+ 494.714599609375,
35
+ 495.918701171875,
36
+ 495.106201171875,
37
+ 495.29248046875,
38
+ 495.2666015625,
39
+ 494.379150390625,
40
+ 496.396484375
41
+ ],
42
+ "steps_logged": [
43
+ 0,
44
+ 200,
45
+ 400,
46
+ 600,
47
+ 800,
48
+ 1000,
49
+ 1200,
50
+ 1400,
51
+ 1600,
52
+ 1800,
53
+ 2000,
54
+ 2200,
55
+ 2400,
56
+ 2600,
57
+ 2800,
58
+ 3000,
59
+ 3200,
60
+ 3400
61
+ ],
62
+ "final_step": 3400,
63
+ "converged": true,
64
+ "plateau_last": 0.0188753812595845,
65
+ "elapsed_s": 2169.8539748191833,
66
+ "T_max": 10,
67
+ "t_sample": 5,
68
+ "row": 12,
69
+ "arch_id": "phase5b_subseq_track2",
70
+ "arch": "phase5b_subseq_track2",
71
+ "group": 2,
72
+ "src_class": "SubseqTXCBareAntidead",
73
+ "src_module": "src.architectures.phase5b_subseq_sampling_txcdr",
74
+ "T": null,
75
+ "n_layers": null,
76
+ "k_win": 500,
77
+ "k_pos": 100,
78
+ "shifts": null,
79
+ "alpha": null,
80
+ "gamma": null,
81
+ "n_scales": null,
82
+ "seed": 2,
83
+ "d_in": 2304,
84
+ "d_sae": 18432,
85
+ "subject_model": "google/gemma-2-2b",
86
+ "anchor_layer": 12,
87
+ "mlc_layers": [
88
+ 10,
89
+ 11,
90
+ 12,
91
+ 13,
92
+ 14
93
+ ],
94
+ "phase": "phase7_unification",
95
+ "run_id": "phase5b_subseq_track2__seed2"
96
+ }