han1823123123 commited on
Commit
cecfeb5
·
verified ·
1 Parent(s): a37bf07

Upload training_logs/txc_contrastive_h8_t2_kpos20_shifts2__seed2.json with huggingface_hub

Browse files
training_logs/txc_contrastive_h8_t2_kpos20_shifts2__seed2.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 88813.3515625,
4
+ 33919.0859375,
5
+ 28151.41015625,
6
+ 26574.19140625,
7
+ 25957.111328125,
8
+ 25535.26171875,
9
+ 24991.052734375,
10
+ 24703.8984375,
11
+ 24675.486328125,
12
+ 24477.263671875,
13
+ 24300.0625,
14
+ 24341.671875,
15
+ 24063.431640625,
16
+ 24102.466796875,
17
+ 24171.744140625,
18
+ 24071.5390625
19
+ ],
20
+ "l0": [
21
+ 40.0,
22
+ 40.0,
23
+ 40.0,
24
+ 40.0,
25
+ 40.0,
26
+ 40.0,
27
+ 40.0,
28
+ 40.0,
29
+ 40.0,
30
+ 40.0,
31
+ 40.0,
32
+ 40.0,
33
+ 40.0,
34
+ 40.0,
35
+ 40.0,
36
+ 40.0
37
+ ],
38
+ "steps_logged": [
39
+ 0,
40
+ 200,
41
+ 400,
42
+ 600,
43
+ 800,
44
+ 1000,
45
+ 1200,
46
+ 1400,
47
+ 1600,
48
+ 1800,
49
+ 2000,
50
+ 2200,
51
+ 2400,
52
+ 2600,
53
+ 2800,
54
+ 3000
55
+ ],
56
+ "final_step": 3000,
57
+ "converged": true,
58
+ "plateau_last": 0.01946369211085733,
59
+ "elapsed_s": 1918.7778370380402,
60
+ "shifts": [
61
+ 2
62
+ ],
63
+ "matryoshka_h_size": 3686,
64
+ "alpha": 1.0,
65
+ "arch_id": "txc_contrastive_h8_t2_kpos20_shifts2",
66
+ "src_class": "TXCContrastiveMergeH8",
67
+ "src_module": "src.architectures.txc_contrastive_merge_h8",
68
+ "T": 2,
69
+ "k_pos": 20,
70
+ "k_win": 40,
71
+ "d_sae": 18432,
72
+ "d_in": 2304,
73
+ "subject_model": "google/gemma-2-2b",
74
+ "anchor_layer": 12,
75
+ "seed": 2
76
+ }