han1823123123 commited on
Commit
6dc0645
·
verified ·
1 Parent(s): b192e89

Upload training_logs/hill_z_mdms_t5_shifts1_2_ns3__seed42.json with huggingface_hub

Browse files
training_logs/hill_z_mdms_t5_shifts1_2_ns3__seed42.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 78768.8203125,
4
+ 34225.9140625,
5
+ 30923.470703125,
6
+ 29198.923828125,
7
+ 27962.841796875,
8
+ 27335.552734375,
9
+ 26706.693359375,
10
+ 26376.677734375,
11
+ 25936.65234375,
12
+ 25821.076171875,
13
+ 25533.013671875,
14
+ 25545.28125,
15
+ 25231.853515625,
16
+ 25142.685546875,
17
+ 25127.41015625,
18
+ 24968.033203125,
19
+ 24896.845703125,
20
+ 24879.998046875,
21
+ 24770.884765625,
22
+ 24766.1015625
23
+ ],
24
+ "l0": [
25
+ 500.0,
26
+ 500.0,
27
+ 500.0,
28
+ 500.0,
29
+ 500.0,
30
+ 500.0,
31
+ 500.0,
32
+ 500.0,
33
+ 500.0,
34
+ 500.0,
35
+ 500.0,
36
+ 500.0,
37
+ 500.0,
38
+ 500.0,
39
+ 500.0,
40
+ 500.0,
41
+ 500.0,
42
+ 500.0,
43
+ 500.0,
44
+ 500.0
45
+ ],
46
+ "steps_logged": [
47
+ 0,
48
+ 200,
49
+ 400,
50
+ 600,
51
+ 800,
52
+ 1000,
53
+ 1200,
54
+ 1400,
55
+ 1600,
56
+ 1800,
57
+ 2000,
58
+ 2200,
59
+ 2400,
60
+ 2600,
61
+ 2800,
62
+ 3000,
63
+ 3200,
64
+ 3400,
65
+ 3600,
66
+ 3800
67
+ ],
68
+ "final_step": 3800,
69
+ "converged": true,
70
+ "plateau_last": 0.018157500603503293,
71
+ "elapsed_s": 2577.0604038238525,
72
+ "T": 5,
73
+ "shifts": [
74
+ 1,
75
+ 2
76
+ ],
77
+ "n_contr_scales": 3,
78
+ "gamma": 0.5,
79
+ "matryoshka_h_size": 3686,
80
+ "alpha": 1.0,
81
+ "src_class": "TXCBareMDxMSContrastiveAntidead",
82
+ "n_seqs_used": 24000,
83
+ "ctx_used": 64,
84
+ "ctx_slice_direction": "last",
85
+ "row": 705,
86
+ "arch_id": "hill_z_mdms_t5_shifts1_2_ns3",
87
+ "arch": "hill_z_mdms_t5_shifts1_2_ns3",
88
+ "group": 99,
89
+ "src_module": "src.architectures.txc_bare_md_ms_contrastive_antidead",
90
+ "T_max": null,
91
+ "t_sample": null,
92
+ "n_layers": null,
93
+ "k_win": 500,
94
+ "k_pos": 100,
95
+ "n_scales": 3,
96
+ "seed": 42,
97
+ "d_in": 2304,
98
+ "d_sae": 18432,
99
+ "subject_model": "google/gemma-2-2b",
100
+ "anchor_layer": 12,
101
+ "mlc_layers": [
102
+ 10,
103
+ 11,
104
+ 12,
105
+ 13,
106
+ 14
107
+ ],
108
+ "phase": "phase7_unification",
109
+ "run_id": "hill_z_mdms_t5_shifts1_2_ns3__seed42"
110
+ }