han1823123123 commited on
Commit
14a0eb3
·
verified ·
1 Parent(s): 75dc506

Upload training_logs/mlc_contrastive_alpha100_batchtopk__seed1.json with huggingface_hub

Browse files
training_logs/mlc_contrastive_alpha100_batchtopk__seed1.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 113.1244125366211,
4
+ 15.078754425048828,
5
+ 13.990482330322266,
6
+ 13.722837448120117,
7
+ 13.43752384185791,
8
+ 13.301484107971191,
9
+ 13.143375396728516,
10
+ 13.06921100616455,
11
+ 12.994958877563477,
12
+ 12.895394325256348,
13
+ 12.889880180358887,
14
+ 12.839088439941406,
15
+ 12.78973388671875,
16
+ 12.753411293029785,
17
+ 12.682119369506836,
18
+ 12.63943099975586
19
+ ],
20
+ "l0": [
21
+ 500.0,
22
+ 490.70361328125,
23
+ 488.095703125,
24
+ 490.61767578125,
25
+ 487.789794921875,
26
+ 490.43359375,
27
+ 488.31787109375,
28
+ 489.744873046875,
29
+ 490.695556640625,
30
+ 489.99658203125,
31
+ 490.54736328125,
32
+ 490.084228515625,
33
+ 491.463623046875,
34
+ 490.50732421875,
35
+ 488.458740234375,
36
+ 489.27197265625
37
+ ],
38
+ "steps_logged": [
39
+ 0,
40
+ 200,
41
+ 400,
42
+ 600,
43
+ 800,
44
+ 1000,
45
+ 1200,
46
+ 1400,
47
+ 1600,
48
+ 1800,
49
+ 2000,
50
+ 2200,
51
+ 2400,
52
+ 2600,
53
+ 2800,
54
+ 3000
55
+ ],
56
+ "final_step": 3000,
57
+ "converged": true,
58
+ "plateau_last": 0.019833510861077906,
59
+ "elapsed_s": 1356.9366526603699,
60
+ "h": 3686,
61
+ "alpha": 1.0,
62
+ "row": 5,
63
+ "arch_id": "mlc_contrastive_alpha100_batchtopk",
64
+ "arch": "mlc_contrastive_alpha100_batchtopk",
65
+ "group": 1,
66
+ "src_class": "MLCContrastive",
67
+ "src_module": "src.architectures.mlc_contrastive",
68
+ "T": null,
69
+ "T_max": null,
70
+ "t_sample": null,
71
+ "n_layers": 5,
72
+ "k_win": 500,
73
+ "k_pos": 100,
74
+ "shifts": [
75
+ 1
76
+ ],
77
+ "gamma": null,
78
+ "n_scales": null,
79
+ "seed": 1,
80
+ "d_in": 2304,
81
+ "d_sae": 18432,
82
+ "subject_model": "google/gemma-2-2b",
83
+ "anchor_layer": 12,
84
+ "mlc_layers": [
85
+ 10,
86
+ 11,
87
+ 12,
88
+ 13,
89
+ 14
90
+ ],
91
+ "phase": "phase7_unification",
92
+ "run_id": "mlc_contrastive_alpha100_batchtopk__seed1"
93
+ }