han1823123123 commited on
Commit
6b0adc1
·
verified ·
1 Parent(s): b6f0bc0

Upload training_logs/mlc_contrastive_alpha100_batchtopk__seed2.json with huggingface_hub

Browse files
training_logs/mlc_contrastive_alpha100_batchtopk__seed2.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 107.99402618408203,
4
+ 15.079160690307617,
5
+ 14.035333633422852,
6
+ 13.666914939880371,
7
+ 13.447431564331055,
8
+ 13.35392951965332,
9
+ 13.23121452331543,
10
+ 13.078819274902344,
11
+ 13.063368797302246,
12
+ 12.917702674865723,
13
+ 12.902910232543945,
14
+ 12.78940200805664,
15
+ 12.790445327758789,
16
+ 12.754850387573242,
17
+ 12.731706619262695,
18
+ 12.691265106201172,
19
+ 12.628910064697266
20
+ ],
21
+ "l0": [
22
+ 500.0,
23
+ 490.47412109375,
24
+ 489.669189453125,
25
+ 490.13525390625,
26
+ 489.85009765625,
27
+ 489.4111328125,
28
+ 490.627685546875,
29
+ 490.576416015625,
30
+ 490.9169921875,
31
+ 488.2041015625,
32
+ 487.806884765625,
33
+ 489.274169921875,
34
+ 487.725830078125,
35
+ 490.3369140625,
36
+ 489.71533203125,
37
+ 489.957763671875,
38
+ 488.9111328125
39
+ ],
40
+ "steps_logged": [
41
+ 0,
42
+ 200,
43
+ 400,
44
+ 600,
45
+ 800,
46
+ 1000,
47
+ 1200,
48
+ 1400,
49
+ 1600,
50
+ 1800,
51
+ 2000,
52
+ 2200,
53
+ 2400,
54
+ 2600,
55
+ 2800,
56
+ 3000,
57
+ 3200
58
+ ],
59
+ "final_step": 3200,
60
+ "converged": true,
61
+ "plateau_last": 0.017837624495921,
62
+ "elapsed_s": 1503.8217902183533,
63
+ "h": 3686,
64
+ "alpha": 1.0,
65
+ "row": 5,
66
+ "arch_id": "mlc_contrastive_alpha100_batchtopk",
67
+ "arch": "mlc_contrastive_alpha100_batchtopk",
68
+ "group": 1,
69
+ "src_class": "MLCContrastive",
70
+ "src_module": "src.architectures.mlc_contrastive",
71
+ "T": null,
72
+ "T_max": null,
73
+ "t_sample": null,
74
+ "n_layers": 5,
75
+ "k_win": 500,
76
+ "k_pos": 100,
77
+ "shifts": [
78
+ 1
79
+ ],
80
+ "gamma": null,
81
+ "n_scales": null,
82
+ "seed": 2,
83
+ "d_in": 2304,
84
+ "d_sae": 18432,
85
+ "subject_model": "google/gemma-2-2b",
86
+ "anchor_layer": 12,
87
+ "mlc_layers": [
88
+ 10,
89
+ 11,
90
+ 12,
91
+ 13,
92
+ 14
93
+ ],
94
+ "phase": "phase7_unification",
95
+ "run_id": "mlc_contrastive_alpha100_batchtopk__seed2"
96
+ }