han1823123123 commited on
Commit
4cf9bf9
·
verified ·
1 Parent(s): e577140

Upload training_logs/mlc_contrastive_alpha100_batchtopk__seed42.json with huggingface_hub

Browse files
training_logs/mlc_contrastive_alpha100_batchtopk__seed42.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 103.3500747680664,
4
+ 15.071441650390625,
5
+ 13.9940185546875,
6
+ 13.655973434448242,
7
+ 13.48550033569336,
8
+ 13.236801147460938,
9
+ 13.230867385864258,
10
+ 13.081604957580566,
11
+ 13.022639274597168,
12
+ 12.89256477355957,
13
+ 12.859413146972656,
14
+ 12.847274780273438,
15
+ 12.828615188598633,
16
+ 12.750104904174805,
17
+ 12.706785202026367,
18
+ 12.704126358032227
19
+ ],
20
+ "l0": [
21
+ 500.0,
22
+ 489.761474609375,
23
+ 488.7451171875,
24
+ 490.169921875,
25
+ 491.069580078125,
26
+ 489.6865234375,
27
+ 491.538330078125,
28
+ 489.5478515625,
29
+ 488.531494140625,
30
+ 488.84521484375,
31
+ 489.150146484375,
32
+ 489.55224609375,
33
+ 489.5205078125,
34
+ 488.846923828125,
35
+ 489.205322265625,
36
+ 489.912109375
37
+ ],
38
+ "steps_logged": [
39
+ 0,
40
+ 200,
41
+ 400,
42
+ 600,
43
+ 800,
44
+ 1000,
45
+ 1200,
46
+ 1400,
47
+ 1600,
48
+ 1800,
49
+ 2000,
50
+ 2200,
51
+ 2400,
52
+ 2600,
53
+ 2800,
54
+ 3000
55
+ ],
56
+ "final_step": 3000,
57
+ "converged": true,
58
+ "plateau_last": 0.01920785080930411,
59
+ "elapsed_s": 1343.2238459587097,
60
+ "h": 3686,
61
+ "alpha": 1.0,
62
+ "row": 5,
63
+ "arch_id": "mlc_contrastive_alpha100_batchtopk",
64
+ "arch": "mlc_contrastive_alpha100_batchtopk",
65
+ "group": 1,
66
+ "src_class": "MLCContrastive",
67
+ "src_module": "src.architectures.mlc_contrastive",
68
+ "T": null,
69
+ "T_max": null,
70
+ "t_sample": null,
71
+ "n_layers": 5,
72
+ "k_win": 500,
73
+ "k_pos": 100,
74
+ "shifts": [
75
+ 1
76
+ ],
77
+ "gamma": null,
78
+ "n_scales": null,
79
+ "seed": 42,
80
+ "d_in": 2304,
81
+ "d_sae": 18432,
82
+ "subject_model": "google/gemma-2-2b",
83
+ "anchor_layer": 12,
84
+ "mlc_layers": [
85
+ 10,
86
+ 11,
87
+ 12,
88
+ 13,
89
+ 14
90
+ ],
91
+ "phase": "phase7_unification",
92
+ "run_id": "mlc_contrastive_alpha100_batchtopk__seed42"
93
+ }