han1823123123 commited on
Commit
2d5cbd4
·
verified ·
1 Parent(s): f150b6f

Upload training_logs/phase57_partB_h8_bare_multidistance_t3__seed2.json with huggingface_hub

Browse files
training_logs/phase57_partB_h8_bare_multidistance_t3__seed2.json CHANGED
@@ -1,57 +1,53 @@
1
  {
2
  "loss": [
3
- 72676.125,
4
- 19830.833984375,
5
- 17857.2578125,
6
- 16698.130859375,
7
- 15952.2470703125,
8
- 15408.072265625,
9
- 15181.0322265625,
10
- 14807.0703125,
11
- 14492.705078125,
12
- 14233.9755859375,
13
- 14187.5849609375,
14
- 13957.34765625,
15
- 13824.751953125,
16
- 13828.958984375,
17
- 13795.6298828125,
18
- 13654.6923828125,
19
- 13631.7744140625,
20
- 13567.30078125,
21
- 13504.404296875,
22
- 13397.8583984375,
23
- 13347.6279296875,
24
- 13280.4404296875,
25
- 13308.0673828125,
26
- 13261.564453125,
27
- 13365.0224609375
28
  ],
29
  "l0": [
30
  500.0,
31
- 497.744140625,
32
- 497.7060546875,
33
- 497.090087890625,
34
- 496.636962890625,
35
- 496.312255859375,
36
- 496.17578125,
37
- 495.980224609375,
38
- 495.760986328125,
39
- 495.501953125,
40
- 496.220703125,
41
- 496.025390625,
42
- 495.618896484375,
43
- 496.352294921875,
44
- 496.019775390625,
45
- 495.375,
46
- 494.937255859375,
47
- 495.63671875,
48
- 496.15966796875,
49
- 495.461669921875,
50
- 495.29443359375,
51
- 495.432373046875,
52
- 495.56103515625,
53
- 495.46630859375,
54
- 495.434814453125
55
  ],
56
  "steps_logged": [
57
  0,
@@ -76,31 +72,32 @@
76
  3800,
77
  4000,
78
  4200,
79
- 4400,
80
- 4600,
81
- 4800
82
  ],
83
- "final_step": 4800,
84
  "converged": true,
85
- "plateau_last": 0.017611828974805156,
86
- "elapsed_s": 1645.8710408210754,
 
87
  "shifts": [
88
  1
89
  ],
90
  "matryoshka_h_size": 3686,
91
- "alpha": null,
 
 
 
 
92
  "row": 30,
93
  "arch_id": "phase57_partB_h8_bare_multidistance_t3",
94
  "arch": "phase57_partB_h8_bare_multidistance_t3",
95
  "group": 4,
96
- "src_class": "TXCBareMultiDistanceContrastiveAntidead",
97
  "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
98
- "T": 3,
99
  "T_max": null,
100
  "t_sample": null,
101
  "n_layers": null,
102
  "k_win": 500,
103
- "k_pos": 167,
104
  "gamma": null,
105
  "n_scales": null,
106
  "seed": 2,
 
1
  {
2
  "loss": [
3
+ 54574.34765625,
4
+ 20413.5234375,
5
+ 18312.55859375,
6
+ 17239.552734375,
7
+ 16312.90625,
8
+ 15910.951171875,
9
+ 15386.853515625,
10
+ 15253.4287109375,
11
+ 14965.2236328125,
12
+ 14752.197265625,
13
+ 14490.404296875,
14
+ 14306.2822265625,
15
+ 14087.99609375,
16
+ 14143.869140625,
17
+ 14067.3046875,
18
+ 13899.822265625,
19
+ 13875.208984375,
20
+ 13754.6025390625,
21
+ 13788.361328125,
22
+ 13815.8779296875,
23
+ 13602.0908203125,
24
+ 13644.73828125,
25
+ 13564.2587890625
 
 
26
  ],
27
  "l0": [
28
  500.0,
29
+ 500.0,
30
+ 500.0,
31
+ 500.0,
32
+ 500.0,
33
+ 500.0,
34
+ 500.0,
35
+ 500.0,
36
+ 500.0,
37
+ 500.0,
38
+ 500.0,
39
+ 500.0,
40
+ 500.0,
41
+ 500.0,
42
+ 500.0,
43
+ 500.0,
44
+ 500.0,
45
+ 500.0,
46
+ 500.0,
47
+ 500.0,
48
+ 500.0,
49
+ 500.0,
50
+ 500.0
 
 
51
  ],
52
  "steps_logged": [
53
  0,
 
72
  3800,
73
  4000,
74
  4200,
75
+ 4400
 
 
76
  ],
77
+ "final_step": 4400,
78
  "converged": true,
79
+ "plateau_last": 0.019005808995296743,
80
+ "elapsed_s": 1067.413980960846,
81
+ "T": 3,
82
  "shifts": [
83
  1
84
  ],
85
  "matryoshka_h_size": 3686,
86
+ "alpha": 1.0,
87
+ "src_class": "TXCBareMultiDistanceContrastiveAntidead",
88
+ "n_seqs_used": 24000,
89
+ "ctx_used": 64,
90
+ "ctx_slice_direction": "last",
91
  "row": 30,
92
  "arch_id": "phase57_partB_h8_bare_multidistance_t3",
93
  "arch": "phase57_partB_h8_bare_multidistance_t3",
94
  "group": 4,
 
95
  "src_module": "src.architectures.txc_bare_multidistance_contrastive_antidead",
 
96
  "T_max": null,
97
  "t_sample": null,
98
  "n_layers": null,
99
  "k_win": 500,
100
+ "k_pos": 166,
101
  "gamma": null,
102
  "n_scales": null,
103
  "seed": 2,