ferxalb commited on
Commit
ce75f33
·
verified ·
1 Parent(s): f59a9ee

Upload matex-privacy-sentinel-v0.1 trained on Modal L40S

Browse files
Files changed (2) hide show
  1. finetune_summary.json +141 -89
  2. model.safetensors +1 -1
finetune_summary.json CHANGED
@@ -6,131 +6,183 @@
6
  },
7
  "base_checkpoint": "/root/.opf/privacy_filter",
8
  "batch_size": 4,
9
- "best_epoch": 4,
10
- "best_metric": 0.22034703597205768,
11
  "best_metric_name": "validation_loss",
12
  "checkpoint_category_version": "v2",
13
  "device": "cuda",
14
- "elapsed_s": 1707.967649188,
15
  "encoding": "o200k_base",
16
  "epoch_metrics": [
17
  {
18
- "elapsed_s": 217.39479903,
19
  "epoch": 1,
20
- "optimizer_steps": 3038,
21
- "train_batches": 3038,
22
- "train_loss": 0.6062319690959177,
23
- "train_token_accuracy": 0.8512172545619088,
24
- "train_tokens": 1453065,
25
- "validation_batches": 169,
26
- "validation_loss": 0.2926192293667525,
27
- "validation_token_accuracy": 0.9352956475523843,
28
- "validation_tokens": 88433
29
  },
30
  {
31
- "elapsed_s": 213.544190871,
32
  "epoch": 2,
33
- "optimizer_steps": 3038,
34
- "train_batches": 3038,
35
- "train_loss": 0.2367756585074477,
36
- "train_token_accuracy": 0.9442185999938062,
37
- "train_tokens": 1453065,
38
- "validation_batches": 169,
39
- "validation_loss": 0.23744411616401195,
40
- "validation_token_accuracy": 0.9441950403130053,
41
- "validation_tokens": 88433
42
  },
43
  {
44
- "elapsed_s": 213.38700962000001,
45
  "epoch": 3,
46
- "optimizer_steps": 3038,
47
- "train_batches": 3038,
48
- "train_loss": 0.17531355934431897,
49
- "train_token_accuracy": 0.9586143771957896,
50
- "train_tokens": 1453065,
51
- "validation_batches": 169,
52
- "validation_loss": 0.2293024442145412,
53
- "validation_token_accuracy": 0.9511494577815973,
54
- "validation_tokens": 88433
55
  },
56
  {
57
- "elapsed_s": 213.449483773,
58
  "epoch": 4,
59
- "optimizer_steps": 3038,
60
- "train_batches": 3038,
61
- "train_loss": 0.13766411499792147,
62
- "train_token_accuracy": 0.9675279495411423,
63
- "train_tokens": 1453065,
64
- "validation_batches": 169,
65
- "validation_loss": 0.22034703597205768,
66
- "validation_token_accuracy": 0.9541008447072925,
67
- "validation_tokens": 88433
68
  },
69
  {
70
- "elapsed_s": 211.39228578199993,
71
  "epoch": 5,
72
- "optimizer_steps": 3038,
73
- "train_batches": 3038,
74
- "train_loss": 0.10729076129519516,
75
- "train_token_accuracy": 0.9746652765017394,
76
- "train_tokens": 1453065,
77
- "validation_batches": 169,
78
- "validation_loss": 0.23614277132286401,
79
- "validation_token_accuracy": 0.9555256521886626,
80
- "validation_tokens": 88433
81
  },
82
  {
83
- "elapsed_s": 211.30701109300003,
84
  "epoch": 6,
85
- "optimizer_steps": 3038,
86
- "train_batches": 3038,
87
- "train_loss": 0.08785577328875824,
88
- "train_token_accuracy": 0.9804427193552938,
89
- "train_tokens": 1453065,
90
- "validation_batches": 169,
91
- "validation_loss": 0.27111270338299504,
92
- "validation_token_accuracy": 0.9552881842751009,
93
- "validation_tokens": 88433
94
  },
95
  {
96
- "elapsed_s": 211.223892794,
97
  "epoch": 7,
98
- "optimizer_steps": 3038,
99
- "train_batches": 3038,
100
- "train_loss": 0.07146611258642582,
101
- "train_token_accuracy": 0.9836793261141105,
102
- "train_tokens": 1453065,
103
- "validation_batches": 169,
104
- "validation_loss": 0.27473266072740865,
105
- "validation_token_accuracy": 0.9579116393201633,
106
- "validation_tokens": 88433
107
  },
108
  {
109
- "elapsed_s": 209.94307118100005,
110
  "epoch": 8,
111
- "optimizer_steps": 3038,
112
- "train_batches": 3038,
113
- "train_loss": 0.05768033069594669,
114
- "train_token_accuracy": 0.9867569585668914,
115
- "train_tokens": 1453065,
116
- "validation_batches": 169,
117
- "validation_loss": 0.30543249755483665,
118
- "validation_token_accuracy": 0.9566903757647032,
119
- "validation_tokens": 88433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
  ],
122
- "epochs": 8,
123
- "generated_at_unix": 1777747058.003828,
124
  "grad_accum_steps": 1,
125
  "label_space_json_path": "/__modal/volumes/vo-1GyZvIAPD7wnYD3s0C2QXS/dataset/configs/custom_label_space.json",
126
  "label_space_source": "label-space-json",
127
  "learning_rate": 1e-05,
128
  "max_grad_norm": 1.0,
129
  "num_output_labels": 97,
130
- "num_train_examples": 12150,
131
- "num_train_windows": 12150,
132
- "num_validation_examples": 675,
133
- "num_validation_windows": 675,
134
  "output_checkpoint_dir": "/__modal/volumes/vo-XIgHS4JYPE8ddRlAcjQTwm/matex-privacy-sentinel-v0.1",
135
  "output_head_reinitialized": true,
136
  "output_head_rows_copied": 97,
 
6
  },
7
  "base_checkpoint": "/root/.opf/privacy_filter",
8
  "batch_size": 4,
9
+ "best_epoch": 3,
10
+ "best_metric": 0.10880099386333249,
11
  "best_metric_name": "validation_loss",
12
  "checkpoint_category_version": "v2",
13
  "device": "cuda",
14
+ "elapsed_s": 4243.969272867,
15
  "encoding": "o200k_base",
16
  "epoch_metrics": [
17
  {
18
+ "elapsed_s": 357.860645665,
19
  "epoch": 1,
20
+ "optimizer_steps": 4838,
21
+ "train_batches": 4838,
22
+ "train_loss": 0.4229886383246393,
23
+ "train_token_accuracy": 0.897370439303403,
24
+ "train_tokens": 2511294,
25
+ "validation_batches": 269,
26
+ "validation_loss": 0.1392858392746236,
27
+ "validation_token_accuracy": 0.9636879159602899,
28
+ "validation_tokens": 135079
29
  },
30
  {
31
+ "elapsed_s": 353.5939458099999,
32
  "epoch": 2,
33
+ "optimizer_steps": 4838,
34
+ "train_batches": 4838,
35
+ "train_loss": 0.13250755364892783,
36
+ "train_token_accuracy": 0.9689693839112425,
37
+ "train_tokens": 2511294,
38
+ "validation_batches": 269,
39
+ "validation_loss": 0.1134884088340716,
40
+ "validation_token_accuracy": 0.9712168434767803,
41
+ "validation_tokens": 135079
42
  },
43
  {
44
+ "elapsed_s": 360.95667415800006,
45
  "epoch": 3,
46
+ "optimizer_steps": 4838,
47
+ "train_batches": 4838,
48
+ "train_loss": 0.09536114751181902,
49
+ "train_token_accuracy": 0.9772459934997655,
50
+ "train_tokens": 2511294,
51
+ "validation_batches": 269,
52
+ "validation_loss": 0.10880099386333249,
53
+ "validation_token_accuracy": 0.9737042767565647,
54
+ "validation_tokens": 135079
55
  },
56
  {
57
+ "elapsed_s": 351.6680030719999,
58
  "epoch": 4,
59
+ "optimizer_steps": 4838,
60
+ "train_batches": 4838,
61
+ "train_loss": 0.07531491173454984,
62
+ "train_token_accuracy": 0.9822832372474111,
63
+ "train_tokens": 2511294,
64
+ "validation_batches": 269,
65
+ "validation_loss": 0.11094144339923677,
66
+ "validation_token_accuracy": 0.9750664426002561,
67
+ "validation_tokens": 135079
68
  },
69
  {
70
+ "elapsed_s": 352.70093192599984,
71
  "epoch": 5,
72
+ "optimizer_steps": 4838,
73
+ "train_batches": 4838,
74
+ "train_loss": 0.05978634332790929,
75
+ "train_token_accuracy": 0.98627082293033,
76
+ "train_tokens": 2511294,
77
+ "validation_batches": 269,
78
+ "validation_loss": 0.10941513350480378,
79
+ "validation_token_accuracy": 0.9764730268953723,
80
+ "validation_tokens": 135079
81
  },
82
  {
83
+ "elapsed_s": 352.3479713060001,
84
  "epoch": 6,
85
+ "optimizer_steps": 4838,
86
+ "train_batches": 4838,
87
+ "train_loss": 0.04700873986023811,
88
+ "train_token_accuracy": 0.9891004398529204,
89
+ "train_tokens": 2511294,
90
+ "validation_batches": 269,
91
+ "validation_loss": 0.12030725566000045,
92
+ "validation_token_accuracy": 0.9774576359019537,
93
+ "validation_tokens": 135079
94
  },
95
  {
96
+ "elapsed_s": 350.5779395559998,
97
  "epoch": 7,
98
+ "optimizer_steps": 4838,
99
+ "train_batches": 4838,
100
+ "train_loss": 0.037927263294671304,
101
+ "train_token_accuracy": 0.9911675016943456,
102
+ "train_tokens": 2511294,
103
+ "validation_batches": 269,
104
+ "validation_loss": 0.1220005202920702,
105
+ "validation_token_accuracy": 0.9779832542438128,
106
+ "validation_tokens": 135079
107
  },
108
  {
109
+ "elapsed_s": 352.3893561149998,
110
  "epoch": 8,
111
+ "optimizer_steps": 4838,
112
+ "train_batches": 4838,
113
+ "train_loss": 0.030487817015155995,
114
+ "train_token_accuracy": 0.9926989830740646,
115
+ "train_tokens": 2511294,
116
+ "validation_batches": 269,
117
+ "validation_loss": 0.124172292954461,
118
+ "validation_token_accuracy": 0.9795156908179657,
119
+ "validation_tokens": 135079
120
+ },
121
+ {
122
+ "elapsed_s": 351.60098334099985,
123
+ "epoch": 9,
124
+ "optimizer_steps": 4838,
125
+ "train_batches": 4838,
126
+ "train_loss": 0.026497954957400774,
127
+ "train_token_accuracy": 0.9938023982855054,
128
+ "train_tokens": 2511294,
129
+ "validation_batches": 269,
130
+ "validation_loss": 0.12726946480100174,
131
+ "validation_token_accuracy": 0.9797229769246145,
132
+ "validation_tokens": 135079
133
+ },
134
+ {
135
+ "elapsed_s": 351.41817523500004,
136
+ "epoch": 10,
137
+ "optimizer_steps": 4838,
138
+ "train_batches": 4838,
139
+ "train_loss": 0.02143179943208533,
140
+ "train_token_accuracy": 0.9948285624861127,
141
+ "train_tokens": 2511294,
142
+ "validation_batches": 269,
143
+ "validation_loss": 0.13001580377020153,
144
+ "validation_token_accuracy": 0.980448478297885,
145
+ "validation_tokens": 135079
146
+ },
147
+ {
148
+ "elapsed_s": 351.210082735,
149
+ "epoch": 11,
150
+ "optimizer_steps": 4838,
151
+ "train_batches": 4838,
152
+ "train_loss": 0.018372399961305817,
153
+ "train_token_accuracy": 0.9957006228661399,
154
+ "train_tokens": 2511294,
155
+ "validation_batches": 269,
156
+ "validation_loss": 0.13454472581949917,
157
+ "validation_token_accuracy": 0.9798858445798385,
158
+ "validation_tokens": 135079
159
+ },
160
+ {
161
+ "elapsed_s": 350.00989479400005,
162
+ "epoch": 12,
163
+ "optimizer_steps": 4838,
164
+ "train_batches": 4838,
165
+ "train_loss": 0.015427773917966436,
166
+ "train_token_accuracy": 0.9963250021701959,
167
+ "train_tokens": 2511294,
168
+ "validation_batches": 269,
169
+ "validation_loss": 0.1662057355638326,
170
+ "validation_token_accuracy": 0.9783386018552107,
171
+ "validation_tokens": 135079
172
  }
173
  ],
174
+ "epochs": 12,
175
+ "generated_at_unix": 1777765877.1481903,
176
  "grad_accum_steps": 1,
177
  "label_space_json_path": "/__modal/volumes/vo-1GyZvIAPD7wnYD3s0C2QXS/dataset/configs/custom_label_space.json",
178
  "label_space_source": "label-space-json",
179
  "learning_rate": 1e-05,
180
  "max_grad_norm": 1.0,
181
  "num_output_labels": 97,
182
+ "num_train_examples": 19350,
183
+ "num_train_windows": 19350,
184
+ "num_validation_examples": 1075,
185
+ "num_validation_windows": 1075,
186
  "output_checkpoint_dir": "/__modal/volumes/vo-XIgHS4JYPE8ddRlAcjQTwm/matex-privacy-sentinel-v0.1",
187
  "output_head_reinitialized": true,
188
  "output_head_rows_copied": 97,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8005133a9d82548fced788b3716a41284eedde86fb701a767be52c41e5453013
3
  size 2799065896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d906893be08e9c6af7a9091787d4f42797aa7b9d9cb1bea67deaf16ca102d927
3
  size 2799065896