File size: 9,196 Bytes
7acb24f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
{
  "checkpoint": "/models/privacy-filter",
  "output_dir": "/workspace/data/checkpoints/ko_pii_hf_ddp_v6_lora",
  "label_space_json": "/workspace/data/generated/ko_pii_opf_v4/label_space.json",
  "token_labels": [
    "O",
    "B-private_person",
    "I-private_person",
    "E-private_person",
    "S-private_person",
    "B-personal_handle",
    "I-personal_handle",
    "E-personal_handle",
    "S-personal_handle",
    "B-private_phone",
    "I-private_phone",
    "E-private_phone",
    "S-private_phone",
    "B-private_email",
    "I-private_email",
    "E-private_email",
    "S-private_email",
    "B-private_address",
    "I-private_address",
    "E-private_address",
    "S-private_address",
    "B-private_date",
    "I-private_date",
    "E-private_date",
    "S-private_date",
    "B-private_url",
    "I-private_url",
    "E-private_url",
    "S-private_url",
    "B-account_number",
    "I-account_number",
    "E-account_number",
    "S-account_number",
    "B-ip_address",
    "I-ip_address",
    "E-ip_address",
    "S-ip_address"
  ],
  "classifier_remap": {
    "exact_rows_copied": 29,
    "fallback_rows_copied": 8,
    "random_rows_kept": 0
  },
  "lora": {
    "r": 16,
    "alpha": 32,
    "dropout": 0.05,
    "target_modules": [
      "q_proj",
      "k_proj",
      "v_proj",
      "o_proj"
    ],
    "trainable_params": 613541,
    "total_params": 1400102970,
    "trainable_pct": 0.0438
  },
  "train_dataset": {
    "split": "train",
    "records": 41197,
    "tokens": 650201,
    "spans": 11465,
    "spans_without_token_overlap": 0,
    "truncated_examples": 0,
    "max_tokens": 63,
    "records_per_path": {
      "/workspace/data/generated/ko_pii_opf_v4/train.jsonl": 41197
    }
  },
  "validation_dataset": {
    "split": "validation",
    "records": 2227,
    "tokens": 34272,
    "spans": 520,
    "spans_without_token_overlap": 0,
    "truncated_examples": 0,
    "max_tokens": 45,
    "records_per_path": {
      "/workspace/data/generated/ko_pii_opf_v4/validation.jsonl": 2227
    }
  },
  "test_dataset": {
    "split": "test",
    "records": 2252,
    "tokens": 34553,
    "spans": 542,
    "spans_without_token_overlap": 0,
    "truncated_examples": 0,
    "max_tokens": 48,
    "records_per_path": {
      "/workspace/data/generated/ko_pii_opf_v4/test.jsonl": 2252
    }
  },
  "train_metrics": {
    "train_runtime": 2753.5271,
    "train_samples_per_second": 149.615,
    "train_steps_per_second": 1.169,
    "total_flos": 1.291714856788951e+17,
    "train_loss": 0.07032274794504509,
    "epoch": 10.0
  },
  "validation_metrics": {
    "eval_loss": 0.09874702990055084,
    "eval_token_accuracy": 0.9907504668534081,
    "eval_span_precision": 0.8582677165354331,
    "eval_span_recall": 0.8384615384615385,
    "eval_span_f1": 0.8482490272373542,
    "eval_gold_spans": 520.0,
    "eval_pred_spans": 508.0,
    "eval_class_account_number_precision": 0.9736842105263158,
    "eval_class_account_number_recall": 0.9823008849557522,
    "eval_class_account_number_f1": 0.9779735682819383,
    "eval_class_account_number_gold_spans": 113.0,
    "eval_class_account_number_pred_spans": 114.0,
    "eval_class_ip_address_precision": 1.0,
    "eval_class_ip_address_recall": 1.0,
    "eval_class_ip_address_f1": 1.0,
    "eval_class_ip_address_gold_spans": 4.0,
    "eval_class_ip_address_pred_spans": 4.0,
    "eval_class_personal_handle_precision": 0.8571428571428571,
    "eval_class_personal_handle_recall": 0.8571428571428571,
    "eval_class_personal_handle_f1": 0.8571428571428571,
    "eval_class_personal_handle_gold_spans": 28.0,
    "eval_class_personal_handle_pred_spans": 28.0,
    "eval_class_private_address_precision": 0.7619047619047619,
    "eval_class_private_address_recall": 0.6666666666666666,
    "eval_class_private_address_f1": 0.7111111111111111,
    "eval_class_private_address_gold_spans": 48.0,
    "eval_class_private_address_pred_spans": 42.0,
    "eval_class_private_date_precision": 1.0,
    "eval_class_private_date_recall": 1.0,
    "eval_class_private_date_f1": 1.0,
    "eval_class_private_date_gold_spans": 33.0,
    "eval_class_private_date_pred_spans": 33.0,
    "eval_class_private_email_precision": 0.926829268292683,
    "eval_class_private_email_recall": 0.9743589743589743,
    "eval_class_private_email_f1": 0.9500000000000001,
    "eval_class_private_email_gold_spans": 39.0,
    "eval_class_private_email_pred_spans": 41.0,
    "eval_class_private_person_precision": 0.6710526315789473,
    "eval_class_private_person_recall": 0.6257668711656442,
    "eval_class_private_person_f1": 0.6476190476190476,
    "eval_class_private_person_gold_spans": 163.0,
    "eval_class_private_person_pred_spans": 152.0,
    "eval_class_private_phone_precision": 1.0,
    "eval_class_private_phone_recall": 1.0,
    "eval_class_private_phone_f1": 1.0,
    "eval_class_private_phone_gold_spans": 69.0,
    "eval_class_private_phone_pred_spans": 69.0,
    "eval_class_private_url_precision": 0.92,
    "eval_class_private_url_recall": 1.0,
    "eval_class_private_url_f1": 0.9583333333333334,
    "eval_class_private_url_gold_spans": 23.0,
    "eval_class_private_url_pred_spans": 25.0,
    "eval_runtime": 7.0151,
    "eval_samples_per_second": 317.46,
    "eval_steps_per_second": 2.566,
    "epoch": 10.0
  },
  "test_metrics": {
    "test_loss": 0.08586616814136505,
    "test_token_accuracy": 0.9924174456631841,
    "test_span_precision": 0.9009708737864077,
    "test_span_recall": 0.8560885608856088,
    "test_span_f1": 0.8779564806054873,
    "test_gold_spans": 542.0,
    "test_pred_spans": 515.0,
    "test_class_account_number_precision": 0.9752066115702479,
    "test_class_account_number_recall": 0.9833333333333333,
    "test_class_account_number_f1": 0.979253112033195,
    "test_class_account_number_gold_spans": 120.0,
    "test_class_account_number_pred_spans": 121.0,
    "test_class_ip_address_precision": 1.0,
    "test_class_ip_address_recall": 1.0,
    "test_class_ip_address_f1": 1.0,
    "test_class_ip_address_gold_spans": 9.0,
    "test_class_ip_address_pred_spans": 9.0,
    "test_class_personal_handle_precision": 0.9743589743589743,
    "test_class_personal_handle_recall": 0.9743589743589743,
    "test_class_personal_handle_f1": 0.9743589743589743,
    "test_class_personal_handle_gold_spans": 39.0,
    "test_class_personal_handle_pred_spans": 39.0,
    "test_class_private_address_precision": 0.8275862068965517,
    "test_class_private_address_recall": 0.7384615384615385,
    "test_class_private_address_f1": 0.7804878048780489,
    "test_class_private_address_gold_spans": 65.0,
    "test_class_private_address_pred_spans": 58.0,
    "test_class_private_date_precision": 0.9166666666666666,
    "test_class_private_date_recall": 0.88,
    "test_class_private_date_f1": 0.8979591836734694,
    "test_class_private_date_gold_spans": 25.0,
    "test_class_private_date_pred_spans": 24.0,
    "test_class_private_email_precision": 1.0,
    "test_class_private_email_recall": 1.0,
    "test_class_private_email_f1": 1.0,
    "test_class_private_email_gold_spans": 38.0,
    "test_class_private_email_pred_spans": 38.0,
    "test_class_private_person_precision": 0.7348484848484849,
    "test_class_private_person_recall": 0.6381578947368421,
    "test_class_private_person_f1": 0.6830985915492959,
    "test_class_private_person_gold_spans": 152.0,
    "test_class_private_person_pred_spans": 132.0,
    "test_class_private_phone_precision": 1.0,
    "test_class_private_phone_recall": 1.0,
    "test_class_private_phone_f1": 1.0,
    "test_class_private_phone_gold_spans": 76.0,
    "test_class_private_phone_pred_spans": 76.0,
    "test_class_private_url_precision": 1.0,
    "test_class_private_url_recall": 1.0,
    "test_class_private_url_f1": 1.0,
    "test_class_private_url_gold_spans": 18.0,
    "test_class_private_url_pred_spans": 18.0,
    "test_runtime": 6.4275,
    "test_samples_per_second": 350.37,
    "test_steps_per_second": 2.8,
    "epoch": 10.0
  },
  "args": {
    "train_dataset": [
      "/workspace/data/generated/ko_pii_opf_v4/train.jsonl"
    ],
    "validation_dataset": "/workspace/data/generated/ko_pii_opf_v4/validation.jsonl",
    "test_dataset": "/workspace/data/generated/ko_pii_opf_v4/test.jsonl",
    "label_space_json": "/workspace/data/generated/ko_pii_opf_v4/label_space.json",
    "checkpoint": "/models/privacy-filter",
    "output_dir": "/workspace/data/checkpoints/ko_pii_hf_ddp_v6_lora",
    "max_length": 512,
    "epochs": 10.0,
    "early_stopping_patience": 3,
    "per_device_train_batch_size": 64,
    "per_device_eval_batch_size": 64,
    "gradient_accumulation_steps": 1,
    "learning_rate": 0.0005,
    "lr_scheduler_type": "cosine",
    "weight_decay": 0.01,
    "warmup_ratio": 0.1,
    "max_grad_norm": 1.0,
    "logging_steps": 25,
    "save_total_limit": 2,
    "dataloader_num_workers": 4,
    "seed": 42,
    "overwrite_output": true,
    "resume_from_checkpoint": null,
    "max_train_examples": null,
    "max_validation_examples": null,
    "max_test_examples": null,
    "use_lora": true,
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "lora_target_modules": "q_proj,k_proj,v_proj,o_proj"
  }
}