| { |
| "checkpoint": "/models/privacy-filter", |
| "output_dir": "/workspace/data/checkpoints/ko_pii_hf_ddp_v6_lora", |
| "label_space_json": "/workspace/data/generated/ko_pii_opf_v4/label_space.json", |
| "token_labels": [ |
| "O", |
| "B-private_person", |
| "I-private_person", |
| "E-private_person", |
| "S-private_person", |
| "B-personal_handle", |
| "I-personal_handle", |
| "E-personal_handle", |
| "S-personal_handle", |
| "B-private_phone", |
| "I-private_phone", |
| "E-private_phone", |
| "S-private_phone", |
| "B-private_email", |
| "I-private_email", |
| "E-private_email", |
| "S-private_email", |
| "B-private_address", |
| "I-private_address", |
| "E-private_address", |
| "S-private_address", |
| "B-private_date", |
| "I-private_date", |
| "E-private_date", |
| "S-private_date", |
| "B-private_url", |
| "I-private_url", |
| "E-private_url", |
| "S-private_url", |
| "B-account_number", |
| "I-account_number", |
| "E-account_number", |
| "S-account_number", |
| "B-ip_address", |
| "I-ip_address", |
| "E-ip_address", |
| "S-ip_address" |
| ], |
| "classifier_remap": { |
| "exact_rows_copied": 29, |
| "fallback_rows_copied": 8, |
| "random_rows_kept": 0 |
| }, |
| "lora": { |
| "r": 16, |
| "alpha": 32, |
| "dropout": 0.05, |
| "target_modules": [ |
| "q_proj", |
| "k_proj", |
| "v_proj", |
| "o_proj" |
| ], |
| "trainable_params": 613541, |
| "total_params": 1400102970, |
| "trainable_pct": 0.0438 |
| }, |
| "train_dataset": { |
| "split": "train", |
| "records": 41197, |
| "tokens": 650201, |
| "spans": 11465, |
| "spans_without_token_overlap": 0, |
| "truncated_examples": 0, |
| "max_tokens": 63, |
| "records_per_path": { |
| "/workspace/data/generated/ko_pii_opf_v4/train.jsonl": 41197 |
| } |
| }, |
| "validation_dataset": { |
| "split": "validation", |
| "records": 2227, |
| "tokens": 34272, |
| "spans": 520, |
| "spans_without_token_overlap": 0, |
| "truncated_examples": 0, |
| "max_tokens": 45, |
| "records_per_path": { |
| "/workspace/data/generated/ko_pii_opf_v4/validation.jsonl": 2227 |
| } |
| }, |
| "test_dataset": { |
| "split": "test", |
| "records": 2252, |
| "tokens": 34553, |
| "spans": 542, |
| "spans_without_token_overlap": 0, |
| "truncated_examples": 0, |
| "max_tokens": 48, |
| "records_per_path": { |
| "/workspace/data/generated/ko_pii_opf_v4/test.jsonl": 2252 |
| } |
| }, |
| "train_metrics": { |
| "train_runtime": 2753.5271, |
| "train_samples_per_second": 149.615, |
| "train_steps_per_second": 1.169, |
| "total_flos": 1.291714856788951e+17, |
| "train_loss": 0.07032274794504509, |
| "epoch": 10.0 |
| }, |
| "validation_metrics": { |
| "eval_loss": 0.09874702990055084, |
| "eval_token_accuracy": 0.9907504668534081, |
| "eval_span_precision": 0.8582677165354331, |
| "eval_span_recall": 0.8384615384615385, |
| "eval_span_f1": 0.8482490272373542, |
| "eval_gold_spans": 520.0, |
| "eval_pred_spans": 508.0, |
| "eval_class_account_number_precision": 0.9736842105263158, |
| "eval_class_account_number_recall": 0.9823008849557522, |
| "eval_class_account_number_f1": 0.9779735682819383, |
| "eval_class_account_number_gold_spans": 113.0, |
| "eval_class_account_number_pred_spans": 114.0, |
| "eval_class_ip_address_precision": 1.0, |
| "eval_class_ip_address_recall": 1.0, |
| "eval_class_ip_address_f1": 1.0, |
| "eval_class_ip_address_gold_spans": 4.0, |
| "eval_class_ip_address_pred_spans": 4.0, |
| "eval_class_personal_handle_precision": 0.8571428571428571, |
| "eval_class_personal_handle_recall": 0.8571428571428571, |
| "eval_class_personal_handle_f1": 0.8571428571428571, |
| "eval_class_personal_handle_gold_spans": 28.0, |
| "eval_class_personal_handle_pred_spans": 28.0, |
| "eval_class_private_address_precision": 0.7619047619047619, |
| "eval_class_private_address_recall": 0.6666666666666666, |
| "eval_class_private_address_f1": 0.7111111111111111, |
| "eval_class_private_address_gold_spans": 48.0, |
| "eval_class_private_address_pred_spans": 42.0, |
| "eval_class_private_date_precision": 1.0, |
| "eval_class_private_date_recall": 1.0, |
| "eval_class_private_date_f1": 1.0, |
| "eval_class_private_date_gold_spans": 33.0, |
| "eval_class_private_date_pred_spans": 33.0, |
| "eval_class_private_email_precision": 0.926829268292683, |
| "eval_class_private_email_recall": 0.9743589743589743, |
| "eval_class_private_email_f1": 0.9500000000000001, |
| "eval_class_private_email_gold_spans": 39.0, |
| "eval_class_private_email_pred_spans": 41.0, |
| "eval_class_private_person_precision": 0.6710526315789473, |
| "eval_class_private_person_recall": 0.6257668711656442, |
| "eval_class_private_person_f1": 0.6476190476190476, |
| "eval_class_private_person_gold_spans": 163.0, |
| "eval_class_private_person_pred_spans": 152.0, |
| "eval_class_private_phone_precision": 1.0, |
| "eval_class_private_phone_recall": 1.0, |
| "eval_class_private_phone_f1": 1.0, |
| "eval_class_private_phone_gold_spans": 69.0, |
| "eval_class_private_phone_pred_spans": 69.0, |
| "eval_class_private_url_precision": 0.92, |
| "eval_class_private_url_recall": 1.0, |
| "eval_class_private_url_f1": 0.9583333333333334, |
| "eval_class_private_url_gold_spans": 23.0, |
| "eval_class_private_url_pred_spans": 25.0, |
| "eval_runtime": 7.0151, |
| "eval_samples_per_second": 317.46, |
| "eval_steps_per_second": 2.566, |
| "epoch": 10.0 |
| }, |
| "test_metrics": { |
| "test_loss": 0.08586616814136505, |
| "test_token_accuracy": 0.9924174456631841, |
| "test_span_precision": 0.9009708737864077, |
| "test_span_recall": 0.8560885608856088, |
| "test_span_f1": 0.8779564806054873, |
| "test_gold_spans": 542.0, |
| "test_pred_spans": 515.0, |
| "test_class_account_number_precision": 0.9752066115702479, |
| "test_class_account_number_recall": 0.9833333333333333, |
| "test_class_account_number_f1": 0.979253112033195, |
| "test_class_account_number_gold_spans": 120.0, |
| "test_class_account_number_pred_spans": 121.0, |
| "test_class_ip_address_precision": 1.0, |
| "test_class_ip_address_recall": 1.0, |
| "test_class_ip_address_f1": 1.0, |
| "test_class_ip_address_gold_spans": 9.0, |
| "test_class_ip_address_pred_spans": 9.0, |
| "test_class_personal_handle_precision": 0.9743589743589743, |
| "test_class_personal_handle_recall": 0.9743589743589743, |
| "test_class_personal_handle_f1": 0.9743589743589743, |
| "test_class_personal_handle_gold_spans": 39.0, |
| "test_class_personal_handle_pred_spans": 39.0, |
| "test_class_private_address_precision": 0.8275862068965517, |
| "test_class_private_address_recall": 0.7384615384615385, |
| "test_class_private_address_f1": 0.7804878048780489, |
| "test_class_private_address_gold_spans": 65.0, |
| "test_class_private_address_pred_spans": 58.0, |
| "test_class_private_date_precision": 0.9166666666666666, |
| "test_class_private_date_recall": 0.88, |
| "test_class_private_date_f1": 0.8979591836734694, |
| "test_class_private_date_gold_spans": 25.0, |
| "test_class_private_date_pred_spans": 24.0, |
| "test_class_private_email_precision": 1.0, |
| "test_class_private_email_recall": 1.0, |
| "test_class_private_email_f1": 1.0, |
| "test_class_private_email_gold_spans": 38.0, |
| "test_class_private_email_pred_spans": 38.0, |
| "test_class_private_person_precision": 0.7348484848484849, |
| "test_class_private_person_recall": 0.6381578947368421, |
| "test_class_private_person_f1": 0.6830985915492959, |
| "test_class_private_person_gold_spans": 152.0, |
| "test_class_private_person_pred_spans": 132.0, |
| "test_class_private_phone_precision": 1.0, |
| "test_class_private_phone_recall": 1.0, |
| "test_class_private_phone_f1": 1.0, |
| "test_class_private_phone_gold_spans": 76.0, |
| "test_class_private_phone_pred_spans": 76.0, |
| "test_class_private_url_precision": 1.0, |
| "test_class_private_url_recall": 1.0, |
| "test_class_private_url_f1": 1.0, |
| "test_class_private_url_gold_spans": 18.0, |
| "test_class_private_url_pred_spans": 18.0, |
| "test_runtime": 6.4275, |
| "test_samples_per_second": 350.37, |
| "test_steps_per_second": 2.8, |
| "epoch": 10.0 |
| }, |
| "args": { |
| "train_dataset": [ |
| "/workspace/data/generated/ko_pii_opf_v4/train.jsonl" |
| ], |
| "validation_dataset": "/workspace/data/generated/ko_pii_opf_v4/validation.jsonl", |
| "test_dataset": "/workspace/data/generated/ko_pii_opf_v4/test.jsonl", |
| "label_space_json": "/workspace/data/generated/ko_pii_opf_v4/label_space.json", |
| "checkpoint": "/models/privacy-filter", |
| "output_dir": "/workspace/data/checkpoints/ko_pii_hf_ddp_v6_lora", |
| "max_length": 512, |
| "epochs": 10.0, |
| "early_stopping_patience": 3, |
| "per_device_train_batch_size": 64, |
| "per_device_eval_batch_size": 64, |
| "gradient_accumulation_steps": 1, |
| "learning_rate": 0.0005, |
| "lr_scheduler_type": "cosine", |
| "weight_decay": 0.01, |
| "warmup_ratio": 0.1, |
| "max_grad_norm": 1.0, |
| "logging_steps": 25, |
| "save_total_limit": 2, |
| "dataloader_num_workers": 4, |
| "seed": 42, |
| "overwrite_output": true, |
| "resume_from_checkpoint": null, |
| "max_train_examples": null, |
| "max_validation_examples": null, |
| "max_test_examples": null, |
| "use_lora": true, |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.05, |
| "lora_target_modules": "q_proj,k_proj,v_proj,o_proj" |
| } |
| } |