| { |
| "alias_map": { |
| "private_nuban_account": "account_number" |
| }, |
| "copied_pretrained_token_labels": [ |
| "O", |
| "B-account_number", |
| "I-account_number", |
| "E-account_number", |
| "S-account_number", |
| "B-private_address", |
| "I-private_address", |
| "E-private_address", |
| "S-private_address", |
| "B-private_email", |
| "I-private_email", |
| "E-private_email", |
| "S-private_email", |
| "B-private_person", |
| "I-private_person", |
| "E-private_person", |
| "S-private_person", |
| "B-private_phone", |
| "I-private_phone", |
| "E-private_phone", |
| "S-private_phone", |
| "B-private_url", |
| "I-private_url", |
| "E-private_url", |
| "S-private_url", |
| "B-private_date", |
| "I-private_date", |
| "E-private_date", |
| "S-private_date", |
| "B-secret", |
| "I-secret", |
| "E-secret", |
| "S-secret" |
| ], |
| "dataset_root": "/naija-privacy-filter/data/dataset", |
| "dataset_summary": { |
| "challenge": { |
| "domain_counts": { |
| "application_form": 25, |
| "compliance_review": 25, |
| "education_record": 25, |
| "employee_record": 25, |
| "health_record": 25, |
| "insurance_record": 25, |
| "kyc_update": 25, |
| "legal_document": 25, |
| "payroll_note": 25, |
| "support_ticket": 25 |
| }, |
| "label_counts": {}, |
| "num_empty_examples": 250, |
| "num_examples": 250, |
| "num_skipped_truncated": 0, |
| "path": "/naija-privacy-filter/data/dataset/challenge.jsonl" |
| }, |
| "test": { |
| "domain_counts": { |
| "application_form": 12, |
| "compliance_review": 13, |
| "education_record": 12, |
| "employee_record": 11, |
| "health_record": 20, |
| "insurance_record": 6, |
| "kyc_update": 9, |
| "legal_document": 12, |
| "ocr_bank_contact_card": 20, |
| "ocr_identity_document": 20, |
| "payroll_note": 5, |
| "short_context": 85, |
| "support_ticket": 11 |
| }, |
| "label_counts": { |
| "account_number": 74, |
| "private_address": 61, |
| "private_bvn": 58, |
| "private_date": 71, |
| "private_drivers_license_number": 28, |
| "private_email": 78, |
| "private_nin": 69, |
| "private_passport_number": 64, |
| "private_person": 141, |
| "private_phone": 70, |
| "private_url": 36, |
| "private_voters_card_number": 17, |
| "secret": 35 |
| }, |
| "num_empty_examples": 46, |
| "num_examples": 236, |
| "num_skipped_truncated": 0, |
| "path": "/naija-privacy-filter/data/dataset/test.jsonl" |
| }, |
| "train": { |
| "domain_counts": { |
| "application_form": 113, |
| "compliance_review": 123, |
| "education_record": 105, |
| "employee_record": 92, |
| "health_record": 145, |
| "insurance_record": 80, |
| "kyc_update": 129, |
| "legal_document": 109, |
| "ocr_bank_contact_card": 84, |
| "ocr_bank_form": 347, |
| "ocr_document_processing": 1088, |
| "ocr_education_certificate": 89, |
| "ocr_employee_record": 80, |
| "ocr_identity_document": 337, |
| "ocr_support_routing": 181, |
| "payroll_note": 91, |
| "short_context": 4685, |
| "support_ticket": 122 |
| }, |
| "label_counts": { |
| "account_number": 1570, |
| "private_address": 1455, |
| "private_bvn": 1520, |
| "private_date": 1168, |
| "private_drivers_license_number": 611, |
| "private_email": 1043, |
| "private_nin": 2140, |
| "private_passport_number": 964, |
| "private_person": 2608, |
| "private_phone": 1098, |
| "private_url": 430, |
| "private_voters_card_number": 480, |
| "secret": 333 |
| }, |
| "num_empty_examples": 2320, |
| "num_examples": 8000, |
| "num_skipped_truncated": 0, |
| "path": "/naija-privacy-filter/data/dataset/train.jsonl" |
| }, |
| "validation": { |
| "domain_counts": { |
| "application_form": 7, |
| "compliance_review": 12, |
| "education_record": 9, |
| "employee_record": 6, |
| "health_record": 18, |
| "insurance_record": 11, |
| "kyc_update": 13, |
| "legal_document": 9, |
| "ocr_bank_contact_card": 20, |
| "ocr_support_routing": 20, |
| "payroll_note": 13, |
| "short_context": 87, |
| "support_ticket": 17 |
| }, |
| "label_counts": { |
| "account_number": 68, |
| "private_address": 65, |
| "private_bvn": 61, |
| "private_date": 46, |
| "private_drivers_license_number": 28, |
| "private_email": 81, |
| "private_nin": 50, |
| "private_passport_number": 79, |
| "private_person": 120, |
| "private_phone": 73, |
| "private_url": 40, |
| "private_voters_card_number": 25, |
| "secret": 40 |
| }, |
| "num_empty_examples": 56, |
| "num_examples": 242, |
| "num_skipped_truncated": 0, |
| "path": "/naija-privacy-filter/data/dataset/validation.jsonl" |
| } |
| }, |
| "device": "cuda", |
| "dropped_labels": [], |
| "early_stopping_patience": 3, |
| "epochs": 12, |
| "eval_batch_size": 64, |
| "grad_accum_steps": 1, |
| "learning_rate": 0.0002, |
| "lora": { |
| "alpha": 32, |
| "dropout": 0.05, |
| "r": 16, |
| "target_modules": [ |
| "q_proj", |
| "k_proj", |
| "v_proj", |
| "o_proj" |
| ] |
| }, |
| "max_grad_norm": 1.0, |
| "max_length": 512, |
| "model_id": "openai/privacy-filter", |
| "output_dir": "/naija-privacy-filter/artifacts/run", |
| "precision": "bf16", |
| "seed": 1337, |
| "skip_truncated_spans": true, |
| "span_labels": [ |
| "account_number", |
| "private_address", |
| "private_email", |
| "private_person", |
| "private_phone", |
| "private_url", |
| "private_date", |
| "secret", |
| "private_nin", |
| "private_bvn", |
| "private_passport_number", |
| "private_drivers_license_number", |
| "private_voters_card_number" |
| ], |
| "span_postprocess": true, |
| "token_label_names": [ |
| "O", |
| "B-account_number", |
| "I-account_number", |
| "E-account_number", |
| "S-account_number", |
| "B-private_address", |
| "I-private_address", |
| "E-private_address", |
| "S-private_address", |
| "B-private_email", |
| "I-private_email", |
| "E-private_email", |
| "S-private_email", |
| "B-private_person", |
| "I-private_person", |
| "E-private_person", |
| "S-private_person", |
| "B-private_phone", |
| "I-private_phone", |
| "E-private_phone", |
| "S-private_phone", |
| "B-private_url", |
| "I-private_url", |
| "E-private_url", |
| "S-private_url", |
| "B-private_date", |
| "I-private_date", |
| "E-private_date", |
| "S-private_date", |
| "B-secret", |
| "I-secret", |
| "E-secret", |
| "S-secret", |
| "B-private_nin", |
| "I-private_nin", |
| "E-private_nin", |
| "S-private_nin", |
| "B-private_bvn", |
| "I-private_bvn", |
| "E-private_bvn", |
| "S-private_bvn", |
| "B-private_passport_number", |
| "I-private_passport_number", |
| "E-private_passport_number", |
| "S-private_passport_number", |
| "B-private_drivers_license_number", |
| "I-private_drivers_license_number", |
| "E-private_drivers_license_number", |
| "S-private_drivers_license_number", |
| "B-private_voters_card_number", |
| "I-private_voters_card_number", |
| "E-private_voters_card_number", |
| "S-private_voters_card_number" |
| ], |
| "train_batch_size": 32, |
| "warmup_ratio": 0.06, |
| "weight_decay": 0.01 |
| } |
|
|