privacy-filter-nigeria / run_config.json
iamSamurai's picture
Publish Naija privacy-filter LoRA adapter
dce66cd verified
{
"alias_map": {
"private_nuban_account": "account_number"
},
"copied_pretrained_token_labels": [
"O",
"B-account_number",
"I-account_number",
"E-account_number",
"S-account_number",
"B-private_address",
"I-private_address",
"E-private_address",
"S-private_address",
"B-private_email",
"I-private_email",
"E-private_email",
"S-private_email",
"B-private_person",
"I-private_person",
"E-private_person",
"S-private_person",
"B-private_phone",
"I-private_phone",
"E-private_phone",
"S-private_phone",
"B-private_url",
"I-private_url",
"E-private_url",
"S-private_url",
"B-private_date",
"I-private_date",
"E-private_date",
"S-private_date",
"B-secret",
"I-secret",
"E-secret",
"S-secret"
],
"dataset_root": "/naija-privacy-filter/data/dataset",
"dataset_summary": {
"challenge": {
"domain_counts": {
"application_form": 25,
"compliance_review": 25,
"education_record": 25,
"employee_record": 25,
"health_record": 25,
"insurance_record": 25,
"kyc_update": 25,
"legal_document": 25,
"payroll_note": 25,
"support_ticket": 25
},
"label_counts": {},
"num_empty_examples": 250,
"num_examples": 250,
"num_skipped_truncated": 0,
"path": "/naija-privacy-filter/data/dataset/challenge.jsonl"
},
"test": {
"domain_counts": {
"application_form": 12,
"compliance_review": 13,
"education_record": 12,
"employee_record": 11,
"health_record": 20,
"insurance_record": 6,
"kyc_update": 9,
"legal_document": 12,
"ocr_bank_contact_card": 20,
"ocr_identity_document": 20,
"payroll_note": 5,
"short_context": 85,
"support_ticket": 11
},
"label_counts": {
"account_number": 74,
"private_address": 61,
"private_bvn": 58,
"private_date": 71,
"private_drivers_license_number": 28,
"private_email": 78,
"private_nin": 69,
"private_passport_number": 64,
"private_person": 141,
"private_phone": 70,
"private_url": 36,
"private_voters_card_number": 17,
"secret": 35
},
"num_empty_examples": 46,
"num_examples": 236,
"num_skipped_truncated": 0,
"path": "/naija-privacy-filter/data/dataset/test.jsonl"
},
"train": {
"domain_counts": {
"application_form": 113,
"compliance_review": 123,
"education_record": 105,
"employee_record": 92,
"health_record": 145,
"insurance_record": 80,
"kyc_update": 129,
"legal_document": 109,
"ocr_bank_contact_card": 84,
"ocr_bank_form": 347,
"ocr_document_processing": 1088,
"ocr_education_certificate": 89,
"ocr_employee_record": 80,
"ocr_identity_document": 337,
"ocr_support_routing": 181,
"payroll_note": 91,
"short_context": 4685,
"support_ticket": 122
},
"label_counts": {
"account_number": 1570,
"private_address": 1455,
"private_bvn": 1520,
"private_date": 1168,
"private_drivers_license_number": 611,
"private_email": 1043,
"private_nin": 2140,
"private_passport_number": 964,
"private_person": 2608,
"private_phone": 1098,
"private_url": 430,
"private_voters_card_number": 480,
"secret": 333
},
"num_empty_examples": 2320,
"num_examples": 8000,
"num_skipped_truncated": 0,
"path": "/naija-privacy-filter/data/dataset/train.jsonl"
},
"validation": {
"domain_counts": {
"application_form": 7,
"compliance_review": 12,
"education_record": 9,
"employee_record": 6,
"health_record": 18,
"insurance_record": 11,
"kyc_update": 13,
"legal_document": 9,
"ocr_bank_contact_card": 20,
"ocr_support_routing": 20,
"payroll_note": 13,
"short_context": 87,
"support_ticket": 17
},
"label_counts": {
"account_number": 68,
"private_address": 65,
"private_bvn": 61,
"private_date": 46,
"private_drivers_license_number": 28,
"private_email": 81,
"private_nin": 50,
"private_passport_number": 79,
"private_person": 120,
"private_phone": 73,
"private_url": 40,
"private_voters_card_number": 25,
"secret": 40
},
"num_empty_examples": 56,
"num_examples": 242,
"num_skipped_truncated": 0,
"path": "/naija-privacy-filter/data/dataset/validation.jsonl"
}
},
"device": "cuda",
"dropped_labels": [],
"early_stopping_patience": 3,
"epochs": 12,
"eval_batch_size": 64,
"grad_accum_steps": 1,
"learning_rate": 0.0002,
"lora": {
"alpha": 32,
"dropout": 0.05,
"r": 16,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj"
]
},
"max_grad_norm": 1.0,
"max_length": 512,
"model_id": "openai/privacy-filter",
"output_dir": "/naija-privacy-filter/artifacts/run",
"precision": "bf16",
"seed": 1337,
"skip_truncated_spans": true,
"span_labels": [
"account_number",
"private_address",
"private_email",
"private_person",
"private_phone",
"private_url",
"private_date",
"secret",
"private_nin",
"private_bvn",
"private_passport_number",
"private_drivers_license_number",
"private_voters_card_number"
],
"span_postprocess": true,
"token_label_names": [
"O",
"B-account_number",
"I-account_number",
"E-account_number",
"S-account_number",
"B-private_address",
"I-private_address",
"E-private_address",
"S-private_address",
"B-private_email",
"I-private_email",
"E-private_email",
"S-private_email",
"B-private_person",
"I-private_person",
"E-private_person",
"S-private_person",
"B-private_phone",
"I-private_phone",
"E-private_phone",
"S-private_phone",
"B-private_url",
"I-private_url",
"E-private_url",
"S-private_url",
"B-private_date",
"I-private_date",
"E-private_date",
"S-private_date",
"B-secret",
"I-secret",
"E-secret",
"S-secret",
"B-private_nin",
"I-private_nin",
"E-private_nin",
"S-private_nin",
"B-private_bvn",
"I-private_bvn",
"E-private_bvn",
"S-private_bvn",
"B-private_passport_number",
"I-private_passport_number",
"E-private_passport_number",
"S-private_passport_number",
"B-private_drivers_license_number",
"I-private_drivers_license_number",
"E-private_drivers_license_number",
"S-private_drivers_license_number",
"B-private_voters_card_number",
"I-private_voters_card_number",
"E-private_voters_card_number",
"S-private_voters_card_number"
],
"train_batch_size": 32,
"warmup_ratio": 0.06,
"weight_decay": 0.01
}