Spaces:
Sleeping
Sleeping
Add phone, IBAN, password, and DB connection PII patterns
Browse filesNew patterns:
- Phone numbers (US and international formats)
- IBAN (international bank account numbers)
- Password (labeled patterns like "password: X")
- Database connection strings with credentials
Detection rate: 14/15 test cases
Only physical addresses skipped (high false positive risk)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- src/security/__init__.py +13 -0
- static/index.html +14 -1
src/security/__init__.py
CHANGED
|
@@ -73,6 +73,19 @@ PII_PATTERNS = {
|
|
| 73 |
"medical_dob": r"(DOB|date\s+of\s+birth|d\.o\.b\.?)[\s:]+\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4}",
|
| 74 |
# Medical Records - MRN patterns
|
| 75 |
"medical_mrn": r"(MRN|medical\s+record|patient\s+id|patient\s+number)[\s:#\-]+\d{4,}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
def detect_pii(prompt: str) -> dict:
|
|
|
|
| 73 |
"medical_dob": r"(DOB|date\s+of\s+birth|d\.o\.b\.?)[\s:]+\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4}",
|
| 74 |
# Medical Records - MRN patterns
|
| 75 |
"medical_mrn": r"(MRN|medical\s+record|patient\s+id|patient\s+number)[\s:#\-]+\d{4,}",
|
| 76 |
+
|
| 77 |
+
# Phone Numbers - US formats
|
| 78 |
+
"phone_us": r"\b(\+?1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}\b",
|
| 79 |
+
# Phone Numbers - International with + prefix
|
| 80 |
+
"phone_intl": r"\+\d{1,3}[\s\-.]?\d{2,4}[\s\-.]?\d{3,4}[\s\-.]?\d{3,4}\b",
|
| 81 |
+
|
| 82 |
+
# IBAN - International Bank Account Number
|
| 83 |
+
"iban": r"\b[A-Z]{2}\d{2}[\s]?[A-Z0-9]{4}[\s]?[A-Z0-9]{4}[\s]?[A-Z0-9]{4}[\s]?[A-Z0-9]{0,14}\b",
|
| 84 |
+
|
| 85 |
+
# Password - Labeled patterns
|
| 86 |
+
"password_labeled": r"(password|passwd|pwd|pass)[\s:=]+\S+",
|
| 87 |
+
# Database connection strings with credentials
|
| 88 |
+
"db_connection": r"(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@",
|
| 89 |
}
|
| 90 |
|
| 91 |
def detect_pii(prompt: str) -> dict:
|
static/index.html
CHANGED
|
@@ -490,7 +490,20 @@
|
|
| 490 |
// Medical Records - DOB patterns
|
| 491 |
medicalDOB: new RegExp('(DOB|date\\s+of\\s+birth|d\\.o\\.b\\.?)[\\s:]+\\d{1,2}[/\\-]\\d{1,2}[/\\-]\\d{2,4}', 'i'),
|
| 492 |
// Medical Records - MRN patterns
|
| 493 |
-
medicalMRN: new RegExp('(MRN|medical\\s+record|patient\\s+id|patient\\s+number)[\\s:#\\-]+\\d{4,}', 'i')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
};
|
| 495 |
|
| 496 |
let detected = [];
|
|
|
|
| 490 |
// Medical Records - DOB patterns
|
| 491 |
medicalDOB: new RegExp('(DOB|date\\s+of\\s+birth|d\\.o\\.b\\.?)[\\s:]+\\d{1,2}[/\\-]\\d{1,2}[/\\-]\\d{2,4}', 'i'),
|
| 492 |
// Medical Records - MRN patterns
|
| 493 |
+
medicalMRN: new RegExp('(MRN|medical\\s+record|patient\\s+id|patient\\s+number)[\\s:#\\-]+\\d{4,}', 'i'),
|
| 494 |
+
|
| 495 |
+
// Phone Numbers - US formats
|
| 496 |
+
phoneUS: new RegExp('\\b(\\+?1[\\s\\-.]?)?\\(?\\d{3}\\)?[\\s\\-.]?\\d{3}[\\s\\-.]?\\d{4}\\b'),
|
| 497 |
+
// Phone Numbers - International with + prefix
|
| 498 |
+
phoneIntl: new RegExp('\\+\\d{1,3}[\\s\\-.]?\\d{2,4}[\\s\\-.]?\\d{3,4}[\\s\\-.]?\\d{3,4}\\b'),
|
| 499 |
+
|
| 500 |
+
// IBAN - International Bank Account Number
|
| 501 |
+
iban: new RegExp('\\b[A-Z]{2}\\d{2}[\\s]?[A-Z0-9]{4}[\\s]?[A-Z0-9]{4}[\\s]?[A-Z0-9]{4}[\\s]?[A-Z0-9]{0,14}\\b'),
|
| 502 |
+
|
| 503 |
+
// Password - Labeled patterns
|
| 504 |
+
passwordLabeled: new RegExp('(password|passwd|pwd|pass)[\\s:=]+\\S+', 'i'),
|
| 505 |
+
// Database connection strings with credentials
|
| 506 |
+
dbConnection: new RegExp('(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@', 'i')
|
| 507 |
};
|
| 508 |
|
| 509 |
let detected = [];
|