Spaces:
Running
Running
fix(v4.3): compliance.py — bug report fixes (10 issues)
Browse files- compliance.py +65 -10
compliance.py
CHANGED
|
@@ -245,12 +245,55 @@ def _get_context(text, keyword, window=100):
|
|
| 245 |
return context
|
| 246 |
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
def check_compliance(text):
|
| 249 |
-
"""Check contract text against
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
text_lower = text.lower()
|
| 251 |
results = {}
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
for reg_name, reg_data in REGULATIONS.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
checks = []
|
| 255 |
for req_name, req_data in reg_data["requirements"].items():
|
| 256 |
matched = False
|
|
@@ -259,15 +302,27 @@ def check_compliance(text):
|
|
| 259 |
context_snippets = []
|
| 260 |
|
| 261 |
for kw in req_data["keywords"]:
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
if matched and not negated:
|
| 273 |
status = "PASS"
|
|
|
|
| 245 |
return context
|
| 246 |
|
| 247 |
|
| 248 |
+
# FIX v4.3: Regulation applicability gates — only apply regulations relevant to the contract type
|
| 249 |
+
_REGULATION_GATES = {
|
| 250 |
+
"SOX": re.compile(
|
| 251 |
+
r'financial\s+statement|internal\s+control|audit\s+committee|public\s+company|sec\s+filing|pcaob|sarbanes',
|
| 252 |
+
re.IGNORECASE
|
| 253 |
+
),
|
| 254 |
+
"HIPAA": re.compile(
|
| 255 |
+
r'protected\s+health|(?<!\w)phi(?!\w)|health\s+information|medical\s+record|business\s+associate\s+agreement|(?<!\w)baa(?!\w)|hipaa',
|
| 256 |
+
re.IGNORECASE
|
| 257 |
+
),
|
| 258 |
+
"FINRA": re.compile(
|
| 259 |
+
r'securities|broker[\-\s]?dealer|investment\s+advis|financial\s+industry|(?<!\w)finra(?!\w)|registered\s+representative',
|
| 260 |
+
re.IGNORECASE
|
| 261 |
+
),
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
|
| 265 |
def check_compliance(text):
|
| 266 |
+
"""Check contract text against applicable regulatory frameworks with negation handling.
|
| 267 |
+
|
| 268 |
+
FIX v4.3:
|
| 269 |
+
- Regulation applicability gates: SOX/HIPAA/FINRA only checked if contract contains relevant terms
|
| 270 |
+
- Whole-word keyword matching: prevents substring false positives (e.g. "SAR" in "Year 3")
|
| 271 |
+
- GDPR and CCPA always checked (broadly applicable)
|
| 272 |
+
"""
|
| 273 |
text_lower = text.lower()
|
| 274 |
results = {}
|
| 275 |
|
| 276 |
+
# FIX v4.3: Determine which regulations apply to this contract
|
| 277 |
+
applicable_regs = {"GDPR", "CCPA"} # Always check these
|
| 278 |
+
for reg_name, gate_pattern in _REGULATION_GATES.items():
|
| 279 |
+
if gate_pattern.search(text):
|
| 280 |
+
applicable_regs.add(reg_name)
|
| 281 |
+
|
| 282 |
for reg_name, reg_data in REGULATIONS.items():
|
| 283 |
+
# FIX v4.3: Skip regulations that don't apply to this contract
|
| 284 |
+
if reg_name not in applicable_regs:
|
| 285 |
+
# Still include in results but mark as not applicable
|
| 286 |
+
results[reg_name] = {
|
| 287 |
+
"description": reg_data["description"],
|
| 288 |
+
"compliance_rate": -1, # -1 = not applicable
|
| 289 |
+
"checks": [],
|
| 290 |
+
"overall_status": "NOT_APPLICABLE",
|
| 291 |
+
"negated_count": 0,
|
| 292 |
+
"ambiguous_count": 0,
|
| 293 |
+
"note": f"{reg_name} does not appear applicable to this contract type.",
|
| 294 |
+
}
|
| 295 |
+
continue
|
| 296 |
+
|
| 297 |
checks = []
|
| 298 |
for req_name, req_data in reg_data["requirements"].items():
|
| 299 |
matched = False
|
|
|
|
| 302 |
context_snippets = []
|
| 303 |
|
| 304 |
for kw in req_data["keywords"]:
|
| 305 |
+
# FIX v4.3: Use whole-word matching to prevent substring false positives
|
| 306 |
+
# e.g., "SAR" should not match "Year 3" tokenised fragments
|
| 307 |
+
kw_lower = kw.lower()
|
| 308 |
+
if len(kw_lower) <= 4:
|
| 309 |
+
# Short keywords (SAR, DPO, PHI, BAA) — require word boundaries
|
| 310 |
+
pattern = re.compile(r'\b' + re.escape(kw_lower) + r'\b', re.IGNORECASE)
|
| 311 |
+
if not pattern.search(text_lower):
|
| 312 |
+
continue
|
| 313 |
+
else:
|
| 314 |
+
# Longer keywords — substring is OK
|
| 315 |
+
if kw_lower not in text_lower:
|
| 316 |
+
continue
|
| 317 |
+
|
| 318 |
+
matched_keywords.append(kw)
|
| 319 |
+
if _check_negation(text_lower, kw):
|
| 320 |
+
negated = True
|
| 321 |
+
else:
|
| 322 |
+
matched = True
|
| 323 |
+
ctx = _get_context(text, kw)
|
| 324 |
+
if ctx:
|
| 325 |
+
context_snippets.append(ctx)
|
| 326 |
|
| 327 |
if matched and not negated:
|
| 328 |
status = "PASS"
|