gaurv007 commited on
Commit
7e95182
·
verified ·
1 Parent(s): 30580c9

fix(v4.3): compliance.py — bug report fixes (10 issues)

Browse files
Files changed (1) hide show
  1. compliance.py +65 -10
compliance.py CHANGED
@@ -245,12 +245,55 @@ def _get_context(text, keyword, window=100):
245
  return context
246
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  def check_compliance(text):
249
- """Check contract text against all regulatory frameworks with negation handling."""
 
 
 
 
 
 
250
  text_lower = text.lower()
251
  results = {}
252
 
 
 
 
 
 
 
253
  for reg_name, reg_data in REGULATIONS.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  checks = []
255
  for req_name, req_data in reg_data["requirements"].items():
256
  matched = False
@@ -259,15 +302,27 @@ def check_compliance(text):
259
  context_snippets = []
260
 
261
  for kw in req_data["keywords"]:
262
- if kw.lower() in text_lower:
263
- matched_keywords.append(kw)
264
- if _check_negation(text_lower, kw):
265
- negated = True
266
- else:
267
- matched = True
268
- ctx = _get_context(text, kw)
269
- if ctx:
270
- context_snippets.append(ctx)
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  if matched and not negated:
273
  status = "PASS"
 
245
  return context
246
 
247
 
248
+ # FIX v4.3: Regulation applicability gates — only apply regulations relevant to the contract type
249
+ _REGULATION_GATES = {
250
+ "SOX": re.compile(
251
+ r'financial\s+statement|internal\s+control|audit\s+committee|public\s+company|sec\s+filing|pcaob|sarbanes',
252
+ re.IGNORECASE
253
+ ),
254
+ "HIPAA": re.compile(
255
+ r'protected\s+health|(?<!\w)phi(?!\w)|health\s+information|medical\s+record|business\s+associate\s+agreement|(?<!\w)baa(?!\w)|hipaa',
256
+ re.IGNORECASE
257
+ ),
258
+ "FINRA": re.compile(
259
+ r'securities|broker[\-\s]?dealer|investment\s+advis|financial\s+industry|(?<!\w)finra(?!\w)|registered\s+representative',
260
+ re.IGNORECASE
261
+ ),
262
+ }
263
+
264
+
265
  def check_compliance(text):
266
+ """Check contract text against applicable regulatory frameworks with negation handling.
267
+
268
+ FIX v4.3:
269
+ - Regulation applicability gates: SOX/HIPAA/FINRA only checked if contract contains relevant terms
270
+ - Whole-word keyword matching: prevents substring false positives (e.g. "SAR" in "Year 3")
271
+ - GDPR and CCPA always checked (broadly applicable)
272
+ """
273
  text_lower = text.lower()
274
  results = {}
275
 
276
+ # FIX v4.3: Determine which regulations apply to this contract
277
+ applicable_regs = {"GDPR", "CCPA"} # Always check these
278
+ for reg_name, gate_pattern in _REGULATION_GATES.items():
279
+ if gate_pattern.search(text):
280
+ applicable_regs.add(reg_name)
281
+
282
  for reg_name, reg_data in REGULATIONS.items():
283
+ # FIX v4.3: Skip regulations that don't apply to this contract
284
+ if reg_name not in applicable_regs:
285
+ # Still include in results but mark as not applicable
286
+ results[reg_name] = {
287
+ "description": reg_data["description"],
288
+ "compliance_rate": -1, # -1 = not applicable
289
+ "checks": [],
290
+ "overall_status": "NOT_APPLICABLE",
291
+ "negated_count": 0,
292
+ "ambiguous_count": 0,
293
+ "note": f"{reg_name} does not appear applicable to this contract type.",
294
+ }
295
+ continue
296
+
297
  checks = []
298
  for req_name, req_data in reg_data["requirements"].items():
299
  matched = False
 
302
  context_snippets = []
303
 
304
  for kw in req_data["keywords"]:
305
+ # FIX v4.3: Use whole-word matching to prevent substring false positives
306
+ # e.g., "SAR" should not match "Year 3" tokenised fragments
307
+ kw_lower = kw.lower()
308
+ if len(kw_lower) <= 4:
309
+ # Short keywords (SAR, DPO, PHI, BAA) — require word boundaries
310
+ pattern = re.compile(r'\b' + re.escape(kw_lower) + r'\b', re.IGNORECASE)
311
+ if not pattern.search(text_lower):
312
+ continue
313
+ else:
314
+ # Longer keywords — substring is OK
315
+ if kw_lower not in text_lower:
316
+ continue
317
+
318
+ matched_keywords.append(kw)
319
+ if _check_negation(text_lower, kw):
320
+ negated = True
321
+ else:
322
+ matched = True
323
+ ctx = _get_context(text, kw)
324
+ if ctx:
325
+ context_snippets.append(ctx)
326
 
327
  if matched and not negated:
328
  status = "PASS"