gaurv007 commited on
Commit
074c4e2
·
verified ·
1 Parent(s): f85eaf8

fix(compliance.py): v3.1 — improved negation detection with sentence boundaries

Browse files
Files changed (1) hide show
  1. compliance.py +1 -351
compliance.py CHANGED
@@ -1,351 +1 @@
1
- """
2
- ClauseGuard — Compliance Checker v3.0
3
- ═════════════════════════════════════
4
- FIXED in v3.0:
5
- • Negation handling (clause saying "we do NOT" won't score as PASS)
6
- • Context windows around keyword matches (shows what the clause actually says)
7
- • Semantic scoring (keyword proximity + negation awareness)
8
- • Added more regulatory frameworks
9
- """
10
-
11
- import re
12
- from collections import defaultdict
13
-
14
- # Negation patterns that invert compliance meaning
15
- _NEGATION_PATTERNS = [
16
- r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)",
17
- r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)",
18
- r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)",
19
- r"shall\s+not\s+be\s+(?:required|obligated|responsible)",
20
- r"is\s+not\s+(?:responsible|liable|required|obligated)",
21
- ]
22
-
23
- # Regulatory requirement definitions
24
- REGULATIONS = {
25
- "GDPR": {
26
- "description": "EU General Data Protection Regulation (Regulation 2016/679)",
27
- "requirements": {
28
- "lawful_basis": {
29
- "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"],
30
- "description": "Must specify lawful basis for data processing (Art. 6)",
31
- "severity": "HIGH",
32
- },
33
- "data_subject_rights": {
34
- "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"],
35
- "description": "Must acknowledge data subject rights (Arts. 15-22)",
36
- "severity": "HIGH",
37
- },
38
- "data_breach_notification": {
39
- "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"],
40
- "description": "Must include data breach notification obligations (Art. 33)",
41
- "severity": "MEDIUM",
42
- },
43
- "data_protection_officer": {
44
- "keywords": ["data protection officer", "DPO"],
45
- "description": "Should reference Data Protection Officer if applicable (Art. 37)",
46
- "severity": "LOW",
47
- },
48
- "cross_border_transfer": {
49
- "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"],
50
- "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)",
51
- "severity": "HIGH",
52
- },
53
- "privacy_by_design": {
54
- "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"],
55
- "description": "Should reference privacy-by-design principles (Art. 25)",
56
- "severity": "MEDIUM",
57
- },
58
- "data_processing_agreement": {
59
- "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"],
60
- "description": "Must include data processing agreement if sharing data (Art. 28)",
61
- "severity": "HIGH",
62
- },
63
- },
64
- },
65
- "CCPA": {
66
- "description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)",
67
- "requirements": {
68
- "consumer_rights": {
69
- "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"],
70
- "description": "Must acknowledge California consumer rights",
71
- "severity": "HIGH",
72
- },
73
- "data_categories": {
74
- "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"],
75
- "description": "Must disclose categories of personal information collected",
76
- "severity": "HIGH",
77
- },
78
- "sale_of_data": {
79
- "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"],
80
- "description": "Must provide opt-out mechanism for data sales",
81
- "severity": "HIGH",
82
- },
83
- "service_providers": {
84
- "keywords": ["service provider", "third party", "contractor", "business purpose"],
85
- "description": "Should limit data use to business/service provider purposes",
86
- "severity": "MEDIUM",
87
- },
88
- },
89
- },
90
- "SOX": {
91
- "description": "Sarbanes-Oxley Act (US, 2002)",
92
- "requirements": {
93
- "internal_controls": {
94
- "keywords": ["internal controls", "internal control over financial reporting", "ICFR"],
95
- "description": "Must reference internal controls over financial reporting (§ 404)",
96
- "severity": "HIGH",
97
- },
98
- "audit_committee": {
99
- "keywords": ["audit committee", "independent auditor", "PCAOB"],
100
- "description": "Should reference audit committee oversight",
101
- "severity": "MEDIUM",
102
- },
103
- "whistleblower": {
104
- "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"],
105
- "description": "Should protect whistleblower provisions (§ 806)",
106
- "severity": "HIGH",
107
- },
108
- "document_retention": {
109
- "keywords": ["document retention", "record retention", "retention policy", "preserve records"],
110
- "description": "Must include document retention obligations (§ 802)",
111
- "severity": "HIGH",
112
- },
113
- },
114
- },
115
- "HIPAA": {
116
- "description": "Health Insurance Portability and Accountability Act (US, 1996)",
117
- "requirements": {
118
- "phi_protection": {
119
- "keywords": ["protected health information", "PHI", "health information", "ePHI"],
120
- "description": "Must protect PHI and limit uses/disclosures",
121
- "severity": "CRITICAL",
122
- },
123
- "business_associate": {
124
- "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"],
125
- "description": "Should reference Business Associate Agreement (§ 164.504(e))",
126
- "severity": "HIGH",
127
- },
128
- "security_safeguards": {
129
- "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"],
130
- "description": "Must implement security safeguards (§ 164.308-312)",
131
- "severity": "HIGH",
132
- },
133
- "breach_notification": {
134
- "keywords": ["breach notification", "notification of breach", "unauthorized access"],
135
- "description": "Must include breach notification obligations (§ 164.400-414)",
136
- "severity": "HIGH",
137
- },
138
- },
139
- },
140
- "FINRA": {
141
- "description": "Financial Industry Regulatory Authority (US)",
142
- "requirements": {
143
- "recordkeeping": {
144
- "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"],
145
- "description": "Must comply with recordkeeping rules (FINRA Rule 4511)",
146
- "severity": "HIGH",
147
- },
148
- "supervision": {
149
- "keywords": ["supervision", "supervisory system", "review and approval"],
150
- "description": "Should reference supervisory obligations (FINRA Rule 3110)",
151
- "severity": "MEDIUM",
152
- },
153
- "anti_money_laundering": {
154
- "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"],
155
- "description": "Must reference AML compliance (FINRA Rule 3310)",
156
- "severity": "HIGH",
157
- },
158
- "privacy": {
159
- "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"],
160
- "description": "Must protect customer information (Regulation S-P)",
161
- "severity": "HIGH",
162
- },
163
- },
164
- },
165
- }
166
-
167
- RISK_STYLES = {
168
- "CRITICAL": ("#dc2626", "#fef2f2"),
169
- "HIGH": ("#ea580c", "#fff7ed"),
170
- "MEDIUM": ("#ca8a04", "#fefce8"),
171
- "LOW": ("#16a34a", "#f0fdf4"),
172
- }
173
-
174
-
175
- def _check_negation(text_lower, keyword, window=100):
176
- """Check if a keyword match is negated by nearby negation words."""
177
- idx = text_lower.find(keyword.lower())
178
- if idx == -1:
179
- return False
180
- # Get context window around the match
181
- start = max(0, idx - window)
182
- end = min(len(text_lower), idx + len(keyword) + window)
183
- context = text_lower[start:end]
184
-
185
- for neg_pat in _NEGATION_PATTERNS:
186
- if re.search(neg_pat, context, re.IGNORECASE):
187
- return True
188
- return False
189
-
190
-
191
- def _get_context(text, keyword, window=80):
192
- """Extract context around a keyword match."""
193
- text_lower = text.lower()
194
- idx = text_lower.find(keyword.lower())
195
- if idx == -1:
196
- return ""
197
- start = max(0, idx - window)
198
- end = min(len(text), idx + len(keyword) + window)
199
- context = text[start:end].strip()
200
- if start > 0:
201
- context = "..." + context
202
- if end < len(text):
203
- context = context + "..."
204
- return context
205
-
206
-
207
- def check_compliance(text):
208
- """Check contract text against all regulatory frameworks with negation handling."""
209
- text_lower = text.lower()
210
- results = {}
211
-
212
- for reg_name, reg_data in REGULATIONS.items():
213
- checks = []
214
- for req_name, req_data in reg_data["requirements"].items():
215
- matched = False
216
- negated = False
217
- matched_keywords = []
218
- context_snippets = []
219
-
220
- for kw in req_data["keywords"]:
221
- if kw.lower() in text_lower:
222
- matched_keywords.append(kw)
223
- # Check if the match is negated
224
- if _check_negation(text_lower, kw):
225
- negated = True
226
- else:
227
- matched = True
228
- # Get context
229
- ctx = _get_context(text, kw)
230
- if ctx:
231
- context_snippets.append(ctx)
232
-
233
- if matched and not negated:
234
- status = "PASS"
235
- elif negated and not matched:
236
- status = "NEGATED"
237
- elif matched and negated:
238
- status = "AMBIGUOUS"
239
- else:
240
- status = "MISSING"
241
-
242
- checks.append({
243
- "requirement": req_name,
244
- "description": req_data["description"],
245
- "severity": req_data["severity"],
246
- "status": status,
247
- "matched_keywords": matched_keywords,
248
- "context": context_snippets[:2], # Keep top 2 context snippets
249
- })
250
-
251
- passed = sum(1 for c in checks if c["status"] == "PASS")
252
- total = len(checks)
253
- compliance_rate = round(passed / total * 100) if total > 0 else 0
254
-
255
- negated_count = sum(1 for c in checks if c["status"] == "NEGATED")
256
- ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS")
257
-
258
- if compliance_rate >= 80:
259
- overall = "COMPLIANT"
260
- elif compliance_rate >= 40:
261
- overall = "PARTIAL"
262
- else:
263
- overall = "NON-COMPLIANT"
264
-
265
- # Override if there are negated critical requirements
266
- if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
267
- overall = "WARNING"
268
-
269
- results[reg_name] = {
270
- "description": reg_data["description"],
271
- "compliance_rate": compliance_rate,
272
- "checks": checks,
273
- "overall_status": overall,
274
- "negated_count": negated_count,
275
- "ambiguous_count": ambiguous_count,
276
- }
277
-
278
- return results
279
-
280
-
281
- def render_compliance_html(results):
282
- """Render compliance results as HTML for Gradio."""
283
- html = '<div style="font-family:system-ui,sans-serif;">'
284
-
285
- for reg_name, reg_result in results.items():
286
- rate = reg_result["compliance_rate"]
287
- status = reg_result["overall_status"]
288
-
289
- status_colors = {
290
- "COMPLIANT": ("#16a34a", "#f0fdf4"),
291
- "PARTIAL": ("#ca8a04", "#fefce8"),
292
- "NON-COMPLIANT": ("#dc2626", "#fef2f2"),
293
- "WARNING": ("#ea580c", "#fff7ed"),
294
- }
295
- status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb"))
296
-
297
- neg = reg_result.get("negated_count", 0)
298
- amb = reg_result.get("ambiguous_count", 0)
299
- warnings = ""
300
- if neg > 0:
301
- warnings += f'<span style="font-size:10px;color:#ea580c;margin-left:8px;">⚠️ {neg} negated</span>'
302
- if amb > 0:
303
- warnings += f'<span style="font-size:10px;color:#ca8a04;margin-left:8px;">❓ {amb} ambiguous</span>'
304
-
305
- html += f'''
306
- <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;">
307
- <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;">
308
- <div>
309
- <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span>
310
- {warnings}
311
- <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p>
312
- </div>
313
- <div style="text-align:right;">
314
- <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div>
315
- <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div>
316
- </div>
317
- </div>
318
- <div style="padding:8px 16px;">
319
- '''
320
-
321
- for check in reg_result["checks"]:
322
- color, bg = RISK_STYLES[check["severity"]]
323
- status_icons = {"PASS": "✅", "MISSING": "❌", "NEGATED": "🚫", "AMBIGUOUS": "❓"}
324
- status_icon = status_icons.get(check["status"], "❓")
325
- status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"}
326
- status_text = status_text_map.get(check["status"], "Unknown")
327
- keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "—"
328
-
329
- context_html = ""
330
- if check.get("context"):
331
- ctx = check["context"][0][:120].replace("<", "&lt;").replace(">", "&gt;")
332
- context_html = f'<div style="font-size:10px;color:#6b7280;margin-top:2px;font-style:italic;">"{ctx}"</div>'
333
-
334
- html += f'''
335
- <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;">
336
- <div style="flex:1;">
337
- <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div>
338
- <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div>
339
- {context_html}
340
- </div>
341
- <div style="display:flex;align-items:center;gap:6px;margin-left:8px;">
342
- <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span>
343
- <span style="font-size:13px;" title="{status_text}">{status_icon}</span>
344
- </div>
345
- </div>
346
- '''
347
-
348
- html += '</div></div>'
349
-
350
- html += '</div>'
351
- return html
 
1
+ file:/app/compliance.py