gaurv007 commited on
Commit
464c655
·
verified ·
1 Parent(s): a90501a

v3.0: Upload actual compliance.py content

Browse files
Files changed (1) hide show
  1. compliance.py +351 -1
compliance.py CHANGED
@@ -1 +1,351 @@
1
- /app/clauseguard/compliance.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClauseGuard — Compliance Checker v3.0
3
+ ═════════════════════════════════════
4
+ FIXED in v3.0:
5
+ • Negation handling (clause saying "we do NOT" won't score as PASS)
6
+ • Context windows around keyword matches (shows what the clause actually says)
7
+ • Semantic scoring (keyword proximity + negation awareness)
8
+ • Added more regulatory frameworks
9
+ """
10
+
11
+ import re
12
+ from collections import defaultdict
13
+
14
+ # Negation patterns that invert compliance meaning
15
+ _NEGATION_PATTERNS = [
16
+ r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)",
17
+ r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)",
18
+ r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)",
19
+ r"shall\s+not\s+be\s+(?:required|obligated|responsible)",
20
+ r"is\s+not\s+(?:responsible|liable|required|obligated)",
21
+ ]
22
+
23
+ # Regulatory requirement definitions
24
+ REGULATIONS = {
25
+ "GDPR": {
26
+ "description": "EU General Data Protection Regulation (Regulation 2016/679)",
27
+ "requirements": {
28
+ "lawful_basis": {
29
+ "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"],
30
+ "description": "Must specify lawful basis for data processing (Art. 6)",
31
+ "severity": "HIGH",
32
+ },
33
+ "data_subject_rights": {
34
+ "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"],
35
+ "description": "Must acknowledge data subject rights (Arts. 15-22)",
36
+ "severity": "HIGH",
37
+ },
38
+ "data_breach_notification": {
39
+ "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"],
40
+ "description": "Must include data breach notification obligations (Art. 33)",
41
+ "severity": "MEDIUM",
42
+ },
43
+ "data_protection_officer": {
44
+ "keywords": ["data protection officer", "DPO"],
45
+ "description": "Should reference Data Protection Officer if applicable (Art. 37)",
46
+ "severity": "LOW",
47
+ },
48
+ "cross_border_transfer": {
49
+ "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"],
50
+ "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)",
51
+ "severity": "HIGH",
52
+ },
53
+ "privacy_by_design": {
54
+ "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"],
55
+ "description": "Should reference privacy-by-design principles (Art. 25)",
56
+ "severity": "MEDIUM",
57
+ },
58
+ "data_processing_agreement": {
59
+ "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"],
60
+ "description": "Must include data processing agreement if sharing data (Art. 28)",
61
+ "severity": "HIGH",
62
+ },
63
+ },
64
+ },
65
+ "CCPA": {
66
+ "description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)",
67
+ "requirements": {
68
+ "consumer_rights": {
69
+ "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"],
70
+ "description": "Must acknowledge California consumer rights",
71
+ "severity": "HIGH",
72
+ },
73
+ "data_categories": {
74
+ "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"],
75
+ "description": "Must disclose categories of personal information collected",
76
+ "severity": "HIGH",
77
+ },
78
+ "sale_of_data": {
79
+ "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"],
80
+ "description": "Must provide opt-out mechanism for data sales",
81
+ "severity": "HIGH",
82
+ },
83
+ "service_providers": {
84
+ "keywords": ["service provider", "third party", "contractor", "business purpose"],
85
+ "description": "Should limit data use to business/service provider purposes",
86
+ "severity": "MEDIUM",
87
+ },
88
+ },
89
+ },
90
+ "SOX": {
91
+ "description": "Sarbanes-Oxley Act (US, 2002)",
92
+ "requirements": {
93
+ "internal_controls": {
94
+ "keywords": ["internal controls", "internal control over financial reporting", "ICFR"],
95
+ "description": "Must reference internal controls over financial reporting (§ 404)",
96
+ "severity": "HIGH",
97
+ },
98
+ "audit_committee": {
99
+ "keywords": ["audit committee", "independent auditor", "PCAOB"],
100
+ "description": "Should reference audit committee oversight",
101
+ "severity": "MEDIUM",
102
+ },
103
+ "whistleblower": {
104
+ "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"],
105
+ "description": "Should protect whistleblower provisions (§ 806)",
106
+ "severity": "HIGH",
107
+ },
108
+ "document_retention": {
109
+ "keywords": ["document retention", "record retention", "retention policy", "preserve records"],
110
+ "description": "Must include document retention obligations (§ 802)",
111
+ "severity": "HIGH",
112
+ },
113
+ },
114
+ },
115
+ "HIPAA": {
116
+ "description": "Health Insurance Portability and Accountability Act (US, 1996)",
117
+ "requirements": {
118
+ "phi_protection": {
119
+ "keywords": ["protected health information", "PHI", "health information", "ePHI"],
120
+ "description": "Must protect PHI and limit uses/disclosures",
121
+ "severity": "CRITICAL",
122
+ },
123
+ "business_associate": {
124
+ "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"],
125
+ "description": "Should reference Business Associate Agreement (§ 164.504(e))",
126
+ "severity": "HIGH",
127
+ },
128
+ "security_safeguards": {
129
+ "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"],
130
+ "description": "Must implement security safeguards (§ 164.308-312)",
131
+ "severity": "HIGH",
132
+ },
133
+ "breach_notification": {
134
+ "keywords": ["breach notification", "notification of breach", "unauthorized access"],
135
+ "description": "Must include breach notification obligations (§ 164.400-414)",
136
+ "severity": "HIGH",
137
+ },
138
+ },
139
+ },
140
+ "FINRA": {
141
+ "description": "Financial Industry Regulatory Authority (US)",
142
+ "requirements": {
143
+ "recordkeeping": {
144
+ "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"],
145
+ "description": "Must comply with recordkeeping rules (FINRA Rule 4511)",
146
+ "severity": "HIGH",
147
+ },
148
+ "supervision": {
149
+ "keywords": ["supervision", "supervisory system", "review and approval"],
150
+ "description": "Should reference supervisory obligations (FINRA Rule 3110)",
151
+ "severity": "MEDIUM",
152
+ },
153
+ "anti_money_laundering": {
154
+ "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"],
155
+ "description": "Must reference AML compliance (FINRA Rule 3310)",
156
+ "severity": "HIGH",
157
+ },
158
+ "privacy": {
159
+ "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"],
160
+ "description": "Must protect customer information (Regulation S-P)",
161
+ "severity": "HIGH",
162
+ },
163
+ },
164
+ },
165
+ }
166
+
167
+ RISK_STYLES = {
168
+ "CRITICAL": ("#dc2626", "#fef2f2"),
169
+ "HIGH": ("#ea580c", "#fff7ed"),
170
+ "MEDIUM": ("#ca8a04", "#fefce8"),
171
+ "LOW": ("#16a34a", "#f0fdf4"),
172
+ }
173
+
174
+
175
+ def _check_negation(text_lower, keyword, window=100):
176
+ """Check if a keyword match is negated by nearby negation words."""
177
+ idx = text_lower.find(keyword.lower())
178
+ if idx == -1:
179
+ return False
180
+ # Get context window around the match
181
+ start = max(0, idx - window)
182
+ end = min(len(text_lower), idx + len(keyword) + window)
183
+ context = text_lower[start:end]
184
+
185
+ for neg_pat in _NEGATION_PATTERNS:
186
+ if re.search(neg_pat, context, re.IGNORECASE):
187
+ return True
188
+ return False
189
+
190
+
191
+ def _get_context(text, keyword, window=80):
192
+ """Extract context around a keyword match."""
193
+ text_lower = text.lower()
194
+ idx = text_lower.find(keyword.lower())
195
+ if idx == -1:
196
+ return ""
197
+ start = max(0, idx - window)
198
+ end = min(len(text), idx + len(keyword) + window)
199
+ context = text[start:end].strip()
200
+ if start > 0:
201
+ context = "..." + context
202
+ if end < len(text):
203
+ context = context + "..."
204
+ return context
205
+
206
+
207
+ def check_compliance(text):
208
+ """Check contract text against all regulatory frameworks with negation handling."""
209
+ text_lower = text.lower()
210
+ results = {}
211
+
212
+ for reg_name, reg_data in REGULATIONS.items():
213
+ checks = []
214
+ for req_name, req_data in reg_data["requirements"].items():
215
+ matched = False
216
+ negated = False
217
+ matched_keywords = []
218
+ context_snippets = []
219
+
220
+ for kw in req_data["keywords"]:
221
+ if kw.lower() in text_lower:
222
+ matched_keywords.append(kw)
223
+ # Check if the match is negated
224
+ if _check_negation(text_lower, kw):
225
+ negated = True
226
+ else:
227
+ matched = True
228
+ # Get context
229
+ ctx = _get_context(text, kw)
230
+ if ctx:
231
+ context_snippets.append(ctx)
232
+
233
+ if matched and not negated:
234
+ status = "PASS"
235
+ elif negated and not matched:
236
+ status = "NEGATED"
237
+ elif matched and negated:
238
+ status = "AMBIGUOUS"
239
+ else:
240
+ status = "MISSING"
241
+
242
+ checks.append({
243
+ "requirement": req_name,
244
+ "description": req_data["description"],
245
+ "severity": req_data["severity"],
246
+ "status": status,
247
+ "matched_keywords": matched_keywords,
248
+ "context": context_snippets[:2], # Keep top 2 context snippets
249
+ })
250
+
251
+ passed = sum(1 for c in checks if c["status"] == "PASS")
252
+ total = len(checks)
253
+ compliance_rate = round(passed / total * 100) if total > 0 else 0
254
+
255
+ negated_count = sum(1 for c in checks if c["status"] == "NEGATED")
256
+ ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS")
257
+
258
+ if compliance_rate >= 80:
259
+ overall = "COMPLIANT"
260
+ elif compliance_rate >= 40:
261
+ overall = "PARTIAL"
262
+ else:
263
+ overall = "NON-COMPLIANT"
264
+
265
+ # Override if there are negated critical requirements
266
+ if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
267
+ overall = "WARNING"
268
+
269
+ results[reg_name] = {
270
+ "description": reg_data["description"],
271
+ "compliance_rate": compliance_rate,
272
+ "checks": checks,
273
+ "overall_status": overall,
274
+ "negated_count": negated_count,
275
+ "ambiguous_count": ambiguous_count,
276
+ }
277
+
278
+ return results
279
+
280
+
281
+ def render_compliance_html(results):
282
+ """Render compliance results as HTML for Gradio."""
283
+ html = '<div style="font-family:system-ui,sans-serif;">'
284
+
285
+ for reg_name, reg_result in results.items():
286
+ rate = reg_result["compliance_rate"]
287
+ status = reg_result["overall_status"]
288
+
289
+ status_colors = {
290
+ "COMPLIANT": ("#16a34a", "#f0fdf4"),
291
+ "PARTIAL": ("#ca8a04", "#fefce8"),
292
+ "NON-COMPLIANT": ("#dc2626", "#fef2f2"),
293
+ "WARNING": ("#ea580c", "#fff7ed"),
294
+ }
295
+ status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb"))
296
+
297
+ neg = reg_result.get("negated_count", 0)
298
+ amb = reg_result.get("ambiguous_count", 0)
299
+ warnings = ""
300
+ if neg > 0:
301
+ warnings += f'<span style="font-size:10px;color:#ea580c;margin-left:8px;">⚠️ {neg} negated</span>'
302
+ if amb > 0:
303
+ warnings += f'<span style="font-size:10px;color:#ca8a04;margin-left:8px;">❓ {amb} ambiguous</span>'
304
+
305
+ html += f'''
306
+ <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;">
307
+ <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;">
308
+ <div>
309
+ <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span>
310
+ {warnings}
311
+ <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p>
312
+ </div>
313
+ <div style="text-align:right;">
314
+ <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div>
315
+ <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div>
316
+ </div>
317
+ </div>
318
+ <div style="padding:8px 16px;">
319
+ '''
320
+
321
+ for check in reg_result["checks"]:
322
+ color, bg = RISK_STYLES[check["severity"]]
323
+ status_icons = {"PASS": "✅", "MISSING": "❌", "NEGATED": "🚫", "AMBIGUOUS": "❓"}
324
+ status_icon = status_icons.get(check["status"], "❓")
325
+ status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"}
326
+ status_text = status_text_map.get(check["status"], "Unknown")
327
+ keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "—"
328
+
329
+ context_html = ""
330
+ if check.get("context"):
331
+ ctx = check["context"][0][:120].replace("<", "&lt;").replace(">", "&gt;")
332
+ context_html = f'<div style="font-size:10px;color:#6b7280;margin-top:2px;font-style:italic;">"{ctx}"</div>'
333
+
334
+ html += f'''
335
+ <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;">
336
+ <div style="flex:1;">
337
+ <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div>
338
+ <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div>
339
+ {context_html}
340
+ </div>
341
+ <div style="display:flex;align-items:center;gap:6px;margin-left:8px;">
342
+ <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span>
343
+ <span style="font-size:13px;" title="{status_text}">{status_icon}</span>
344
+ </div>
345
+ </div>
346
+ '''
347
+
348
+ html += '</div></div>'
349
+
350
+ html += '</div>'
351
+ return html