gaurv007 commited on
Commit
50f2675
·
verified ·
1 Parent(s): ef9dc08

fix: upload actual compliance.py content with all v4.1 fixes

Browse files
Files changed (1) hide show
  1. compliance.py +386 -1
compliance.py CHANGED
@@ -1 +1,386 @@
1
- file:/app/compliance.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClauseGuard — Compliance Checker v3.1
3
+ ═════════════════════════════════════
4
+ FIXED in v3.1:
5
+ • FIX: Expanded negation window from 100 to 200 chars to catch cross-sentence negation
6
+ • FIX: Added sentence-boundary-aware negation detection
7
+ • FIX: Improved context extraction with sentence boundaries
8
+ • FIX: Added AMBIGUOUS handling for mixed positive/negative signals
9
+ """
10
+
11
+ import re
12
+ from collections import defaultdict
13
+
14
+ # Negation patterns that invert compliance meaning
15
+ _NEGATION_PATTERNS = [
16
+ r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain|comply|adhere|support|acknowledge)",
17
+ r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty|commitment|responsibility|duty)",
18
+ r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject|eliminat|remov|revok)",
19
+ r"shall\s+not\s+be\s+(?:required|obligated|responsible|liable|bound|subject)",
20
+ r"is\s+not\s+(?:responsible|liable|required|obligated|bound|subject)",
21
+ r"expressly\s+(?:disclaim|exclud|waiv|reject)",
22
+ r"to\s+the\s+(?:maximum|fullest)\s+extent\s+(?:permitted|allowed).*(?:disclaim|exclud|waiv)",
23
+ r"notwithstanding.*(?:shall\s+not|does\s+not|is\s+not)",
24
+ ]
25
+
26
+ # Regulatory requirement definitions
27
+ REGULATIONS = {
28
+ "GDPR": {
29
+ "description": "EU General Data Protection Regulation (Regulation 2016/679)",
30
+ "requirements": {
31
+ "lawful_basis": {
32
+ "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"],
33
+ "description": "Must specify lawful basis for data processing (Art. 6)",
34
+ "severity": "HIGH",
35
+ },
36
+ "data_subject_rights": {
37
+ "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"],
38
+ "description": "Must acknowledge data subject rights (Arts. 15-22)",
39
+ "severity": "HIGH",
40
+ },
41
+ "data_breach_notification": {
42
+ "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"],
43
+ "description": "Must include data breach notification obligations (Art. 33)",
44
+ "severity": "MEDIUM",
45
+ },
46
+ "data_protection_officer": {
47
+ "keywords": ["data protection officer", "DPO"],
48
+ "description": "Should reference Data Protection Officer if applicable (Art. 37)",
49
+ "severity": "LOW",
50
+ },
51
+ "cross_border_transfer": {
52
+ "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"],
53
+ "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)",
54
+ "severity": "HIGH",
55
+ },
56
+ "privacy_by_design": {
57
+ "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"],
58
+ "description": "Should reference privacy-by-design principles (Art. 25)",
59
+ "severity": "MEDIUM",
60
+ },
61
+ "data_processing_agreement": {
62
+ "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"],
63
+ "description": "Must include data processing agreement if sharing data (Art. 28)",
64
+ "severity": "HIGH",
65
+ },
66
+ },
67
+ },
68
+ "CCPA": {
69
+ "description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)",
70
+ "requirements": {
71
+ "consumer_rights": {
72
+ "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"],
73
+ "description": "Must acknowledge California consumer rights",
74
+ "severity": "HIGH",
75
+ },
76
+ "data_categories": {
77
+ "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"],
78
+ "description": "Must disclose categories of personal information collected",
79
+ "severity": "HIGH",
80
+ },
81
+ "sale_of_data": {
82
+ "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"],
83
+ "description": "Must provide opt-out mechanism for data sales",
84
+ "severity": "HIGH",
85
+ },
86
+ "service_providers": {
87
+ "keywords": ["service provider", "third party", "contractor", "business purpose"],
88
+ "description": "Should limit data use to business/service provider purposes",
89
+ "severity": "MEDIUM",
90
+ },
91
+ },
92
+ },
93
+ "SOX": {
94
+ "description": "Sarbanes-Oxley Act (US, 2002)",
95
+ "requirements": {
96
+ "internal_controls": {
97
+ "keywords": ["internal controls", "internal control over financial reporting", "ICFR"],
98
+ "description": "Must reference internal controls over financial reporting (§ 404)",
99
+ "severity": "HIGH",
100
+ },
101
+ "audit_committee": {
102
+ "keywords": ["audit committee", "independent auditor", "PCAOB"],
103
+ "description": "Should reference audit committee oversight",
104
+ "severity": "MEDIUM",
105
+ },
106
+ "whistleblower": {
107
+ "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"],
108
+ "description": "Should protect whistleblower provisions (§ 806)",
109
+ "severity": "HIGH",
110
+ },
111
+ "document_retention": {
112
+ "keywords": ["document retention", "record retention", "retention policy", "preserve records"],
113
+ "description": "Must include document retention obligations (§ 802)",
114
+ "severity": "HIGH",
115
+ },
116
+ },
117
+ },
118
+ "HIPAA": {
119
+ "description": "Health Insurance Portability and Accountability Act (US, 1996)",
120
+ "requirements": {
121
+ "phi_protection": {
122
+ "keywords": ["protected health information", "PHI", "health information", "ePHI"],
123
+ "description": "Must protect PHI and limit uses/disclosures",
124
+ "severity": "CRITICAL",
125
+ },
126
+ "business_associate": {
127
+ "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"],
128
+ "description": "Should reference Business Associate Agreement (§ 164.504(e))",
129
+ "severity": "HIGH",
130
+ },
131
+ "security_safeguards": {
132
+ "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"],
133
+ "description": "Must implement security safeguards (§ 164.308-312)",
134
+ "severity": "HIGH",
135
+ },
136
+ "breach_notification": {
137
+ "keywords": ["breach notification", "notification of breach", "unauthorized access"],
138
+ "description": "Must include breach notification obligations (§ 164.400-414)",
139
+ "severity": "HIGH",
140
+ },
141
+ },
142
+ },
143
+ "FINRA": {
144
+ "description": "Financial Industry Regulatory Authority (US)",
145
+ "requirements": {
146
+ "recordkeeping": {
147
+ "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"],
148
+ "description": "Must comply with recordkeeping rules (FINRA Rule 4511)",
149
+ "severity": "HIGH",
150
+ },
151
+ "supervision": {
152
+ "keywords": ["supervision", "supervisory system", "review and approval"],
153
+ "description": "Should reference supervisory obligations (FINRA Rule 3110)",
154
+ "severity": "MEDIUM",
155
+ },
156
+ "anti_money_laundering": {
157
+ "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"],
158
+ "description": "Must reference AML compliance (FINRA Rule 3310)",
159
+ "severity": "HIGH",
160
+ },
161
+ "privacy": {
162
+ "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"],
163
+ "description": "Must protect customer information (Regulation S-P)",
164
+ "severity": "HIGH",
165
+ },
166
+ },
167
+ },
168
+ }
169
+
170
+ RISK_STYLES = {
171
+ "CRITICAL": ("#dc2626", "#fef2f2"),
172
+ "HIGH": ("#ea580c", "#fff7ed"),
173
+ "MEDIUM": ("#ca8a04", "#fefce8"),
174
+ "LOW": ("#16a34a", "#f0fdf4"),
175
+ }
176
+
177
+
178
+ def _get_sentence_containing(text_lower, keyword_lower, start_idx):
179
+ """FIX v3.1: Extract the full sentence containing the keyword match."""
180
+ # Find sentence boundaries around the match
181
+ # Look backward for sentence start
182
+ sent_start = start_idx
183
+ for i in range(start_idx - 1, max(0, start_idx - 500), -1):
184
+ if text_lower[i] in '.!?' and i < start_idx - 2:
185
+ sent_start = i + 1
186
+ break
187
+ else:
188
+ sent_start = max(0, start_idx - 500)
189
+
190
+ # Look forward for sentence end
191
+ sent_end = start_idx + len(keyword_lower)
192
+ for i in range(sent_end, min(len(text_lower), sent_end + 500)):
193
+ if text_lower[i] in '.!?':
194
+ sent_end = i + 1
195
+ break
196
+ else:
197
+ sent_end = min(len(text_lower), sent_end + 500)
198
+
199
+ return text_lower[sent_start:sent_end].strip()
200
+
201
+
202
+ def _check_negation(text_lower, keyword, window=200):
203
+ """FIX v3.1: Check if a keyword match is negated — uses sentence-aware window."""
204
+ idx = text_lower.find(keyword.lower())
205
+ if idx == -1:
206
+ return False
207
+
208
+ # Get sentence-aware context (more accurate than fixed window)
209
+ sentence = _get_sentence_containing(text_lower, keyword.lower(), idx)
210
+
211
+ # Also get a wider window for cross-sentence negation
212
+ start = max(0, idx - window)
213
+ end = min(len(text_lower), idx + len(keyword) + window)
214
+ wider_context = text_lower[start:end]
215
+
216
+ # Check sentence first (higher confidence)
217
+ for neg_pat in _NEGATION_PATTERNS:
218
+ if re.search(neg_pat, sentence, re.IGNORECASE):
219
+ return True
220
+
221
+ # Then check wider window (lower confidence, still relevant)
222
+ for neg_pat in _NEGATION_PATTERNS[:4]: # Only strong negation patterns for wider window
223
+ if re.search(neg_pat, wider_context, re.IGNORECASE):
224
+ return True
225
+
226
+ return False
227
+
228
+
229
+ def _get_context(text, keyword, window=100):
230
+ """Extract context around a keyword match with sentence boundaries."""
231
+ text_lower = text.lower()
232
+ idx = text_lower.find(keyword.lower())
233
+ if idx == -1:
234
+ return ""
235
+ start = max(0, idx - window)
236
+ end = min(len(text), idx + len(keyword) + window)
237
+ context = text[start:end].strip()
238
+ if start > 0:
239
+ context = "..." + context
240
+ if end < len(text):
241
+ context = context + "..."
242
+ return context
243
+
244
+
245
+ def check_compliance(text):
246
+ """Check contract text against all regulatory frameworks with negation handling."""
247
+ text_lower = text.lower()
248
+ results = {}
249
+
250
+ for reg_name, reg_data in REGULATIONS.items():
251
+ checks = []
252
+ for req_name, req_data in reg_data["requirements"].items():
253
+ matched = False
254
+ negated = False
255
+ matched_keywords = []
256
+ context_snippets = []
257
+
258
+ for kw in req_data["keywords"]:
259
+ if kw.lower() in text_lower:
260
+ matched_keywords.append(kw)
261
+ if _check_negation(text_lower, kw):
262
+ negated = True
263
+ else:
264
+ matched = True
265
+ ctx = _get_context(text, kw)
266
+ if ctx:
267
+ context_snippets.append(ctx)
268
+
269
+ if matched and not negated:
270
+ status = "PASS"
271
+ elif negated and not matched:
272
+ status = "NEGATED"
273
+ elif matched and negated:
274
+ status = "AMBIGUOUS"
275
+ else:
276
+ status = "MISSING"
277
+
278
+ checks.append({
279
+ "requirement": req_name,
280
+ "description": req_data["description"],
281
+ "severity": req_data["severity"],
282
+ "status": status,
283
+ "matched_keywords": matched_keywords,
284
+ "context": context_snippets[:2],
285
+ })
286
+
287
+ passed = sum(1 for c in checks if c["status"] == "PASS")
288
+ total = len(checks)
289
+ compliance_rate = round(passed / total * 100) if total > 0 else 0
290
+
291
+ negated_count = sum(1 for c in checks if c["status"] == "NEGATED")
292
+ ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS")
293
+
294
+ if compliance_rate >= 80:
295
+ overall = "COMPLIANT"
296
+ elif compliance_rate >= 40:
297
+ overall = "PARTIAL"
298
+ else:
299
+ overall = "NON-COMPLIANT"
300
+
301
+ if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
302
+ overall = "WARNING"
303
+
304
+ results[reg_name] = {
305
+ "description": reg_data["description"],
306
+ "compliance_rate": compliance_rate,
307
+ "checks": checks,
308
+ "overall_status": overall,
309
+ "negated_count": negated_count,
310
+ "ambiguous_count": ambiguous_count,
311
+ }
312
+
313
+ return results
314
+
315
+
316
+ def render_compliance_html(results):
317
+ """Render compliance results as HTML for Gradio."""
318
+ html = '<div style="font-family:system-ui,sans-serif;">'
319
+
320
+ for reg_name, reg_result in results.items():
321
+ rate = reg_result["compliance_rate"]
322
+ status = reg_result["overall_status"]
323
+
324
+ status_colors = {
325
+ "COMPLIANT": ("#16a34a", "#f0fdf4"),
326
+ "PARTIAL": ("#ca8a04", "#fefce8"),
327
+ "NON-COMPLIANT": ("#dc2626", "#fef2f2"),
328
+ "WARNING": ("#ea580c", "#fff7ed"),
329
+ }
330
+ status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb"))
331
+
332
+ neg = reg_result.get("negated_count", 0)
333
+ amb = reg_result.get("ambiguous_count", 0)
334
+ warnings = ""
335
+ if neg > 0:
336
+ warnings += f'<span style="font-size:10px;color:#ea580c;margin-left:8px;">⚠️ {neg} negated</span>'
337
+ if amb > 0:
338
+ warnings += f'<span style="font-size:10px;color:#ca8a04;margin-left:8px;">❓ {amb} ambiguous</span>'
339
+
340
+ html += f'''
341
+ <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;">
342
+ <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;">
343
+ <div>
344
+ <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span>
345
+ {warnings}
346
+ <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p>
347
+ </div>
348
+ <div style="text-align:right;">
349
+ <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div>
350
+ <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div>
351
+ </div>
352
+ </div>
353
+ <div style="padding:8px 16px;">
354
+ '''
355
+
356
+ for check in reg_result["checks"]:
357
+ color, bg = RISK_STYLES[check["severity"]]
358
+ status_icons = {"PASS": "✅", "MISSING": "❌", "NEGATED": "🚫", "AMBIGUOUS": "❓"}
359
+ status_icon = status_icons.get(check["status"], "❓")
360
+ status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"}
361
+ status_text = status_text_map.get(check["status"], "Unknown")
362
+ keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "—"
363
+
364
+ context_html = ""
365
+ if check.get("context"):
366
+ ctx = check["context"][0][:120].replace("<", "&lt;").replace(">", "&gt;")
367
+ context_html = f'<div style="font-size:10px;color:#6b7280;margin-top:2px;font-style:italic;">"{ctx}"</div>'
368
+
369
+ html += f'''
370
+ <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;">
371
+ <div style="flex:1;">
372
+ <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div>
373
+ <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div>
374
+ {context_html}
375
+ </div>
376
+ <div style="display:flex;align-items:center;gap:6px;margin-left:8px;">
377
+ <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span>
378
+ <span style="font-size:13px;" title="{status_text}">{status_icon}</span>
379
+ </div>
380
+ </div>
381
+ '''
382
+
383
+ html += '</div></div>'
384
+
385
+ html += '</div>'
386
+ return html