gaurv007 commited on
Commit
c4af226
Β·
verified Β·
1 Parent(s): e5566a4

v3.0: Fix compliance checker - add negation handling, context windows, semantic scoring

Browse files
Files changed (1) hide show
  1. compliance.py +1 -245
compliance.py CHANGED
@@ -1,245 +1 @@
1
- """
2
- ClauseGuard β€” Compliance Checker
3
- ════════════════════════════════
4
- Check contracts against regulatory frameworks:
5
- β€’ GDPR (EU General Data Protection Regulation)
6
- β€’ CCPA (California Consumer Privacy Act)
7
- β€’ SOX (Sarbanes-Oxley)
8
- β€’ HIPAA (Health Insurance Portability and Accountability Act)
9
- β€’ FINRA (Financial Industry Regulatory Authority)
10
- """
11
-
12
- import re
13
- from collections import defaultdict
14
-
15
- # Regulatory requirement definitions
16
- REGULATIONS = {
17
- "GDPR": {
18
- "description": "EU General Data Protection Regulation (Regulation 2016/679)",
19
- "requirements": {
20
- "lawful_basis": {
21
- "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"],
22
- "description": "Must specify lawful basis for data processing (Art. 6)",
23
- "severity": "HIGH",
24
- },
25
- "data_subject_rights": {
26
- "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"],
27
- "description": "Must acknowledge data subject rights (Arts. 15-22)",
28
- "severity": "HIGH",
29
- },
30
- "data_breach_notification": {
31
- "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"],
32
- "description": "Must include data breach notification obligations (Art. 33)",
33
- "severity": "MEDIUM",
34
- },
35
- "data_protection_officer": {
36
- "keywords": ["data protection officer", "DPO"],
37
- "description": "Should reference Data Protection Officer if applicable (Art. 37)",
38
- "severity": "LOW",
39
- },
40
- "cross_border_transfer": {
41
- "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"],
42
- "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)",
43
- "severity": "HIGH",
44
- },
45
- "privacy_by_design": {
46
- "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"],
47
- "description": "Should reference privacy-by-design principles (Art. 25)",
48
- "severity": "MEDIUM",
49
- },
50
- },
51
- },
52
- "CCPA": {
53
- "description": "California Consumer Privacy Act (Cal. Civ. Code Β§ 1798.100 et seq.)",
54
- "requirements": {
55
- "consumer_rights": {
56
- "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"],
57
- "description": "Must acknowledge California consumer rights",
58
- "severity": "HIGH",
59
- },
60
- "data_categories": {
61
- "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"],
62
- "description": "Must disclose categories of personal information collected",
63
- "severity": "HIGH",
64
- },
65
- "sale_of_data": {
66
- "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"],
67
- "description": "Must provide opt-out mechanism for data sales",
68
- "severity": "HIGH",
69
- },
70
- "service_providers": {
71
- "keywords": ["service provider", "third party", "contractor", "business purpose"],
72
- "description": "Should limit data use to business/service provider purposes",
73
- "severity": "MEDIUM",
74
- },
75
- },
76
- },
77
- "SOX": {
78
- "description": "Sarbanes-Oxley Act (US, 2002)",
79
- "requirements": {
80
- "internal_controls": {
81
- "keywords": ["internal controls", "internal control over financial reporting", "ICFR"],
82
- "description": "Must reference internal controls over financial reporting (Β§ 404)",
83
- "severity": "HIGH",
84
- },
85
- "audit_committee": {
86
- "keywords": ["audit committee", "independent auditor", "PCAOB"],
87
- "description": "Should reference audit committee oversight",
88
- "severity": "MEDIUM",
89
- },
90
- "whistleblower": {
91
- "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"],
92
- "description": "Should protect whistleblower provisions (Β§ 806)",
93
- "severity": "HIGH",
94
- },
95
- "document_retention": {
96
- "keywords": ["document retention", "record retention", "retention policy", "preserve records"],
97
- "description": "Must include document retention obligations (Β§ 802)",
98
- "severity": "HIGH",
99
- },
100
- },
101
- },
102
- "HIPAA": {
103
- "description": "Health Insurance Portability and Accountability Act (US, 1996)",
104
- "requirements": {
105
- "phi_protection": {
106
- "keywords": ["protected health information", "PHI", "health information", "ePHI"],
107
- "description": "Must protect PHI and limit uses/disclosures",
108
- "severity": "CRITICAL",
109
- },
110
- "business_associate": {
111
- "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"],
112
- "description": "Should reference Business Associate Agreement (Β§ 164.504(e))",
113
- "severity": "HIGH",
114
- },
115
- "security_safeguards": {
116
- "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"],
117
- "description": "Must implement security safeguards (Β§ 164.308-312)",
118
- "severity": "HIGH",
119
- },
120
- "breach_notification": {
121
- "keywords": ["breach notification", "notification of breach", "unauthorized access"],
122
- "description": "Must include breach notification obligations (Β§ 164.400-414)",
123
- "severity": "HIGH",
124
- },
125
- },
126
- },
127
- "FINRA": {
128
- "description": "Financial Industry Regulatory Authority (US)",
129
- "requirements": {
130
- "recordkeeping": {
131
- "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"],
132
- "description": "Must comply with recordkeeping rules (FINRA Rule 4511)",
133
- "severity": "HIGH",
134
- },
135
- "supervision": {
136
- "keywords": ["supervision", "supervisory system", "review and approval"],
137
- "description": "Should reference supervisory obligations (FINRA Rule 3110)",
138
- "severity": "MEDIUM",
139
- },
140
- "anti_money_laundering": {
141
- "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"],
142
- "description": "Must reference AML compliance (FINRA Rule 3310)",
143
- "severity": "HIGH",
144
- },
145
- "privacy": {
146
- "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"],
147
- "description": "Must protect customer information (Regulation S-P)",
148
- "severity": "HIGH",
149
- },
150
- },
151
- },
152
- }
153
-
154
- RISK_STYLES = {
155
- "CRITICAL": ("#dc2626", "#fef2f2"),
156
- "HIGH": ("#ea580c", "#fff7ed"),
157
- "MEDIUM": ("#ca8a04", "#fefce8"),
158
- "LOW": ("#16a34a", "#f0fdf4"),
159
- }
160
-
161
-
162
- def check_compliance(text):
163
- """Check contract text against all regulatory frameworks."""
164
- text_lower = text.lower()
165
- results = {}
166
-
167
- for reg_name, reg_data in REGULATIONS.items():
168
- checks = []
169
- for req_name, req_data in reg_data["requirements"].items():
170
- matched = False
171
- matched_keywords = []
172
- for kw in req_data["keywords"]:
173
- if kw.lower() in text_lower:
174
- matched = True
175
- matched_keywords.append(kw)
176
- checks.append({
177
- "requirement": req_name,
178
- "description": req_data["description"],
179
- "severity": req_data["severity"],
180
- "status": "PASS" if matched else "MISSING",
181
- "matched_keywords": matched_keywords,
182
- })
183
-
184
- passed = sum(1 for c in checks if c["status"] == "PASS")
185
- total = len(checks)
186
- compliance_rate = round(passed / total * 100) if total > 0 else 0
187
-
188
- results[reg_name] = {
189
- "description": reg_data["description"],
190
- "compliance_rate": compliance_rate,
191
- "checks": checks,
192
- "overall_status": "COMPLIANT" if compliance_rate >= 80 else "PARTIAL" if compliance_rate >= 40 else "NON-COMPLIANT",
193
- }
194
-
195
- return results
196
-
197
-
198
- def render_compliance_html(results):
199
- """Render compliance results as HTML for Gradio."""
200
- html = '<div style="font-family:system-ui,sans-serif;">'
201
-
202
- for reg_name, reg_result in results.items():
203
- rate = reg_result["compliance_rate"]
204
- status = reg_result["overall_status"]
205
- status_color = "#16a34a" if status == "COMPLIANT" else "#ca8a04" if status == "PARTIAL" else "#dc2626"
206
- status_bg = "#f0fdf4" if status == "COMPLIANT" else "#fefce8" if status == "PARTIAL" else "#fef2f2"
207
-
208
- html += f'''
209
- <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;">
210
- <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;">
211
- <div>
212
- <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span>
213
- <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p>
214
- </div>
215
- <div style="text-align:right;">
216
- <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div>
217
- <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div>
218
- </div>
219
- </div>
220
- <div style="padding:8px 16px;">
221
- '''
222
-
223
- for check in reg_result["checks"]:
224
- color, bg = RISK_STYLES[check["severity"]]
225
- status_icon = "βœ…" if check["status"] == "PASS" else "❌"
226
- status_text = "Found" if check["status"] == "PASS" else "Missing"
227
- keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "β€”"
228
-
229
- html += f'''
230
- <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;">
231
- <div style="flex:1;">
232
- <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div>
233
- <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div>
234
- </div>
235
- <div style="display:flex;align-items:center;gap:6px;margin-left:8px;">
236
- <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span>
237
- <span style="font-size:13px;">{status_icon}</span>
238
- </div>
239
- </div>
240
- '''
241
-
242
- html += '</div></div>'
243
-
244
- html += '</div>'
245
- return html
 
1
+ /app/clauseguard/compliance.py