Spaces:
Sleeping
Sleeping
| """ | |
| ClauseGuard β Compliance Checker v3.1 | |
| βββββββββββββββββββββββββββββββββββββ | |
| FIXED in v3.1: | |
| β’ FIX: Expanded negation window from 100 to 200 chars to catch cross-sentence negation | |
| β’ FIX: Added sentence-boundary-aware negation detection | |
| β’ FIX: Improved context extraction with sentence boundaries | |
| β’ FIX: Added AMBIGUOUS handling for mixed positive/negative signals | |
| """ | |
| import re | |
| from collections import defaultdict | |
| # Negation patterns that invert compliance meaning | |
| _NEGATION_PATTERNS = [ | |
| r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain|comply|adhere|support|acknowledge)", | |
| r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty|commitment|responsibility|duty)", | |
| r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject|eliminat|remov|revok)", | |
| r"shall\s+not\s+be\s+(?:required|obligated|responsible|liable|bound|subject)", | |
| r"is\s+not\s+(?:responsible|liable|required|obligated|bound|subject)", | |
| r"expressly\s+(?:disclaim|exclud|waiv|reject)", | |
| r"to\s+the\s+(?:maximum|fullest)\s+extent\s+(?:permitted|allowed).*(?:disclaim|exclud|waiv)", | |
| r"notwithstanding.*(?:shall\s+not|does\s+not|is\s+not)", | |
| ] | |
| # FIX v4.2: Pre-compile negation patterns at module level | |
| _NEGATION_PATTERNS_COMPILED = [re.compile(p, re.IGNORECASE) for p in _NEGATION_PATTERNS] | |
| # Regulatory requirement definitions | |
| REGULATIONS = { | |
| "GDPR": { | |
| "description": "EU General Data Protection Regulation (Regulation 2016/679)", | |
| "requirements": { | |
| "lawful_basis": { | |
| "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"], | |
| "description": "Must specify lawful basis for data processing (Art. 6)", | |
| "severity": "HIGH", | |
| }, | |
| "data_subject_rights": { | |
| "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"], | |
| "description": "Must acknowledge data subject rights (Arts. 15-22)", | |
| "severity": "HIGH", | |
| }, | |
| "data_breach_notification": { | |
| "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"], | |
| "description": "Must include data breach notification obligations (Art. 33)", | |
| "severity": "MEDIUM", | |
| }, | |
| "data_protection_officer": { | |
| "keywords": ["data protection officer", "DPO"], | |
| "description": "Should reference Data Protection Officer if applicable (Art. 37)", | |
| "severity": "LOW", | |
| }, | |
| "cross_border_transfer": { | |
| "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"], | |
| "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)", | |
| "severity": "HIGH", | |
| }, | |
| "privacy_by_design": { | |
| "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"], | |
| "description": "Should reference privacy-by-design principles (Art. 25)", | |
| "severity": "MEDIUM", | |
| }, | |
| "data_processing_agreement": { | |
| "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"], | |
| "description": "Must include data processing agreement if sharing data (Art. 28)", | |
| "severity": "HIGH", | |
| }, | |
| }, | |
| }, | |
| "CCPA": { | |
| "description": "California Consumer Privacy Act (Cal. Civ. Code Β§ 1798.100 et seq.)", | |
| "requirements": { | |
| "consumer_rights": { | |
| "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"], | |
| "description": "Must acknowledge California consumer rights", | |
| "severity": "HIGH", | |
| }, | |
| "data_categories": { | |
| "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"], | |
| "description": "Must disclose categories of personal information collected", | |
| "severity": "HIGH", | |
| }, | |
| "sale_of_data": { | |
| "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"], | |
| "description": "Must provide opt-out mechanism for data sales", | |
| "severity": "HIGH", | |
| }, | |
| "service_providers": { | |
| "keywords": ["service provider", "third party", "contractor", "business purpose"], | |
| "description": "Should limit data use to business/service provider purposes", | |
| "severity": "MEDIUM", | |
| }, | |
| }, | |
| }, | |
| "SOX": { | |
| "description": "Sarbanes-Oxley Act (US, 2002)", | |
| "requirements": { | |
| "internal_controls": { | |
| "keywords": ["internal controls", "internal control over financial reporting", "ICFR"], | |
| "description": "Must reference internal controls over financial reporting (Β§ 404)", | |
| "severity": "HIGH", | |
| }, | |
| "audit_committee": { | |
| "keywords": ["audit committee", "independent auditor", "PCAOB"], | |
| "description": "Should reference audit committee oversight", | |
| "severity": "MEDIUM", | |
| }, | |
| "whistleblower": { | |
| "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"], | |
| "description": "Should protect whistleblower provisions (Β§ 806)", | |
| "severity": "HIGH", | |
| }, | |
| "document_retention": { | |
| "keywords": ["document retention", "record retention", "retention policy", "preserve records"], | |
| "description": "Must include document retention obligations (Β§ 802)", | |
| "severity": "HIGH", | |
| }, | |
| }, | |
| }, | |
| "HIPAA": { | |
| "description": "Health Insurance Portability and Accountability Act (US, 1996)", | |
| "requirements": { | |
| "phi_protection": { | |
| "keywords": ["protected health information", "PHI", "health information", "ePHI"], | |
| "description": "Must protect PHI and limit uses/disclosures", | |
| "severity": "CRITICAL", | |
| }, | |
| "business_associate": { | |
| "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"], | |
| "description": "Should reference Business Associate Agreement (Β§ 164.504(e))", | |
| "severity": "HIGH", | |
| }, | |
| "security_safeguards": { | |
| "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"], | |
| "description": "Must implement security safeguards (Β§ 164.308-312)", | |
| "severity": "HIGH", | |
| }, | |
| "breach_notification": { | |
| "keywords": ["breach notification", "notification of breach", "unauthorized access"], | |
| "description": "Must include breach notification obligations (Β§ 164.400-414)", | |
| "severity": "HIGH", | |
| }, | |
| }, | |
| }, | |
| "FINRA": { | |
| "description": "Financial Industry Regulatory Authority (US)", | |
| "requirements": { | |
| "recordkeeping": { | |
| "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"], | |
| "description": "Must comply with recordkeeping rules (FINRA Rule 4511)", | |
| "severity": "HIGH", | |
| }, | |
| "supervision": { | |
| "keywords": ["supervision", "supervisory system", "review and approval"], | |
| "description": "Should reference supervisory obligations (FINRA Rule 3110)", | |
| "severity": "MEDIUM", | |
| }, | |
| "anti_money_laundering": { | |
| "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"], | |
| "description": "Must reference AML compliance (FINRA Rule 3310)", | |
| "severity": "HIGH", | |
| }, | |
| "privacy": { | |
| "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"], | |
| "description": "Must protect customer information (Regulation S-P)", | |
| "severity": "HIGH", | |
| }, | |
| }, | |
| }, | |
| } | |
| RISK_STYLES = { | |
| "CRITICAL": ("#dc2626", "#fef2f2"), | |
| "HIGH": ("#ea580c", "#fff7ed"), | |
| "MEDIUM": ("#ca8a04", "#fefce8"), | |
| "LOW": ("#16a34a", "#f0fdf4"), | |
| } | |
| def _get_sentence_containing(text_lower, keyword_lower, start_idx): | |
| """FIX v3.1: Extract the full sentence containing the keyword match.""" | |
| # Find sentence boundaries around the match | |
| # Look backward for sentence start | |
| sent_start = start_idx | |
| for i in range(start_idx - 1, max(0, start_idx - 500), -1): | |
| if text_lower[i] in '.!?' and i < start_idx - 2: | |
| sent_start = i + 1 | |
| break | |
| else: | |
| sent_start = max(0, start_idx - 500) | |
| # Look forward for sentence end | |
| sent_end = start_idx + len(keyword_lower) | |
| for i in range(sent_end, min(len(text_lower), sent_end + 500)): | |
| if text_lower[i] in '.!?': | |
| sent_end = i + 1 | |
| break | |
| else: | |
| sent_end = min(len(text_lower), sent_end + 500) | |
| return text_lower[sent_start:sent_end].strip() | |
| def _check_negation(text_lower, keyword, window=200): | |
| """FIX v3.1: Check if a keyword match is negated β uses sentence-aware window.""" | |
| idx = text_lower.find(keyword.lower()) | |
| if idx == -1: | |
| return False | |
| # Get sentence-aware context (more accurate than fixed window) | |
| sentence = _get_sentence_containing(text_lower, keyword.lower(), idx) | |
| # Also get a wider window for cross-sentence negation | |
| start = max(0, idx - window) | |
| end = min(len(text_lower), idx + len(keyword) + window) | |
| wider_context = text_lower[start:end] | |
| # Check sentence first (higher confidence) | |
| for neg_pat in _NEGATION_PATTERNS_COMPILED: | |
| if neg_pat.search(sentence): | |
| return True | |
| # Then check wider window (lower confidence, still relevant) | |
| for neg_pat in _NEGATION_PATTERNS_COMPILED[:4]: # Only strong negation patterns for wider window | |
| if neg_pat.search(wider_context): | |
| return True | |
| return False | |
| def _get_context(text, keyword, window=100): | |
| """Extract context around a keyword match with sentence boundaries.""" | |
| text_lower = text.lower() | |
| idx = text_lower.find(keyword.lower()) | |
| if idx == -1: | |
| return "" | |
| start = max(0, idx - window) | |
| end = min(len(text), idx + len(keyword) + window) | |
| context = text[start:end].strip() | |
| if start > 0: | |
| context = "..." + context | |
| if end < len(text): | |
| context = context + "..." | |
| return context | |
| def check_compliance(text): | |
| """Check contract text against all regulatory frameworks with negation handling.""" | |
| text_lower = text.lower() | |
| results = {} | |
| for reg_name, reg_data in REGULATIONS.items(): | |
| checks = [] | |
| for req_name, req_data in reg_data["requirements"].items(): | |
| matched = False | |
| negated = False | |
| matched_keywords = [] | |
| context_snippets = [] | |
| for kw in req_data["keywords"]: | |
| if kw.lower() in text_lower: | |
| matched_keywords.append(kw) | |
| if _check_negation(text_lower, kw): | |
| negated = True | |
| else: | |
| matched = True | |
| ctx = _get_context(text, kw) | |
| if ctx: | |
| context_snippets.append(ctx) | |
| if matched and not negated: | |
| status = "PASS" | |
| elif negated and not matched: | |
| status = "NEGATED" | |
| elif matched and negated: | |
| status = "AMBIGUOUS" | |
| else: | |
| status = "MISSING" | |
| checks.append({ | |
| "requirement": req_name, | |
| "description": req_data["description"], | |
| "severity": req_data["severity"], | |
| "status": status, | |
| "matched_keywords": matched_keywords, | |
| "context": context_snippets[:2], | |
| }) | |
| passed = sum(1 for c in checks if c["status"] == "PASS") | |
| total = len(checks) | |
| compliance_rate = round(passed / total * 100) if total > 0 else 0 | |
| negated_count = sum(1 for c in checks if c["status"] == "NEGATED") | |
| ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS") | |
| if compliance_rate >= 80: | |
| overall = "COMPLIANT" | |
| elif compliance_rate >= 40: | |
| overall = "PARTIAL" | |
| else: | |
| overall = "NON-COMPLIANT" | |
| if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks): | |
| overall = "WARNING" | |
| results[reg_name] = { | |
| "description": reg_data["description"], | |
| "compliance_rate": compliance_rate, | |
| "checks": checks, | |
| "overall_status": overall, | |
| "negated_count": negated_count, | |
| "ambiguous_count": ambiguous_count, | |
| } | |
| return results | |
| def render_compliance_html(results): | |
| """Render compliance results as HTML for Gradio.""" | |
| html = '<div style="font-family:system-ui,sans-serif;">' | |
| for reg_name, reg_result in results.items(): | |
| rate = reg_result["compliance_rate"] | |
| status = reg_result["overall_status"] | |
| status_colors = { | |
| "COMPLIANT": ("#16a34a", "#f0fdf4"), | |
| "PARTIAL": ("#ca8a04", "#fefce8"), | |
| "NON-COMPLIANT": ("#dc2626", "#fef2f2"), | |
| "WARNING": ("#ea580c", "#fff7ed"), | |
| } | |
| status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb")) | |
| neg = reg_result.get("negated_count", 0) | |
| amb = reg_result.get("ambiguous_count", 0) | |
| warnings = "" | |
| if neg > 0: | |
| warnings += f'<span style="font-size:10px;color:#ea580c;margin-left:8px;">β οΈ {neg} negated</span>' | |
| if amb > 0: | |
| warnings += f'<span style="font-size:10px;color:#ca8a04;margin-left:8px;">β {amb} ambiguous</span>' | |
| html += f''' | |
| <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;"> | |
| <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;"> | |
| <div> | |
| <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span> | |
| {warnings} | |
| <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p> | |
| </div> | |
| <div style="text-align:right;"> | |
| <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div> | |
| <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div> | |
| </div> | |
| </div> | |
| <div style="padding:8px 16px;"> | |
| ''' | |
| for check in reg_result["checks"]: | |
| color, bg = RISK_STYLES[check["severity"]] | |
| status_icons = {"PASS": "β ", "MISSING": "β", "NEGATED": "π«", "AMBIGUOUS": "β"} | |
| status_icon = status_icons.get(check["status"], "β") | |
| status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"} | |
| status_text = status_text_map.get(check["status"], "Unknown") | |
| keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "β" | |
| context_html = "" | |
| if check.get("context"): | |
| ctx = check["context"][0][:120].replace("<", "<").replace(">", ">") | |
| context_html = f'<div style="font-size:10px;color:#6b7280;margin-top:2px;font-style:italic;">"{ctx}"</div>' | |
| html += f''' | |
| <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;"> | |
| <div style="flex:1;"> | |
| <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div> | |
| <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div> | |
| {context_html} | |
| </div> | |
| <div style="display:flex;align-items:center;gap:6px;margin-left:8px;"> | |
| <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span> | |
| <span style="font-size:13px;" title="{status_text}">{status_icon}</span> | |
| </div> | |
| </div> | |
| ''' | |
| html += '</div></div>' | |
| html += '</div>' | |
| return html | |