""" ClauseGuard — Compliance Checker v3.0 ═════════════════════════════════════ FIXED in v3.0: • Negation handling (clause saying "we do NOT" won't score as PASS) • Context windows around keyword matches (shows what the clause actually says) • Semantic scoring (keyword proximity + negation awareness) • Added more regulatory frameworks """ import re from collections import defaultdict # Negation patterns that invert compliance meaning _NEGATION_PATTERNS = [ r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)", r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)", r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)", r"shall\s+not\s+be\s+(?:required|obligated|responsible)", r"is\s+not\s+(?:responsible|liable|required|obligated)", ] # Regulatory requirement definitions REGULATIONS = { "GDPR": { "description": "EU General Data Protection Regulation (Regulation 2016/679)", "requirements": { "lawful_basis": { "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"], "description": "Must specify lawful basis for data processing (Art. 6)", "severity": "HIGH", }, "data_subject_rights": { "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"], "description": "Must acknowledge data subject rights (Arts. 15-22)", "severity": "HIGH", }, "data_breach_notification": { "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"], "description": "Must include data breach notification obligations (Art. 33)", "severity": "MEDIUM", }, "data_protection_officer": { "keywords": ["data protection officer", "DPO"], "description": "Should reference Data Protection Officer if applicable (Art. 37)", "severity": "LOW", }, "cross_border_transfer": { "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"], "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)", "severity": "HIGH", }, "privacy_by_design": { "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"], "description": "Should reference privacy-by-design principles (Art. 25)", "severity": "MEDIUM", }, "data_processing_agreement": { "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"], "description": "Must include data processing agreement if sharing data (Art. 28)", "severity": "HIGH", }, }, }, "CCPA": { "description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)", "requirements": { "consumer_rights": { "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"], "description": "Must acknowledge California consumer rights", "severity": "HIGH", }, "data_categories": { "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"], "description": "Must disclose categories of personal information collected", "severity": "HIGH", }, "sale_of_data": { "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"], "description": "Must provide opt-out mechanism for data sales", "severity": "HIGH", }, "service_providers": { "keywords": ["service provider", "third party", "contractor", "business purpose"], "description": "Should limit data use to business/service provider purposes", "severity": "MEDIUM", }, }, }, "SOX": { "description": "Sarbanes-Oxley Act (US, 2002)", "requirements": { "internal_controls": { "keywords": ["internal controls", "internal control over financial reporting", "ICFR"], "description": "Must reference internal controls over financial reporting (§ 404)", "severity": "HIGH", }, "audit_committee": { "keywords": ["audit committee", "independent auditor", "PCAOB"], "description": "Should reference audit committee oversight", "severity": "MEDIUM", }, "whistleblower": { "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"], "description": "Should protect whistleblower provisions (§ 806)", "severity": "HIGH", }, "document_retention": { "keywords": ["document retention", "record retention", "retention policy", "preserve records"], "description": "Must include document retention obligations (§ 802)", "severity": "HIGH", }, }, }, "HIPAA": { "description": "Health Insurance Portability and Accountability Act (US, 1996)", "requirements": { "phi_protection": { "keywords": ["protected health information", "PHI", "health information", "ePHI"], "description": "Must protect PHI and limit uses/disclosures", "severity": "CRITICAL", }, "business_associate": { "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"], "description": "Should reference Business Associate Agreement (§ 164.504(e))", "severity": "HIGH", }, "security_safeguards": { "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"], "description": "Must implement security safeguards (§ 164.308-312)", "severity": "HIGH", }, "breach_notification": { "keywords": ["breach notification", "notification of breach", "unauthorized access"], "description": "Must include breach notification obligations (§ 164.400-414)", "severity": "HIGH", }, }, }, "FINRA": { "description": "Financial Industry Regulatory Authority (US)", "requirements": { "recordkeeping": { "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"], "description": "Must comply with recordkeeping rules (FINRA Rule 4511)", "severity": "HIGH", }, "supervision": { "keywords": ["supervision", "supervisory system", "review and approval"], "description": "Should reference supervisory obligations (FINRA Rule 3110)", "severity": "MEDIUM", }, "anti_money_laundering": { "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"], "description": "Must reference AML compliance (FINRA Rule 3310)", "severity": "HIGH", }, "privacy": { "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"], "description": "Must protect customer information (Regulation S-P)", "severity": "HIGH", }, }, }, } RISK_STYLES = { "CRITICAL": ("#dc2626", "#fef2f2"), "HIGH": ("#ea580c", "#fff7ed"), "MEDIUM": ("#ca8a04", "#fefce8"), "LOW": ("#16a34a", "#f0fdf4"), } def _check_negation(text_lower, keyword, window=100): """Check if a keyword match is negated by nearby negation words.""" idx = text_lower.find(keyword.lower()) if idx == -1: return False # Get context window around the match start = max(0, idx - window) end = min(len(text_lower), idx + len(keyword) + window) context = text_lower[start:end] for neg_pat in _NEGATION_PATTERNS: if re.search(neg_pat, context, re.IGNORECASE): return True return False def _get_context(text, keyword, window=80): """Extract context around a keyword match.""" text_lower = text.lower() idx = text_lower.find(keyword.lower()) if idx == -1: return "" start = max(0, idx - window) end = min(len(text), idx + len(keyword) + window) context = text[start:end].strip() if start > 0: context = "..." + context if end < len(text): context = context + "..." return context def check_compliance(text): """Check contract text against all regulatory frameworks with negation handling.""" text_lower = text.lower() results = {} for reg_name, reg_data in REGULATIONS.items(): checks = [] for req_name, req_data in reg_data["requirements"].items(): matched = False negated = False matched_keywords = [] context_snippets = [] for kw in req_data["keywords"]: if kw.lower() in text_lower: matched_keywords.append(kw) # Check if the match is negated if _check_negation(text_lower, kw): negated = True else: matched = True # Get context ctx = _get_context(text, kw) if ctx: context_snippets.append(ctx) if matched and not negated: status = "PASS" elif negated and not matched: status = "NEGATED" elif matched and negated: status = "AMBIGUOUS" else: status = "MISSING" checks.append({ "requirement": req_name, "description": req_data["description"], "severity": req_data["severity"], "status": status, "matched_keywords": matched_keywords, "context": context_snippets[:2], # Keep top 2 context snippets }) passed = sum(1 for c in checks if c["status"] == "PASS") total = len(checks) compliance_rate = round(passed / total * 100) if total > 0 else 0 negated_count = sum(1 for c in checks if c["status"] == "NEGATED") ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS") if compliance_rate >= 80: overall = "COMPLIANT" elif compliance_rate >= 40: overall = "PARTIAL" else: overall = "NON-COMPLIANT" # Override if there are negated critical requirements if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks): overall = "WARNING" results[reg_name] = { "description": reg_data["description"], "compliance_rate": compliance_rate, "checks": checks, "overall_status": overall, "negated_count": negated_count, "ambiguous_count": ambiguous_count, } return results def render_compliance_html(results): """Render compliance results as HTML for Gradio.""" html = '
' for reg_name, reg_result in results.items(): rate = reg_result["compliance_rate"] status = reg_result["overall_status"] status_colors = { "COMPLIANT": ("#16a34a", "#f0fdf4"), "PARTIAL": ("#ca8a04", "#fefce8"), "NON-COMPLIANT": ("#dc2626", "#fef2f2"), "WARNING": ("#ea580c", "#fff7ed"), } status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb")) neg = reg_result.get("negated_count", 0) amb = reg_result.get("ambiguous_count", 0) warnings = "" if neg > 0: warnings += f'⚠️ {neg} negated' if amb > 0: warnings += f'❓ {amb} ambiguous' html += f'''
{reg_name} {warnings}

{reg_result["description"]}

{rate}%
{status}
''' for check in reg_result["checks"]: color, bg = RISK_STYLES[check["severity"]] status_icons = {"PASS": "✅", "MISSING": "❌", "NEGATED": "🚫", "AMBIGUOUS": "❓"} status_icon = status_icons.get(check["status"], "❓") status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"} status_text = status_text_map.get(check["status"], "Unknown") keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "—" context_html = "" if check.get("context"): ctx = check["context"][0][:120].replace("<", "<").replace(">", ">") context_html = f'
"{ctx}"
' html += f'''
{check["description"]}
Keywords: {keywords}
{context_html}
{check["severity"]} {status_icon}
''' html += '
' html += '
' return html