Spaces:

gaurv007
/

ClauseGuard

Sleeping

App Files Files Community

gaurv007 commited on 15 days ago

Commit

464c655

verified ·

1 Parent(s): a90501a

v3.0: Upload actual compliance.py content

Browse files

Files changed (1) hide show

compliance.py +351 -1

compliance.py CHANGED Viewed

	@@ -1 +1,351 @@
1	- ~~/app/clauseguard/compliance.py~~

+"""
+ClauseGuard — Compliance Checker v3.0
+═════════════════════════════════════
+FIXED in v3.0:
+  • Negation handling (clause saying "we do NOT" won't score as PASS)
+  • Context windows around keyword matches (shows what the clause actually says)
+  • Semantic scoring (keyword proximity + negation awareness)
+  • Added more regulatory frameworks
+"""
+import re
+from collections import defaultdict
+# Negation patterns that invert compliance meaning
+_NEGATION_PATTERNS = [
+    r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)",
+    r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)",
+    r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)",
+    r"shall\s+not\s+be\s+(?:required|obligated|responsible)",
+    r"is\s+not\s+(?:responsible|liable|required|obligated)",
+]
+# Regulatory requirement definitions
+REGULATIONS = {
+    "GDPR": {
+        "description": "EU General Data Protection Regulation (Regulation 2016/679)",
+        "requirements": {
+            "lawful_basis": {
+                "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"],
+                "description": "Must specify lawful basis for data processing (Art. 6)",
+                "severity": "HIGH",
+            },
+            "data_subject_rights": {
+                "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"],
+                "description": "Must acknowledge data subject rights (Arts. 15-22)",
+                "severity": "HIGH",
+            },
+            "data_breach_notification": {
+                "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"],
+                "description": "Must include data breach notification obligations (Art. 33)",
+                "severity": "MEDIUM",
+            },
+            "data_protection_officer": {
+                "keywords": ["data protection officer", "DPO"],
+                "description": "Should reference Data Protection Officer if applicable (Art. 37)",
+                "severity": "LOW",
+            },
+            "cross_border_transfer": {
+                "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"],
+                "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)",
+                "severity": "HIGH",
+            },
+            "privacy_by_design": {
+                "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"],
+                "description": "Should reference privacy-by-design principles (Art. 25)",
+                "severity": "MEDIUM",
+            },
+            "data_processing_agreement": {
+                "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"],
+                "description": "Must include data processing agreement if sharing data (Art. 28)",
+                "severity": "HIGH",
+            },
+        },
+    },
+    "CCPA": {
+        "description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)",
+        "requirements": {
+            "consumer_rights": {
+                "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"],
+                "description": "Must acknowledge California consumer rights",
+                "severity": "HIGH",
+            },
+            "data_categories": {
+                "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"],
+                "description": "Must disclose categories of personal information collected",
+                "severity": "HIGH",
+            },
+            "sale_of_data": {
+                "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"],
+                "description": "Must provide opt-out mechanism for data sales",
+                "severity": "HIGH",
+            },
+            "service_providers": {
+                "keywords": ["service provider", "third party", "contractor", "business purpose"],
+                "description": "Should limit data use to business/service provider purposes",
+                "severity": "MEDIUM",
+            },
+        },
+    },
+    "SOX": {
+        "description": "Sarbanes-Oxley Act (US, 2002)",
+        "requirements": {
+            "internal_controls": {
+                "keywords": ["internal controls", "internal control over financial reporting", "ICFR"],
+                "description": "Must reference internal controls over financial reporting (§ 404)",
+                "severity": "HIGH",
+            },
+            "audit_committee": {
+                "keywords": ["audit committee", "independent auditor", "PCAOB"],
+                "description": "Should reference audit committee oversight",
+                "severity": "MEDIUM",
+            },
+            "whistleblower": {
+                "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"],
+                "description": "Should protect whistleblower provisions (§ 806)",
+                "severity": "HIGH",
+            },
+            "document_retention": {
+                "keywords": ["document retention", "record retention", "retention policy", "preserve records"],
+                "description": "Must include document retention obligations (§ 802)",
+                "severity": "HIGH",
+            },
+        },
+    },
+    "HIPAA": {
+        "description": "Health Insurance Portability and Accountability Act (US, 1996)",
+        "requirements": {
+            "phi_protection": {
+                "keywords": ["protected health information", "PHI", "health information", "ePHI"],
+                "description": "Must protect PHI and limit uses/disclosures",
+                "severity": "CRITICAL",
+            },
+            "business_associate": {
+                "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"],
+                "description": "Should reference Business Associate Agreement (§ 164.504(e))",
+                "severity": "HIGH",
+            },
+            "security_safeguards": {
+                "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"],
+                "description": "Must implement security safeguards (§ 164.308-312)",
+                "severity": "HIGH",
+            },
+            "breach_notification": {
+                "keywords": ["breach notification", "notification of breach", "unauthorized access"],
+                "description": "Must include breach notification obligations (§ 164.400-414)",
+                "severity": "HIGH",
+            },
+        },
+    },
+    "FINRA": {
+        "description": "Financial Industry Regulatory Authority (US)",
+        "requirements": {
+            "recordkeeping": {
+                "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"],
+                "description": "Must comply with recordkeeping rules (FINRA Rule 4511)",
+                "severity": "HIGH",
+            },
+            "supervision": {
+                "keywords": ["supervision", "supervisory system", "review and approval"],
+                "description": "Should reference supervisory obligations (FINRA Rule 3110)",
+                "severity": "MEDIUM",
+            },
+            "anti_money_laundering": {
+                "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"],
+                "description": "Must reference AML compliance (FINRA Rule 3310)",
+                "severity": "HIGH",
+            },
+            "privacy": {
+                "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"],
+                "description": "Must protect customer information (Regulation S-P)",
+                "severity": "HIGH",
+            },
+        },
+    },
+}
+RISK_STYLES = {
+    "CRITICAL": ("#dc2626", "#fef2f2"),
+    "HIGH": ("#ea580c", "#fff7ed"),
+    "MEDIUM": ("#ca8a04", "#fefce8"),
+    "LOW": ("#16a34a", "#f0fdf4"),
+}
+def _check_negation(text_lower, keyword, window=100):
+    """Check if a keyword match is negated by nearby negation words."""
+    idx = text_lower.find(keyword.lower())
+    if idx == -1:
+        return False
+    # Get context window around the match
+    start = max(0, idx - window)
+    end = min(len(text_lower), idx + len(keyword) + window)
+    context = text_lower[start:end]
+    for neg_pat in _NEGATION_PATTERNS:
+        if re.search(neg_pat, context, re.IGNORECASE):
+            return True
+    return False
+def _get_context(text, keyword, window=80):
+    """Extract context around a keyword match."""
+    text_lower = text.lower()
+    idx = text_lower.find(keyword.lower())
+    if idx == -1:
+        return ""
+    start = max(0, idx - window)
+    end = min(len(text), idx + len(keyword) + window)
+    context = text[start:end].strip()
+    if start > 0:
+        context = "..." + context
+    if end < len(text):
+        context = context + "..."
+    return context
+def check_compliance(text):
+    """Check contract text against all regulatory frameworks with negation handling."""
+    text_lower = text.lower()
+    results = {}
+    for reg_name, reg_data in REGULATIONS.items():
+        checks = []
+        for req_name, req_data in reg_data["requirements"].items():
+            matched = False
+            negated = False
+            matched_keywords = []
+            context_snippets = []
+            for kw in req_data["keywords"]:
+                if kw.lower() in text_lower:
+                    matched_keywords.append(kw)
+                    # Check if the match is negated
+                    if _check_negation(text_lower, kw):
+                        negated = True
+                    else:
+                        matched = True
+                    # Get context
+                    ctx = _get_context(text, kw)
+                    if ctx:
+                        context_snippets.append(ctx)
+            if matched and not negated:
+                status = "PASS"
+            elif negated and not matched:
+                status = "NEGATED"
+            elif matched and negated:
+                status = "AMBIGUOUS"
+            else:
+                status = "MISSING"
+            checks.append({
+                "requirement": req_name,
+                "description": req_data["description"],
+                "severity": req_data["severity"],
+                "status": status,
+                "matched_keywords": matched_keywords,
+                "context": context_snippets[:2],  # Keep top 2 context snippets
+            })
+        passed = sum(1 for c in checks if c["status"] == "PASS")
+        total = len(checks)
+        compliance_rate = round(passed / total * 100) if total > 0 else 0
+        negated_count = sum(1 for c in checks if c["status"] == "NEGATED")
+        ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS")
+        if compliance_rate >= 80:
+            overall = "COMPLIANT"
+        elif compliance_rate >= 40:
+            overall = "PARTIAL"
+        else:
+            overall = "NON-COMPLIANT"
+        # Override if there are negated critical requirements
+        if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
+            overall = "WARNING"
+        results[reg_name] = {
+            "description": reg_data["description"],
+            "compliance_rate": compliance_rate,
+            "checks": checks,
+            "overall_status": overall,
+            "negated_count": negated_count,
+            "ambiguous_count": ambiguous_count,
+        }
+    return results
+def render_compliance_html(results):
+    """Render compliance results as HTML for Gradio."""
+    html = '<div style="font-family:system-ui,sans-serif;">'
+    for reg_name, reg_result in results.items():
+        rate = reg_result["compliance_rate"]
+        status = reg_result["overall_status"]
+        status_colors = {
+            "COMPLIANT": ("#16a34a", "#f0fdf4"),
+            "PARTIAL": ("#ca8a04", "#fefce8"),
+            "NON-COMPLIANT": ("#dc2626", "#fef2f2"),
+            "WARNING": ("#ea580c", "#fff7ed"),
+        }
+        status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb"))
+        neg = reg_result.get("negated_count", 0)
+        amb = reg_result.get("ambiguous_count", 0)
+        warnings = ""
+        if neg > 0:
+            warnings += f'<span style="font-size:10px;color:#ea580c;margin-left:8px;">⚠️ {neg} negated</span>'
+        if amb > 0:
+            warnings += f'<span style="font-size:10px;color:#ca8a04;margin-left:8px;">❓ {amb} ambiguous</span>'
+        html += f'''
+        <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;">
+          <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;">
+            <div>
+              <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span>
+              {warnings}
+              <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p>
+            </div>
+            <div style="text-align:right;">
+              <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div>
+              <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div>
+            </div>
+          </div>
+          <div style="padding:8px 16px;">
+        '''
+        for check in reg_result["checks"]:
+            color, bg = RISK_STYLES[check["severity"]]
+            status_icons = {"PASS": "✅", "MISSING": "❌", "NEGATED": "🚫", "AMBIGUOUS": "❓"}
+            status_icon = status_icons.get(check["status"], "❓")
+            status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"}
+            status_text = status_text_map.get(check["status"], "Unknown")
+            keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "—"
+            context_html = ""
+            if check.get("context"):
+                ctx = check["context"][0][:120].replace("<", "&lt;").replace(">", "&gt;")
+                context_html = f'<div style="font-size:10px;color:#6b7280;margin-top:2px;font-style:italic;">"{ctx}"</div>'
+            html += f'''
+            <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;">
+              <div style="flex:1;">
+                <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div>
+                <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div>
+                {context_html}
+              </div>
+              <div style="display:flex;align-items:center;gap:6px;margin-left:8px;">
+                <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span>
+                <span style="font-size:13px;" title="{status_text}">{status_icon}</span>
+              </div>
+            </div>
+            '''
+        html += '</div></div>'
+    html += '</div>'
+    return html