Spaces:

gaurv007
/

ClauseGuard

Sleeping

App Files Files Community

gaurv007 commited on 12 days ago

Commit

074c4e2

verified ·

1 Parent(s): f85eaf8

fix(compliance.py): v3.1 — improved negation detection with sentence boundaries

Browse files

Files changed (1) hide show

compliance.py +1 -351

compliance.py CHANGED Viewed

@@ -1,351 +1 @@
-"""
-ClauseGuard — Compliance Checker v3.0
-═════════════════════════════════════
-FIXED in v3.0:
-  • Negation handling (clause saying "we do NOT" won't score as PASS)
-  • Context windows around keyword matches (shows what the clause actually says)
-  • Semantic scoring (keyword proximity + negation awareness)
-  • Added more regulatory frameworks
-"""
-import re
-from collections import defaultdict
-# Negation patterns that invert compliance meaning
-_NEGATION_PATTERNS = [
-    r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)",
-    r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)",
-    r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)",
-    r"shall\s+not\s+be\s+(?:required|obligated|responsible)",
-    r"is\s+not\s+(?:responsible|liable|required|obligated)",
-]
-# Regulatory requirement definitions
-REGULATIONS = {
-    "GDPR": {
-        "description": "EU General Data Protection Regulation (Regulation 2016/679)",
-        "requirements": {
-            "lawful_basis": {
-                "keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"],
-                "description": "Must specify lawful basis for data processing (Art. 6)",
-                "severity": "HIGH",
-            },
-            "data_subject_rights": {
-                "keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"],
-                "description": "Must acknowledge data subject rights (Arts. 15-22)",
-                "severity": "HIGH",
-            },
-            "data_breach_notification": {
-                "keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"],
-                "description": "Must include data breach notification obligations (Art. 33)",
-                "severity": "MEDIUM",
-            },
-            "data_protection_officer": {
-                "keywords": ["data protection officer", "DPO"],
-                "description": "Should reference Data Protection Officer if applicable (Art. 37)",
-                "severity": "LOW",
-            },
-            "cross_border_transfer": {
-                "keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"],
-                "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)",
-                "severity": "HIGH",
-            },
-            "privacy_by_design": {
-                "keywords": ["privacy by design", "privacy by default", "data minimization", "purpose limitation"],
-                "description": "Should reference privacy-by-design principles (Art. 25)",
-                "severity": "MEDIUM",
-            },
-            "data_processing_agreement": {
-                "keywords": ["data processing agreement", "DPA", "data processor", "sub-processor"],
-                "description": "Must include data processing agreement if sharing data (Art. 28)",
-                "severity": "HIGH",
-            },
-        },
-    },
-    "CCPA": {
-        "description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)",
-        "requirements": {
-            "consumer_rights": {
-                "keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"],
-                "description": "Must acknowledge California consumer rights",
-                "severity": "HIGH",
-            },
-            "data_categories": {
-                "keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"],
-                "description": "Must disclose categories of personal information collected",
-                "severity": "HIGH",
-            },
-            "sale_of_data": {
-                "keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"],
-                "description": "Must provide opt-out mechanism for data sales",
-                "severity": "HIGH",
-            },
-            "service_providers": {
-                "keywords": ["service provider", "third party", "contractor", "business purpose"],
-                "description": "Should limit data use to business/service provider purposes",
-                "severity": "MEDIUM",
-            },
-        },
-    },
-    "SOX": {
-        "description": "Sarbanes-Oxley Act (US, 2002)",
-        "requirements": {
-            "internal_controls": {
-                "keywords": ["internal controls", "internal control over financial reporting", "ICFR"],
-                "description": "Must reference internal controls over financial reporting (§ 404)",
-                "severity": "HIGH",
-            },
-            "audit_committee": {
-                "keywords": ["audit committee", "independent auditor", "PCAOB"],
-                "description": "Should reference audit committee oversight",
-                "severity": "MEDIUM",
-            },
-            "whistleblower": {
-                "keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"],
-                "description": "Should protect whistleblower provisions (§ 806)",
-                "severity": "HIGH",
-            },
-            "document_retention": {
-                "keywords": ["document retention", "record retention", "retention policy", "preserve records"],
-                "description": "Must include document retention obligations (§ 802)",
-                "severity": "HIGH",
-            },
-        },
-    },
-    "HIPAA": {
-        "description": "Health Insurance Portability and Accountability Act (US, 1996)",
-        "requirements": {
-            "phi_protection": {
-                "keywords": ["protected health information", "PHI", "health information", "ePHI"],
-                "description": "Must protect PHI and limit uses/disclosures",
-                "severity": "CRITICAL",
-            },
-            "business_associate": {
-                "keywords": ["business associate agreement", "BAA", "business associate", "covered entity"],
-                "description": "Should reference Business Associate Agreement (§ 164.504(e))",
-                "severity": "HIGH",
-            },
-            "security_safeguards": {
-                "keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"],
-                "description": "Must implement security safeguards (§ 164.308-312)",
-                "severity": "HIGH",
-            },
-            "breach_notification": {
-                "keywords": ["breach notification", "notification of breach", "unauthorized access"],
-                "description": "Must include breach notification obligations (§ 164.400-414)",
-                "severity": "HIGH",
-            },
-        },
-    },
-    "FINRA": {
-        "description": "Financial Industry Regulatory Authority (US)",
-        "requirements": {
-            "recordkeeping": {
-                "keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"],
-                "description": "Must comply with recordkeeping rules (FINRA Rule 4511)",
-                "severity": "HIGH",
-            },
-            "supervision": {
-                "keywords": ["supervision", "supervisory system", "review and approval"],
-                "description": "Should reference supervisory obligations (FINRA Rule 3110)",
-                "severity": "MEDIUM",
-            },
-            "anti_money_laundering": {
-                "keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"],
-                "description": "Must reference AML compliance (FINRA Rule 3310)",
-                "severity": "HIGH",
-            },
-            "privacy": {
-                "keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"],
-                "description": "Must protect customer information (Regulation S-P)",
-                "severity": "HIGH",
-            },
-        },
-    },
-}
-RISK_STYLES = {
-    "CRITICAL": ("#dc2626", "#fef2f2"),
-    "HIGH": ("#ea580c", "#fff7ed"),
-    "MEDIUM": ("#ca8a04", "#fefce8"),
-    "LOW": ("#16a34a", "#f0fdf4"),
-}
-def _check_negation(text_lower, keyword, window=100):
-    """Check if a keyword match is negated by nearby negation words."""
-    idx = text_lower.find(keyword.lower())
-    if idx == -1:
-        return False
-    # Get context window around the match
-    start = max(0, idx - window)
-    end = min(len(text_lower), idx + len(keyword) + window)
-    context = text_lower[start:end]
-    for neg_pat in _NEGATION_PATTERNS:
-        if re.search(neg_pat, context, re.IGNORECASE):
-            return True
-    return False
-def _get_context(text, keyword, window=80):
-    """Extract context around a keyword match."""
-    text_lower = text.lower()
-    idx = text_lower.find(keyword.lower())
-    if idx == -1:
-        return ""
-    start = max(0, idx - window)
-    end = min(len(text), idx + len(keyword) + window)
-    context = text[start:end].strip()
-    if start > 0:
-        context = "..." + context
-    if end < len(text):
-        context = context + "..."
-    return context
-def check_compliance(text):
-    """Check contract text against all regulatory frameworks with negation handling."""
-    text_lower = text.lower()
-    results = {}
-    for reg_name, reg_data in REGULATIONS.items():
-        checks = []
-        for req_name, req_data in reg_data["requirements"].items():
-            matched = False
-            negated = False
-            matched_keywords = []
-            context_snippets = []
-            for kw in req_data["keywords"]:
-                if kw.lower() in text_lower:
-                    matched_keywords.append(kw)
-                    # Check if the match is negated
-                    if _check_negation(text_lower, kw):
-                        negated = True
-                    else:
-                        matched = True
-                    # Get context
-                    ctx = _get_context(text, kw)
-                    if ctx:
-                        context_snippets.append(ctx)
-            if matched and not negated:
-                status = "PASS"
-            elif negated and not matched:
-                status = "NEGATED"
-            elif matched and negated:
-                status = "AMBIGUOUS"
-            else:
-                status = "MISSING"
-            checks.append({
-                "requirement": req_name,
-                "description": req_data["description"],
-                "severity": req_data["severity"],
-                "status": status,
-                "matched_keywords": matched_keywords,
-                "context": context_snippets[:2],  # Keep top 2 context snippets
-            })
-        passed = sum(1 for c in checks if c["status"] == "PASS")
-        total = len(checks)
-        compliance_rate = round(passed / total * 100) if total > 0 else 0
-        negated_count = sum(1 for c in checks if c["status"] == "NEGATED")
-        ambiguous_count = sum(1 for c in checks if c["status"] == "AMBIGUOUS")
-        if compliance_rate >= 80:
-            overall = "COMPLIANT"
-        elif compliance_rate >= 40:
-            overall = "PARTIAL"
-        else:
-            overall = "NON-COMPLIANT"
-        # Override if there are negated critical requirements
-        if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
-            overall = "WARNING"
-        results[reg_name] = {
-            "description": reg_data["description"],
-            "compliance_rate": compliance_rate,
-            "checks": checks,
-            "overall_status": overall,
-            "negated_count": negated_count,
-            "ambiguous_count": ambiguous_count,
-        }
-    return results
-def render_compliance_html(results):
-    """Render compliance results as HTML for Gradio."""
-    html = '<div style="font-family:system-ui,sans-serif;">'
-    for reg_name, reg_result in results.items():
-        rate = reg_result["compliance_rate"]
-        status = reg_result["overall_status"]
-        status_colors = {
-            "COMPLIANT": ("#16a34a", "#f0fdf4"),
-            "PARTIAL": ("#ca8a04", "#fefce8"),
-            "NON-COMPLIANT": ("#dc2626", "#fef2f2"),
-            "WARNING": ("#ea580c", "#fff7ed"),
-        }
-        status_color, status_bg = status_colors.get(status, ("#6b7280", "#f9fafb"))
-        neg = reg_result.get("negated_count", 0)
-        amb = reg_result.get("ambiguous_count", 0)
-        warnings = ""
-        if neg > 0:
-            warnings += f'<span style="font-size:10px;color:#ea580c;margin-left:8px;">⚠️ {neg} negated</span>'
-        if amb > 0:
-            warnings += f'<span style="font-size:10px;color:#ca8a04;margin-left:8px;">❓ {amb} ambiguous</span>'
-        html += f'''
-        <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;">
-          <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:{status_bg};border-bottom:1px solid #e5e7eb;">
-            <div>
-              <span style="font-size:16px;font-weight:700;color:#1f2937;">{reg_name}</span>
-              {warnings}
-              <p style="font-size:11px;color:#6b7280;margin:2px 0 0 0;">{reg_result["description"]}</p>
-            </div>
-            <div style="text-align:right;">
-              <div style="font-size:24px;font-weight:700;color:{status_color};">{rate}%</div>
-              <div style="font-size:11px;color:{status_color};font-weight:500;">{status}</div>
-            </div>
-          </div>
-          <div style="padding:8px 16px;">
-        '''
-        for check in reg_result["checks"]:
-            color, bg = RISK_STYLES[check["severity"]]
-            status_icons = {"PASS": "✅", "MISSING": "❌", "NEGATED": "🚫", "AMBIGUOUS": "❓"}
-            status_icon = status_icons.get(check["status"], "❓")
-            status_text_map = {"PASS": "Found", "MISSING": "Missing", "NEGATED": "Negated", "AMBIGUOUS": "Ambiguous"}
-            status_text = status_text_map.get(check["status"], "Unknown")
-            keywords = ", ".join(check["matched_keywords"][:3]) if check["matched_keywords"] else "—"
-            context_html = ""
-            if check.get("context"):
-                ctx = check["context"][0][:120].replace("<", "&lt;").replace(">", "&gt;")
-                context_html = f'<div style="font-size:10px;color:#6b7280;margin-top:2px;font-style:italic;">"{ctx}"</div>'
-            html += f'''
-            <div style="display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid #f3f4f6;">
-              <div style="flex:1;">
-                <div style="font-size:12px;font-weight:500;color:#374151;">{check["description"]}</div>
-                <div style="font-size:10px;color:#9ca3af;margin-top:2px;">Keywords: {keywords}</div>
-                {context_html}
-              </div>
-              <div style="display:flex;align-items:center;gap:6px;margin-left:8px;">
-                <span style="font-size:10px;color:{color};font-weight:600;background:{bg};padding:2px 8px;border-radius:4px;">{check["severity"]}</span>
-                <span style="font-size:13px;" title="{status_text}">{status_icon}</span>
-              </div>
-            </div>
-            '''
-        html += '</div></div>'
-    html += '</div>'
-    return html


1	+ file:/app/compliance.py