Spaces:
Sleeping
Sleeping
| """ | |
| ClauseGuard β Obligation Tracker v3.0 | |
| βββββββββββββββββββββββββββββββββββββ | |
| FIXED in v3.0: | |
| β’ Reduced false positives (filter out generic service descriptions) | |
| β’ Better party extraction with role detection | |
| β’ Obligation priority scoring | |
| β’ Context-aware obligation type detection | |
| """ | |
| import re | |
| from collections import defaultdict | |
| from datetime import datetime, timedelta | |
| # Obligation keywords by category β more specific patterns to reduce false positives | |
| OBLIGATION_PATTERNS = { | |
| "monetary": [ | |
| r"(?:shall|must|will|agrees? to)\s+pay\s+(?:a\s+)?(?:(?:monthly|annual|quarterly)\s+)?(?:fee|amount|sum|payment)?\s*(?:of\s+)?(?:\$[\d,]+(?:\.\d{2})?)", | |
| r"(?:fee|payment|compensation|reimburs(?:e|ement))\s+(?:of|in the amount of)\s+\$[\d,]+", | |
| r"(?:shall|must|will)\s+remit\s+\$[\d,]+", | |
| r"(?:liquidated damages|penalty)\s+(?:of|in the amount of)\s+\$[\d,]+", | |
| r"(?:shall|must)\s+(?:pay|reimburse)\s+(?:all|any)\s+(?:outstanding|overdue|unpaid)", | |
| ], | |
| "compliance": [ | |
| r"(?:shall|must|will)\s+comply\s+with\s+(?:all\s+)?(?:applicable\s+)?(?:laws|regulations|standards|requirements)", | |
| r"(?:shall|must|will)\s+(?:adhere|conform)\s+to\s+(?:the|all|applicable)", | |
| r"(?:shall|must|will)\s+(?:obtain|maintain|procure)\s+(?:all\s+)?(?:necessary|required|applicable)\s+(?:approvals?|permits?|licenses?|certifications?)", | |
| r"(?:shall|must|will)\s+maintain\s+(?:insurance|coverage|bond|policy)", | |
| r"(?:shall|must|will)\s+ensure\s+(?:compliance|conformance|adherence)", | |
| ], | |
| "reporting": [ | |
| r"(?:shall|must|will)\s+(?:report|disclose)\s+(?:to|any)\s+(?:the|supervisory|regulatory)", | |
| r"(?:shall|must|will)\s+provide\s+(?:regular|monthly|quarterly|annual|periodic)\s+(?:reports?|updates?|statements?)", | |
| r"(?:shall|must|will)\s+(?:notify|inform)\s+(?:the other party|promptly|immediately|within)", | |
| r"(?:shall|must|will)\s+deliver\s+(?:a|an|the)\s+(?:report|statement|notice|certificate)", | |
| r"(?:shall|must|will)\s+provide\s+(?:SOC|audit|compliance)\s+(?:\d+\s+)?(?:Type\s+)?(?:reports?|certificates?)", | |
| ], | |
| "delivery": [ | |
| r"(?:shall|must|will)\s+deliver\s+(?:the|all|any)\s+(?:products?|goods?|materials?|deliverables?|services?)", | |
| r"(?:shall|must|will)\s+(?:furnish|supply)\s+(?:the|all|any)", | |
| r"(?:shall|must|will)\s+(?:submit|produce|complete)\s+(?:the|all|any)\s+(?:work|deliverables?|results?)", | |
| r"(?:delivery|performance)\s+(?:date|schedule|deadline|timeline|milestone)", | |
| ], | |
| "termination": [ | |
| r"(?:shall|must|will)\s+(?:return|surrender)\s+(?:all|any)\s+(?:materials?|property|documents?|data|information|equipment)", | |
| r"(?:shall|must|will)\s+(?:destroy|delete|erase)\s+(?:all|any)\s+(?:copies|data|information|records?|materials?)", | |
| r"(?:shall|must|will)\s+(?:cease|discontinue)\s+(?:all|any)\s+(?:use|access|activities)", | |
| r"(?:upon|after|following)\s+termination.*(?:shall|must|will)\s+(?:pay|return|destroy|cease)", | |
| r"(?:surviving|post-termination)\s+obligations?", | |
| ], | |
| } | |
| # More restrictive β patterns that DON'T indicate obligations (false positive filters) | |
| _FALSE_POSITIVE_PATTERNS = [ | |
| r"^(?:the|this)\s+(?:agreement|contract|document)\s+(?:shall|will)\s+(?:be|become|remain)", | |
| r"(?:shall|will)\s+(?:be\s+)?(?:governed|construed|interpreted)", | |
| r"(?:shall|will)\s+(?:constitute|represent|mean|include)", | |
| r"(?:shall|will)\s+(?:not\s+)?(?:be\s+)?(?:deemed|considered|construed)", | |
| r"(?:shall|will)\s+(?:have|possess)\s+(?:the\s+)?(?:right|authority|power)", | |
| r"(?:shall|will)\s+(?:survive|remain\s+in\s+(?:effect|force))", | |
| ] | |
| # Timeframe extraction | |
| TIME_PATTERNS = [ | |
| (r"within\s+(\d+)\s+(day|week|month|year)s?", "relative"), | |
| (r"no\s+later\s+than\s+(\d+)\s+(day|week|month|year)s?", "relative"), | |
| (r"within\s+(\d+)\s+business\s+days?", "business_days"), | |
| (r"by\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"), | |
| (r"on\s+or\s+before\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"), | |
| (r"(\d{1,2}/\d{1,2}/\d{2,4})", "absolute_date"), | |
| (r"(?:promptly|immediately)(?:\s+(?:upon|after|following))?", "immediate"), | |
| ] | |
| PARTY_PATTERNS = [ | |
| r"\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client|Provider|Contractor)\b", | |
| r"\b[A-Z][A-Za-z0-9\s&]+?(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH)\b", | |
| ] | |
| # Priority scoring for obligation types | |
| _PRIORITY_MAP = { | |
| "monetary": 3, | |
| "termination": 3, | |
| "compliance": 2, | |
| "reporting": 2, | |
| "delivery": 1, | |
| } | |
| def _is_false_positive(sentence): | |
| """Check if a sentence is a common false positive (definition/interpretation, not obligation).""" | |
| for fp in _FALSE_POSITIVE_PATTERNS: | |
| if re.search(fp, sentence, re.IGNORECASE): | |
| return True | |
| return False | |
| def extract_obligations(text): | |
| """Extract obligations from contract text with false positive filtering.""" | |
| obligations = [] | |
| # Split into sentences | |
| sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text) | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if len(sentence) < 30 or len(sentence) > 1000: | |
| continue | |
| # Skip false positives | |
| if _is_false_positive(sentence): | |
| continue | |
| found_types = set() | |
| for otype, patterns in OBLIGATION_PATTERNS.items(): | |
| for pat in patterns: | |
| if re.search(pat, sentence, re.IGNORECASE): | |
| found_types.add(otype) | |
| break | |
| if not found_types: | |
| continue | |
| # Extract party (Fix 8: scope to sentence only, reject >40 char strings) | |
| party = "Unknown" | |
| # First try structured direction detection | |
| obligation_direction = _detect_obligation_direction(sentence) | |
| if obligation_direction: | |
| party = obligation_direction | |
| else: | |
| # Fallback to pattern matching within the sentence | |
| for pp in PARTY_PATTERNS: | |
| m = re.search(pp, sentence) | |
| if m: | |
| candidate = m.group(0).strip() | |
| # Fix 8: Reject party strings >40 chars (header bleed-through) | |
| if len(candidate) <= 40: | |
| party = candidate | |
| break | |
| # Extract timeframe | |
| deadline = "Not specified" | |
| deadline_urgency = 0 | |
| for pat, ptype in TIME_PATTERNS: | |
| m = re.search(pat, sentence, re.IGNORECASE) | |
| if m: | |
| if ptype == "relative": | |
| num = m.group(1) | |
| unit = m.group(2) | |
| deadline = f"Within {num} {unit}(s)" | |
| deadline_urgency = int(num) | |
| elif ptype == "business_days": | |
| num = m.group(1) | |
| deadline = f"Within {num} business day(s)" | |
| deadline_urgency = int(num) | |
| elif ptype in ("absolute", "absolute_date"): | |
| deadline = m.group(1) | |
| deadline_urgency = 1 | |
| elif ptype == "immediate": | |
| deadline = "Immediately" | |
| deadline_urgency = 0 | |
| break | |
| for otype in found_types: | |
| priority = _PRIORITY_MAP.get(otype, 1) | |
| if deadline_urgency > 0 and deadline_urgency <= 7: | |
| priority += 1 # Urgent deadlines get higher priority | |
| obligations.append({ | |
| "type": otype, | |
| "party": party, | |
| "description": sentence[:250] + ("..." if len(sentence) > 250 else ""), | |
| "deadline": deadline, | |
| "full_text": sentence, | |
| "priority": priority, | |
| }) | |
| # Sort by priority (highest first) | |
| obligations.sort(key=lambda x: x.get("priority", 0), reverse=True) | |
| return obligations | |
| def _detect_obligation_direction(sentence): | |
| """Try to detect who bears the obligation from sentence structure.""" | |
| patterns = [ | |
| (r"^(?:The\s+)?(Provider|Company|Licensor|Landlord|Employer|Seller|Vendor)\s+(?:shall|must|will)", None), | |
| (r"^(?:The\s+)?(Customer|Client|Licensee|Tenant|Employee|Buyer)\s+(?:shall|must|will)", None), | |
| (r"^(?:Each|Both)\s+part(?:y|ies)\s+(?:shall|must|will)", "Both parties"), | |
| (r"^(?:Neither|No)\s+party\s+(?:shall|may)", "Neither party"), | |
| ] | |
| for pat, override in patterns: | |
| m = re.search(pat, sentence, re.IGNORECASE) | |
| if m: | |
| return override or m.group(1) | |
| return None | |
| def render_obligations_html(obligations): | |
| """Render obligations as HTML cards for Gradio.""" | |
| if not obligations: | |
| return '<div style="padding:16px;color:#6b7280;text-align:center;">No obligations detected.</div>' | |
| # Group by type | |
| grouped = defaultdict(list) | |
| for ob in obligations: | |
| grouped[ob["type"]].append(ob) | |
| type_icons = { | |
| "monetary": "π°", | |
| "compliance": "βοΈ", | |
| "reporting": "π", | |
| "delivery": "π¦", | |
| "termination": "π", | |
| } | |
| type_colors = { | |
| "monetary": "#22c55e", | |
| "compliance": "#f59e0b", | |
| "reporting": "#3b82f6", | |
| "delivery": "#8b5cf6", | |
| "termination": "#ef4444", | |
| } | |
| html = '<div style="font-family:system-ui,sans-serif;">' | |
| # Summary counts | |
| html += '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(120px,1fr));gap:8px;margin-bottom:16px;">' | |
| for otype, obs in sorted(grouped.items()): | |
| color = type_colors.get(otype, "#6b7280") | |
| icon = type_icons.get(otype, "π") | |
| html += f''' | |
| <div style="text-align:center;padding:10px;border-radius:8px;background:{color}15;border:1px solid {color}30;"> | |
| <div style="font-size:20px;">{icon}</div> | |
| <div style="font-size:20px;font-weight:700;color:{color};">{len(obs)}</div> | |
| <div style="font-size:11px;color:{color};text-transform:capitalize;">{otype}</div> | |
| </div> | |
| ''' | |
| html += '</div>' | |
| # Individual cards | |
| for otype, obs in sorted(grouped.items()): | |
| color = type_colors.get(otype, "#6b7280") | |
| icon = type_icons.get(otype, "π") | |
| html += f'<h3 style="font-size:14px;color:#374151;margin:16px 0 8px 0;border-bottom:2px solid {color}30;padding-bottom:4px;">{icon} {otype.title()} Obligations</h3>' | |
| for ob in obs: | |
| priority = ob.get("priority", 1) | |
| priority_badge = "" | |
| if priority >= 3: | |
| priority_badge = '<span style="font-size:9px;background:#fef2f2;color:#dc2626;padding:1px 4px;border-radius:3px;margin-left:4px;">HIGH PRIORITY</span>' | |
| elif priority >= 2: | |
| priority_badge = '<span style="font-size:9px;background:#fefce8;color:#ca8a04;padding:1px 4px;border-radius:3px;margin-left:4px;">MEDIUM</span>' | |
| html += f''' | |
| <div style="border:1px solid #e5e7eb;border-left:4px solid {color};border-radius:6px;padding:10px;margin-bottom:8px;background:#fafafa;"> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:12px;font-weight:600;color:{color};">{ob["party"]}{priority_badge}</span> | |
| <span style="font-size:11px;color:#6b7280;background:#f3f4f6;padding:2px 8px;border-radius:4px;">{ob["deadline"]}</span> | |
| </div> | |
| <p style="font-size:12px;color:#4b5563;margin:0;line-height:1.5;">{ob["description"]}</p> | |
| </div> | |
| ''' | |
| html += '</div>' | |
| return html | |