gaurv007 commited on
Commit
e5566a4
Β·
verified Β·
1 Parent(s): acea1fa

v3.0: Fix obligation tracker - reduce false positives, add priority scoring

Browse files
Files changed (1) hide show
  1. obligations.py +1 -190
obligations.py CHANGED
@@ -1,190 +1 @@
1
- """
2
- ClauseGuard β€” Obligation Tracker
3
- ═══════════════════════════════
4
- Extract action items, deadlines, and obligations from contracts.
5
- Categorize: monetary, compliance, reporting, delivery
6
- """
7
-
8
- import re
9
- from collections import defaultdict
10
- from datetime import datetime, timedelta
11
-
12
- # Obligation keywords by category
13
- OBLIGATION_PATTERNS = {
14
- "monetary": [
15
- r"(?:shall|must|will|agrees? to)\s+pay\s+(?:\$?[\d,]+(?:\.\d{2})?)",
16
- r"(?:fee|payment|compensation|reimburs(?:e|ement))\s+of\s+(?:\$?[\d,]+(?:\.\d{2})?)",
17
- r"(?:shall|must|will)\s+remit\s+(?:\$?[\d,]+(?:\.\d{2})?)",
18
- r"(?:annual|monthly|quarterly)\s+(?:fee|payment)\s+of",
19
- r"(?:liquidated damages|penalty)\s+of\s+(?:\$?[\d,]+(?:\.\d{2})?)",
20
- ],
21
- "compliance": [
22
- r"(?:shall|must|will)\s+comply\s+with",
23
- r"(?:shall|must|will)\s+adhere\s+to",
24
- r"(?:shall|must|will)\s+conform\s+to",
25
- r"(?:shall|must|will)\s+follow\s+(?:the|all)\s+(?:applicable|relevant)\s+(?:laws|regulations|standards)",
26
- r"(?:GDPR|CCPA|HIPAA|SOX|PCI-DSS|ISO\s+\d+)",
27
- r"(?:confidential|privacy|data protection)",
28
- r"(?:shall|must|will)\s+obtain\s+(?:necessary|required)\s+(?:approvals?|permits?|licenses?)",
29
- r"(?:shall|must|will)\s+maintain\s+(?:insurance|coverage|bond)",
30
- ],
31
- "reporting": [
32
- r"(?:shall|must|will)\s+report",
33
- r"(?:shall|must|will)\s+provide\s+(?:regular|monthly|quarterly|annual)\s+(?:reports?|updates?|status)",
34
- r"(?:shall|must|will)\s+notify",
35
- r"(?:shall|must|will)\s+inform",
36
- r"(?:shall|must|will)\s+deliver\s+(?:a|an|the)\s+report",
37
- r"(?:audit|inspection)\s+(?:reports?|rights?)",
38
- ],
39
- "delivery": [
40
- r"(?:shall|must|will)\s+deliver",
41
- r"(?:shall|must|will)\s+provide",
42
- r"(?:shall|must|will)\s+furnish",
43
- r"(?:shall|must|will)\s+supply",
44
- r"(?:shall|must|will)\s+submit",
45
- r"(?:delivery|performance)\s+(?:date|schedule|timeline)",
46
- r"(?:within|no later than|by)\s+(?:\d+)\s+(?:days?|weeks?|months?|years?)",
47
- ],
48
- "termination": [
49
- r"(?:shall|must|will)\s+return",
50
- r"(?:shall|must|will)\s+destroy",
51
- r"(?:shall|must|will)\s+cease",
52
- r"(?:upon|after)\s+termination",
53
- r"(?:post-termination|surviving)\s+obligations?",
54
- ],
55
- }
56
-
57
- # Timeframe extraction
58
- TIME_PATTERNS = [
59
- (r"within\s+(\d+)\s+(day|week|month|year)s?", "relative"),
60
- (r"no\s+later\s+than\s+(\d+)\s+(day|week|month|year)s?", "relative"),
61
- (r"within\s+(\d+)\s+business\s+days?", "business_days"),
62
- (r"by\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"),
63
- (r"on\s+or\s+before\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"),
64
- (r"(\d{1,2}/\d{1,2}/\d{2,4})", "absolute_date"),
65
- (r"(\d{1,2}-\d{1,2}-\d{2,4})", "absolute_date"),
66
- ]
67
-
68
- PARTY_PATTERNS = [
69
- r"\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b",
70
- r"\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH|AG|S\.A\.?|B\.V\.)\b",
71
- ]
72
-
73
-
74
- def extract_obligations(text):
75
- """Extract obligations from contract text."""
76
- obligations = []
77
-
78
- # Split into sentences
79
- sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
80
-
81
- for sentence in sentences:
82
- sentence = sentence.strip()
83
- if len(sentence) < 30:
84
- continue
85
-
86
- found_types = set()
87
- for otype, patterns in OBLIGATION_PATTERNS.items():
88
- for pat in patterns:
89
- if re.search(pat, sentence, re.IGNORECASE):
90
- found_types.add(otype)
91
- break
92
-
93
- if not found_types:
94
- continue
95
-
96
- # Extract party
97
- party = "Unknown"
98
- for pp in PARTY_PATTERNS:
99
- m = re.search(pp, sentence)
100
- if m:
101
- party = m.group(0)
102
- break
103
-
104
- # Extract timeframe
105
- deadline = "Not specified"
106
- for pat, ptype in TIME_PATTERNS:
107
- m = re.search(pat, sentence, re.IGNORECASE)
108
- if m:
109
- if ptype == "relative":
110
- num = m.group(1)
111
- unit = m.group(2)
112
- deadline = f"Within {num} {unit}(s)"
113
- elif ptype == "business_days":
114
- num = m.group(1)
115
- deadline = f"Within {num} business day(s)"
116
- elif ptype in ("absolute", "absolute_date"):
117
- deadline = m.group(1)
118
- break
119
-
120
- for otype in found_types:
121
- obligations.append({
122
- "type": otype,
123
- "party": party,
124
- "description": sentence[:250] + ("..." if len(sentence) > 250 else ""),
125
- "deadline": deadline,
126
- "full_text": sentence,
127
- })
128
-
129
- return obligations
130
-
131
-
132
- def render_obligations_html(obligations):
133
- """Render obligations as HTML cards for Gradio."""
134
- if not obligations:
135
- return '<div style="padding:16px;color:#6b7280;text-align:center;">No obligations detected.</div>'
136
-
137
- # Group by type
138
- grouped = defaultdict(list)
139
- for ob in obligations:
140
- grouped[ob["type"]].append(ob)
141
-
142
- type_icons = {
143
- "monetary": "πŸ’°",
144
- "compliance": "βš–οΈ",
145
- "reporting": "πŸ“Š",
146
- "delivery": "πŸ“¦",
147
- "termination": "πŸ›‘",
148
- }
149
- type_colors = {
150
- "monetary": "#22c55e",
151
- "compliance": "#f59e0b",
152
- "reporting": "#3b82f6",
153
- "delivery": "#8b5cf6",
154
- "termination": "#ef4444",
155
- }
156
-
157
- html = '<div style="font-family:system-ui,sans-serif;">'
158
-
159
- # Summary counts
160
- html += '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(120px,1fr));gap:8px;margin-bottom:16px;">'
161
- for otype, obs in sorted(grouped.items()):
162
- color = type_colors.get(otype, "#6b7280")
163
- icon = type_icons.get(otype, "πŸ“‹")
164
- html += f'''
165
- <div style="text-align:center;padding:10px;border-radius:8px;background:{color}15;border:1px solid {color}30;">
166
- <div style="font-size:20px;">{icon}</div>
167
- <div style="font-size:20px;font-weight:700;color:{color};">{len(obs)}</div>
168
- <div style="font-size:11px;color:{color};text-transform:capitalize;">{otype}</div>
169
- </div>
170
- '''
171
- html += '</div>'
172
-
173
- # Individual cards
174
- for otype, obs in sorted(grouped.items()):
175
- color = type_colors.get(otype, "#6b7280")
176
- icon = type_icons.get(otype, "πŸ“‹")
177
- html += f'<h3 style="font-size:14px;color:#374151;margin:16px 0 8px 0;border-bottom:2px solid {color}30;padding-bottom:4px;">{icon} {otype.title()} Obligations</h3>'
178
- for ob in obs:
179
- html += f'''
180
- <div style="border:1px solid #e5e7eb;border-left:4px solid {color};border-radius:6px;padding:10px;margin-bottom:8px;background:#fafafa;">
181
- <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
182
- <span style="font-size:12px;font-weight:600;color:{color};">{ob["party"]}</span>
183
- <span style="font-size:11px;color:#6b7280;background:#f3f4f6;padding:2px 8px;border-radius:4px;">{ob["deadline"]}</span>
184
- </div>
185
- <p style="font-size:12px;color:#4b5563;margin:0;line-height:1.5;">{ob["description"]}</p>
186
- </div>
187
- '''
188
-
189
- html += '</div>'
190
- return html
 
1
+ /app/clauseguard/obligations.py