gaurv007 commited on
Commit
bec2e2c
·
verified ·
1 Parent(s): 464c655

v3.0: Upload actual obligations.py content

Browse files
Files changed (1) hide show
  1. obligations.py +259 -1
obligations.py CHANGED
@@ -1 +1,259 @@
1
- /app/clauseguard/obligations.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClauseGuard — Obligation Tracker v3.0
3
+ ═════════════════════════════════════
4
+ FIXED in v3.0:
5
+ • Reduced false positives (filter out generic service descriptions)
6
+ • Better party extraction with role detection
7
+ • Obligation priority scoring
8
+ • Context-aware obligation type detection
9
+ """
10
+
11
+ import re
12
+ from collections import defaultdict
13
+ from datetime import datetime, timedelta
14
+
15
+ # Obligation keywords by category — more specific patterns to reduce false positives
16
+ OBLIGATION_PATTERNS = {
17
+ "monetary": [
18
+ r"(?:shall|must|will|agrees? to)\s+pay\s+(?:a\s+)?(?:(?:monthly|annual|quarterly)\s+)?(?:fee|amount|sum|payment)?\s*(?:of\s+)?(?:\$[\d,]+(?:\.\d{2})?)",
19
+ r"(?:fee|payment|compensation|reimburs(?:e|ement))\s+(?:of|in the amount of)\s+\$[\d,]+",
20
+ r"(?:shall|must|will)\s+remit\s+\$[\d,]+",
21
+ r"(?:liquidated damages|penalty)\s+(?:of|in the amount of)\s+\$[\d,]+",
22
+ r"(?:shall|must)\s+(?:pay|reimburse)\s+(?:all|any)\s+(?:outstanding|overdue|unpaid)",
23
+ ],
24
+ "compliance": [
25
+ r"(?:shall|must|will)\s+comply\s+with\s+(?:all\s+)?(?:applicable\s+)?(?:laws|regulations|standards|requirements)",
26
+ r"(?:shall|must|will)\s+(?:adhere|conform)\s+to\s+(?:the|all|applicable)",
27
+ r"(?:shall|must|will)\s+(?:obtain|maintain|procure)\s+(?:all\s+)?(?:necessary|required|applicable)\s+(?:approvals?|permits?|licenses?|certifications?)",
28
+ r"(?:shall|must|will)\s+maintain\s+(?:insurance|coverage|bond|policy)",
29
+ r"(?:shall|must|will)\s+ensure\s+(?:compliance|conformance|adherence)",
30
+ ],
31
+ "reporting": [
32
+ r"(?:shall|must|will)\s+(?:report|disclose)\s+(?:to|any)\s+(?:the|supervisory|regulatory)",
33
+ r"(?:shall|must|will)\s+provide\s+(?:regular|monthly|quarterly|annual|periodic)\s+(?:reports?|updates?|statements?)",
34
+ r"(?:shall|must|will)\s+(?:notify|inform)\s+(?:the other party|promptly|immediately|within)",
35
+ r"(?:shall|must|will)\s+deliver\s+(?:a|an|the)\s+(?:report|statement|notice|certificate)",
36
+ r"(?:shall|must|will)\s+provide\s+(?:SOC|audit|compliance)\s+(?:\d+\s+)?(?:Type\s+)?(?:reports?|certificates?)",
37
+ ],
38
+ "delivery": [
39
+ r"(?:shall|must|will)\s+deliver\s+(?:the|all|any)\s+(?:products?|goods?|materials?|deliverables?|services?)",
40
+ r"(?:shall|must|will)\s+(?:furnish|supply)\s+(?:the|all|any)",
41
+ r"(?:shall|must|will)\s+(?:submit|produce|complete)\s+(?:the|all|any)\s+(?:work|deliverables?|results?)",
42
+ r"(?:delivery|performance)\s+(?:date|schedule|deadline|timeline|milestone)",
43
+ ],
44
+ "termination": [
45
+ r"(?:shall|must|will)\s+(?:return|surrender)\s+(?:all|any)\s+(?:materials?|property|documents?|data|information|equipment)",
46
+ r"(?:shall|must|will)\s+(?:destroy|delete|erase)\s+(?:all|any)\s+(?:copies|data|information|records?|materials?)",
47
+ r"(?:shall|must|will)\s+(?:cease|discontinue)\s+(?:all|any)\s+(?:use|access|activities)",
48
+ r"(?:upon|after|following)\s+termination.*(?:shall|must|will)\s+(?:pay|return|destroy|cease)",
49
+ r"(?:surviving|post-termination)\s+obligations?",
50
+ ],
51
+ }
52
+
53
+ # More restrictive — patterns that DON'T indicate obligations (false positive filters)
54
+ _FALSE_POSITIVE_PATTERNS = [
55
+ r"^(?:the|this)\s+(?:agreement|contract|document)\s+(?:shall|will)\s+(?:be|become|remain)",
56
+ r"(?:shall|will)\s+(?:be\s+)?(?:governed|construed|interpreted)",
57
+ r"(?:shall|will)\s+(?:constitute|represent|mean|include)",
58
+ r"(?:shall|will)\s+(?:not\s+)?(?:be\s+)?(?:deemed|considered|construed)",
59
+ r"(?:shall|will)\s+(?:have|possess)\s+(?:the\s+)?(?:right|authority|power)",
60
+ r"(?:shall|will)\s+(?:survive|remain\s+in\s+(?:effect|force))",
61
+ ]
62
+
63
+ # Timeframe extraction
64
+ TIME_PATTERNS = [
65
+ (r"within\s+(\d+)\s+(day|week|month|year)s?", "relative"),
66
+ (r"no\s+later\s+than\s+(\d+)\s+(day|week|month|year)s?", "relative"),
67
+ (r"within\s+(\d+)\s+business\s+days?", "business_days"),
68
+ (r"by\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"),
69
+ (r"on\s+or\s+before\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"),
70
+ (r"(\d{1,2}/\d{1,2}/\d{2,4})", "absolute_date"),
71
+ (r"(?:promptly|immediately)(?:\s+(?:upon|after|following))?", "immediate"),
72
+ ]
73
+
74
+ PARTY_PATTERNS = [
75
+ r"\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client|Provider|Contractor)\b",
76
+ r"\b[A-Z][A-Za-z0-9\s&]+?(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH)\b",
77
+ ]
78
+
79
+ # Priority scoring for obligation types
80
+ _PRIORITY_MAP = {
81
+ "monetary": 3,
82
+ "termination": 3,
83
+ "compliance": 2,
84
+ "reporting": 2,
85
+ "delivery": 1,
86
+ }
87
+
88
+
89
+ def _is_false_positive(sentence):
90
+ """Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
91
+ for fp in _FALSE_POSITIVE_PATTERNS:
92
+ if re.search(fp, sentence, re.IGNORECASE):
93
+ return True
94
+ return False
95
+
96
+
97
+ def extract_obligations(text):
98
+ """Extract obligations from contract text with false positive filtering."""
99
+ obligations = []
100
+
101
+ # Split into sentences
102
+ sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
103
+
104
+ for sentence in sentences:
105
+ sentence = sentence.strip()
106
+ if len(sentence) < 30 or len(sentence) > 1000:
107
+ continue
108
+
109
+ # Skip false positives
110
+ if _is_false_positive(sentence):
111
+ continue
112
+
113
+ found_types = set()
114
+ for otype, patterns in OBLIGATION_PATTERNS.items():
115
+ for pat in patterns:
116
+ if re.search(pat, sentence, re.IGNORECASE):
117
+ found_types.add(otype)
118
+ break
119
+
120
+ if not found_types:
121
+ continue
122
+
123
+ # Extract party
124
+ party = "Unknown"
125
+ for pp in PARTY_PATTERNS:
126
+ m = re.search(pp, sentence)
127
+ if m:
128
+ party = m.group(0).strip()
129
+ break
130
+
131
+ # Try to determine which party has the obligation based on sentence structure
132
+ obligation_direction = _detect_obligation_direction(sentence)
133
+ if obligation_direction:
134
+ party = obligation_direction
135
+
136
+ # Extract timeframe
137
+ deadline = "Not specified"
138
+ deadline_urgency = 0
139
+ for pat, ptype in TIME_PATTERNS:
140
+ m = re.search(pat, sentence, re.IGNORECASE)
141
+ if m:
142
+ if ptype == "relative":
143
+ num = m.group(1)
144
+ unit = m.group(2)
145
+ deadline = f"Within {num} {unit}(s)"
146
+ deadline_urgency = int(num)
147
+ elif ptype == "business_days":
148
+ num = m.group(1)
149
+ deadline = f"Within {num} business day(s)"
150
+ deadline_urgency = int(num)
151
+ elif ptype in ("absolute", "absolute_date"):
152
+ deadline = m.group(1)
153
+ deadline_urgency = 1
154
+ elif ptype == "immediate":
155
+ deadline = "Immediately"
156
+ deadline_urgency = 0
157
+ break
158
+
159
+ for otype in found_types:
160
+ priority = _PRIORITY_MAP.get(otype, 1)
161
+ if deadline_urgency > 0 and deadline_urgency <= 7:
162
+ priority += 1 # Urgent deadlines get higher priority
163
+
164
+ obligations.append({
165
+ "type": otype,
166
+ "party": party,
167
+ "description": sentence[:250] + ("..." if len(sentence) > 250 else ""),
168
+ "deadline": deadline,
169
+ "full_text": sentence,
170
+ "priority": priority,
171
+ })
172
+
173
+ # Sort by priority (highest first)
174
+ obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
175
+
176
+ return obligations
177
+
178
+
179
+ def _detect_obligation_direction(sentence):
180
+ """Try to detect who bears the obligation from sentence structure."""
181
+ patterns = [
182
+ (r"^(?:The\s+)?(Provider|Company|Licensor|Landlord|Employer|Seller|Vendor)\s+(?:shall|must|will)", None),
183
+ (r"^(?:The\s+)?(Customer|Client|Licensee|Tenant|Employee|Buyer)\s+(?:shall|must|will)", None),
184
+ (r"^(?:Each|Both)\s+part(?:y|ies)\s+(?:shall|must|will)", "Both parties"),
185
+ (r"^(?:Neither|No)\s+party\s+(?:shall|may)", "Neither party"),
186
+ ]
187
+ for pat, override in patterns:
188
+ m = re.search(pat, sentence, re.IGNORECASE)
189
+ if m:
190
+ return override or m.group(1)
191
+ return None
192
+
193
+
194
+ def render_obligations_html(obligations):
195
+ """Render obligations as HTML cards for Gradio."""
196
+ if not obligations:
197
+ return '<div style="padding:16px;color:#6b7280;text-align:center;">No obligations detected.</div>'
198
+
199
+ # Group by type
200
+ grouped = defaultdict(list)
201
+ for ob in obligations:
202
+ grouped[ob["type"]].append(ob)
203
+
204
+ type_icons = {
205
+ "monetary": "💰",
206
+ "compliance": "⚖️",
207
+ "reporting": "📊",
208
+ "delivery": "📦",
209
+ "termination": "🛑",
210
+ }
211
+ type_colors = {
212
+ "monetary": "#22c55e",
213
+ "compliance": "#f59e0b",
214
+ "reporting": "#3b82f6",
215
+ "delivery": "#8b5cf6",
216
+ "termination": "#ef4444",
217
+ }
218
+
219
+ html = '<div style="font-family:system-ui,sans-serif;">'
220
+
221
+ # Summary counts
222
+ html += '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(120px,1fr));gap:8px;margin-bottom:16px;">'
223
+ for otype, obs in sorted(grouped.items()):
224
+ color = type_colors.get(otype, "#6b7280")
225
+ icon = type_icons.get(otype, "📋")
226
+ html += f'''
227
+ <div style="text-align:center;padding:10px;border-radius:8px;background:{color}15;border:1px solid {color}30;">
228
+ <div style="font-size:20px;">{icon}</div>
229
+ <div style="font-size:20px;font-weight:700;color:{color};">{len(obs)}</div>
230
+ <div style="font-size:11px;color:{color};text-transform:capitalize;">{otype}</div>
231
+ </div>
232
+ '''
233
+ html += '</div>'
234
+
235
+ # Individual cards
236
+ for otype, obs in sorted(grouped.items()):
237
+ color = type_colors.get(otype, "#6b7280")
238
+ icon = type_icons.get(otype, "📋")
239
+ html += f'<h3 style="font-size:14px;color:#374151;margin:16px 0 8px 0;border-bottom:2px solid {color}30;padding-bottom:4px;">{icon} {otype.title()} Obligations</h3>'
240
+ for ob in obs:
241
+ priority = ob.get("priority", 1)
242
+ priority_badge = ""
243
+ if priority >= 3:
244
+ priority_badge = '<span style="font-size:9px;background:#fef2f2;color:#dc2626;padding:1px 4px;border-radius:3px;margin-left:4px;">HIGH PRIORITY</span>'
245
+ elif priority >= 2:
246
+ priority_badge = '<span style="font-size:9px;background:#fefce8;color:#ca8a04;padding:1px 4px;border-radius:3px;margin-left:4px;">MEDIUM</span>'
247
+
248
+ html += f'''
249
+ <div style="border:1px solid #e5e7eb;border-left:4px solid {color};border-radius:6px;padding:10px;margin-bottom:8px;background:#fafafa;">
250
+ <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
251
+ <span style="font-size:12px;font-weight:600;color:{color};">{ob["party"]}{priority_badge}</span>
252
+ <span style="font-size:11px;color:#6b7280;background:#f3f4f6;padding:2px 8px;border-radius:4px;">{ob["deadline"]}</span>
253
+ </div>
254
+ <p style="font-size:12px;color:#4b5563;margin:0;line-height:1.5;">{ob["description"]}</p>
255
+ </div>
256
+ '''
257
+
258
+ html += '</div>'
259
+ return html