Spaces:
Running
Running
fix(v4.3): obligations.py — bug report fixes (10 issues)
Browse files- obligations.py +20 -0
obligations.py
CHANGED
|
@@ -192,6 +192,26 @@ def extract_obligations(text):
|
|
| 192 |
# Sort by priority (highest first)
|
| 193 |
obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
return obligations
|
| 196 |
|
| 197 |
|
|
|
|
| 192 |
# Sort by priority (highest first)
|
| 193 |
obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
|
| 194 |
|
| 195 |
+
# FIX v4.3: Deduplicate obligations — same text producing multiple types
|
| 196 |
+
# Keep the more specific type (termination > compliance > monetary > general)
|
| 197 |
+
_TYPE_PRIORITY = {"termination": 1, "compliance": 2, "reporting": 3, "delivery": 4, "monetary": 5}
|
| 198 |
+
seen_texts = {}
|
| 199 |
+
deduped = []
|
| 200 |
+
for ob in obligations:
|
| 201 |
+
# Hash on first 80 chars of description + party
|
| 202 |
+
key = hash(ob["description"][:80] + ob["party"])
|
| 203 |
+
type_pri = _TYPE_PRIORITY.get(ob["type"], 99)
|
| 204 |
+
if key not in seen_texts:
|
| 205 |
+
seen_texts[key] = (type_pri, len(deduped))
|
| 206 |
+
deduped.append(ob)
|
| 207 |
+
else:
|
| 208 |
+
existing_pri, existing_idx = seen_texts[key]
|
| 209 |
+
if type_pri < existing_pri:
|
| 210 |
+
# This type is more specific — replace
|
| 211 |
+
deduped[existing_idx] = ob
|
| 212 |
+
seen_texts[key] = (type_pri, existing_idx)
|
| 213 |
+
obligations = deduped
|
| 214 |
+
|
| 215 |
return obligations
|
| 216 |
|
| 217 |
|