Spaces:
Running
Running
fix(v4.3.1): app.py — Run 4 delta fixes (A-E)
Browse files
app.py
CHANGED
|
@@ -294,6 +294,56 @@ DESC_MAP.update({
|
|
| 294 |
|
| 295 |
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 296 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
RISK_STYLES = {
|
| 298 |
"CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
|
| 299 |
"HIGH": ("#ea580c", "#fff7ed", "⚡"),
|
|
@@ -617,16 +667,29 @@ _LABEL_GUARDRAILS = {
|
|
| 617 |
r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
|
| 618 |
re.IGNORECASE
|
| 619 |
),
|
| 620 |
-
# FIX v4.3: ROFR fires on "right, title, and interest" in IP clauses — require ROFR-specific phrases
|
| 621 |
"ROFR/ROFO/ROFN": re.compile(
|
| 622 |
r'right\s+of\s+first\s+(?:refusal|offer|negotiation)|ROFR|ROFO|ROFN',
|
| 623 |
re.IGNORECASE
|
| 624 |
),
|
| 625 |
-
# FIX v4.3: Renewal Term fires on "twelve (12) months" in liability caps — require renewal-specific phrases
|
| 626 |
"Renewal Term": re.compile(
|
| 627 |
r'renew(?:al)?|successive\s+term|auto(?:matic(?:ally)?)?\s*[\-\s]?renew|non[\-\s]?renewal',
|
| 628 |
re.IGNORECASE
|
| 629 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
}
|
| 631 |
|
| 632 |
# FIX v4.3: Exclusion patterns — even if guardrail passes, exclude if contra-indicators present
|
|
@@ -639,12 +702,24 @@ _LABEL_EXCLUSIONS = {
|
|
| 639 |
r'limitation\s+of\s+liabilit|shall\s+not\s+be\s+liable|indemnif|hold\s+harmless|defend\s+and',
|
| 640 |
re.IGNORECASE
|
| 641 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
}
|
| 643 |
|
| 644 |
-
# FIX v4.3: Minimum confidence thresholds per label
|
| 645 |
_LABEL_MIN_CONFIDENCE = {
|
| 646 |
"ROFR/ROFO/ROFN": 0.65,
|
| 647 |
"Renewal Term": 0.70,
|
|
|
|
|
|
|
| 648 |
}
|
| 649 |
|
| 650 |
def _apply_guardrails(label, text, confidence):
|
|
@@ -712,6 +787,7 @@ def classify_cuad(clause_text):
|
|
| 712 |
label, conf = _apply_guardrails(label, clause_text, conf)
|
| 713 |
if not (label == "Other" and conf < 0.3):
|
| 714 |
risk = RISK_MAP.get(label, "LOW")
|
|
|
|
| 715 |
results.append({
|
| 716 |
"label": label,
|
| 717 |
"confidence": round(conf, 3),
|
|
@@ -735,6 +811,7 @@ def classify_cuad(clause_text):
|
|
| 735 |
# Only add if different from primary
|
| 736 |
if not results or results[0]["label"] != label2:
|
| 737 |
risk2 = RISK_MAP.get(label2, "LOW")
|
|
|
|
| 738 |
results.append({
|
| 739 |
"label": label2,
|
| 740 |
"confidence": round(conf2, 3),
|
|
@@ -821,6 +898,7 @@ def classify_cuad_batch(clauses, batch_size=8):
|
|
| 821 |
label, conf = _apply_guardrails(label, original_text, conf)
|
| 822 |
if not (label == "Other" and conf < 0.3):
|
| 823 |
risk = RISK_MAP.get(label, "LOW")
|
|
|
|
| 824 |
results.append({
|
| 825 |
"label": label,
|
| 826 |
"confidence": round(conf, 3),
|
|
@@ -842,6 +920,7 @@ def classify_cuad_batch(clauses, batch_size=8):
|
|
| 842 |
if not (label2 == "Other" and conf2 < 0.3):
|
| 843 |
if not results or results[0]["label"] != label2:
|
| 844 |
risk2 = RISK_MAP.get(label2, "LOW")
|
|
|
|
| 845 |
results.append({
|
| 846 |
"label": label2,
|
| 847 |
"confidence": round(conf2, 3),
|
|
|
|
| 294 |
|
| 295 |
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 296 |
|
| 297 |
+
# FIX v4.3.1: Content-based severity refinement
|
| 298 |
+
# Default RISK_MAP assigns severity by label alone. This function downgrades severity
|
| 299 |
+
# when the clause text contains mitigating language (caps, carve-outs, time limits).
|
| 300 |
+
_SEVERITY_MITIGATORS = {
|
| 301 |
+
"IP Ownership Assignment": {
|
| 302 |
+
# Downgrade from CRITICAL to HIGH if pre-existing IP is carved out
|
| 303 |
+
"HIGH": re.compile(r'pre[\-\s]existing|background\s+ip|prior\s+(?:ip|intellectual)', re.IGNORECASE),
|
| 304 |
+
# Downgrade to MEDIUM if both carve-out AND license-back exist
|
| 305 |
+
"MEDIUM": re.compile(r'(?:pre[\-\s]existing|background\s+ip).*(?:license|retain)', re.IGNORECASE | re.DOTALL),
|
| 306 |
+
},
|
| 307 |
+
"Limitation of liability": {
|
| 308 |
+
# Downgrade from CRITICAL to HIGH if there's any cap
|
| 309 |
+
"HIGH": re.compile(r'shall\s+not\s+exceed|aggregate.{0,20}(?:not\s+exceed|limited\s+to)|cap(?:ped)?\s+at', re.IGNORECASE),
|
| 310 |
+
# Downgrade to MEDIUM if there's a reasonable cap AND exceptions for gross negligence
|
| 311 |
+
"MEDIUM": re.compile(r'(?:shall\s+not\s+exceed|limited\s+to).{0,80}(?:gross\s+negligence|willful|fraud)', re.IGNORECASE | re.DOTALL),
|
| 312 |
+
},
|
| 313 |
+
"Termination for Convenience": {
|
| 314 |
+
# Downgrade from CRITICAL to HIGH if there's a notice period
|
| 315 |
+
"HIGH": re.compile(r'(?:\d+)\s+(?:day|month|week)s?.{0,20}(?:prior|advance|written)\s+notice', re.IGNORECASE),
|
| 316 |
+
# Downgrade to MEDIUM if mutual termination right
|
| 317 |
+
"MEDIUM": re.compile(r'either\s+party\s+may\s+terminat', re.IGNORECASE),
|
| 318 |
+
},
|
| 319 |
+
"Non-Compete": {
|
| 320 |
+
# Downgrade from HIGH to MEDIUM if time-limited
|
| 321 |
+
"MEDIUM": re.compile(r'(?:period\s+of|for)\s+(?:\d+|one|two|three|six|twelve)\s+(?:\(\d+\)\s+)?(?:month|year)', re.IGNORECASE),
|
| 322 |
+
},
|
| 323 |
+
"Arbitration": {
|
| 324 |
+
# Downgrade from CRITICAL to HIGH if opt-out is available
|
| 325 |
+
"HIGH": re.compile(r'opt[\-\s]?out|may\s+elect|small\s+claims', re.IGNORECASE),
|
| 326 |
+
},
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def _refine_severity(label, text, default_risk):
|
| 331 |
+
"""FIX v4.3.1: Refine severity based on clause content, not just label."""
|
| 332 |
+
mitigators = _SEVERITY_MITIGATORS.get(label)
|
| 333 |
+
if not mitigators:
|
| 334 |
+
return default_risk
|
| 335 |
+
|
| 336 |
+
# Check from lowest severity up — return the lowest matching level
|
| 337 |
+
for level in ["MEDIUM", "HIGH"]:
|
| 338 |
+
pattern = mitigators.get(level)
|
| 339 |
+
if pattern and pattern.search(text):
|
| 340 |
+
# Only downgrade, never upgrade
|
| 341 |
+
level_order = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1}
|
| 342 |
+
if level_order.get(level, 0) < level_order.get(default_risk, 0):
|
| 343 |
+
return level
|
| 344 |
+
|
| 345 |
+
return default_risk
|
| 346 |
+
|
| 347 |
RISK_STYLES = {
|
| 348 |
"CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
|
| 349 |
"HIGH": ("#ea580c", "#fff7ed", "⚡"),
|
|
|
|
| 667 |
r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
|
| 668 |
re.IGNORECASE
|
| 669 |
),
|
|
|
|
| 670 |
"ROFR/ROFO/ROFN": re.compile(
|
| 671 |
r'right\s+of\s+first\s+(?:refusal|offer|negotiation)|ROFR|ROFO|ROFN',
|
| 672 |
re.IGNORECASE
|
| 673 |
),
|
|
|
|
| 674 |
"Renewal Term": re.compile(
|
| 675 |
r'renew(?:al)?|successive\s+term|auto(?:matic(?:ally)?)?\s*[\-\s]?renew|non[\-\s]?renewal',
|
| 676 |
re.IGNORECASE
|
| 677 |
),
|
| 678 |
+
# FIX v4.3.1: Revenue/Profit Sharing fires on IP assignment "rights for value" language
|
| 679 |
+
"Revenue/Profit Sharing": re.compile(
|
| 680 |
+
r'revenue\s+shar|profit\s+shar|revenue\s+split|percentage\s+of\s+revenue|royalt(?:y|ies)|gross\s+profit',
|
| 681 |
+
re.IGNORECASE
|
| 682 |
+
),
|
| 683 |
+
# FIX v4.3.1: Minimum Commitment fires on fee schedules — require explicit minimum language
|
| 684 |
+
"Minimum Commitment": re.compile(
|
| 685 |
+
r'minimum\s+(?:purchase|order|spend|volume|commitment)|take[\-\s]or[\-\s]pay|minimum\s+annual',
|
| 686 |
+
re.IGNORECASE
|
| 687 |
+
),
|
| 688 |
+
# FIX v4.3.1: Non-Disparagement fires on arbitration/class-waiver language
|
| 689 |
+
"Non-Disparagement": re.compile(
|
| 690 |
+
r'disparag|defam|false\s+statement|negative\s+statement|social\s+media|reputat',
|
| 691 |
+
re.IGNORECASE
|
| 692 |
+
),
|
| 693 |
}
|
| 694 |
|
| 695 |
# FIX v4.3: Exclusion patterns — even if guardrail passes, exclude if contra-indicators present
|
|
|
|
| 702 |
r'limitation\s+of\s+liabilit|shall\s+not\s+be\s+liable|indemnif|hold\s+harmless|defend\s+and',
|
| 703 |
re.IGNORECASE
|
| 704 |
),
|
| 705 |
+
# FIX v4.3.1: Revenue/Profit Sharing must NOT fire on IP assignment or license grant clauses
|
| 706 |
+
"Revenue/Profit Sharing": re.compile(
|
| 707 |
+
r'assigns?\s+to|irrevocab(?:ly|le)\s+assign|work[\-\s](?:made\s+)?for[\-\s]hire|license\s+to\s+access|license\s+grant|non[\-\s]exclusive\s+license',
|
| 708 |
+
re.IGNORECASE
|
| 709 |
+
),
|
| 710 |
+
# FIX v4.3.1: Non-Disparagement must NOT fire on arbitration/dispute sections
|
| 711 |
+
"Non-Disparagement": re.compile(
|
| 712 |
+
r'arbitrat|(?<!\w)aaa(?!\w)|(?<!\w)jams(?!\w)|class\s+action|collective\s+(?:proceeding|action)|waives?\s+any\s+right\s+to\s+participate|binding\s+arbitration',
|
| 713 |
+
re.IGNORECASE
|
| 714 |
+
),
|
| 715 |
}
|
| 716 |
|
| 717 |
+
# FIX v4.3: Minimum confidence thresholds per label
|
| 718 |
_LABEL_MIN_CONFIDENCE = {
|
| 719 |
"ROFR/ROFO/ROFN": 0.65,
|
| 720 |
"Renewal Term": 0.70,
|
| 721 |
+
"Revenue/Profit Sharing": 0.65, # FIX v4.3.1
|
| 722 |
+
"Minimum Commitment": 0.65, # FIX v4.3.1
|
| 723 |
}
|
| 724 |
|
| 725 |
def _apply_guardrails(label, text, confidence):
|
|
|
|
| 787 |
label, conf = _apply_guardrails(label, clause_text, conf)
|
| 788 |
if not (label == "Other" and conf < 0.3):
|
| 789 |
risk = RISK_MAP.get(label, "LOW")
|
| 790 |
+
risk = _refine_severity(label, clause_text, risk)
|
| 791 |
results.append({
|
| 792 |
"label": label,
|
| 793 |
"confidence": round(conf, 3),
|
|
|
|
| 811 |
# Only add if different from primary
|
| 812 |
if not results or results[0]["label"] != label2:
|
| 813 |
risk2 = RISK_MAP.get(label2, "LOW")
|
| 814 |
+
risk2 = _refine_severity(label2, clause_text, risk2)
|
| 815 |
results.append({
|
| 816 |
"label": label2,
|
| 817 |
"confidence": round(conf2, 3),
|
|
|
|
| 898 |
label, conf = _apply_guardrails(label, original_text, conf)
|
| 899 |
if not (label == "Other" and conf < 0.3):
|
| 900 |
risk = RISK_MAP.get(label, "LOW")
|
| 901 |
+
risk = _refine_severity(label, original_text, risk)
|
| 902 |
results.append({
|
| 903 |
"label": label,
|
| 904 |
"confidence": round(conf, 3),
|
|
|
|
| 920 |
if not (label2 == "Other" and conf2 < 0.3):
|
| 921 |
if not results or results[0]["label"] != label2:
|
| 922 |
risk2 = RISK_MAP.get(label2, "LOW")
|
| 923 |
+
risk2 = _refine_severity(label2, original_text, risk2)
|
| 924 |
results.append({
|
| 925 |
"label": label2,
|
| 926 |
"confidence": round(conf2, 3),
|