gaurv007 commited on
Commit
ccf342b
·
verified ·
1 Parent(s): 5cd7d1f

fix(v4.3.1): app.py — Run 4 delta fixes (A-E)

Browse files
Files changed (1) hide show
  1. app.py +82 -3
app.py CHANGED
@@ -294,6 +294,56 @@ DESC_MAP.update({
294
 
295
  RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  RISK_STYLES = {
298
  "CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
299
  "HIGH": ("#ea580c", "#fff7ed", "⚡"),
@@ -617,16 +667,29 @@ _LABEL_GUARDRAILS = {
617
  r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
618
  re.IGNORECASE
619
  ),
620
- # FIX v4.3: ROFR fires on "right, title, and interest" in IP clauses — require ROFR-specific phrases
621
  "ROFR/ROFO/ROFN": re.compile(
622
  r'right\s+of\s+first\s+(?:refusal|offer|negotiation)|ROFR|ROFO|ROFN',
623
  re.IGNORECASE
624
  ),
625
- # FIX v4.3: Renewal Term fires on "twelve (12) months" in liability caps — require renewal-specific phrases
626
  "Renewal Term": re.compile(
627
  r'renew(?:al)?|successive\s+term|auto(?:matic(?:ally)?)?\s*[\-\s]?renew|non[\-\s]?renewal',
628
  re.IGNORECASE
629
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
  }
631
 
632
  # FIX v4.3: Exclusion patterns — even if guardrail passes, exclude if contra-indicators present
@@ -639,12 +702,24 @@ _LABEL_EXCLUSIONS = {
639
  r'limitation\s+of\s+liabilit|shall\s+not\s+be\s+liable|indemnif|hold\s+harmless|defend\s+and',
640
  re.IGNORECASE
641
  ),
 
 
 
 
 
 
 
 
 
 
642
  }
643
 
644
- # FIX v4.3: Minimum confidence thresholds per label (overrides the per-class _CUAD_THRESHOLDS)
645
  _LABEL_MIN_CONFIDENCE = {
646
  "ROFR/ROFO/ROFN": 0.65,
647
  "Renewal Term": 0.70,
 
 
648
  }
649
 
650
  def _apply_guardrails(label, text, confidence):
@@ -712,6 +787,7 @@ def classify_cuad(clause_text):
712
  label, conf = _apply_guardrails(label, clause_text, conf)
713
  if not (label == "Other" and conf < 0.3):
714
  risk = RISK_MAP.get(label, "LOW")
 
715
  results.append({
716
  "label": label,
717
  "confidence": round(conf, 3),
@@ -735,6 +811,7 @@ def classify_cuad(clause_text):
735
  # Only add if different from primary
736
  if not results or results[0]["label"] != label2:
737
  risk2 = RISK_MAP.get(label2, "LOW")
 
738
  results.append({
739
  "label": label2,
740
  "confidence": round(conf2, 3),
@@ -821,6 +898,7 @@ def classify_cuad_batch(clauses, batch_size=8):
821
  label, conf = _apply_guardrails(label, original_text, conf)
822
  if not (label == "Other" and conf < 0.3):
823
  risk = RISK_MAP.get(label, "LOW")
 
824
  results.append({
825
  "label": label,
826
  "confidence": round(conf, 3),
@@ -842,6 +920,7 @@ def classify_cuad_batch(clauses, batch_size=8):
842
  if not (label2 == "Other" and conf2 < 0.3):
843
  if not results or results[0]["label"] != label2:
844
  risk2 = RISK_MAP.get(label2, "LOW")
 
845
  results.append({
846
  "label": label2,
847
  "confidence": round(conf2, 3),
 
294
 
295
  RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
296
 
297
+ # FIX v4.3.1: Content-based severity refinement
298
+ # Default RISK_MAP assigns severity by label alone. This function downgrades severity
299
+ # when the clause text contains mitigating language (caps, carve-outs, time limits).
300
+ _SEVERITY_MITIGATORS = {
301
+ "IP Ownership Assignment": {
302
+ # Downgrade from CRITICAL to HIGH if pre-existing IP is carved out
303
+ "HIGH": re.compile(r'pre[\-\s]existing|background\s+ip|prior\s+(?:ip|intellectual)', re.IGNORECASE),
304
+ # Downgrade to MEDIUM if both carve-out AND license-back exist
305
+ "MEDIUM": re.compile(r'(?:pre[\-\s]existing|background\s+ip).*(?:license|retain)', re.IGNORECASE | re.DOTALL),
306
+ },
307
+ "Limitation of liability": {
308
+ # Downgrade from CRITICAL to HIGH if there's any cap
309
+ "HIGH": re.compile(r'shall\s+not\s+exceed|aggregate.{0,20}(?:not\s+exceed|limited\s+to)|cap(?:ped)?\s+at', re.IGNORECASE),
310
+ # Downgrade to MEDIUM if there's a reasonable cap AND exceptions for gross negligence
311
+ "MEDIUM": re.compile(r'(?:shall\s+not\s+exceed|limited\s+to).{0,80}(?:gross\s+negligence|willful|fraud)', re.IGNORECASE | re.DOTALL),
312
+ },
313
+ "Termination for Convenience": {
314
+ # Downgrade from CRITICAL to HIGH if there's a notice period
315
+ "HIGH": re.compile(r'(?:\d+)\s+(?:day|month|week)s?.{0,20}(?:prior|advance|written)\s+notice', re.IGNORECASE),
316
+ # Downgrade to MEDIUM if mutual termination right
317
+ "MEDIUM": re.compile(r'either\s+party\s+may\s+terminat', re.IGNORECASE),
318
+ },
319
+ "Non-Compete": {
320
+ # Downgrade from HIGH to MEDIUM if time-limited
321
+ "MEDIUM": re.compile(r'(?:period\s+of|for)\s+(?:\d+|one|two|three|six|twelve)\s+(?:\(\d+\)\s+)?(?:month|year)', re.IGNORECASE),
322
+ },
323
+ "Arbitration": {
324
+ # Downgrade from CRITICAL to HIGH if opt-out is available
325
+ "HIGH": re.compile(r'opt[\-\s]?out|may\s+elect|small\s+claims', re.IGNORECASE),
326
+ },
327
+ }
328
+
329
+
330
+ def _refine_severity(label, text, default_risk):
331
+ """FIX v4.3.1: Refine severity based on clause content, not just label."""
332
+ mitigators = _SEVERITY_MITIGATORS.get(label)
333
+ if not mitigators:
334
+ return default_risk
335
+
336
+ # Check from lowest severity up — return the lowest matching level
337
+ for level in ["MEDIUM", "HIGH"]:
338
+ pattern = mitigators.get(level)
339
+ if pattern and pattern.search(text):
340
+ # Only downgrade, never upgrade
341
+ level_order = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1}
342
+ if level_order.get(level, 0) < level_order.get(default_risk, 0):
343
+ return level
344
+
345
+ return default_risk
346
+
347
  RISK_STYLES = {
348
  "CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
349
  "HIGH": ("#ea580c", "#fff7ed", "⚡"),
 
667
  r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
668
  re.IGNORECASE
669
  ),
 
670
  "ROFR/ROFO/ROFN": re.compile(
671
  r'right\s+of\s+first\s+(?:refusal|offer|negotiation)|ROFR|ROFO|ROFN',
672
  re.IGNORECASE
673
  ),
 
674
  "Renewal Term": re.compile(
675
  r'renew(?:al)?|successive\s+term|auto(?:matic(?:ally)?)?\s*[\-\s]?renew|non[\-\s]?renewal',
676
  re.IGNORECASE
677
  ),
678
+ # FIX v4.3.1: Revenue/Profit Sharing fires on IP assignment "rights for value" language
679
+ "Revenue/Profit Sharing": re.compile(
680
+ r'revenue\s+shar|profit\s+shar|revenue\s+split|percentage\s+of\s+revenue|royalt(?:y|ies)|gross\s+profit',
681
+ re.IGNORECASE
682
+ ),
683
+ # FIX v4.3.1: Minimum Commitment fires on fee schedules — require explicit minimum language
684
+ "Minimum Commitment": re.compile(
685
+ r'minimum\s+(?:purchase|order|spend|volume|commitment)|take[\-\s]or[\-\s]pay|minimum\s+annual',
686
+ re.IGNORECASE
687
+ ),
688
+ # FIX v4.3.1: Non-Disparagement fires on arbitration/class-waiver language
689
+ "Non-Disparagement": re.compile(
690
+ r'disparag|defam|false\s+statement|negative\s+statement|social\s+media|reputat',
691
+ re.IGNORECASE
692
+ ),
693
  }
694
 
695
  # FIX v4.3: Exclusion patterns — even if guardrail passes, exclude if contra-indicators present
 
702
  r'limitation\s+of\s+liabilit|shall\s+not\s+be\s+liable|indemnif|hold\s+harmless|defend\s+and',
703
  re.IGNORECASE
704
  ),
705
+ # FIX v4.3.1: Revenue/Profit Sharing must NOT fire on IP assignment or license grant clauses
706
+ "Revenue/Profit Sharing": re.compile(
707
+ r'assigns?\s+to|irrevocab(?:ly|le)\s+assign|work[\-\s](?:made\s+)?for[\-\s]hire|license\s+to\s+access|license\s+grant|non[\-\s]exclusive\s+license',
708
+ re.IGNORECASE
709
+ ),
710
+ # FIX v4.3.1: Non-Disparagement must NOT fire on arbitration/dispute sections
711
+ "Non-Disparagement": re.compile(
712
+ r'arbitrat|(?<!\w)aaa(?!\w)|(?<!\w)jams(?!\w)|class\s+action|collective\s+(?:proceeding|action)|waives?\s+any\s+right\s+to\s+participate|binding\s+arbitration',
713
+ re.IGNORECASE
714
+ ),
715
  }
716
 
717
+ # FIX v4.3: Minimum confidence thresholds per label
718
  _LABEL_MIN_CONFIDENCE = {
719
  "ROFR/ROFO/ROFN": 0.65,
720
  "Renewal Term": 0.70,
721
+ "Revenue/Profit Sharing": 0.65, # FIX v4.3.1
722
+ "Minimum Commitment": 0.65, # FIX v4.3.1
723
  }
724
 
725
  def _apply_guardrails(label, text, confidence):
 
787
  label, conf = _apply_guardrails(label, clause_text, conf)
788
  if not (label == "Other" and conf < 0.3):
789
  risk = RISK_MAP.get(label, "LOW")
790
+ risk = _refine_severity(label, clause_text, risk)
791
  results.append({
792
  "label": label,
793
  "confidence": round(conf, 3),
 
811
  # Only add if different from primary
812
  if not results or results[0]["label"] != label2:
813
  risk2 = RISK_MAP.get(label2, "LOW")
814
+ risk2 = _refine_severity(label2, clause_text, risk2)
815
  results.append({
816
  "label": label2,
817
  "confidence": round(conf2, 3),
 
898
  label, conf = _apply_guardrails(label, original_text, conf)
899
  if not (label == "Other" and conf < 0.3):
900
  risk = RISK_MAP.get(label, "LOW")
901
+ risk = _refine_severity(label, original_text, risk)
902
  results.append({
903
  "label": label,
904
  "confidence": round(conf, 3),
 
920
  if not (label2 == "Other" and conf2 < 0.3):
921
  if not results or results[0]["label"] != label2:
922
  risk2 = RISK_MAP.get(label2, "LOW")
923
+ risk2 = _refine_severity(label2, original_text, risk2)
924
  results.append({
925
  "label": label2,
926
  "confidence": round(conf2, 3),