gaurv007 commited on
Commit
30580c9
·
verified ·
1 Parent(s): 423d2a9

fix(v4.3): redlining.py — bug report fixes (10 issues)

Browse files
Files changed (1) hide show
  1. redlining.py +72 -22
redlining.py CHANGED
@@ -408,24 +408,51 @@ Write the refined safer clause (adapt the template to this specific contract's c
408
 
409
 
410
  # ═══════════════════════════════════════════════════════════════════════
411
- # PUBLIC API
412
  # ═══════════════════════════════════════════════════════════════════════
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  def generate_redlines(analysis_result, use_llm=True):
415
  """
416
  Generate redline suggestions for all flagged clauses in the analysis.
417
-
418
- Returns list of redline suggestions:
419
- [{
420
- "original_text": str,
421
- "clause_label": str,
422
- "risk_level": str,
423
- "safe_alternative": str,
424
- "legal_basis": str,
425
- "consumer_standard": str,
426
- "tier": "template" | "llm_refined",
427
- "confidence": str,
428
- }]
429
  """
430
  if analysis_result is None:
431
  return []
@@ -434,23 +461,40 @@ def generate_redlines(analysis_result, use_llm=True):
434
  if not clauses:
435
  return []
436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  redlines = []
438
- seen_labels = set() # Deduplicate by label
439
 
440
  # Sort by risk level: CRITICAL first
441
  risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
442
- sorted_clauses = sorted(clauses, key=lambda c: risk_order.get(c.get("risk", "LOW"), 3))
 
 
 
443
 
444
- for clause in sorted_clauses:
445
- label = clause.get("label", "")
446
  risk = clause.get("risk", "LOW")
447
  text = clause.get("text", "")
448
 
449
- # Skip LOW risk and already-seen labels
450
- if risk == "LOW" or label in seen_labels:
451
- continue
452
- seen_labels.add(label)
453
-
454
  # Find matching templates (Tier 1 + Tier 2)
455
  matches = _find_similar_templates(label, text)
456
  if not matches:
@@ -458,6 +502,12 @@ def generate_redlines(analysis_result, use_llm=True):
458
 
459
  best_key, best_template, score = matches[0]
460
 
 
 
 
 
 
 
461
  # Tier 3: Try LLM refinement if enabled
462
  refined_text = None
463
  tier = "template"
 
408
 
409
 
410
  # ═══════════════════════════════════════════════════════════════════════
411
+ # FIX v4.3: Keyword validation — ensure original clause matches the label
412
  # ═══════════════════════════════════════════════════════════════════════
413
 
414
+ _LABEL_KEYWORDS = {
415
+ "Limitation of liability": ["liable", "liability", "damages", "limitation of liability", "in no event"],
416
+ "Uncapped Liability": ["uncapped", "unlimited", "no limit", "no cap"],
417
+ "Governing Law": ["governed by", "governing law", "jurisdiction", "laws of"],
418
+ "Termination for Convenience": ["terminat", "cancel", "convenience", "without cause"],
419
+ "Non-Compete": ["non-compete", "not compete", "competition restriction"],
420
+ "No-Solicit of Employees": ["solicit", "recruit", "induce", "encourage", "employee"],
421
+ "No-Solicit of Customers": ["solicit", "customer", "client", "divert"],
422
+ "Non-Disparagement": ["disparag", "defam", "negative", "derogatory"],
423
+ "Arbitration": ["arbitrat", "binding arbitration", "waive", "class action"],
424
+ "IP Ownership Assignment": ["intellectual property", "ip", "assign", "work for hire", "ownership"],
425
+ "Indemnification": ["indemnif", "hold harmless", "defend"],
426
+ "Confidentiality": ["confidential", "non-disclosure", "nda"],
427
+ "Exclusivity": ["exclusive", "exclusivity"],
428
+ "Anti-Assignment": ["assign", "transfer", "without consent"],
429
+ "Content removal": ["remove", "delete", "content"],
430
+ "Unilateral change": ["modify", "change", "amend", "sole discretion"],
431
+ "Unilateral termination": ["terminat", "suspend", "at any time"],
432
+ "Liquidated Damages": ["liquidated", "pre-determined", "stipulated"],
433
+ "Choice of law": ["governed by", "laws of", "choice of law"],
434
+ "Jurisdiction": ["jurisdiction", "courts of", "exclusive jurisdiction"],
435
+ "Contract by using": ["by using", "continued use", "acceptance"],
436
+ }
437
+
438
+
439
+ def _validate_clause_match(label, clause_text):
440
+ """FIX v4.3: Validate that a clause actually contains content relevant to its label."""
441
+ keywords = _LABEL_KEYWORDS.get(label, [])
442
+ if not keywords:
443
+ return True # No keywords defined — allow match
444
+ text_lower = clause_text.lower()
445
+ return any(kw in text_lower for kw in keywords)
446
+
447
+
448
  def generate_redlines(analysis_result, use_llm=True):
449
  """
450
  Generate redline suggestions for all flagged clauses in the analysis.
451
+
452
+ FIX v4.3:
453
+ - Validates original clause matches label keywords before showing
454
+ - Deduplicates by suggested text (catches template mapping bugs)
455
+ - Picks the BEST clause for each label (highest confidence + keyword match)
 
 
 
 
 
 
 
456
  """
457
  if analysis_result is None:
458
  return []
 
461
  if not clauses:
462
  return []
463
 
464
+ # FIX v4.3: Group clauses by label and pick the best match for each
465
+ label_clauses = {}
466
+ for clause in clauses:
467
+ label = clause.get("label", "")
468
+ risk = clause.get("risk", "LOW")
469
+ text = clause.get("text", "")
470
+ confidence = clause.get("confidence", 0) or 0
471
+
472
+ if risk == "LOW":
473
+ continue
474
+
475
+ # Validate that the clause text actually matches the label
476
+ if not _validate_clause_match(label, text):
477
+ continue
478
+
479
+ # Keep the highest-confidence match for each label
480
+ if label not in label_clauses or confidence > (label_clauses[label].get("confidence", 0) or 0):
481
+ label_clauses[label] = clause
482
+
483
  redlines = []
484
+ seen_alternatives = set() # FIX v4.3: Dedup by suggested text
485
 
486
  # Sort by risk level: CRITICAL first
487
  risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
488
+ sorted_labels = sorted(
489
+ label_clauses.keys(),
490
+ key=lambda l: risk_order.get(label_clauses[l].get("risk", "LOW"), 3)
491
+ )
492
 
493
+ for label in sorted_labels:
494
+ clause = label_clauses[label]
495
  risk = clause.get("risk", "LOW")
496
  text = clause.get("text", "")
497
 
 
 
 
 
 
498
  # Find matching templates (Tier 1 + Tier 2)
499
  matches = _find_similar_templates(label, text)
500
  if not matches:
 
502
 
503
  best_key, best_template, score = matches[0]
504
 
505
+ # FIX v4.3: Dedup — skip if this template's alternative was already used
506
+ alt_fingerprint = best_template["safe_alternative"][:120]
507
+ if alt_fingerprint in seen_alternatives:
508
+ continue
509
+ seen_alternatives.add(alt_fingerprint)
510
+
511
  # Tier 3: Try LLM refinement if enabled
512
  refined_text = None
513
  tier = "template"