Spaces:
Sleeping
Sleeping
fix(v4.3): redlining.py — bug report fixes (10 issues)
Browse files- redlining.py +72 -22
redlining.py
CHANGED
|
@@ -408,24 +408,51 @@ Write the refined safer clause (adapt the template to this specific contract's c
|
|
| 408 |
|
| 409 |
|
| 410 |
# ═══════════════════════════════════════════════════════════════════════
|
| 411 |
-
#
|
| 412 |
# ═══════════════════════════════════════════════════════════════════════
|
| 413 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
def generate_redlines(analysis_result, use_llm=True):
|
| 415 |
"""
|
| 416 |
Generate redline suggestions for all flagged clauses in the analysis.
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
"risk_level": str,
|
| 423 |
-
"safe_alternative": str,
|
| 424 |
-
"legal_basis": str,
|
| 425 |
-
"consumer_standard": str,
|
| 426 |
-
"tier": "template" | "llm_refined",
|
| 427 |
-
"confidence": str,
|
| 428 |
-
}]
|
| 429 |
"""
|
| 430 |
if analysis_result is None:
|
| 431 |
return []
|
|
@@ -434,23 +461,40 @@ def generate_redlines(analysis_result, use_llm=True):
|
|
| 434 |
if not clauses:
|
| 435 |
return []
|
| 436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
redlines = []
|
| 438 |
-
|
| 439 |
|
| 440 |
# Sort by risk level: CRITICAL first
|
| 441 |
risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
-
for
|
| 445 |
-
|
| 446 |
risk = clause.get("risk", "LOW")
|
| 447 |
text = clause.get("text", "")
|
| 448 |
|
| 449 |
-
# Skip LOW risk and already-seen labels
|
| 450 |
-
if risk == "LOW" or label in seen_labels:
|
| 451 |
-
continue
|
| 452 |
-
seen_labels.add(label)
|
| 453 |
-
|
| 454 |
# Find matching templates (Tier 1 + Tier 2)
|
| 455 |
matches = _find_similar_templates(label, text)
|
| 456 |
if not matches:
|
|
@@ -458,6 +502,12 @@ def generate_redlines(analysis_result, use_llm=True):
|
|
| 458 |
|
| 459 |
best_key, best_template, score = matches[0]
|
| 460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
# Tier 3: Try LLM refinement if enabled
|
| 462 |
refined_text = None
|
| 463 |
tier = "template"
|
|
|
|
| 408 |
|
| 409 |
|
| 410 |
# ═══════════════════════════════════════════════════════════════════════
|
| 411 |
+
# FIX v4.3: Keyword validation — ensure original clause matches the label
|
| 412 |
# ═══════════════════════════════════════════════════════════════════════
|
| 413 |
|
| 414 |
+
_LABEL_KEYWORDS = {
|
| 415 |
+
"Limitation of liability": ["liable", "liability", "damages", "limitation of liability", "in no event"],
|
| 416 |
+
"Uncapped Liability": ["uncapped", "unlimited", "no limit", "no cap"],
|
| 417 |
+
"Governing Law": ["governed by", "governing law", "jurisdiction", "laws of"],
|
| 418 |
+
"Termination for Convenience": ["terminat", "cancel", "convenience", "without cause"],
|
| 419 |
+
"Non-Compete": ["non-compete", "not compete", "competition restriction"],
|
| 420 |
+
"No-Solicit of Employees": ["solicit", "recruit", "induce", "encourage", "employee"],
|
| 421 |
+
"No-Solicit of Customers": ["solicit", "customer", "client", "divert"],
|
| 422 |
+
"Non-Disparagement": ["disparag", "defam", "negative", "derogatory"],
|
| 423 |
+
"Arbitration": ["arbitrat", "binding arbitration", "waive", "class action"],
|
| 424 |
+
"IP Ownership Assignment": ["intellectual property", "ip", "assign", "work for hire", "ownership"],
|
| 425 |
+
"Indemnification": ["indemnif", "hold harmless", "defend"],
|
| 426 |
+
"Confidentiality": ["confidential", "non-disclosure", "nda"],
|
| 427 |
+
"Exclusivity": ["exclusive", "exclusivity"],
|
| 428 |
+
"Anti-Assignment": ["assign", "transfer", "without consent"],
|
| 429 |
+
"Content removal": ["remove", "delete", "content"],
|
| 430 |
+
"Unilateral change": ["modify", "change", "amend", "sole discretion"],
|
| 431 |
+
"Unilateral termination": ["terminat", "suspend", "at any time"],
|
| 432 |
+
"Liquidated Damages": ["liquidated", "pre-determined", "stipulated"],
|
| 433 |
+
"Choice of law": ["governed by", "laws of", "choice of law"],
|
| 434 |
+
"Jurisdiction": ["jurisdiction", "courts of", "exclusive jurisdiction"],
|
| 435 |
+
"Contract by using": ["by using", "continued use", "acceptance"],
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def _validate_clause_match(label, clause_text):
|
| 440 |
+
"""FIX v4.3: Validate that a clause actually contains content relevant to its label."""
|
| 441 |
+
keywords = _LABEL_KEYWORDS.get(label, [])
|
| 442 |
+
if not keywords:
|
| 443 |
+
return True # No keywords defined — allow match
|
| 444 |
+
text_lower = clause_text.lower()
|
| 445 |
+
return any(kw in text_lower for kw in keywords)
|
| 446 |
+
|
| 447 |
+
|
| 448 |
def generate_redlines(analysis_result, use_llm=True):
|
| 449 |
"""
|
| 450 |
Generate redline suggestions for all flagged clauses in the analysis.
|
| 451 |
+
|
| 452 |
+
FIX v4.3:
|
| 453 |
+
- Validates original clause matches label keywords before showing
|
| 454 |
+
- Deduplicates by suggested text (catches template mapping bugs)
|
| 455 |
+
- Picks the BEST clause for each label (highest confidence + keyword match)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
"""
|
| 457 |
if analysis_result is None:
|
| 458 |
return []
|
|
|
|
| 461 |
if not clauses:
|
| 462 |
return []
|
| 463 |
|
| 464 |
+
# FIX v4.3: Group clauses by label and pick the best match for each
|
| 465 |
+
label_clauses = {}
|
| 466 |
+
for clause in clauses:
|
| 467 |
+
label = clause.get("label", "")
|
| 468 |
+
risk = clause.get("risk", "LOW")
|
| 469 |
+
text = clause.get("text", "")
|
| 470 |
+
confidence = clause.get("confidence", 0) or 0
|
| 471 |
+
|
| 472 |
+
if risk == "LOW":
|
| 473 |
+
continue
|
| 474 |
+
|
| 475 |
+
# Validate that the clause text actually matches the label
|
| 476 |
+
if not _validate_clause_match(label, text):
|
| 477 |
+
continue
|
| 478 |
+
|
| 479 |
+
# Keep the highest-confidence match for each label
|
| 480 |
+
if label not in label_clauses or confidence > (label_clauses[label].get("confidence", 0) or 0):
|
| 481 |
+
label_clauses[label] = clause
|
| 482 |
+
|
| 483 |
redlines = []
|
| 484 |
+
seen_alternatives = set() # FIX v4.3: Dedup by suggested text
|
| 485 |
|
| 486 |
# Sort by risk level: CRITICAL first
|
| 487 |
risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
|
| 488 |
+
sorted_labels = sorted(
|
| 489 |
+
label_clauses.keys(),
|
| 490 |
+
key=lambda l: risk_order.get(label_clauses[l].get("risk", "LOW"), 3)
|
| 491 |
+
)
|
| 492 |
|
| 493 |
+
for label in sorted_labels:
|
| 494 |
+
clause = label_clauses[label]
|
| 495 |
risk = clause.get("risk", "LOW")
|
| 496 |
text = clause.get("text", "")
|
| 497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
# Find matching templates (Tier 1 + Tier 2)
|
| 499 |
matches = _find_similar_templates(label, text)
|
| 500 |
if not matches:
|
|
|
|
| 502 |
|
| 503 |
best_key, best_template, score = matches[0]
|
| 504 |
|
| 505 |
+
# FIX v4.3: Dedup — skip if this template's alternative was already used
|
| 506 |
+
alt_fingerprint = best_template["safe_alternative"][:120]
|
| 507 |
+
if alt_fingerprint in seen_alternatives:
|
| 508 |
+
continue
|
| 509 |
+
seen_alternatives.add(alt_fingerprint)
|
| 510 |
+
|
| 511 |
# Tier 3: Try LLM refinement if enabled
|
| 512 |
refined_text = None
|
| 513 |
tier = "template"
|