Spaces:

gaurv007
/

ClauseGuard

Sleeping

App Files Files Community

gaurv007 commited on 12 days ago

Commit

30580c9

verified ·

1 Parent(s): 423d2a9

fix(v4.3): redlining.py — bug report fixes (10 issues)

Browse files

Files changed (1) hide show

redlining.py +72 -22

redlining.py CHANGED Viewed

@@ -408,24 +408,51 @@ Write the refined safer clause (adapt the template to this specific contract's c
 # ═══════════════════════════════════════════════════════════════════════
-# PUBLIC API
 # ═══════════════════════════════════════════════════════════════════════
 def generate_redlines(analysis_result, use_llm=True):
     """
     Generate redline suggestions for all flagged clauses in the analysis.
-    Returns list of redline suggestions:
-    [{
-        "original_text": str,
-        "clause_label": str,
-        "risk_level": str,
-        "safe_alternative": str,
-        "legal_basis": str,
-        "consumer_standard": str,
-        "tier": "template" | "llm_refined",
-        "confidence": str,
-    }]
     """
     if analysis_result is None:
         return []
@@ -434,23 +461,40 @@ def generate_redlines(analysis_result, use_llm=True):
     if not clauses:
         return []
     redlines = []
-    seen_labels = set()  # Deduplicate by label
     # Sort by risk level: CRITICAL first
     risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
-    sorted_clauses = sorted(clauses, key=lambda c: risk_order.get(c.get("risk", "LOW"), 3))
-    for clause in sorted_clauses:
-        label = clause.get("label", "")
         risk = clause.get("risk", "LOW")
         text = clause.get("text", "")
-        # Skip LOW risk and already-seen labels
-        if risk == "LOW" or label in seen_labels:
-            continue
-        seen_labels.add(label)
         # Find matching templates (Tier 1 + Tier 2)
         matches = _find_similar_templates(label, text)
         if not matches:
@@ -458,6 +502,12 @@ def generate_redlines(analysis_result, use_llm=True):
         best_key, best_template, score = matches[0]
         # Tier 3: Try LLM refinement if enabled
         refined_text = None
         tier = "template"

 # ═══════════════════════════════════════════════════════════════════════
+# FIX v4.3: Keyword validation — ensure original clause matches the label
 # ═══════════════════════════════════════════════════════════════════════
+_LABEL_KEYWORDS = {
+    "Limitation of liability": ["liable", "liability", "damages", "limitation of liability", "in no event"],
+    "Uncapped Liability": ["uncapped", "unlimited", "no limit", "no cap"],
+    "Governing Law": ["governed by", "governing law", "jurisdiction", "laws of"],
+    "Termination for Convenience": ["terminat", "cancel", "convenience", "without cause"],
+    "Non-Compete": ["non-compete", "not compete", "competition restriction"],
+    "No-Solicit of Employees": ["solicit", "recruit", "induce", "encourage", "employee"],
+    "No-Solicit of Customers": ["solicit", "customer", "client", "divert"],
+    "Non-Disparagement": ["disparag", "defam", "negative", "derogatory"],
+    "Arbitration": ["arbitrat", "binding arbitration", "waive", "class action"],
+    "IP Ownership Assignment": ["intellectual property", "ip", "assign", "work for hire", "ownership"],
+    "Indemnification": ["indemnif", "hold harmless", "defend"],
+    "Confidentiality": ["confidential", "non-disclosure", "nda"],
+    "Exclusivity": ["exclusive", "exclusivity"],
+    "Anti-Assignment": ["assign", "transfer", "without consent"],
+    "Content removal": ["remove", "delete", "content"],
+    "Unilateral change": ["modify", "change", "amend", "sole discretion"],
+    "Unilateral termination": ["terminat", "suspend", "at any time"],
+    "Liquidated Damages": ["liquidated", "pre-determined", "stipulated"],
+    "Choice of law": ["governed by", "laws of", "choice of law"],
+    "Jurisdiction": ["jurisdiction", "courts of", "exclusive jurisdiction"],
+    "Contract by using": ["by using", "continued use", "acceptance"],
+}
+def _validate_clause_match(label, clause_text):
+    """FIX v4.3: Validate that a clause actually contains content relevant to its label."""
+    keywords = _LABEL_KEYWORDS.get(label, [])
+    if not keywords:
+        return True  # No keywords defined — allow match
+    text_lower = clause_text.lower()
+    return any(kw in text_lower for kw in keywords)
 def generate_redlines(analysis_result, use_llm=True):
     """
     Generate redline suggestions for all flagged clauses in the analysis.
+    FIX v4.3:
+      - Validates original clause matches label keywords before showing
+      - Deduplicates by suggested text (catches template mapping bugs)
+      - Picks the BEST clause for each label (highest confidence + keyword match)
     """
     if analysis_result is None:
         return []
     if not clauses:
         return []
+    # FIX v4.3: Group clauses by label and pick the best match for each
+    label_clauses = {}
+    for clause in clauses:
+        label = clause.get("label", "")
+        risk = clause.get("risk", "LOW")
+        text = clause.get("text", "")
+        confidence = clause.get("confidence", 0) or 0
+        if risk == "LOW":
+            continue
+        # Validate that the clause text actually matches the label
+        if not _validate_clause_match(label, text):
+            continue
+        # Keep the highest-confidence match for each label
+        if label not in label_clauses or confidence > (label_clauses[label].get("confidence", 0) or 0):
+            label_clauses[label] = clause
     redlines = []
+    seen_alternatives = set()  # FIX v4.3: Dedup by suggested text
     # Sort by risk level: CRITICAL first
     risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
+    sorted_labels = sorted(
+        label_clauses.keys(),
+        key=lambda l: risk_order.get(label_clauses[l].get("risk", "LOW"), 3)
+    )
+    for label in sorted_labels:
+        clause = label_clauses[label]
         risk = clause.get("risk", "LOW")
         text = clause.get("text", "")
         # Find matching templates (Tier 1 + Tier 2)
         matches = _find_similar_templates(label, text)
         if not matches:
         best_key, best_template, score = matches[0]
+        # FIX v4.3: Dedup — skip if this template's alternative was already used
+        alt_fingerprint = best_template["safe_alternative"][:120]
+        if alt_fingerprint in seen_alternatives:
+            continue
+        seen_alternatives.add(alt_fingerprint)
         # Tier 3: Try LLM refinement if enabled
         refined_text = None
         tier = "template"