Spaces:
Sleeping
Sleeping
Wire trained Legal-BERT model (76% F1) into live demo — replaces regex
Browse files- app.py +79 -40
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -1,33 +1,74 @@
|
|
| 1 |
"""
|
| 2 |
ClauseGuard — AI Fine Print Scanner
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
LABELS = [
|
| 10 |
-
("Limitation of liability", "HIGH", "Company avoids responsibility for damages or losses."),
|
| 11 |
-
("Unilateral termination", "HIGH", "They can close your account without reason."),
|
| 12 |
-
("Unilateral change", "MEDIUM", "Terms can change without your consent."),
|
| 13 |
-
("Content removal", "MEDIUM", "Your content can be deleted without notice."),
|
| 14 |
-
("Contract by using", "LOW", "You agree just by visiting or using the site."),
|
| 15 |
-
("Choice of law", "MEDIUM", "Foreign law applies instead of your local protections."),
|
| 16 |
-
("Jurisdiction", "MEDIUM", "Disputes handled in their preferred court, not yours."),
|
| 17 |
-
("Arbitration", "HIGH", "You waive your right to sue in court."),
|
| 18 |
-
]
|
| 19 |
-
|
| 20 |
PATTERNS = {
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
}
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def split_clauses(text):
|
| 32 |
text = re.sub(r'\n{2,}', '\n', text.strip())
|
| 33 |
parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n)(?=\d+[.)]\s|\([a-z]\)\s)', text)
|
|
@@ -45,17 +86,11 @@ def analyze(text):
|
|
| 45 |
sev_counts = {"HIGH": 0, "MEDIUM": 0, "LOW": 0}
|
| 46 |
|
| 47 |
for clause in clauses:
|
| 48 |
-
|
| 49 |
-
hits = []
|
| 50 |
-
for lid, pats in PATTERNS.items():
|
| 51 |
-
for p in pats:
|
| 52 |
-
if re.search(p, clause_lower):
|
| 53 |
-
name, sev, desc = LABELS[lid]
|
| 54 |
-
hits.append({"name": name, "severity": sev, "desc": desc})
|
| 55 |
-
sev_counts[sev] += 1
|
| 56 |
-
break
|
| 57 |
if hits:
|
| 58 |
flagged.append({"text": clause, "hits": hits})
|
|
|
|
|
|
|
| 59 |
|
| 60 |
total = len(clauses)
|
| 61 |
risk = min(100, round((sev_counts["HIGH"] * 20 + sev_counts["MEDIUM"] * 10 + sev_counts["LOW"] * 5) / max(1, total) * 100))
|
|
@@ -66,7 +101,9 @@ def analyze(text):
|
|
| 66 |
elif risk >= 10: grade = "B"
|
| 67 |
else: grade = "A"
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
summary = f"""<div style="font-family:system-ui,sans-serif;">
|
| 71 |
<div style="border:1px solid #e4e4e7;border-radius:8px;padding:20px;margin-bottom:16px;">
|
| 72 |
<div style="display:flex;justify-content:space-between;align-items:baseline;">
|
|
@@ -80,27 +117,28 @@ def analyze(text):
|
|
| 80 |
'background:#f0fdf4;color:#15803d;'
|
| 81 |
}">Grade {grade}</span>
|
| 82 |
</div>
|
| 83 |
-
<p style="margin-top:8px;font-size:12px;color:#a1a1aa;">{total} clauses
|
| 84 |
</div>"""
|
| 85 |
|
| 86 |
if not flagged:
|
| 87 |
summary += '<div style="border:1px solid #e4e4e7;border-radius:8px;padding:24px;text-align:center;"><p style="font-size:14px;color:#71717a;">No unfair clauses found.</p></div>'
|
| 88 |
else:
|
| 89 |
-
for
|
| 90 |
max_sev = max(item["hits"], key=lambda h: {"HIGH":3,"MEDIUM":2,"LOW":1}[h["severity"]])["severity"]
|
| 91 |
-
|
| 92 |
|
| 93 |
tags = ""
|
| 94 |
for h in item["hits"]:
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
| 99 |
|
| 100 |
descs = "".join(f'<p style="font-size:12px;color:#71717a;margin-top:4px;">{h["desc"]}</p>' for h in item["hits"])
|
| 101 |
-
preview = item["text"][:
|
| 102 |
|
| 103 |
-
summary += f'''<div style="border:1px solid #e4e4e7;border-left:3px solid {
|
| 104 |
<p style="font-size:13px;color:#3f3f46;line-height:1.6;">{preview}</p>
|
| 105 |
<div style="margin-top:8px;">{tags}</div>
|
| 106 |
{descs}
|
|
@@ -109,6 +147,7 @@ def analyze(text):
|
|
| 109 |
summary += "</div>"
|
| 110 |
return summary, ""
|
| 111 |
|
|
|
|
| 112 |
SPOTIFY = """By using the Spotify Service, you agree to be bound by these Terms of Use.
|
| 113 |
|
| 114 |
Spotify may, in its sole discretion, modify or update these Terms of Service at any time without prior notice. Your continued use of the Service after any such changes constitutes your acceptance of the new Terms of Service.
|
|
@@ -153,7 +192,7 @@ with demo:
|
|
| 153 |
scan_btn.click(fn=analyze, inputs=[text_input], outputs=[results_html, hidden])
|
| 154 |
clear_btn.click(fn=lambda: ("", "", ""), outputs=[text_input, results_html, hidden])
|
| 155 |
|
| 156 |
-
gr.HTML('<p style="font-family:system-ui,sans-serif;font-size:11px;color:#a1a1aa;text-align:center;padding:16px 0;border-top:1px solid #f4f4f5;margin-top:16px;">Not legal advice.
|
| 157 |
|
| 158 |
if __name__ == "__main__":
|
| 159 |
demo.launch()
|
|
|
|
| 1 |
"""
|
| 2 |
ClauseGuard — AI Fine Print Scanner
|
| 3 |
+
Uses Legal-BERT fine-tuned on CLAUDETTE/LexGLUE unfair_tos (8 categories).
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import re
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
# ─── Load ML model ───
|
| 11 |
+
MODEL_ID = "gaurv007/clauseguard-legal-bert"
|
| 12 |
+
ml_pipeline = None
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
from transformers import pipeline
|
| 16 |
+
ml_pipeline = pipeline("text-classification", model=MODEL_ID, top_k=None, device=-1)
|
| 17 |
+
print(f"Loaded model: {MODEL_ID}")
|
| 18 |
+
except Exception as e:
|
| 19 |
+
print(f"Model load failed ({e}), using regex fallback")
|
| 20 |
+
|
| 21 |
+
# ─── Label metadata ───
|
| 22 |
+
LABELS = {
|
| 23 |
+
"Limitation of liability": ("HIGH", "Company avoids responsibility for damages or losses."),
|
| 24 |
+
"Unilateral termination": ("HIGH", "They can close your account without reason."),
|
| 25 |
+
"Unilateral change": ("MEDIUM", "Terms can change without your consent."),
|
| 26 |
+
"Content removal": ("MEDIUM", "Your content can be deleted without notice."),
|
| 27 |
+
"Contract by using": ("LOW", "You agree just by visiting or using the site."),
|
| 28 |
+
"Choice of law": ("MEDIUM", "Foreign law applies instead of your local protections."),
|
| 29 |
+
"Jurisdiction": ("MEDIUM", "Disputes handled in their preferred court, not yours."),
|
| 30 |
+
"Arbitration": ("HIGH", "You waive your right to sue in court."),
|
| 31 |
+
}
|
| 32 |
|
| 33 |
+
# ─── Regex fallback ───
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
PATTERNS = {
|
| 35 |
+
"Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
|
| 36 |
+
"Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
|
| 37 |
+
"Unilateral change": [r"sole discretion", r"reserves? the right to (modify|change|update|amend)", r"at any time.*without (prior )?notice", r"we may (modify|change|update)"],
|
| 38 |
+
"Content removal": [r"remove.*content.*without", r"right to remove", r"we may.*remove"],
|
| 39 |
+
"Contract by using": [r"by (using|accessing).*you agree", r"continued use.*constitutes? acceptance"],
|
| 40 |
+
"Choice of law": [r"governed by.*laws? of", r"shall be governed", r"laws of the state of"],
|
| 41 |
+
"Jurisdiction": [r"exclusive jurisdiction", r"courts? of.*(california|delaware|new york|ireland|england)", r"submit to.*jurisdiction"],
|
| 42 |
+
"Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
|
| 43 |
}
|
| 44 |
|
| 45 |
+
def classify_ml(text):
|
| 46 |
+
"""Classify using the trained Legal-BERT model."""
|
| 47 |
+
if not ml_pipeline:
|
| 48 |
+
return classify_regex(text)
|
| 49 |
+
try:
|
| 50 |
+
preds = ml_pipeline(text, truncation=True, max_length=512)
|
| 51 |
+
results = []
|
| 52 |
+
for p in preds[0] if isinstance(preds[0], list) else preds:
|
| 53 |
+
if p["score"] > 0.5 and p["label"] in LABELS:
|
| 54 |
+
sev, desc = LABELS[p["label"]]
|
| 55 |
+
results.append({"name": p["label"], "severity": sev, "desc": desc, "confidence": round(p["score"], 2)})
|
| 56 |
+
return results
|
| 57 |
+
except Exception:
|
| 58 |
+
return classify_regex(text)
|
| 59 |
+
|
| 60 |
+
def classify_regex(text):
|
| 61 |
+
"""Fallback regex classifier."""
|
| 62 |
+
results = []
|
| 63 |
+
text_lower = text.lower()
|
| 64 |
+
for name, pats in PATTERNS.items():
|
| 65 |
+
for p in pats:
|
| 66 |
+
if re.search(p, text_lower):
|
| 67 |
+
sev, desc = LABELS[name]
|
| 68 |
+
results.append({"name": name, "severity": sev, "desc": desc, "confidence": 0.7})
|
| 69 |
+
break
|
| 70 |
+
return results
|
| 71 |
+
|
| 72 |
def split_clauses(text):
|
| 73 |
text = re.sub(r'\n{2,}', '\n', text.strip())
|
| 74 |
parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n)(?=\d+[.)]\s|\([a-z]\)\s)', text)
|
|
|
|
| 86 |
sev_counts = {"HIGH": 0, "MEDIUM": 0, "LOW": 0}
|
| 87 |
|
| 88 |
for clause in clauses:
|
| 89 |
+
hits = classify_ml(clause)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
if hits:
|
| 91 |
flagged.append({"text": clause, "hits": hits})
|
| 92 |
+
for h in hits:
|
| 93 |
+
sev_counts[h["severity"]] += 1
|
| 94 |
|
| 95 |
total = len(clauses)
|
| 96 |
risk = min(100, round((sev_counts["HIGH"] * 20 + sev_counts["MEDIUM"] * 10 + sev_counts["LOW"] * 5) / max(1, total) * 100))
|
|
|
|
| 101 |
elif risk >= 10: grade = "B"
|
| 102 |
else: grade = "A"
|
| 103 |
|
| 104 |
+
engine = "Legal-BERT" if ml_pipeline else "Pattern matching"
|
| 105 |
+
|
| 106 |
+
# Build HTML
|
| 107 |
summary = f"""<div style="font-family:system-ui,sans-serif;">
|
| 108 |
<div style="border:1px solid #e4e4e7;border-radius:8px;padding:20px;margin-bottom:16px;">
|
| 109 |
<div style="display:flex;justify-content:space-between;align-items:baseline;">
|
|
|
|
| 117 |
'background:#f0fdf4;color:#15803d;'
|
| 118 |
}">Grade {grade}</span>
|
| 119 |
</div>
|
| 120 |
+
<p style="margin-top:8px;font-size:12px;color:#a1a1aa;">{total} clauses · {len(flagged)} flagged · {sev_counts['HIGH']} high · {sev_counts['MEDIUM']} medium · {sev_counts['LOW']} low · Engine: {engine}</p>
|
| 121 |
</div>"""
|
| 122 |
|
| 123 |
if not flagged:
|
| 124 |
summary += '<div style="border:1px solid #e4e4e7;border-radius:8px;padding:24px;text-align:center;"><p style="font-size:14px;color:#71717a;">No unfair clauses found.</p></div>'
|
| 125 |
else:
|
| 126 |
+
for item in flagged:
|
| 127 |
max_sev = max(item["hits"], key=lambda h: {"HIGH":3,"MEDIUM":2,"LOW":1}[h["severity"]])["severity"]
|
| 128 |
+
border = {"HIGH":"#fca5a5","MEDIUM":"#fcd34d","LOW":"#93c5fd"}[max_sev]
|
| 129 |
|
| 130 |
tags = ""
|
| 131 |
for h in item["hits"]:
|
| 132 |
+
ts = {"HIGH":"background:#fef2f2;color:#b91c1c;border:1px solid #fecaca;",
|
| 133 |
+
"MEDIUM":"background:#fffbeb;color:#a16207;border:1px solid #fde68a;",
|
| 134 |
+
"LOW":"background:#eff6ff;color:#1d4ed8;border:1px solid #bfdbfe;"}[h["severity"]]
|
| 135 |
+
conf = f' ({h["confidence"]})' if h.get("confidence") and ml_pipeline else ""
|
| 136 |
+
tags += f'<span style="{ts}font-size:11px;font-weight:500;padding:1px 8px;border-radius:3px;margin-right:4px;">{h["name"]}{conf}</span>'
|
| 137 |
|
| 138 |
descs = "".join(f'<p style="font-size:12px;color:#71717a;margin-top:4px;">{h["desc"]}</p>' for h in item["hits"])
|
| 139 |
+
preview = item["text"][:200] + ("..." if len(item["text"]) > 200 else "")
|
| 140 |
|
| 141 |
+
summary += f'''<div style="border:1px solid #e4e4e7;border-left:3px solid {border};border-radius:8px;padding:14px;margin-bottom:8px;">
|
| 142 |
<p style="font-size:13px;color:#3f3f46;line-height:1.6;">{preview}</p>
|
| 143 |
<div style="margin-top:8px;">{tags}</div>
|
| 144 |
{descs}
|
|
|
|
| 147 |
summary += "</div>"
|
| 148 |
return summary, ""
|
| 149 |
|
| 150 |
+
|
| 151 |
SPOTIFY = """By using the Spotify Service, you agree to be bound by these Terms of Use.
|
| 152 |
|
| 153 |
Spotify may, in its sole discretion, modify or update these Terms of Service at any time without prior notice. Your continued use of the Service after any such changes constitutes your acceptance of the new Terms of Service.
|
|
|
|
| 192 |
scan_btn.click(fn=analyze, inputs=[text_input], outputs=[results_html, hidden])
|
| 193 |
clear_btn.click(fn=lambda: ("", "", ""), outputs=[text_input, results_html, hidden])
|
| 194 |
|
| 195 |
+
gr.HTML('<p style="font-family:system-ui,sans-serif;font-size:11px;color:#a1a1aa;text-align:center;padding:16px 0;border-top:1px solid #f4f4f5;margin-top:16px;">Not legal advice. Model: Legal-BERT fine-tuned on CLAUDETTE. <a href="https://huggingface.co/gaurv007/clauseguard-legal-bert" style="color:#71717a;">Model</a> · <a href="https://huggingface.co/datasets/coastalcph/lex_glue" style="color:#71717a;">Dataset</a></p>')
|
| 196 |
|
| 197 |
if __name__ == "__main__":
|
| 198 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1 +1,4 @@
|
|
| 1 |
gradio>=5.0
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
gradio>=5.0
|
| 2 |
+
transformers>=5.0
|
| 3 |
+
torch
|
| 4 |
+
numpy
|