File size: 10,223 Bytes
3ca2e60
94c4c90
f81766f
3ca2e60
 
 
 
f81766f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ca2e60
f81766f
94c4c90
f81766f
 
 
 
 
 
 
 
3ca2e60
 
f81766f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94c4c90
3ca2e60
94c4c90
 
3ca2e60
94c4c90
 
 
3ca2e60
94c4c90
 
 
3ca2e60
94c4c90
 
3ca2e60
94c4c90
f81766f
94c4c90
 
f81766f
 
94c4c90
 
 
 
 
 
 
 
 
 
f81766f
 
 
94c4c90
 
 
 
 
 
 
 
 
 
 
 
 
f81766f
94c4c90
 
 
 
 
f81766f
94c4c90
f81766f
3ca2e60
94c4c90
 
f81766f
 
 
 
 
3ca2e60
94c4c90
f81766f
3ca2e60
f81766f
94c4c90
 
 
 
3ca2e60
94c4c90
 
3ca2e60
f81766f
94c4c90
3ca2e60
94c4c90
3ca2e60
94c4c90
3ca2e60
94c4c90
3ca2e60
94c4c90
3ca2e60
94c4c90
3ca2e60
94c4c90
3ca2e60
94c4c90
3ca2e60
 
 
94c4c90
3ca2e60
 
 
94c4c90
3ca2e60
94c4c90
3ca2e60
 
94c4c90
 
3ca2e60
 
94c4c90
3ca2e60
94c4c90
 
 
 
3ca2e60
94c4c90
 
 
 
 
 
f81766f
3ca2e60
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
"""
ClauseGuard β€” AI Fine Print Scanner
Uses Legal-BERT fine-tuned on CLAUDETTE/LexGLUE unfair_tos (8 categories).
"""

import gradio as gr
import re
import numpy as np

# ─── Load ML model ───
MODEL_ID = "gaurv007/clauseguard-legal-bert"
ml_pipeline = None

try:
    from transformers import pipeline
    ml_pipeline = pipeline("text-classification", model=MODEL_ID, top_k=None, device=-1)
    print(f"Loaded model: {MODEL_ID}")
except Exception as e:
    print(f"Model load failed ({e}), using regex fallback")

# ─── Label metadata ───
LABELS = {
    "Limitation of liability": ("HIGH", "Company avoids responsibility for damages or losses."),
    "Unilateral termination": ("HIGH", "They can close your account without reason."),
    "Unilateral change": ("MEDIUM", "Terms can change without your consent."),
    "Content removal": ("MEDIUM", "Your content can be deleted without notice."),
    "Contract by using": ("LOW", "You agree just by visiting or using the site."),
    "Choice of law": ("MEDIUM", "Foreign law applies instead of your local protections."),
    "Jurisdiction": ("MEDIUM", "Disputes handled in their preferred court, not yours."),
    "Arbitration": ("HIGH", "You waive your right to sue in court."),
}

# ─── Regex fallback ───
PATTERNS = {
    "Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
    "Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
    "Unilateral change": [r"sole discretion", r"reserves? the right to (modify|change|update|amend)", r"at any time.*without (prior )?notice", r"we may (modify|change|update)"],
    "Content removal": [r"remove.*content.*without", r"right to remove", r"we may.*remove"],
    "Contract by using": [r"by (using|accessing).*you agree", r"continued use.*constitutes? acceptance"],
    "Choice of law": [r"governed by.*laws? of", r"shall be governed", r"laws of the state of"],
    "Jurisdiction": [r"exclusive jurisdiction", r"courts? of.*(california|delaware|new york|ireland|england)", r"submit to.*jurisdiction"],
    "Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
}

def classify_ml(text):
    """Classify using the trained Legal-BERT model."""
    if not ml_pipeline:
        return classify_regex(text)
    try:
        preds = ml_pipeline(text, truncation=True, max_length=512)
        results = []
        for p in preds[0] if isinstance(preds[0], list) else preds:
            if p["score"] > 0.5 and p["label"] in LABELS:
                sev, desc = LABELS[p["label"]]
                results.append({"name": p["label"], "severity": sev, "desc": desc, "confidence": round(p["score"], 2)})
        return results
    except Exception:
        return classify_regex(text)

def classify_regex(text):
    """Fallback regex classifier."""
    results = []
    text_lower = text.lower()
    for name, pats in PATTERNS.items():
        for p in pats:
            if re.search(p, text_lower):
                sev, desc = LABELS[name]
                results.append({"name": name, "severity": sev, "desc": desc, "confidence": 0.7})
                break
    return results

def split_clauses(text):
    text = re.sub(r'\n{2,}', '\n', text.strip())
    parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n)(?=\d+[.)]\s|\([a-z]\)\s)', text)
    return [c.strip() for c in parts if len(c.strip()) > 30]

def analyze(text):
    if not text or len(text.strip()) < 50:
        return "", ""

    clauses = split_clauses(text)
    if not clauses:
        return "", ""

    flagged = []
    sev_counts = {"HIGH": 0, "MEDIUM": 0, "LOW": 0}

    for clause in clauses:
        hits = classify_ml(clause)
        if hits:
            flagged.append({"text": clause, "hits": hits})
            for h in hits:
                sev_counts[h["severity"]] += 1

    total = len(clauses)
    risk = min(100, round((sev_counts["HIGH"] * 20 + sev_counts["MEDIUM"] * 10 + sev_counts["LOW"] * 5) / max(1, total) * 100))

    if risk >= 60: grade = "F"
    elif risk >= 40: grade = "D"
    elif risk >= 20: grade = "C"
    elif risk >= 10: grade = "B"
    else: grade = "A"

    engine = "Legal-BERT" if ml_pipeline else "Pattern matching"

    # Build HTML
    summary = f"""<div style="font-family:system-ui,sans-serif;">
  <div style="border:1px solid #e4e4e7;border-radius:8px;padding:20px;margin-bottom:16px;">
    <div style="display:flex;justify-content:space-between;align-items:baseline;">
      <div>
        <span style="font-size:32px;font-weight:600;">{risk}</span>
        <span style="font-size:13px;color:#a1a1aa;">/100 risk</span>
      </div>
      <span style="font-size:13px;font-weight:500;padding:2px 10px;border-radius:4px;{
        'background:#fef2f2;color:#b91c1c;' if grade in ('F','D') else
        'background:#fffbeb;color:#a16207;' if grade == 'C' else
        'background:#f0fdf4;color:#15803d;'
      }">Grade {grade}</span>
    </div>
    <p style="margin-top:8px;font-size:12px;color:#a1a1aa;">{total} clauses Β· {len(flagged)} flagged Β· {sev_counts['HIGH']} high Β· {sev_counts['MEDIUM']} medium Β· {sev_counts['LOW']} low Β· Engine: {engine}</p>
  </div>"""

    if not flagged:
        summary += '<div style="border:1px solid #e4e4e7;border-radius:8px;padding:24px;text-align:center;"><p style="font-size:14px;color:#71717a;">No unfair clauses found.</p></div>'
    else:
        for item in flagged:
            max_sev = max(item["hits"], key=lambda h: {"HIGH":3,"MEDIUM":2,"LOW":1}[h["severity"]])["severity"]
            border = {"HIGH":"#fca5a5","MEDIUM":"#fcd34d","LOW":"#93c5fd"}[max_sev]

            tags = ""
            for h in item["hits"]:
                ts = {"HIGH":"background:#fef2f2;color:#b91c1c;border:1px solid #fecaca;",
                      "MEDIUM":"background:#fffbeb;color:#a16207;border:1px solid #fde68a;",
                      "LOW":"background:#eff6ff;color:#1d4ed8;border:1px solid #bfdbfe;"}[h["severity"]]
                conf = f' ({h["confidence"]})' if h.get("confidence") and ml_pipeline else ""
                tags += f'<span style="{ts}font-size:11px;font-weight:500;padding:1px 8px;border-radius:3px;margin-right:4px;">{h["name"]}{conf}</span>'

            descs = "".join(f'<p style="font-size:12px;color:#71717a;margin-top:4px;">{h["desc"]}</p>' for h in item["hits"])
            preview = item["text"][:200] + ("..." if len(item["text"]) > 200 else "")

            summary += f'''<div style="border:1px solid #e4e4e7;border-left:3px solid {border};border-radius:8px;padding:14px;margin-bottom:8px;">
    <p style="font-size:13px;color:#3f3f46;line-height:1.6;">{preview}</p>
    <div style="margin-top:8px;">{tags}</div>
    {descs}
  </div>'''

    summary += "</div>"
    return summary, ""


SPOTIFY = """By using the Spotify Service, you agree to be bound by these Terms of Use.

Spotify may, in its sole discretion, modify or update these Terms of Service at any time without prior notice. Your continued use of the Service after any such changes constitutes your acceptance of the new Terms of Service.

In no event will Spotify be liable for any indirect, incidental, special, consequential, or punitive damages, or any loss of profits or revenues, whether incurred directly or indirectly.

Spotify reserves the right to remove or disable access to any User Content for any reason, without prior notice.

Spotify may terminate your account or suspend your access at any time, with or without cause, with or without notice, effective immediately.

These Terms will be governed by and construed in accordance with the laws of the State of New York.

Any dispute shall be finally settled by arbitration in New York County."""

RENTAL = """The Landlord reserves the right to enter the premises at any time without prior notice for inspection or any other purpose deemed necessary in their sole discretion.

The Landlord shall not be liable for any damage to the Tenant's personal property, whether caused by water leaks, fire, theft, or any other cause, including the Landlord's own negligence.

The Landlord may terminate this lease at any time with only 7 days written notice, for any reason or no reason at all.

Any disputes arising from this lease agreement shall be resolved exclusively in the courts of the Landlord's choosing, and the Tenant waives the right to a jury trial.

The Landlord reserves the right to modify the terms of this lease at any time. Continued occupancy constitutes acceptance of the new terms."""

demo = gr.Blocks(title="ClauseGuard")

with demo:
    gr.HTML('<div style="font-family:system-ui,sans-serif;padding:16px 0;"><h1 style="font-size:20px;font-weight:600;margin:0;">ClauseGuard</h1><p style="font-size:13px;color:#a1a1aa;margin-top:2px;">Paste a Terms of Service, contract, or lease. Get a risk breakdown.</p></div>')

    with gr.Row():
        with gr.Column(scale=1):
            text_input = gr.Textbox(label="Document text", placeholder="Paste here...", lines=14, max_lines=40)
            with gr.Row():
                scan_btn = gr.Button("Scan", variant="primary")
                clear_btn = gr.Button("Clear", variant="secondary")
            gr.Examples(examples=[[SPOTIFY], [RENTAL]], inputs=[text_input], label="Examples")

        with gr.Column(scale=1):
            results_html = gr.HTML(label="Results")
            hidden = gr.HTML(visible=False)

    scan_btn.click(fn=analyze, inputs=[text_input], outputs=[results_html, hidden])
    clear_btn.click(fn=lambda: ("", "", ""), outputs=[text_input, results_html, hidden])

    gr.HTML('<p style="font-family:system-ui,sans-serif;font-size:11px;color:#a1a1aa;text-align:center;padding:16px 0;border-top:1px solid #f4f4f5;margin-top:16px;">Not legal advice. Model: Legal-BERT fine-tuned on CLAUDETTE. <a href="https://huggingface.co/gaurv007/clauseguard-legal-bert" style="color:#71717a;">Model</a> Β· <a href="https://huggingface.co/datasets/coastalcph/lex_glue" style="color:#71717a;">Dataset</a></p>')

if __name__ == "__main__":
    demo.launch()