""" ClauseGuard — AI Fine Print Scanner Uses Legal-BERT fine-tuned on CLAUDETTE/LexGLUE unfair_tos (8 categories). """ import gradio as gr import re import numpy as np # ─── Load ML model ─── MODEL_ID = "gaurv007/clauseguard-legal-bert" ml_pipeline = None try: from transformers import pipeline ml_pipeline = pipeline("text-classification", model=MODEL_ID, top_k=None, device=-1) print(f"Loaded model: {MODEL_ID}") except Exception as e: print(f"Model load failed ({e}), using regex fallback") # ─── Label metadata ─── LABELS = { "Limitation of liability": ("HIGH", "Company avoids responsibility for damages or losses."), "Unilateral termination": ("HIGH", "They can close your account without reason."), "Unilateral change": ("MEDIUM", "Terms can change without your consent."), "Content removal": ("MEDIUM", "Your content can be deleted without notice."), "Contract by using": ("LOW", "You agree just by visiting or using the site."), "Choice of law": ("MEDIUM", "Foreign law applies instead of your local protections."), "Jurisdiction": ("MEDIUM", "Disputes handled in their preferred court, not yours."), "Arbitration": ("HIGH", "You waive your right to sue in court."), } # ─── Regex fallback ─── PATTERNS = { "Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"], "Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"], "Unilateral change": [r"sole discretion", r"reserves? the right to (modify|change|update|amend)", r"at any time.*without (prior )?notice", r"we may (modify|change|update)"], "Content removal": [r"remove.*content.*without", r"right to remove", r"we may.*remove"], "Contract by using": [r"by (using|accessing).*you agree", r"continued use.*constitutes? acceptance"], "Choice of law": [r"governed by.*laws? of", r"shall be governed", r"laws of the state of"], "Jurisdiction": [r"exclusive jurisdiction", r"courts? of.*(california|delaware|new york|ireland|england)", r"submit to.*jurisdiction"], "Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"], } def classify_ml(text): """Classify using the trained Legal-BERT model.""" if not ml_pipeline: return classify_regex(text) try: preds = ml_pipeline(text, truncation=True, max_length=512) results = [] for p in preds[0] if isinstance(preds[0], list) else preds: if p["score"] > 0.5 and p["label"] in LABELS: sev, desc = LABELS[p["label"]] results.append({"name": p["label"], "severity": sev, "desc": desc, "confidence": round(p["score"], 2)}) return results except Exception: return classify_regex(text) def classify_regex(text): """Fallback regex classifier.""" results = [] text_lower = text.lower() for name, pats in PATTERNS.items(): for p in pats: if re.search(p, text_lower): sev, desc = LABELS[name] results.append({"name": name, "severity": sev, "desc": desc, "confidence": 0.7}) break return results def split_clauses(text): text = re.sub(r'\n{2,}', '\n', text.strip()) parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n)(?=\d+[.)]\s|\([a-z]\)\s)', text) return [c.strip() for c in parts if len(c.strip()) > 30] def analyze(text): if not text or len(text.strip()) < 50: return "", "" clauses = split_clauses(text) if not clauses: return "", "" flagged = [] sev_counts = {"HIGH": 0, "MEDIUM": 0, "LOW": 0} for clause in clauses: hits = classify_ml(clause) if hits: flagged.append({"text": clause, "hits": hits}) for h in hits: sev_counts[h["severity"]] += 1 total = len(clauses) risk = min(100, round((sev_counts["HIGH"] * 20 + sev_counts["MEDIUM"] * 10 + sev_counts["LOW"] * 5) / max(1, total) * 100)) if risk >= 60: grade = "F" elif risk >= 40: grade = "D" elif risk >= 20: grade = "C" elif risk >= 10: grade = "B" else: grade = "A" engine = "Legal-BERT" if ml_pipeline else "Pattern matching" # Build HTML summary = f"""
{total} clauses · {len(flagged)} flagged · {sev_counts['HIGH']} high · {sev_counts['MEDIUM']} medium · {sev_counts['LOW']} low · Engine: {engine}
No unfair clauses found.
{h["desc"]}
' for h in item["hits"]) preview = item["text"][:200] + ("..." if len(item["text"]) > 200 else "") summary += f'''{preview}
Paste a Terms of Service, contract, or lease. Get a risk breakdown.
Not legal advice. Model: Legal-BERT fine-tuned on CLAUDETTE. Model · Dataset
') if __name__ == "__main__": demo.launch()