Spaces:
Sleeping
Sleeping
Upload api/main.py
Browse files- api/main.py +529 -126
api/main.py
CHANGED
|
@@ -1,18 +1,28 @@
|
|
| 1 |
"""
|
| 2 |
-
ClauseGuard — FastAPI Backend
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
| 8 |
-
import time
|
| 9 |
import re
|
|
|
|
|
|
|
| 10 |
from contextlib import asynccontextmanager
|
| 11 |
from typing import Optional
|
|
|
|
|
|
|
| 12 |
|
| 13 |
import httpx
|
| 14 |
import numpy as np
|
| 15 |
-
from fastapi import FastAPI, HTTPException, Depends
|
| 16 |
from fastapi.middleware.cors import CORSMiddleware
|
| 17 |
from pydantic import BaseModel, Field
|
| 18 |
|
|
@@ -27,12 +37,50 @@ SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
|
|
| 27 |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
|
| 28 |
SAULLM_ENDPOINT = os.environ.get("SAULLM_ENDPOINT", "")
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
]
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"Limitation of liability": "Company limits or excludes liability for losses, data breaches, or service failures.",
|
| 37 |
"Unilateral termination": "Company can terminate your account at any time without reason.",
|
| 38 |
"Unilateral change": "Company can change terms at any time without your consent.",
|
|
@@ -41,79 +89,93 @@ LABEL_DESCRIPTIONS = {
|
|
| 41 |
"Choice of law": "Governing law may differ from your country, reducing your legal protections.",
|
| 42 |
"Jurisdiction": "Disputes must be resolved in a jurisdiction that may disadvantage you.",
|
| 43 |
"Arbitration": "Forces disputes to arbitration instead of court. You waive your right to sue.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
}
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
}
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
"Jurisdiction": "EU Regulation 1215/2012 Art. 18 — consumer domicile prevails.",
|
| 57 |
-
"Choice of law": "EU Regulation 593/2008 Art. 6 — consumer protection of habitual residence.",
|
| 58 |
-
"Limitation of liability": "EU Directive 93/13/EEC Annex 1(a) — excluding statutory rights.",
|
| 59 |
-
"Unilateral termination": "EU Directive 93/13/EEC Annex 1(f)(g) — termination without notice.",
|
| 60 |
-
"Contract by using": "EU Directive 2011/83/EU Art. 8 — active consent required.",
|
| 61 |
-
}
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def load_model():
|
| 67 |
-
global classifier
|
|
|
|
|
|
|
|
|
|
| 68 |
try:
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
| 78 |
except Exception as e:
|
| 79 |
-
print(f"
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
PATTERNS = {
|
| 83 |
-
0: [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
|
| 84 |
-
1: [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
|
| 85 |
-
2: [r"sole discretion", r"reserves? the right to (modify|change|update|amend)", r"at any time.*without (prior )?notice", r"we may (modify|change|update)"],
|
| 86 |
-
3: [r"remove.*content.*without", r"right to remove", r"we may.*remove"],
|
| 87 |
-
4: [r"by (using|accessing).*you agree", r"continued use.*constitutes? acceptance"],
|
| 88 |
-
5: [r"governed by.*laws? of", r"shall be governed", r"laws of the state of"],
|
| 89 |
-
6: [r"exclusive jurisdiction", r"courts? of.*(california|delaware|new york|ireland|england)", r"submit to.*jurisdiction"],
|
| 90 |
-
7: [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
def classify_clause(text: str) -> list[dict]:
|
| 94 |
-
if classifier:
|
| 95 |
-
try:
|
| 96 |
-
preds = classifier(text, truncation=True, max_length=512)
|
| 97 |
-
items = preds[0] if isinstance(preds[0], list) else preds
|
| 98 |
-
return [
|
| 99 |
-
{"name": p["label"], "severity": SEVERITY_MAP.get(p["label"], "MEDIUM"),
|
| 100 |
-
"description": LABEL_DESCRIPTIONS.get(p["label"], ""), "confidence": round(p["score"], 3)}
|
| 101 |
-
for p in items if p["score"] > 0.5 and p["label"] in LABEL_DESCRIPTIONS
|
| 102 |
-
]
|
| 103 |
-
except Exception:
|
| 104 |
-
pass
|
| 105 |
-
|
| 106 |
-
# Regex fallback
|
| 107 |
-
results = []
|
| 108 |
-
text_lower = text.lower()
|
| 109 |
-
for lid, pats in PATTERNS.items():
|
| 110 |
-
for p in pats:
|
| 111 |
-
if re.search(p, text_lower):
|
| 112 |
-
name = LABEL_NAMES[lid]
|
| 113 |
-
results.append({"name": name, "severity": SEVERITY_MAP[name],
|
| 114 |
-
"description": LABEL_DESCRIPTIONS[name], "confidence": 0.7})
|
| 115 |
-
break
|
| 116 |
-
return results
|
| 117 |
|
| 118 |
# ─── Supabase helper ───
|
| 119 |
async def supabase_insert(table: str, data: dict):
|
|
@@ -138,9 +200,348 @@ async def supabase_query(table: str, params: dict, headers_extra: dict = {}):
|
|
| 138 |
)
|
| 139 |
return resp.json() if resp.status_code == 200 else []
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
# ─── Models ───
|
| 142 |
class AnalyzeRequest(BaseModel):
|
| 143 |
-
|
| 144 |
source_url: Optional[str] = None
|
| 145 |
|
| 146 |
class AnalyzeResponse(BaseModel):
|
|
@@ -149,9 +550,17 @@ class AnalyzeResponse(BaseModel):
|
|
| 149 |
total_clauses: int
|
| 150 |
flagged_count: int
|
| 151 |
results: list[dict]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
model: str
|
| 153 |
latency_ms: int
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
class ExplainRequest(BaseModel):
|
| 156 |
clause: str = Field(..., min_length=10, max_length=2000)
|
| 157 |
category: str
|
|
@@ -169,73 +578,72 @@ async def lifespan(app: FastAPI):
|
|
| 169 |
load_model()
|
| 170 |
yield
|
| 171 |
|
| 172 |
-
app = FastAPI(title="ClauseGuard API", version="
|
| 173 |
|
| 174 |
app.add_middleware(
|
| 175 |
CORSMiddleware,
|
| 176 |
-
allow_origins=["https://clauseguardweb.netlify.app", "https://clauseguardweb.netlify.app", "chrome-extension://*", "http://localhost:3000"],
|
| 177 |
allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
|
| 178 |
)
|
| 179 |
|
| 180 |
@app.get("/health")
|
| 181 |
async def health():
|
| 182 |
-
return {"status": "ok", "model": "ml" if
|
| 183 |
|
| 184 |
@app.post("/api/analyze", response_model=AnalyzeResponse)
|
| 185 |
async def analyze(req: AnalyzeRequest, user: Optional[dict] = Depends(get_current_user)):
|
| 186 |
start = time.time()
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
for
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
latency = int((time.time() - start) * 1000)
|
| 200 |
-
|
| 201 |
-
|
|
|
|
| 202 |
if user:
|
| 203 |
await supabase_insert("analyses", {
|
| 204 |
-
"user_id": user["id"], "source_url": req.source_url, "total_clauses":
|
| 205 |
-
"flagged_count": len(
|
|
|
|
|
|
|
| 206 |
})
|
| 207 |
-
|
| 208 |
-
return AnalyzeResponse(
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
@app.post("/api/explain", response_model=ExplainResponse)
|
| 213 |
async def explain(req: ExplainRequest, user: dict = Depends(require_auth)):
|
| 214 |
-
desc =
|
| 215 |
-
legal =
|
| 216 |
recommendation = "Review this clause carefully. Consider negotiating or seeking legal advice before agreeing."
|
| 217 |
-
|
| 218 |
-
# Try SaulLM-7B if endpoint configured
|
| 219 |
if SAULLM_ENDPOINT and HF_API_TOKEN:
|
| 220 |
try:
|
| 221 |
-
prompt = f"
|
| 222 |
-
|
| 223 |
-
Clause: "{req.clause}"
|
| 224 |
-
Category: {req.category}
|
| 225 |
-
|
| 226 |
-
Provide:
|
| 227 |
-
1. A plain-English explanation of why this is problematic
|
| 228 |
-
2. The specific legal basis (EU/US consumer protection law)
|
| 229 |
-
3. A practical recommendation for the consumer
|
| 230 |
-
|
| 231 |
-
Be concise. 3-4 sentences maximum per section."""
|
| 232 |
-
|
| 233 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 234 |
-
resp = await client.post(
|
| 235 |
-
SAULLM_ENDPOINT,
|
| 236 |
-
json={"inputs": prompt, "parameters": {"max_new_tokens": 300, "temperature": 0.3}},
|
| 237 |
-
headers={"Authorization": f"Bearer {HF_API_TOKEN}"},
|
| 238 |
-
)
|
| 239 |
if resp.status_code == 200:
|
| 240 |
output = resp.json()
|
| 241 |
generated = output[0]["generated_text"] if isinstance(output, list) else output.get("generated_text", "")
|
|
@@ -245,18 +653,13 @@ Be concise. 3-4 sentences maximum per section."""
|
|
| 245 |
legal = parts[1] if len(parts) > 1 else legal
|
| 246 |
recommendation = parts[2] if len(parts) > 2 else recommendation
|
| 247 |
except Exception:
|
| 248 |
-
pass
|
| 249 |
-
|
| 250 |
-
return ExplainResponse(clause=req.clause, category=req.category,
|
| 251 |
-
explanation=desc, legal_basis=legal, recommendation=recommendation)
|
| 252 |
|
| 253 |
@app.get("/api/history")
|
| 254 |
async def history(user: dict = Depends(require_auth), limit: int = 20, offset: int = 0):
|
| 255 |
limit = min(limit, 100)
|
| 256 |
-
data = await supabase_query("analyses", {
|
| 257 |
-
"user_id": f"eq.{user['id']}", "select": "*",
|
| 258 |
-
"order": "created_at.desc", "limit": str(limit), "offset": str(offset),
|
| 259 |
-
})
|
| 260 |
return {"analyses": data, "limit": limit, "offset": offset}
|
| 261 |
|
| 262 |
if __name__ == "__main__":
|
|
|
|
| 1 |
"""
|
| 2 |
+
ClauseGuard — FastAPI Backend v2.0
|
| 3 |
+
══════════════════════════════════
|
| 4 |
+
Features:
|
| 5 |
+
• 41 CUAD clause categories via fine-tuned Legal-BERT
|
| 6 |
+
• 4-tier risk scoring (Critical / High / Medium / Low)
|
| 7 |
+
• Legal NER: parties, dates, monetary values, jurisdictions, defined terms
|
| 8 |
+
• NLI contradiction & missing-clause detection
|
| 9 |
+
• Contract comparison engine
|
| 10 |
+
• Obligation tracker
|
| 11 |
+
• Compliance checker (GDPR, CCPA, SOX, HIPAA, FINRA)
|
| 12 |
"""
|
| 13 |
|
| 14 |
import os
|
|
|
|
| 15 |
import re
|
| 16 |
+
import json
|
| 17 |
+
import time
|
| 18 |
from contextlib import asynccontextmanager
|
| 19 |
from typing import Optional
|
| 20 |
+
from collections import defaultdict
|
| 21 |
+
from datetime import datetime
|
| 22 |
|
| 23 |
import httpx
|
| 24 |
import numpy as np
|
| 25 |
+
from fastapi import FastAPI, HTTPException, Depends, Body
|
| 26 |
from fastapi.middleware.cors import CORSMiddleware
|
| 27 |
from pydantic import BaseModel, Field
|
| 28 |
|
|
|
|
| 37 |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
|
| 38 |
SAULLM_ENDPOINT = os.environ.get("SAULLM_ENDPOINT", "")
|
| 39 |
|
| 40 |
+
# ─── CUAD Labels (41 categories) ───
|
| 41 |
+
CUAD_LABELS = [
|
| 42 |
+
"Document Name", "Parties", "Agreement Date", "Effective Date",
|
| 43 |
+
"Expiration Date", "Renewal Term", "Governing Law", "Most Favored Nation",
|
| 44 |
+
"Non-Compete", "Exclusivity", "No-Solicit of Customers",
|
| 45 |
+
"No-Solicit of Employees", "Non-Disparagement",
|
| 46 |
+
"Termination for Convenience", "ROFR/ROFO/ROFN", "Change of Control",
|
| 47 |
+
"Anti-Assignment", "Revenue/Profit Sharing", "Price Restriction",
|
| 48 |
+
"Minimum Commitment", "Volume Restriction", "IP Ownership Assignment",
|
| 49 |
+
"Joint IP Ownership", "License Grant", "Non-Transferable License",
|
| 50 |
+
"Affiliate License-Licensor", "Affiliate License-Licensee",
|
| 51 |
+
"Unlimited/All-You-Can-Eat License", "Irrevocable or Perpetual License",
|
| 52 |
+
"Source Code Escrow", "Post-Termination Services", "Audit Rights",
|
| 53 |
+
"Uncapped Liability", "Cap on Liability", "Liquidated Damages",
|
| 54 |
+
"Warranty Duration", "Insurance", "Covenant Not to Sue",
|
| 55 |
+
"Third Party Beneficiary", "Other"
|
| 56 |
]
|
| 57 |
|
| 58 |
+
RISK_MAP = {
|
| 59 |
+
"Uncapped Liability": "CRITICAL", "Arbitration": "CRITICAL",
|
| 60 |
+
"IP Ownership Assignment": "CRITICAL", "Termination for Convenience": "CRITICAL",
|
| 61 |
+
"Limitation of liability": "CRITICAL", "Unilateral termination": "CRITICAL",
|
| 62 |
+
"Liquidated Damages": "CRITICAL",
|
| 63 |
+
"Non-Compete": "HIGH", "Exclusivity": "HIGH", "Change of Control": "HIGH",
|
| 64 |
+
"No-Solicit of Customers": "HIGH", "No-Solicit of Employees": "HIGH",
|
| 65 |
+
"Unilateral change": "HIGH", "Content removal": "HIGH", "Anti-Assignment": "HIGH",
|
| 66 |
+
"Governing Law": "MEDIUM", "Jurisdiction": "MEDIUM", "Choice of law": "MEDIUM",
|
| 67 |
+
"Price Restriction": "MEDIUM", "Minimum Commitment": "MEDIUM",
|
| 68 |
+
"Volume Restriction": "MEDIUM", "Non-Disparagement": "MEDIUM",
|
| 69 |
+
"Most Favored Nation": "MEDIUM", "Revenue/Profit Sharing": "MEDIUM",
|
| 70 |
+
"Warranty Duration": "MEDIUM",
|
| 71 |
+
"Document Name": "LOW", "Parties": "LOW", "Agreement Date": "LOW",
|
| 72 |
+
"Effective Date": "LOW", "Expiration Date": "LOW", "Renewal Term": "LOW",
|
| 73 |
+
"Joint IP Ownership": "LOW", "License Grant": "LOW",
|
| 74 |
+
"Non-Transferable License": "LOW", "Affiliate License-Licensor": "LOW",
|
| 75 |
+
"Affiliate License-Licensee": "LOW", "Unlimited/All-You-Can-Eat License": "LOW",
|
| 76 |
+
"Irrevocable or Perpetual License": "LOW", "Source Code Escrow": "LOW",
|
| 77 |
+
"Post-Termination Services": "LOW", "Audit Rights": "LOW",
|
| 78 |
+
"Cap on Liability": "LOW", "Insurance": "LOW",
|
| 79 |
+
"Covenant Not to Sue": "LOW", "Third Party Beneficiary": "LOW",
|
| 80 |
+
"Other": "LOW", "ROFR/ROFO/ROFN": "LOW", "Contract by using": "LOW",
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
DESC_MAP = {
|
| 84 |
"Limitation of liability": "Company limits or excludes liability for losses, data breaches, or service failures.",
|
| 85 |
"Unilateral termination": "Company can terminate your account at any time without reason.",
|
| 86 |
"Unilateral change": "Company can change terms at any time without your consent.",
|
|
|
|
| 89 |
"Choice of law": "Governing law may differ from your country, reducing your legal protections.",
|
| 90 |
"Jurisdiction": "Disputes must be resolved in a jurisdiction that may disadvantage you.",
|
| 91 |
"Arbitration": "Forces disputes to arbitration instead of court. You waive your right to sue.",
|
| 92 |
+
"Uncapped Liability": "No financial limit on damages the party may be liable for.",
|
| 93 |
+
"Cap on Liability": "Maximum financial liability is explicitly capped.",
|
| 94 |
+
"Non-Compete": "Restrictions on competing with the counter-party.",
|
| 95 |
+
"Exclusivity": "Obligation to deal exclusively with one party.",
|
| 96 |
+
"IP Ownership Assignment": "Intellectual property rights are transferred entirely.",
|
| 97 |
+
"Termination for Convenience": "Either party may terminate without cause or notice.",
|
| 98 |
+
"Governing Law": "Specifies which jurisdiction's laws apply.",
|
| 99 |
+
"Non-Disparagement": "Agreement not to speak negatively about the other party.",
|
| 100 |
+
"ROFR/ROFO/ROFN": "Right of First Refusal / Offer / Negotiation clause.",
|
| 101 |
+
"Change of Control": "Provisions triggered by ownership or control changes.",
|
| 102 |
+
"Anti-Assignment": "Restrictions on transferring contract rights to third parties.",
|
| 103 |
+
"Liquidated Damages": "Pre-determined damages amount for breach of contract.",
|
| 104 |
+
"Source Code Escrow": "Third-party holds source code for release under defined conditions.",
|
| 105 |
+
"Post-Termination Services": "Services to be provided after the contract ends.",
|
| 106 |
+
"Audit Rights": "Right to inspect records or verify compliance.",
|
| 107 |
+
"Warranty Duration": "Length of time warranties remain in effect.",
|
| 108 |
+
"Covenant Not to Sue": "Agreement not to bring legal action against a party.",
|
| 109 |
+
"Third Party Beneficiary": "Non-party who benefits from the contract terms.",
|
| 110 |
+
"Insurance": "Insurance coverage requirements.",
|
| 111 |
+
"Revenue/Profit Sharing": "Revenue or profit sharing arrangements between parties.",
|
| 112 |
+
"Price Restriction": "Restrictions on pricing or discounting.",
|
| 113 |
+
"Minimum Commitment": "Minimum purchase or usage commitment.",
|
| 114 |
+
"Volume Restriction": "Limits on volume of goods or services.",
|
| 115 |
+
"License Grant": "Permission to use intellectual property.",
|
| 116 |
+
"Non-Transferable License": "License that cannot be transferred to third parties.",
|
| 117 |
+
"Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
|
| 118 |
+
"Unlimited/All-You-Can-Eat License": "License with no usage limits.",
|
| 119 |
}
|
| 120 |
|
| 121 |
+
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 122 |
+
|
| 123 |
+
# ─── Regex patterns (fallback) ───
|
| 124 |
+
REGEX_PATTERNS = {
|
| 125 |
+
"Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
|
| 126 |
+
"Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
|
| 127 |
+
"Unilateral change": [r"sole discretion", r"reserves? the right to (modify|change|update|amend)", r"at any time.*without (prior )?notice", r"we may (modify|change|update)"],
|
| 128 |
+
"Content removal": [r"remove.*content.*without", r"right to remove", r"we may.*remove"],
|
| 129 |
+
"Contract by using": [r"by (using|accessing).*you agree", r"continued use.*constitutes? acceptance"],
|
| 130 |
+
"Choice of law": [r"governed by.*laws? of", r"shall be governed", r"laws of the state of"],
|
| 131 |
+
"Jurisdiction": [r"exclusive jurisdiction", r"courts? of.*(california|delaware|new york|ireland|england)", r"submit to.*jurisdiction"],
|
| 132 |
+
"Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
|
| 133 |
+
"Governing Law": [r"governed by", r"laws of", r"jurisdiction of"],
|
| 134 |
+
"Termination for Convenience": [r"terminat.*for convenience", r"terminat.*without cause", r"terminat.*at any time"],
|
| 135 |
+
"Non-Compete": [r"non-compete", r"shall not compete", r"competition"],
|
| 136 |
+
"Exclusivity": [r"exclusive", r"exclusivity"],
|
| 137 |
+
"IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
|
| 138 |
+
"Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
|
| 139 |
+
"Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed"],
|
| 140 |
+
"Indemnification": [r"indemnif", r"hold harmless", r"defend"],
|
| 141 |
+
"Confidentiality": [r"confidential", r"non-disclosure", r"nda"],
|
| 142 |
+
"Force Majeure": [r"force majeure", r"act of god", r"beyond.*control"],
|
| 143 |
+
"Penalties": [r"penalt", r"late fee", r"default charge", r"interest on overdue"],
|
| 144 |
}
|
| 145 |
|
| 146 |
+
# ─── Model Loading ───
|
| 147 |
+
cuad_tokenizer = None
|
| 148 |
+
cuad_model = None
|
| 149 |
+
_HAS_TORCH = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
+
try:
|
| 152 |
+
import torch
|
| 153 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 154 |
+
from peft import PeftModel
|
| 155 |
+
_HAS_TORCH = True
|
| 156 |
+
except Exception:
|
| 157 |
+
pass
|
| 158 |
|
| 159 |
def load_model():
|
| 160 |
+
global cuad_tokenizer, cuad_model, classifier
|
| 161 |
+
if not _HAS_TORCH:
|
| 162 |
+
print("[ClauseGuard] PyTorch not available")
|
| 163 |
+
return
|
| 164 |
try:
|
| 165 |
+
base = "nlpaueb/legal-bert-base-uncased"
|
| 166 |
+
adapter = "Mokshith31/legalbert-contract-clause-classification"
|
| 167 |
+
print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
|
| 168 |
+
cuad_tokenizer = AutoTokenizer.from_pretrained(base)
|
| 169 |
+
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 170 |
+
base, num_labels=41, ignore_mismatched_sizes=True
|
| 171 |
+
)
|
| 172 |
+
cuad_model = PeftModel.from_pretrained(base_model, adapter)
|
| 173 |
+
cuad_model.eval()
|
| 174 |
+
print("[ClauseGuard] CUAD model loaded successfully")
|
| 175 |
except Exception as e:
|
| 176 |
+
print(f"[ClauseGuard] CUAD model load failed: {e}")
|
| 177 |
+
cuad_tokenizer = None
|
| 178 |
+
cuad_model = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
# ─── Supabase helper ───
|
| 181 |
async def supabase_insert(table: str, data: dict):
|
|
|
|
| 200 |
)
|
| 201 |
return resp.json() if resp.status_code == 200 else []
|
| 202 |
|
| 203 |
+
# ─── Clause Processing ───
|
| 204 |
+
def split_clauses(text):
|
| 205 |
+
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
| 206 |
+
parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|[A-Z][A-Z\s]{2,})', text)
|
| 207 |
+
return [p.strip() for p in parts if len(p.strip()) > 30]
|
| 208 |
+
|
| 209 |
+
def classify_regex(text):
|
| 210 |
+
text_lower = text.lower()
|
| 211 |
+
results = []
|
| 212 |
+
seen = set()
|
| 213 |
+
for label, patterns in REGEX_PATTERNS.items():
|
| 214 |
+
for pat in patterns:
|
| 215 |
+
if re.search(pat, text_lower):
|
| 216 |
+
if label not in seen:
|
| 217 |
+
risk = RISK_MAP.get(label, "MEDIUM")
|
| 218 |
+
results.append({
|
| 219 |
+
"label": label,
|
| 220 |
+
"confidence": 0.7,
|
| 221 |
+
"risk": risk,
|
| 222 |
+
"description": DESC_MAP.get(label, label),
|
| 223 |
+
})
|
| 224 |
+
seen.add(label)
|
| 225 |
+
break
|
| 226 |
+
return results
|
| 227 |
+
|
| 228 |
+
def classify_cuad(clause_text):
|
| 229 |
+
if cuad_model is None or cuad_tokenizer is None:
|
| 230 |
+
return classify_regex(clause_text)
|
| 231 |
+
try:
|
| 232 |
+
inputs = cuad_tokenizer(clause_text, return_tensors="pt", truncation=True, max_length=256, padding=True)
|
| 233 |
+
with torch.no_grad():
|
| 234 |
+
logits = cuad_model(**inputs).logits
|
| 235 |
+
probs = torch.softmax(logits, dim=-1)[0]
|
| 236 |
+
threshold = 0.15
|
| 237 |
+
results = []
|
| 238 |
+
for i, prob in enumerate(probs):
|
| 239 |
+
if prob > threshold and i < len(CUAD_LABELS):
|
| 240 |
+
label = CUAD_LABELS[i]
|
| 241 |
+
results.append({
|
| 242 |
+
"label": label,
|
| 243 |
+
"confidence": round(float(prob), 3),
|
| 244 |
+
"risk": RISK_MAP.get(label, "LOW"),
|
| 245 |
+
"description": DESC_MAP.get(label, label),
|
| 246 |
+
})
|
| 247 |
+
results.sort(key=lambda x: x["confidence"], reverse=True)
|
| 248 |
+
if not results:
|
| 249 |
+
top_idx = int(probs.argmax())
|
| 250 |
+
label = CUAD_LABELS[top_idx] if top_idx < len(CUAD_LABELS) else "Other"
|
| 251 |
+
results.append({
|
| 252 |
+
"label": label,
|
| 253 |
+
"confidence": round(float(probs[top_idx]), 3),
|
| 254 |
+
"risk": RISK_MAP.get(label, "LOW"),
|
| 255 |
+
"description": DESC_MAP.get(label, label),
|
| 256 |
+
})
|
| 257 |
+
return results
|
| 258 |
+
except Exception:
|
| 259 |
+
return classify_regex(clause_text)
|
| 260 |
+
|
| 261 |
+
# ─── NER ───
|
| 262 |
+
def extract_entities(text):
|
| 263 |
+
entities = []
|
| 264 |
+
# Dates
|
| 265 |
+
for pat, etype in [
|
| 266 |
+
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
| 267 |
+
(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
|
| 268 |
+
(r'\b\d{1,2}-\d{1,2}-\d{2,4}\b', "DATE"),
|
| 269 |
+
(r'\b(?:Effective|Commencement|Expiration|Termination)\s+Date\b', "DATE_REF"),
|
| 270 |
+
]:
|
| 271 |
+
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 272 |
+
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 273 |
+
# Money
|
| 274 |
+
for pat, etype in [
|
| 275 |
+
(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
| 276 |
+
(r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros)', "MONEY"),
|
| 277 |
+
]:
|
| 278 |
+
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 279 |
+
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 280 |
+
# Parties
|
| 281 |
+
for pat, etype in [
|
| 282 |
+
(r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
|
| 283 |
+
(r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
|
| 284 |
+
]:
|
| 285 |
+
for m in re.finditer(pat, text):
|
| 286 |
+
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 287 |
+
# Jurisdictions
|
| 288 |
+
for pat, etype in [
|
| 289 |
+
(r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
| 290 |
+
(r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong)\b', "JURISDICTION"),
|
| 291 |
+
]:
|
| 292 |
+
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 293 |
+
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 294 |
+
# Defined Terms
|
| 295 |
+
for pat, etype in [
|
| 296 |
+
(r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
|
| 297 |
+
(r'\(([A-Z][A-Z\s]+)\)', "DEFINED_TERM"),
|
| 298 |
+
]:
|
| 299 |
+
for m in re.finditer(pat, text):
|
| 300 |
+
entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
|
| 301 |
+
# Deduplicate
|
| 302 |
+
entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
|
| 303 |
+
filtered = []
|
| 304 |
+
last_end = -1
|
| 305 |
+
for e in entities:
|
| 306 |
+
if e["start"] >= last_end:
|
| 307 |
+
filtered.append(e)
|
| 308 |
+
last_end = e["end"]
|
| 309 |
+
return filtered
|
| 310 |
+
|
| 311 |
+
# ─── Contradictions ───
|
| 312 |
+
CONTRADICTION_PAIRS = [
|
| 313 |
+
(["Uncapped Liability", "unlimited liability"], ["Cap on Liability", "cap on liability"],
|
| 314 |
+
"Liability cannot be both uncapped and capped simultaneously."),
|
| 315 |
+
(["Governing Law"], ["Governing Law"],
|
| 316 |
+
"Multiple governing law provisions detected — verify consistency."),
|
| 317 |
+
(["Termination for Convenience", "terminat.*convenience"], ["Fixed Term", "fixed term"],
|
| 318 |
+
"Contract has both fixed term and termination for convenience — review carefully."),
|
| 319 |
+
(["IP Ownership Assignment", "assign.*ip"], ["Joint IP Ownership", "joint ownership"],
|
| 320 |
+
"IP cannot be both fully assigned and jointly owned."),
|
| 321 |
+
]
|
| 322 |
+
|
| 323 |
+
def detect_contradictions(clause_results):
|
| 324 |
+
contradictions = []
|
| 325 |
+
labels_found = set()
|
| 326 |
+
for cr in clause_results:
|
| 327 |
+
labels_found.add(cr["label"])
|
| 328 |
+
for group_a, group_b, explanation in CONTRADICTION_PAIRS:
|
| 329 |
+
found_a = any(l in labels_found for l in group_a)
|
| 330 |
+
found_b = any(l in labels_found for l in group_b)
|
| 331 |
+
if found_a and found_b:
|
| 332 |
+
contradictions.append({"type": "CONTRADICTION", "explanation": explanation, "severity": "HIGH", "clauses": list(set(group_a + group_b))})
|
| 333 |
+
for cc in ["Governing Law", "Termination for Convenience", "Limitation of liability", "Arbitration"]:
|
| 334 |
+
if cc not in labels_found:
|
| 335 |
+
contradictions.append({"type": "MISSING", "explanation": f"Critical clause '{cc}' not detected.", "severity": "MEDIUM", "clauses": [cc]})
|
| 336 |
+
return contradictions
|
| 337 |
+
|
| 338 |
+
# ─── Risk Scoring ───
|
| 339 |
+
def compute_risk_score(clause_results, total_clauses):
|
| 340 |
+
sev_counts = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
|
| 341 |
+
for cr in clause_results:
|
| 342 |
+
sev = cr.get("risk", "LOW")
|
| 343 |
+
sev_counts[sev] += 1
|
| 344 |
+
if total_clauses == 0:
|
| 345 |
+
return 0, "A", sev_counts
|
| 346 |
+
weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
|
| 347 |
+
risk = min(100, round(weighted / max(1, total_clauses) * 10))
|
| 348 |
+
if risk >= 70: grade = "F"
|
| 349 |
+
elif risk >= 50: grade = "D"
|
| 350 |
+
elif risk >= 30: grade = "C"
|
| 351 |
+
elif risk >= 15: grade = "B"
|
| 352 |
+
else: grade = "A"
|
| 353 |
+
return risk, grade, sev_counts
|
| 354 |
+
|
| 355 |
+
# ─── Obligations ───
|
| 356 |
+
OBLIGATION_PATTERNS = {
|
| 357 |
+
"monetary": [r"(?:shall|must|will|agrees? to)\s+pay\s+(?:\$?[\d,]+)", r"(?:fee|payment|compensation|reimburs(?:e|ement))\s+of\s+(?:\$?[\d,]+)", r"(?:shall|must|will)\s+remit\s+(?:\$?[\d,]+)", r"(?:annual|monthly|quarterly)\s+(?:fee|payment)\s+of", r"(?:liquidated damages|penalty)\s+of\s+(?:\$?[\d,]+)"],
|
| 358 |
+
"compliance": [r"(?:shall|must|will)\s+comply\s+with", r"(?:shall|must|will)\s+adhere\s+to", r"(?:shall|must|will)\s+conform\s+to", r"(?:GDPR|CCPA|HIPAA|SOX|PCI-DSS|ISO\s+\d+)", r"(?:confidential|privacy|data protection)", r"(?:shall|must|will)\s+maintain\s+(?:insurance|coverage|bond)"],
|
| 359 |
+
"reporting": [r"(?:shall|must|will)\s+report", r"(?:shall|must|will)\s+provide\s+(?:regular|monthly|quarterly|annual)\s+(?:reports?|updates?|status)", r"(?:shall|must|will)\s+notify", r"(?:shall|must|will)\s+inform"],
|
| 360 |
+
"delivery": [r"(?:shall|must|will)\s+deliver", r"(?:shall|must|will)\s+provide", r"(?:shall|must|will)\s+furnish", r"(?:shall|must|will)\s+supply", r"(?:shall|must|will)\s+submit"],
|
| 361 |
+
"termination": [r"(?:shall|must|will)\s+return", r"(?:shall|must|will)\s+destroy", r"(?:shall|must|will)\s+cease", r"(?:upon|after)\s+termination"],
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
def extract_obligations(text):
|
| 365 |
+
sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
|
| 366 |
+
obligations = []
|
| 367 |
+
for sentence in sentences:
|
| 368 |
+
sentence = sentence.strip()
|
| 369 |
+
if len(sentence) < 30:
|
| 370 |
+
continue
|
| 371 |
+
found_types = set()
|
| 372 |
+
for otype, patterns in OBLIGATION_PATTERNS.items():
|
| 373 |
+
for pat in patterns:
|
| 374 |
+
if re.search(pat, sentence, re.IGNORECASE):
|
| 375 |
+
found_types.add(otype)
|
| 376 |
+
break
|
| 377 |
+
if not found_types:
|
| 378 |
+
continue
|
| 379 |
+
party = "Unknown"
|
| 380 |
+
for pp in [r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b']:
|
| 381 |
+
m = re.search(pp, sentence)
|
| 382 |
+
if m:
|
| 383 |
+
party = m.group(0)
|
| 384 |
+
break
|
| 385 |
+
deadline = "Not specified"
|
| 386 |
+
for pat, ptype in [
|
| 387 |
+
(r"within\s+(\d+)\s+(day|week|month|year)s?", "relative"),
|
| 388 |
+
(r"no\s+later\s+than\s+(\d+)\s+(day|week|month|year)s?", "relative"),
|
| 389 |
+
(r"within\s+(\d+)\s+business\s+days?", "business_days"),
|
| 390 |
+
(r"by\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"),
|
| 391 |
+
(r"on\s+or\s+before\s+([A-Z][a-z]+\s+\d{1,2},?\s+\d{4})", "absolute"),
|
| 392 |
+
]:
|
| 393 |
+
m = re.search(pat, sentence, re.IGNORECASE)
|
| 394 |
+
if m:
|
| 395 |
+
deadline = m.group(0)
|
| 396 |
+
break
|
| 397 |
+
for otype in found_types:
|
| 398 |
+
obligations.append({"type": otype, "party": party, "description": sentence[:250] + ("..." if len(sentence) > 250 else ""), "deadline": deadline})
|
| 399 |
+
return obligations
|
| 400 |
+
|
| 401 |
+
# ─── Compliance ───
|
| 402 |
+
REGULATIONS = {
|
| 403 |
+
"GDPR": {
|
| 404 |
+
"description": "EU General Data Protection Regulation (Regulation 2016/679)",
|
| 405 |
+
"requirements": {
|
| 406 |
+
"lawful_basis": {"keywords": ["lawful basis", "legal basis", "legitimate interest", "consent", "performance of contract", "legal obligation"], "description": "Must specify lawful basis for data processing (Art. 6)", "severity": "HIGH"},
|
| 407 |
+
"data_subject_rights": {"keywords": ["right to access", "right to erasure", "right to be forgotten", "data portability", "rectification", "object to processing"], "description": "Must acknowledge data subject rights (Arts. 15-22)", "severity": "HIGH"},
|
| 408 |
+
"data_breach_notification": {"keywords": ["data breach", "breach notification", "notify supervisory authority", "72 hours"], "description": "Must include data breach notification obligations (Art. 33)", "severity": "MEDIUM"},
|
| 409 |
+
"cross_border_transfer": {"keywords": ["standard contractual clauses", "SCCs", "adequacy decision", "transfer mechanism", "third country"], "description": "Must specify transfer safeguards for cross-border data (Arts. 44-49)", "severity": "HIGH"},
|
| 410 |
+
},
|
| 411 |
+
},
|
| 412 |
+
"CCPA": {
|
| 413 |
+
"description": "California Consumer Privacy Act (Cal. Civ. Code § 1798.100 et seq.)",
|
| 414 |
+
"requirements": {
|
| 415 |
+
"consumer_rights": {"keywords": ["right to know", "right to delete", "right to opt out", "right to non-discrimination", "consumer rights"], "description": "Must acknowledge California consumer rights", "severity": "HIGH"},
|
| 416 |
+
"data_categories": {"keywords": ["categories of personal information", "personal information categories", "identifiers", "commercial information"], "description": "Must disclose categories of personal information collected", "severity": "HIGH"},
|
| 417 |
+
"sale_of_data": {"keywords": ["do not sell my personal information", "opt-out of sale", "sale of personal information"], "description": "Must provide opt-out mechanism for data sales", "severity": "HIGH"},
|
| 418 |
+
},
|
| 419 |
+
},
|
| 420 |
+
"SOX": {
|
| 421 |
+
"description": "Sarbanes-Oxley Act (US, 2002)",
|
| 422 |
+
"requirements": {
|
| 423 |
+
"internal_controls": {"keywords": ["internal controls", "internal control over financial reporting", "ICFR"], "description": "Must reference internal controls over financial reporting (§ 404)", "severity": "HIGH"},
|
| 424 |
+
"whistleblower": {"keywords": ["whistleblower", "anonymous reporting", "reporting hotline", "retaliation"], "description": "Should protect whistleblower provisions (§ 806)", "severity": "HIGH"},
|
| 425 |
+
"document_retention": {"keywords": ["document retention", "record retention", "retention policy", "preserve records"], "description": "Must include document retention obligations (§ 802)", "severity": "HIGH"},
|
| 426 |
+
},
|
| 427 |
+
},
|
| 428 |
+
"HIPAA": {
|
| 429 |
+
"description": "Health Insurance Portability and Accountability Act (US, 1996)",
|
| 430 |
+
"requirements": {
|
| 431 |
+
"phi_protection": {"keywords": ["protected health information", "PHI", "health information", "ePHI"], "description": "Must protect PHI and limit uses/disclosures", "severity": "CRITICAL"},
|
| 432 |
+
"security_safeguards": {"keywords": ["administrative safeguards", "technical safeguards", "physical safeguards", "encryption", "access controls"], "description": "Must implement security safeguards (§ 164.308-312)", "severity": "HIGH"},
|
| 433 |
+
"breach_notification": {"keywords": ["breach notification", "notification of breach", "unauthorized access"], "description": "Must include breach notification obligations (§ 164.400-414)", "severity": "HIGH"},
|
| 434 |
+
},
|
| 435 |
+
},
|
| 436 |
+
"FINRA": {
|
| 437 |
+
"description": "Financial Industry Regulatory Authority (US)",
|
| 438 |
+
"requirements": {
|
| 439 |
+
"recordkeeping": {"keywords": ["recordkeeping", "books and records", "retain records", "SEC Rule 17a-4"], "description": "Must comply with recordkeeping rules (FINRA Rule 4511)", "severity": "HIGH"},
|
| 440 |
+
"anti_money_laundering": {"keywords": ["anti-money laundering", "AML", "suspicious activity", "SAR", "OFAC"], "description": "Must reference AML compliance (FINRA Rule 3310)", "severity": "HIGH"},
|
| 441 |
+
"privacy": {"keywords": ["privacy policy", "customer information", "Regulation S-P", "nonpublic personal information"], "description": "Must protect customer information (Regulation S-P)", "severity": "HIGH"},
|
| 442 |
+
},
|
| 443 |
+
},
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
def check_compliance(text):
|
| 447 |
+
text_lower = text.lower()
|
| 448 |
+
results = {}
|
| 449 |
+
for reg_name, reg_data in REGULATIONS.items():
|
| 450 |
+
checks = []
|
| 451 |
+
for req_name, req_data in reg_data["requirements"].items():
|
| 452 |
+
matched = False
|
| 453 |
+
matched_keywords = []
|
| 454 |
+
for kw in req_data["keywords"]:
|
| 455 |
+
if kw.lower() in text_lower:
|
| 456 |
+
matched = True
|
| 457 |
+
matched_keywords.append(kw)
|
| 458 |
+
checks.append({"requirement": req_name, "description": req_data["description"], "severity": req_data["severity"], "status": "PASS" if matched else "MISSING", "matched_keywords": matched_keywords})
|
| 459 |
+
passed = sum(1 for c in checks if c["status"] == "PASS")
|
| 460 |
+
total = len(checks)
|
| 461 |
+
compliance_rate = round(passed / total * 100) if total > 0 else 0
|
| 462 |
+
results[reg_name] = {"description": reg_data["description"], "compliance_rate": compliance_rate, "checks": checks, "overall_status": "COMPLIANT" if compliance_rate >= 80 else "PARTIAL" if compliance_rate >= 40 else "NON-COMPLIANT"}
|
| 463 |
+
return results
|
| 464 |
+
|
| 465 |
+
# ─── Comparison ───
|
| 466 |
+
from difflib import SequenceMatcher
|
| 467 |
+
|
| 468 |
+
def _normalize(text):
|
| 469 |
+
text = text.lower()
|
| 470 |
+
text = re.sub(r'[^a-z0-9\s]', ' ', text)
|
| 471 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 472 |
+
return text
|
| 473 |
+
|
| 474 |
+
def _clause_type(text):
|
| 475 |
+
text_lower = text.lower()
|
| 476 |
+
type_keywords = {
|
| 477 |
+
"governing law": ["govern", "law", "jurisdiction"],
|
| 478 |
+
"termination": ["terminat", "cancel", "end"],
|
| 479 |
+
"indemnification": ["indemnif", "hold harmless"],
|
| 480 |
+
"confidentiality": ["confidential", "non-disclosure"],
|
| 481 |
+
"liability": ["liability", "liable", "damages"],
|
| 482 |
+
"payment": ["payment", "fee", "price", "compensat"],
|
| 483 |
+
"intellectual property": ["intellectual", "ip", "copyright", "patent"],
|
| 484 |
+
"warranty": ["warrant", "guarantee"],
|
| 485 |
+
"force majeure": ["force majeure", "act of god"],
|
| 486 |
+
"arbitration": ["arbitrat", "mediation"],
|
| 487 |
+
"assignment": ["assign", "transfer"],
|
| 488 |
+
"non-compete": ["compete", "competition"],
|
| 489 |
+
"renewal": ["renew", "extend"],
|
| 490 |
+
}
|
| 491 |
+
for ctype, keywords in type_keywords.items():
|
| 492 |
+
if any(kw in text_lower for kw in keywords):
|
| 493 |
+
return ctype
|
| 494 |
+
return "general"
|
| 495 |
+
|
| 496 |
+
def compare_contracts(text_a, text_b):
|
| 497 |
+
clauses_a = split_clauses(text_a)
|
| 498 |
+
clauses_b = split_clauses(text_b)
|
| 499 |
+
matched_a = set()
|
| 500 |
+
matched_b = set()
|
| 501 |
+
modified = []
|
| 502 |
+
for i, ca in enumerate(clauses_a):
|
| 503 |
+
best_sim, best_j = 0, -1
|
| 504 |
+
for j, cb in enumerate(clauses_b):
|
| 505 |
+
if j in matched_b:
|
| 506 |
+
continue
|
| 507 |
+
sim = SequenceMatcher(None, _normalize(ca), _normalize(cb)).ratio()
|
| 508 |
+
if sim > best_sim:
|
| 509 |
+
best_sim = sim
|
| 510 |
+
best_j = j
|
| 511 |
+
if best_sim >= 0.75:
|
| 512 |
+
matched_a.add(i)
|
| 513 |
+
matched_b.add(best_j)
|
| 514 |
+
if best_sim < 0.95:
|
| 515 |
+
modified.append({"type": "modified", "similarity": round(best_sim, 3), "clause_a": ca[:200], "clause_b": clauses_b[best_j][:200], "clause_type": _clause_type(ca)})
|
| 516 |
+
elif best_sim >= 0.45:
|
| 517 |
+
modified.append({"type": "partial", "similarity": round(best_sim, 3), "clause_a": ca[:200], "clause_b": clauses_b[best_j][:200] if best_j >= 0 else "", "clause_type": _clause_type(ca)})
|
| 518 |
+
removed = [clauses_a[i] for i in range(len(clauses_a)) if i not in matched_a]
|
| 519 |
+
added = [clauses_b[j] for j in range(len(clauses_b)) if j not in matched_b]
|
| 520 |
+
total_pairs = max(len(clauses_a), len(clauses_b))
|
| 521 |
+
alignment = len(matched_a) / total_pairs if total_pairs > 0 else 0.0
|
| 522 |
+
risk_keywords = ["unlimited", "unilateral", "waive", "arbitration", "indemnif", "not liable", "no warranty", "sole discretion"]
|
| 523 |
+
risk_a = sum(1 for kw in risk_keywords if kw in text_a.lower())
|
| 524 |
+
risk_b = sum(1 for kw in risk_keywords if kw in text_b.lower())
|
| 525 |
+
if risk_a > risk_b + 2:
|
| 526 |
+
risk_delta, risk_winner = "Contract A is significantly riskier", "B"
|
| 527 |
+
elif risk_b > risk_a + 2:
|
| 528 |
+
risk_delta, risk_winner = "Contract B is significantly riskier", "A"
|
| 529 |
+
else:
|
| 530 |
+
risk_delta, risk_winner = "Similar risk profiles", "tie"
|
| 531 |
+
return {
|
| 532 |
+
"alignment_score": round(alignment, 3),
|
| 533 |
+
"contract_a_clauses": len(clauses_a), "contract_b_clauses": len(clauses_b),
|
| 534 |
+
"added_clauses": [{"text": c[:200], "type": _clause_type(c)} for c in added[:50]],
|
| 535 |
+
"removed_clauses": [{"text": c[:200], "type": _clause_type(c)} for c in removed[:50]],
|
| 536 |
+
"modified_clauses": modified[:50],
|
| 537 |
+
"risk_delta": risk_delta, "risk_winner": risk_winner,
|
| 538 |
+
"type_map_a": {k: len(v) for k, v in defaultdict(list, [("general", [])]).items()},
|
| 539 |
+
"type_map_b": {k: len(v) for k, v in defaultdict(list, [("general", [])]).items()},
|
| 540 |
+
}
|
| 541 |
+
|
| 542 |
# ─── Models ───
|
| 543 |
class AnalyzeRequest(BaseModel):
|
| 544 |
+
text: str = Field(..., min_length=50)
|
| 545 |
source_url: Optional[str] = None
|
| 546 |
|
| 547 |
class AnalyzeResponse(BaseModel):
|
|
|
|
| 550 |
total_clauses: int
|
| 551 |
flagged_count: int
|
| 552 |
results: list[dict]
|
| 553 |
+
entities: list[dict]
|
| 554 |
+
contradictions: list[dict]
|
| 555 |
+
obligations: list[dict]
|
| 556 |
+
compliance: dict
|
| 557 |
model: str
|
| 558 |
latency_ms: int
|
| 559 |
|
| 560 |
+
class CompareRequest(BaseModel):
|
| 561 |
+
text_a: str = Field(..., min_length=50)
|
| 562 |
+
text_b: str = Field(..., min_length=50)
|
| 563 |
+
|
| 564 |
class ExplainRequest(BaseModel):
|
| 565 |
clause: str = Field(..., min_length=10, max_length=2000)
|
| 566 |
category: str
|
|
|
|
| 578 |
load_model()
|
| 579 |
yield
|
| 580 |
|
| 581 |
+
app = FastAPI(title="ClauseGuard API", version="2.0.0", lifespan=lifespan)
|
| 582 |
|
| 583 |
app.add_middleware(
|
| 584 |
CORSMiddleware,
|
| 585 |
+
allow_origins=["https://clauseguardweb.netlify.app", "https://clauseguardweb.netlify.app", "chrome-extension://*", "http://localhost:3000", "*"],
|
| 586 |
allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
|
| 587 |
)
|
| 588 |
|
| 589 |
@app.get("/health")
|
| 590 |
async def health():
|
| 591 |
+
return {"status": "ok", "model": "ml" if cuad_model else "regex", "version": "2.0.0"}
|
| 592 |
|
| 593 |
@app.post("/api/analyze", response_model=AnalyzeResponse)
|
| 594 |
async def analyze(req: AnalyzeRequest, user: Optional[dict] = Depends(get_current_user)):
|
| 595 |
start = time.time()
|
| 596 |
+
clauses = split_clauses(req.text)
|
| 597 |
+
if not clauses:
|
| 598 |
+
raise HTTPException(status_code=400, detail="No clauses detected in document")
|
| 599 |
+
|
| 600 |
+
clause_results = []
|
| 601 |
+
for clause in clauses:
|
| 602 |
+
predictions = classify_cuad(clause)
|
| 603 |
+
if predictions:
|
| 604 |
+
for pred in predictions:
|
| 605 |
+
clause_results.append({"text": clause, "label": pred["label"], "confidence": pred["confidence"], "risk": pred["risk"], "description": pred["description"]})
|
| 606 |
+
|
| 607 |
+
entities = extract_entities(req.text)
|
| 608 |
+
contradictions = detect_contradictions(clause_results)
|
| 609 |
+
risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
|
| 610 |
+
obligations = extract_obligations(req.text)
|
| 611 |
+
compliance = check_compliance(req.text)
|
| 612 |
latency = int((time.time() - start) * 1000)
|
| 613 |
+
|
| 614 |
+
results_for_db = [{"text": cr["text"], "categories": [{"name": cr["label"], "severity": cr["risk"], "confidence": cr["confidence"], "description": cr["description"]}]} for cr in clause_results]
|
| 615 |
+
|
| 616 |
if user:
|
| 617 |
await supabase_insert("analyses", {
|
| 618 |
+
"user_id": user["id"], "source_url": req.source_url, "total_clauses": len(clauses),
|
| 619 |
+
"flagged_count": len(set(cr["text"] for cr in clause_results)), "risk_score": risk, "grade": grade,
|
| 620 |
+
"clauses": results_for_db, "entities": entities, "contradictions": contradictions,
|
| 621 |
+
"obligations": obligations, "compliance": compliance,
|
| 622 |
})
|
| 623 |
+
|
| 624 |
+
return AnalyzeResponse(
|
| 625 |
+
risk_score=risk, grade=grade, total_clauses=len(clauses),
|
| 626 |
+
flagged_count=len(set(cr["text"] for cr in clause_results)),
|
| 627 |
+
results=results_for_db, entities=entities, contradictions=contradictions,
|
| 628 |
+
obligations=obligations, compliance=compliance,
|
| 629 |
+
model="ml" if cuad_model else "regex", latency_ms=latency,
|
| 630 |
+
)
|
| 631 |
+
|
| 632 |
+
@app.post("/api/compare")
|
| 633 |
+
async def compare(req: CompareRequest):
|
| 634 |
+
result = compare_contracts(req.text_a, req.text_b)
|
| 635 |
+
return result
|
| 636 |
|
| 637 |
@app.post("/api/explain", response_model=ExplainResponse)
|
| 638 |
async def explain(req: ExplainRequest, user: dict = Depends(require_auth)):
|
| 639 |
+
desc = DESC_MAP.get(req.category, "Unknown category.")
|
| 640 |
+
legal = "Consult local consumer protection laws."
|
| 641 |
recommendation = "Review this clause carefully. Consider negotiating or seeking legal advice before agreeing."
|
|
|
|
|
|
|
| 642 |
if SAULLM_ENDPOINT and HF_API_TOKEN:
|
| 643 |
try:
|
| 644 |
+
prompt = f"You are a consumer protection legal analyst. Analyze this clause and explain why it may be unfair.\n\nClause: \"{req.clause}\"\nCategory: {req.category}\n\nProvide:\n1. A plain-English explanation\n2. The specific legal basis\n3. A practical recommendation\n\nBe concise. 3-4 sentences per section."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 646 |
+
resp = await client.post(SAULLM_ENDPOINT, json={"inputs": prompt, "parameters": {"max_new_tokens": 300, "temperature": 0.3}}, headers={"Authorization": f"Bearer {HF_API_TOKEN}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
if resp.status_code == 200:
|
| 648 |
output = resp.json()
|
| 649 |
generated = output[0]["generated_text"] if isinstance(output, list) else output.get("generated_text", "")
|
|
|
|
| 653 |
legal = parts[1] if len(parts) > 1 else legal
|
| 654 |
recommendation = parts[2] if len(parts) > 2 else recommendation
|
| 655 |
except Exception:
|
| 656 |
+
pass
|
| 657 |
+
return ExplainResponse(clause=req.clause, category=req.category, explanation=desc, legal_basis=legal, recommendation=recommendation)
|
|
|
|
|
|
|
| 658 |
|
| 659 |
@app.get("/api/history")
|
| 660 |
async def history(user: dict = Depends(require_auth), limit: int = 20, offset: int = 0):
|
| 661 |
limit = min(limit, 100)
|
| 662 |
+
data = await supabase_query("analyses", {"user_id": f"eq.{user['id']}", "select": "*", "order": "created_at.desc", "limit": str(limit), "offset": str(offset)})
|
|
|
|
|
|
|
|
|
|
| 663 |
return {"analyses": data, "limit": limit, "offset": offset}
|
| 664 |
|
| 665 |
if __name__ == "__main__":
|