gaurv007 commited on
Commit
cdee1d9
·
verified ·
1 Parent(s): 04e44e4

ClauseGuard API — FastAPI + Legal-BERT from Hub

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. README.md +12 -5
  3. main.py +213 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,17 @@
1
  ---
2
- title: Clauseguard Api
3
- emoji: 💻
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
  ---
2
+ title: ClauseGuard API
3
+ emoji: ⚙️
4
+ colorFrom: gray
5
+ colorTo: gray
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ ClauseGuard API unfair clause detection powered by Legal-BERT.
12
+
13
+ - `GET /` — status
14
+ - `GET /health` — health check
15
+ - `GET /docs` — Swagger UI
16
+ - `POST /api/analyze` — classify clauses
17
+ - `POST /api/explain` — explain why a clause is unfair
main.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClauseGuard API — HuggingFace Spaces Deployment
3
+ Loads Legal-BERT from Hub, serves clause classification.
4
+ """
5
+
6
+ import os
7
+ import time
8
+ import re
9
+ from contextlib import asynccontextmanager
10
+ from typing import Optional
11
+
12
+ import httpx
13
+ import numpy as np
14
+ from fastapi import FastAPI, HTTPException, Depends, Header
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from pydantic import BaseModel, Field
17
+
18
+ # ─── Config ───
19
+ HUB_MODEL_ID = os.environ.get("HUB_MODEL_ID", "gaurv007/clauseguard-legal-bert")
20
+ SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
21
+ SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
22
+
23
+ LABEL_NAMES = [
24
+ "Limitation of liability", "Unilateral termination", "Unilateral change",
25
+ "Content removal", "Contract by using", "Choice of law", "Jurisdiction", "Arbitration",
26
+ ]
27
+
28
+ LABEL_DESCRIPTIONS = {
29
+ "Limitation of liability": "Company limits or excludes liability for losses, data breaches, or service failures.",
30
+ "Unilateral termination": "Company can terminate your account at any time without reason.",
31
+ "Unilateral change": "Company can change terms at any time without your consent.",
32
+ "Content removal": "Company can delete your content without notice or justification.",
33
+ "Contract by using": "You are bound to the contract simply by using the service.",
34
+ "Choice of law": "Governing law may differ from your country, reducing your legal protections.",
35
+ "Jurisdiction": "Disputes must be resolved in a jurisdiction that may disadvantage you.",
36
+ "Arbitration": "Forces disputes to arbitration instead of court. You waive your right to sue.",
37
+ }
38
+
39
+ SEVERITY_MAP = {
40
+ "Limitation of liability": "HIGH", "Unilateral termination": "HIGH", "Arbitration": "HIGH",
41
+ "Unilateral change": "MEDIUM", "Content removal": "MEDIUM", "Choice of law": "MEDIUM",
42
+ "Jurisdiction": "MEDIUM", "Contract by using": "LOW",
43
+ }
44
+
45
+ LEGAL_BASIS = {
46
+ "Arbitration": "EU Directive 93/13/EEC Art. 3; CFPB arbitration rule (US).",
47
+ "Unilateral change": "EU Directive 93/13/EEC Annex 1(j) — unilateral alteration.",
48
+ "Content removal": "EU Digital Services Act Art. 17 — statement of reasons required.",
49
+ "Jurisdiction": "EU Regulation 1215/2012 Art. 18 — consumer domicile prevails.",
50
+ "Choice of law": "EU Regulation 593/2008 Art. 6 — consumer protection of habitual residence.",
51
+ "Limitation of liability": "EU Directive 93/13/EEC Annex 1(a) — excluding statutory rights.",
52
+ "Unilateral termination": "EU Directive 93/13/EEC Annex 1(f)(g) — termination without notice.",
53
+ "Contract by using": "EU Directive 2011/83/EU Art. 8 — active consent required.",
54
+ }
55
+
56
+ # ─── ML Model ───
57
+ classifier = None
58
+
59
+ def load_model():
60
+ global classifier
61
+ try:
62
+ from transformers import pipeline
63
+ print(f"Loading model from Hub: {HUB_MODEL_ID}")
64
+ classifier = pipeline("text-classification", model=HUB_MODEL_ID, top_k=None, device=-1)
65
+ print(f"Model loaded successfully")
66
+ except Exception as e:
67
+ print(f"Model load failed: {e} — using regex fallback")
68
+
69
+ # ─── Regex fallback ───
70
+ PATTERNS = {
71
+ 0: [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
72
+ 1: [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
73
+ 2: [r"sole discretion", r"reserves? the right to (modify|change|update|amend)", r"at any time.*without (prior )?notice", r"we may (modify|change|update)"],
74
+ 3: [r"remove.*content.*without", r"right to remove", r"we may.*remove"],
75
+ 4: [r"by (using|accessing).*you agree", r"continued use.*constitutes? acceptance"],
76
+ 5: [r"governed by.*laws? of", r"shall be governed", r"laws of the state of"],
77
+ 6: [r"exclusive jurisdiction", r"courts? of.*(california|delaware|new york|ireland|england)", r"submit to.*jurisdiction"],
78
+ 7: [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
79
+ }
80
+
81
+ def classify_clause(text: str) -> list[dict]:
82
+ if classifier:
83
+ try:
84
+ preds = classifier(text, truncation=True, max_length=512)
85
+ items = preds[0] if isinstance(preds[0], list) else preds
86
+ return [
87
+ {"name": p["label"], "severity": SEVERITY_MAP.get(p["label"], "MEDIUM"),
88
+ "description": LABEL_DESCRIPTIONS.get(p["label"], ""), "confidence": round(p["score"], 3)}
89
+ for p in items if p["score"] > 0.5 and p["label"] in LABEL_DESCRIPTIONS
90
+ ]
91
+ except Exception:
92
+ pass
93
+
94
+ results = []
95
+ text_lower = text.lower()
96
+ for lid, pats in PATTERNS.items():
97
+ for p in pats:
98
+ if re.search(p, text_lower):
99
+ name = LABEL_NAMES[lid]
100
+ results.append({"name": name, "severity": SEVERITY_MAP[name],
101
+ "description": LABEL_DESCRIPTIONS[name], "confidence": 0.7})
102
+ break
103
+ return results
104
+
105
+ # ─── Auth (simplified for HF Spaces — no Supabase dependency required) ───
106
+ async def get_optional_user(authorization: Optional[str] = Header(None)) -> Optional[dict]:
107
+ if not authorization:
108
+ return None
109
+ # In production, validate JWT here. For now, extract user ID from token claims.
110
+ return None
111
+
112
+ # ─── Supabase helpers ───
113
+ async def supabase_insert(table: str, data: dict):
114
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
115
+ return
116
+ async with httpx.AsyncClient() as client:
117
+ await client.post(
118
+ f"{SUPABASE_URL}/rest/v1/{table}", json=data,
119
+ headers={"apikey": SUPABASE_SERVICE_KEY, "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
120
+ "Content-Type": "application/json", "Prefer": "return=minimal"},
121
+ )
122
+
123
+ # ─── Models ───
124
+ class AnalyzeRequest(BaseModel):
125
+ clauses: list[str] = Field(..., min_length=1, max_length=500)
126
+ source_url: Optional[str] = None
127
+
128
+ class AnalyzeResponse(BaseModel):
129
+ risk_score: int
130
+ grade: str
131
+ total_clauses: int
132
+ flagged_count: int
133
+ results: list[dict]
134
+ model: str
135
+ latency_ms: int
136
+
137
+ class ExplainRequest(BaseModel):
138
+ clause: str = Field(..., min_length=10, max_length=2000)
139
+ category: str
140
+
141
+ class ExplainResponse(BaseModel):
142
+ clause: str
143
+ category: str
144
+ explanation: str
145
+ legal_basis: str
146
+ recommendation: str
147
+
148
+ # ─── App ───
149
+ @asynccontextmanager
150
+ async def lifespan(app: FastAPI):
151
+ load_model()
152
+ yield
153
+
154
+ app = FastAPI(
155
+ title="ClauseGuard API",
156
+ description="AI-powered unfair clause detection. Send contract clauses, get risk scores.",
157
+ version="1.0.0",
158
+ lifespan=lifespan,
159
+ )
160
+
161
+ app.add_middleware(
162
+ CORSMiddleware,
163
+ allow_origins=["*"],
164
+ allow_credentials=True,
165
+ allow_methods=["*"],
166
+ allow_headers=["*"],
167
+ )
168
+
169
+ @app.get("/")
170
+ async def root():
171
+ return {
172
+ "name": "ClauseGuard API",
173
+ "status": "running",
174
+ "model": "ml" if classifier else "regex",
175
+ "docs": "/docs",
176
+ }
177
+
178
+ @app.get("/health")
179
+ async def health():
180
+ return {"status": "ok", "model": "ml" if classifier else "regex"}
181
+
182
+ @app.post("/api/analyze", response_model=AnalyzeResponse)
183
+ async def analyze(req: AnalyzeRequest):
184
+ start = time.time()
185
+
186
+ results = [{"text": c, "categories": classify_clause(c)} for c in req.clauses]
187
+ flagged = [r for r in results if r["categories"]]
188
+
189
+ sev = {"HIGH": 0, "MEDIUM": 0, "LOW": 0}
190
+ for r in flagged:
191
+ for c in r["categories"]:
192
+ sev[c.get("severity", "LOW")] += 1
193
+
194
+ total = len(req.clauses)
195
+ risk = min(100, round((sev["HIGH"] * 20 + sev["MEDIUM"] * 10 + sev["LOW"] * 5) / max(1, total) * 100))
196
+ grade = "F" if risk >= 60 else "D" if risk >= 40 else "C" if risk >= 20 else "B" if risk >= 10 else "A"
197
+ latency = int((time.time() - start) * 1000)
198
+
199
+ return AnalyzeResponse(
200
+ risk_score=risk, grade=grade, total_clauses=total,
201
+ flagged_count=len(flagged), results=results,
202
+ model="ml" if classifier else "regex", latency_ms=latency,
203
+ )
204
+
205
+ @app.post("/api/explain", response_model=ExplainResponse)
206
+ async def explain(req: ExplainRequest):
207
+ desc = LABEL_DESCRIPTIONS.get(req.category, "Unknown category.")
208
+ legal = LEGAL_BASIS.get(req.category, "Consult local consumer protection laws.")
209
+ return ExplainResponse(
210
+ clause=req.clause, category=req.category,
211
+ explanation=desc, legal_basis=legal,
212
+ recommendation="Review this clause carefully. Consider negotiating or seeking legal advice.",
213
+ )
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.136.0
2
+ uvicorn[standard]==0.46.0
3
+ pydantic==2.13.3
4
+ transformers==5.6.1
5
+ torch
6
+ numpy>=2.0.0
7
+ python-jose[cryptography]>=3.3.0
8
+ httpx>=0.28.0