Spaces:
Sleeping
Sleeping
fix: Header nav - Settings, Admin, Login/Signup visibility
#1
by gaurv007 - opened
- README.md +3 -23
- api/main.py +42 -100
- app.py +193 -664
- chatbot.py +5 -9
- compare.py +56 -72
- compliance.py +33 -148
- extension/background.js +14 -51
- extension/content.js +3 -13
- extension/manifest.json +3 -3
- extension/popup.js +9 -19
- extension/sidepanel.html +0 -4
- extension/sidepanel.js +7 -23
- extension/styles/content.css +0 -17
- ml/export_onnx_v2.py +0 -169
- obligations.py +8 -43
- redlining.py +22 -87
- requirements.txt +0 -1
- web/.env.example +1 -7
- web/app/api/analyze/route.ts +40 -121
- web/app/api/chat/route.ts +41 -74
- web/app/api/compare/route.ts +13 -46
- web/app/api/parse-upload/route.ts +7 -29
- web/app/api/redline/route.ts +11 -97
- web/app/api/subscribe/create/route.ts +1 -1
- web/app/auth/callback/route.ts +1 -6
- web/app/auth/login/page.tsx +6 -19
- web/app/auth/signup/page.tsx +2 -4
- web/app/dashboard-pages/analyze/loading.tsx +0 -50
- web/app/dashboard-pages/analyze/page.tsx +53 -73
- web/app/dashboard-pages/compare/loading.tsx +0 -22
- web/app/dashboard-pages/dashboard/loading.tsx +0 -45
- web/app/dashboard-pages/settings/loading.tsx +0 -23
- web/components/export-dropdown.tsx +0 -69
- web/components/extension-bridge.tsx +9 -13
- web/components/nav.tsx +33 -68
- web/lib/export-utils.ts +0 -454
- web/lib/supabase/client.ts +2 -2
- web/lib/supabase/schema.sql +13 -33
- web/lib/types.ts +0 -87
- web/package-lock.json +0 -0
- web/package.json +1 -1
- web/proxy.ts +1 -6
- web/tsconfig.json +6 -26
README.md
CHANGED
|
@@ -10,31 +10,11 @@ app_file: app.py
|
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
-
# 🛡️ ClauseGuard v4.
|
| 14 |
|
| 15 |
**ClauseGuard** is the most comprehensive open-source AI-powered legal contract analysis tool. It analyzes contracts using state-of-the-art legal NLP models and provides actionable risk assessments, Q&A chatbot, clause redlining, and OCR for scanned PDFs.
|
| 16 |
|
| 17 |
-
## 🆕 What's New in v4.
|
| 18 |
-
|
| 19 |
-
| Feature | Description |
|
| 20 |
-
|---------|-------------|
|
| 21 |
-
| **⚡ ONNX + INT8 Quantization** | CUAD classifier now supports ONNX Runtime with dynamic INT8 quantization — **2-4x faster inference on CPU**. New `ml/export_onnx_v2.py` handles the full merge→export→quantize pipeline. |
|
| 22 |
-
| **🎯 Better Embeddings** | Upgraded from `all-MiniLM-L6-v2` to `BAAI/bge-small-en-v1.5` — **+21% retrieval accuracy** on MTEB benchmarks, same 384-dim, same latency. Includes query instruction prefix for asymmetric retrieval. |
|
| 23 |
-
| **🚀 Batched Classification** | All clauses classified in a single batched forward pass (batch_size=8) instead of one-by-one — **2-3x throughput improvement**. |
|
| 24 |
-
| **🧵 CPU Thread Control** | `torch.set_num_threads(2)` prevents CPU thrashing under concurrent Gradio requests |
|
| 25 |
-
|
| 26 |
-
### Previous: v4.2
|
| 27 |
-
|
| 28 |
-
| Feature | Description |
|
| 29 |
-
|---------|-------------|
|
| 30 |
-
| **🔧 NLI Fix** | Fixed contradiction detection — now uses `CrossEncoder.predict()` instead of broken `pipeline("text-classification")` dict input. Contradictions actually work now. |
|
| 31 |
-
| **🔒 Thread Safety** | `BoundedCache` now uses `threading.RLock` to prevent race conditions under concurrent Gradio requests |
|
| 32 |
-
| **⚡ Pre-compiled Regex** | All regex patterns (clause classification, obligations, compliance negation) pre-compiled at module level — eliminates thousands of redundant compilations |
|
| 33 |
-
| **🔗 Extension Fix** | Chrome extension risk formula now matches backend (diminishing returns, not normalized by doc length). Fixed API_BASE URL. |
|
| 34 |
-
| **🏷️ Label Coverage** | Added missing regex-only labels (Indemnification, Confidentiality, Force Majeure, Penalties) to RISK_MAP and DESC_MAP |
|
| 35 |
-
| **🛡️ Security** | API CORS localhost origins now require explicit opt-in via `CORS_ALLOW_LOCALHOST=true` env var |
|
| 36 |
-
|
| 37 |
-
### Previous: v4.0
|
| 38 |
|
| 39 |
| Feature | Description |
|
| 40 |
|---------|-------------|
|
|
@@ -79,7 +59,7 @@ pinned: false
|
|
| 79 |
| Clause Classification | `Mokshith31/legalbert-contract-clause-classification` — LoRA adapter on `nlpaueb/legal-bert-base-uncased`, fine-tuned on CUAD 41-class taxonomy |
|
| 80 |
| Legal NER | `matterstack/legal-bert-ner` (ML) with regex fallback for 7 entity types |
|
| 81 |
| NLI | `cross-encoder/nli-deberta-v3-base` (semantic contradiction detection) |
|
| 82 |
-
| Embeddings | `
|
| 83 |
| LLM | `Qwen/Qwen2.5-7B-Instruct` via HF Inference API (chatbot + redlining) |
|
| 84 |
| OCR | `docTR` (fast_base + crnn_vgg16_bn) for scanned PDF text extraction |
|
| 85 |
| Compliance | Regulatory keyword matching across GDPR, CCPA, SOX, HIPAA, FINRA |
|
|
|
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 🛡️ ClauseGuard v4.0 — World's Best Open-Source Legal Contract Analysis
|
| 14 |
|
| 15 |
**ClauseGuard** is the most comprehensive open-source AI-powered legal contract analysis tool. It analyzes contracts using state-of-the-art legal NLP models and provides actionable risk assessments, Q&A chatbot, clause redlining, and OCR for scanned PDFs.
|
| 16 |
|
| 17 |
+
## 🆕 What's New in v4.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
| Feature | Description |
|
| 20 |
|---------|-------------|
|
|
|
|
| 59 |
| Clause Classification | `Mokshith31/legalbert-contract-clause-classification` — LoRA adapter on `nlpaueb/legal-bert-base-uncased`, fine-tuned on CUAD 41-class taxonomy |
|
| 60 |
| Legal NER | `matterstack/legal-bert-ner` (ML) with regex fallback for 7 entity types |
|
| 61 |
| NLI | `cross-encoder/nli-deberta-v3-base` (semantic contradiction detection) |
|
| 62 |
+
| Embeddings | `sentence-transformers/all-MiniLM-L6-v2` (384-dim, RAG retrieval) |
|
| 63 |
| LLM | `Qwen/Qwen2.5-7B-Instruct` via HF Inference API (chatbot + redlining) |
|
| 64 |
| OCR | `docTR` (fast_base + crnn_vgg16_bn) for scanned PDF text extraction |
|
| 65 |
| Compliance | Regulatory keyword matching across GDPR, CCPA, SOX, HIPAA, FINRA |
|
api/main.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
ClauseGuard — FastAPI Backend v4.
|
| 3 |
══════════════════════════════════
|
| 4 |
-
|
| 5 |
-
•
|
| 6 |
-
•
|
| 7 |
-
•
|
| 8 |
-
•
|
| 9 |
"""
|
| 10 |
|
| 11 |
import os
|
|
@@ -56,45 +56,25 @@ SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
|
|
| 56 |
SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
|
| 57 |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
|
| 58 |
SAULLM_ENDPOINT = os.environ.get("SAULLM_ENDPOINT", "")
|
| 59 |
-
MAX_TEXT_LENGTH = int(os.environ.get("MAX_TEXT_LENGTH", "
|
| 60 |
|
| 61 |
-
# ───
|
| 62 |
-
_rate_limits
|
| 63 |
-
_rate_limits_last_cleanup: float = 0.0
|
| 64 |
RATE_LIMIT_REQUESTS = 30
|
| 65 |
-
RATE_LIMIT_WINDOW = 60
|
| 66 |
-
|
| 67 |
-
def _get_client_ip(request: Request) -> str:
|
| 68 |
-
"""Extract real client IP, handling reverse proxies."""
|
| 69 |
-
forwarded = request.headers.get("x-forwarded-for", "")
|
| 70 |
-
if forwarded:
|
| 71 |
-
return forwarded.split(",")[0].strip()
|
| 72 |
-
return request.client.host if request.client else "unknown"
|
| 73 |
|
| 74 |
def _check_rate_limit(client_ip: str) -> bool:
|
| 75 |
-
"""Sliding window rate limiter with periodic stale-IP cleanup."""
|
| 76 |
-
global _rate_limits_last_cleanup
|
| 77 |
now = time.time()
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
# Remove expired timestamps
|
| 90 |
-
_rate_limits[client_ip] = [
|
| 91 |
-
t for t in _rate_limits[client_ip] if now - t < RATE_LIMIT_WINDOW
|
| 92 |
-
]
|
| 93 |
-
|
| 94 |
-
if len(_rate_limits[client_ip]) >= RATE_LIMIT_REQUESTS:
|
| 95 |
-
return False
|
| 96 |
-
|
| 97 |
-
_rate_limits[client_ip].append(now)
|
| 98 |
return True
|
| 99 |
|
| 100 |
# ─── Supabase helper ───
|
|
@@ -136,27 +116,9 @@ async def supabase_query(table: str, params: dict, headers_extra: dict = {}):
|
|
| 136 |
except Exception:
|
| 137 |
return []
|
| 138 |
|
| 139 |
-
# ───
|
| 140 |
-
_rag_sessions: dict
|
| 141 |
_RAG_SESSION_MAX = 100
|
| 142 |
-
_RAG_SESSION_TTL = 3600 # 1 hour
|
| 143 |
-
|
| 144 |
-
def _cleanup_rag_sessions():
|
| 145 |
-
"""Remove expired RAG sessions."""
|
| 146 |
-
now = time.time()
|
| 147 |
-
expired = [sid for sid, s in _rag_sessions.items() if now - s.get("created_at", 0) > _RAG_SESSION_TTL]
|
| 148 |
-
for sid in expired:
|
| 149 |
-
del _rag_sessions[sid]
|
| 150 |
-
|
| 151 |
-
def _store_rag_session(session_id: str, data: dict):
|
| 152 |
-
"""Store a RAG session with TTL tracking."""
|
| 153 |
-
_cleanup_rag_sessions()
|
| 154 |
-
if len(_rag_sessions) >= _RAG_SESSION_MAX:
|
| 155 |
-
# Remove oldest session
|
| 156 |
-
oldest = min(_rag_sessions, key=lambda k: _rag_sessions[k].get("created_at", 0))
|
| 157 |
-
del _rag_sessions[oldest]
|
| 158 |
-
data["created_at"] = time.time()
|
| 159 |
-
_rag_sessions[session_id] = data
|
| 160 |
|
| 161 |
# ─── Request/Response Models ───
|
| 162 |
class AnalyzeRequest(BaseModel):
|
|
@@ -194,17 +156,13 @@ class RedlineRequest(BaseModel):
|
|
| 194 |
async def lifespan(app: FastAPI):
|
| 195 |
yield
|
| 196 |
|
| 197 |
-
app = FastAPI(title="ClauseGuard API", version="4.
|
| 198 |
|
| 199 |
-
# FIX v4.2: CORS origins configurable via env var; localhost only in dev
|
| 200 |
-
_extra_origins = os.environ.get("CORS_EXTRA_ORIGINS", "").split(",")
|
| 201 |
ALLOWED_ORIGINS = [
|
| 202 |
"https://clauseguardweb.netlify.app",
|
|
|
|
|
|
|
| 203 |
]
|
| 204 |
-
# Only add localhost origins if explicitly enabled via env
|
| 205 |
-
if os.environ.get("CORS_ALLOW_LOCALHOST", "").lower() == "true":
|
| 206 |
-
ALLOWED_ORIGINS.extend(["http://localhost:3000", "http://localhost:3001"])
|
| 207 |
-
ALLOWED_ORIGINS.extend([o.strip() for o in _extra_origins if o.strip()])
|
| 208 |
app.add_middleware(
|
| 209 |
CORSMiddleware,
|
| 210 |
allow_origins=ALLOWED_ORIGINS,
|
|
@@ -221,18 +179,17 @@ async def health():
|
|
| 221 |
return {
|
| 222 |
"status": "ok",
|
| 223 |
"model": model_status,
|
| 224 |
-
"version": "4.
|
| 225 |
"shared_modules": _SHARED_MODULES,
|
| 226 |
"ocr": ocr_status,
|
| 227 |
"features": ["analyze", "compare", "redline", "chat", "ocr"],
|
| 228 |
-
"rag_sessions_active": len(_rag_sessions),
|
| 229 |
}
|
| 230 |
|
| 231 |
@app.post("/api/analyze")
|
| 232 |
async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] = Depends(get_current_user)):
|
| 233 |
-
client_ip =
|
| 234 |
if not _check_rate_limit(client_ip):
|
| 235 |
-
raise HTTPException(status_code=429, detail="Rate limit exceeded.
|
| 236 |
|
| 237 |
text = req.text
|
| 238 |
if not text and req.clauses:
|
|
@@ -240,10 +197,8 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
|
|
| 240 |
|
| 241 |
if not text or len(text.strip()) < 50:
|
| 242 |
raise HTTPException(status_code=400, detail="Text too short (minimum 50 characters)")
|
| 243 |
-
|
| 244 |
-
# FIX v4.1: Input size validation
|
| 245 |
if len(text) > MAX_TEXT_LENGTH:
|
| 246 |
-
raise HTTPException(status_code=400, detail=f"Text too long (max {MAX_TEXT_LENGTH
|
| 247 |
|
| 248 |
start = time.time()
|
| 249 |
|
|
@@ -293,13 +248,16 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
|
|
| 293 |
}],
|
| 294 |
})
|
| 295 |
|
| 296 |
-
# RAG indexing
|
| 297 |
session_id = None
|
| 298 |
try:
|
| 299 |
chunks, embeddings, _status = index_contract(text)
|
| 300 |
if chunks and embeddings is not None:
|
| 301 |
session_id = uuid.uuid4().hex[:12]
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
| 303 |
"chunks": chunks,
|
| 304 |
"embeddings": embeddings,
|
| 305 |
"analysis": {
|
|
@@ -309,7 +267,7 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
|
|
| 309 |
"entities": entities[:30],
|
| 310 |
"contradictions": contradictions,
|
| 311 |
},
|
| 312 |
-
}
|
| 313 |
except Exception as e:
|
| 314 |
print(f"[API] RAG indexing error: {e}")
|
| 315 |
|
|
@@ -346,27 +304,20 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
|
|
| 346 |
|
| 347 |
@app.post("/api/compare")
|
| 348 |
async def compare(req: CompareRequest, request: Request):
|
| 349 |
-
client_ip =
|
| 350 |
if not _check_rate_limit(client_ip):
|
| 351 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 352 |
-
|
| 353 |
-
# FIX v4.1: Input size validation for comparison
|
| 354 |
-
if len(req.text_a) > MAX_TEXT_LENGTH or len(req.text_b) > MAX_TEXT_LENGTH:
|
| 355 |
-
raise HTTPException(status_code=400, detail=f"Text too long (max {MAX_TEXT_LENGTH // 1000}KB per contract)")
|
| 356 |
-
|
| 357 |
return compare_contracts(req.text_a, req.text_b)
|
| 358 |
|
| 359 |
@app.post("/api/redline")
|
| 360 |
async def redline(req: RedlineRequest, request: Request):
|
| 361 |
-
client_ip =
|
| 362 |
if not _check_rate_limit(client_ip):
|
| 363 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 364 |
|
| 365 |
if req.session_id and req.session_id in _rag_sessions:
|
| 366 |
analysis = _rag_sessions[req.session_id]["analysis"]
|
| 367 |
elif req.text:
|
| 368 |
-
if len(req.text) > MAX_TEXT_LENGTH:
|
| 369 |
-
raise HTTPException(status_code=400, detail="Text too long")
|
| 370 |
result, error = analyze_contract(req.text)
|
| 371 |
if error:
|
| 372 |
raise HTTPException(status_code=400, detail=error)
|
|
@@ -379,15 +330,12 @@ async def redline(req: RedlineRequest, request: Request):
|
|
| 379 |
|
| 380 |
@app.post("/api/chat")
|
| 381 |
async def chat(req: ChatRequest, request: Request):
|
| 382 |
-
client_ip =
|
| 383 |
if not _check_rate_limit(client_ip):
|
| 384 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 385 |
|
| 386 |
-
# FIX v4.1: Clean up expired sessions before checking
|
| 387 |
-
_cleanup_rag_sessions()
|
| 388 |
-
|
| 389 |
if req.session_id not in _rag_sessions:
|
| 390 |
-
raise HTTPException(status_code=404, detail="Session
|
| 391 |
|
| 392 |
session = _rag_sessions[req.session_id]
|
| 393 |
response_text = ""
|
|
@@ -399,14 +347,12 @@ async def chat(req: ChatRequest, request: Request):
|
|
| 399 |
|
| 400 |
@app.post("/api/chat/stream")
|
| 401 |
async def chat_stream(req: ChatRequest, request: Request):
|
| 402 |
-
client_ip =
|
| 403 |
if not _check_rate_limit(client_ip):
|
| 404 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 405 |
|
| 406 |
-
_cleanup_rag_sessions()
|
| 407 |
-
|
| 408 |
if req.session_id not in _rag_sessions:
|
| 409 |
-
raise HTTPException(status_code=404, detail="Session
|
| 410 |
|
| 411 |
session = _rag_sessions[req.session_id]
|
| 412 |
|
|
@@ -429,12 +375,8 @@ async def ocr_endpoint(file: UploadFile = FastAPIFile(...)):
|
|
| 429 |
if not file.filename or not file.filename.lower().endswith(".pdf"):
|
| 430 |
raise HTTPException(status_code=400, detail="Only PDF files supported")
|
| 431 |
|
| 432 |
-
# FIX v4.1: Limit upload size (20MB)
|
| 433 |
-
content = await file.read()
|
| 434 |
-
if len(content) > 20 * 1024 * 1024:
|
| 435 |
-
raise HTTPException(status_code=400, detail="File too large (max 20MB)")
|
| 436 |
-
|
| 437 |
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
|
|
|
|
| 438 |
tmp.write(content)
|
| 439 |
tmp_path = tmp.name
|
| 440 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
ClauseGuard — FastAPI Backend v4.0
|
| 3 |
══════════════════════════════════
|
| 4 |
+
New in v4.0:
|
| 5 |
+
• /api/redline — clause redlining suggestions
|
| 6 |
+
• /api/chat — RAG chatbot (streaming)
|
| 7 |
+
• /api/ocr — OCR scanned PDF extraction
|
| 8 |
+
• Updated analysis to include redlining data
|
| 9 |
"""
|
| 10 |
|
| 11 |
import os
|
|
|
|
| 56 |
SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
|
| 57 |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
|
| 58 |
SAULLM_ENDPOINT = os.environ.get("SAULLM_ENDPOINT", "")
|
| 59 |
+
MAX_TEXT_LENGTH = int(os.environ.get("MAX_TEXT_LENGTH", "100000"))
|
| 60 |
|
| 61 |
+
# ─── Rate Limiting ───
|
| 62 |
+
_rate_limits = {}
|
|
|
|
| 63 |
RATE_LIMIT_REQUESTS = 30
|
| 64 |
+
RATE_LIMIT_WINDOW = 60
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def _check_rate_limit(client_ip: str) -> bool:
|
|
|
|
|
|
|
| 67 |
now = time.time()
|
| 68 |
+
if client_ip in _rate_limits:
|
| 69 |
+
count, window_start = _rate_limits[client_ip]
|
| 70 |
+
if now - window_start > RATE_LIMIT_WINDOW:
|
| 71 |
+
_rate_limits[client_ip] = (1, now)
|
| 72 |
+
return True
|
| 73 |
+
if count >= RATE_LIMIT_REQUESTS:
|
| 74 |
+
return False
|
| 75 |
+
_rate_limits[client_ip] = (count + 1, window_start)
|
| 76 |
+
return True
|
| 77 |
+
_rate_limits[client_ip] = (1, now)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
return True
|
| 79 |
|
| 80 |
# ─── Supabase helper ───
|
|
|
|
| 116 |
except Exception:
|
| 117 |
return []
|
| 118 |
|
| 119 |
+
# ─── In-memory RAG session store ───
|
| 120 |
+
_rag_sessions: dict = {}
|
| 121 |
_RAG_SESSION_MAX = 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
# ─── Request/Response Models ───
|
| 124 |
class AnalyzeRequest(BaseModel):
|
|
|
|
| 156 |
async def lifespan(app: FastAPI):
|
| 157 |
yield
|
| 158 |
|
| 159 |
+
app = FastAPI(title="ClauseGuard API", version="4.0.0", lifespan=lifespan)
|
| 160 |
|
|
|
|
|
|
|
| 161 |
ALLOWED_ORIGINS = [
|
| 162 |
"https://clauseguardweb.netlify.app",
|
| 163 |
+
"http://localhost:3000",
|
| 164 |
+
"http://localhost:3001",
|
| 165 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
app.add_middleware(
|
| 167 |
CORSMiddleware,
|
| 168 |
allow_origins=ALLOWED_ORIGINS,
|
|
|
|
| 179 |
return {
|
| 180 |
"status": "ok",
|
| 181 |
"model": model_status,
|
| 182 |
+
"version": "4.0.0",
|
| 183 |
"shared_modules": _SHARED_MODULES,
|
| 184 |
"ocr": ocr_status,
|
| 185 |
"features": ["analyze", "compare", "redline", "chat", "ocr"],
|
|
|
|
| 186 |
}
|
| 187 |
|
| 188 |
@app.post("/api/analyze")
|
| 189 |
async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] = Depends(get_current_user)):
|
| 190 |
+
client_ip = request.client.host if request.client else "unknown"
|
| 191 |
if not _check_rate_limit(client_ip):
|
| 192 |
+
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 193 |
|
| 194 |
text = req.text
|
| 195 |
if not text and req.clauses:
|
|
|
|
| 197 |
|
| 198 |
if not text or len(text.strip()) < 50:
|
| 199 |
raise HTTPException(status_code=400, detail="Text too short (minimum 50 characters)")
|
|
|
|
|
|
|
| 200 |
if len(text) > MAX_TEXT_LENGTH:
|
| 201 |
+
raise HTTPException(status_code=400, detail=f"Text too long (max {MAX_TEXT_LENGTH} chars)")
|
| 202 |
|
| 203 |
start = time.time()
|
| 204 |
|
|
|
|
| 248 |
}],
|
| 249 |
})
|
| 250 |
|
| 251 |
+
# v4.0: RAG indexing
|
| 252 |
session_id = None
|
| 253 |
try:
|
| 254 |
chunks, embeddings, _status = index_contract(text)
|
| 255 |
if chunks and embeddings is not None:
|
| 256 |
session_id = uuid.uuid4().hex[:12]
|
| 257 |
+
if len(_rag_sessions) >= _RAG_SESSION_MAX:
|
| 258 |
+
oldest = next(iter(_rag_sessions))
|
| 259 |
+
del _rag_sessions[oldest]
|
| 260 |
+
_rag_sessions[session_id] = {
|
| 261 |
"chunks": chunks,
|
| 262 |
"embeddings": embeddings,
|
| 263 |
"analysis": {
|
|
|
|
| 267 |
"entities": entities[:30],
|
| 268 |
"contradictions": contradictions,
|
| 269 |
},
|
| 270 |
+
}
|
| 271 |
except Exception as e:
|
| 272 |
print(f"[API] RAG indexing error: {e}")
|
| 273 |
|
|
|
|
| 304 |
|
| 305 |
@app.post("/api/compare")
|
| 306 |
async def compare(req: CompareRequest, request: Request):
|
| 307 |
+
client_ip = request.client.host if request.client else "unknown"
|
| 308 |
if not _check_rate_limit(client_ip):
|
| 309 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
return compare_contracts(req.text_a, req.text_b)
|
| 311 |
|
| 312 |
@app.post("/api/redline")
|
| 313 |
async def redline(req: RedlineRequest, request: Request):
|
| 314 |
+
client_ip = request.client.host if request.client else "unknown"
|
| 315 |
if not _check_rate_limit(client_ip):
|
| 316 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 317 |
|
| 318 |
if req.session_id and req.session_id in _rag_sessions:
|
| 319 |
analysis = _rag_sessions[req.session_id]["analysis"]
|
| 320 |
elif req.text:
|
|
|
|
|
|
|
| 321 |
result, error = analyze_contract(req.text)
|
| 322 |
if error:
|
| 323 |
raise HTTPException(status_code=400, detail=error)
|
|
|
|
| 330 |
|
| 331 |
@app.post("/api/chat")
|
| 332 |
async def chat(req: ChatRequest, request: Request):
|
| 333 |
+
client_ip = request.client.host if request.client else "unknown"
|
| 334 |
if not _check_rate_limit(client_ip):
|
| 335 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 336 |
|
|
|
|
|
|
|
|
|
|
| 337 |
if req.session_id not in _rag_sessions:
|
| 338 |
+
raise HTTPException(status_code=404, detail="Session not found. Analyze a contract first.")
|
| 339 |
|
| 340 |
session = _rag_sessions[req.session_id]
|
| 341 |
response_text = ""
|
|
|
|
| 347 |
|
| 348 |
@app.post("/api/chat/stream")
|
| 349 |
async def chat_stream(req: ChatRequest, request: Request):
|
| 350 |
+
client_ip = request.client.host if request.client else "unknown"
|
| 351 |
if not _check_rate_limit(client_ip):
|
| 352 |
raise HTTPException(status_code=429, detail="Rate limit exceeded.")
|
| 353 |
|
|
|
|
|
|
|
| 354 |
if req.session_id not in _rag_sessions:
|
| 355 |
+
raise HTTPException(status_code=404, detail="Session not found.")
|
| 356 |
|
| 357 |
session = _rag_sessions[req.session_id]
|
| 358 |
|
|
|
|
| 375 |
if not file.filename or not file.filename.lower().endswith(".pdf"):
|
| 376 |
raise HTTPException(status_code=400, detail="Only PDF files supported")
|
| 377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
|
| 379 |
+
content = await file.read()
|
| 380 |
tmp.write(content)
|
| 381 |
tmp_path = tmp.name
|
| 382 |
|
app.py
CHANGED
|
@@ -1,46 +1,25 @@
|
|
| 1 |
"""
|
| 2 |
-
ClauseGuard — World's Best Legal Contract Analysis Tool (v4.
|
| 3 |
═══════════════════════════════════════════════════════════════
|
| 4 |
-
|
| 5 |
-
• PERF: Upgraded embedder to BAAI/bge-small-en-v1.5 (+21% retrieval accuracy)
|
| 6 |
-
• PERF: Batched clause classification (single forward pass, batch_size=8)
|
| 7 |
-
• PERF: ONNX INT8 quantized model support (2-4x faster on CPU)
|
| 8 |
-
• PERF: torch.set_num_threads(2) to prevent CPU thrashing
|
| 9 |
-
• NEW: ml/export_onnx_v2.py — full merge→ONNX→quantize pipeline
|
| 10 |
-
|
| 11 |
-
Fixes in v4.2:
|
| 12 |
-
• FIX: NLI now uses CrossEncoder.predict() — contradictions actually work
|
| 13 |
-
• FIX: BoundedCache uses threading.RLock — no more race conditions
|
| 14 |
-
• FIX: Pre-compiled ALL regex patterns at module level (perf)
|
| 15 |
-
• FIX: Added missing regex labels to RISK_MAP/DESC_MAP
|
| 16 |
-
• FIX: Extension risk formula matches backend
|
| 17 |
-
• FIX: Extension API_BASE URL corrected
|
| 18 |
-
• FIX: API CORS localhost requires explicit opt-in
|
| 19 |
-
|
| 20 |
-
Fixes in v4.1:
|
| 21 |
-
• FIX: Bounded LRU caches (chunk_cache, prediction_cache) — no more memory leaks
|
| 22 |
-
• FIX: NLI input format — pass (text_a, text_b) tuple, not [SEP]-concatenated string
|
| 23 |
-
• FIX: Classifier max_length raised to 512 (was 256 — truncating legal clauses)
|
| 24 |
-
• FIX: Risk score formula — absolute risk, not normalized by total_clauses
|
| 25 |
-
• FIX: Train/inference alignment — use softmax+argmax for single-label model
|
| 26 |
-
• FIX: Added missing regex fallback patterns for more CUAD categories
|
| 27 |
-
• FIX: Entity extraction batching — single pipeline call instead of sequential
|
| 28 |
-
• PERF: Shared model singleton via models.py module
|
| 29 |
-
• PERF: LRU-bounded caches everywhere
|
| 30 |
-
|
| 31 |
-
Carried from v4.0:
|
| 32 |
• OCR support for scanned PDFs (docTR engine with smart native/scanned routing)
|
| 33 |
• Contract Q&A Chatbot (RAG: embedding retrieval + HF Inference API streaming)
|
| 34 |
• Clause Redlining (3-tier: template lookup + RAG + LLM refinement)
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
• Real NLI contradiction detection via cross-encoder model
|
| 38 |
-
• ML-based Legal NER with regex fallback
|
| 39 |
• Semantic compliance checking with negation handling
|
| 40 |
• Improved obligation extraction with false-positive filtering
|
| 41 |
-
• LLM-powered clause explanations
|
|
|
|
| 42 |
• Per-session temp files (no collision)
|
| 43 |
-
• Model health reporting
|
|
|
|
| 44 |
|
| 45 |
Models:
|
| 46 |
• Clause classifier: Mokshith31/legalbert-contract-clause-classification
|
|
@@ -60,8 +39,7 @@ import io
|
|
| 60 |
import uuid
|
| 61 |
import tempfile
|
| 62 |
import hashlib
|
| 63 |
-
import
|
| 64 |
-
from collections import defaultdict, OrderedDict
|
| 65 |
from datetime import datetime
|
| 66 |
from functools import lru_cache
|
| 67 |
|
|
@@ -94,29 +72,9 @@ try:
|
|
| 94 |
)
|
| 95 |
from peft import PeftModel
|
| 96 |
_HAS_TORCH = True
|
| 97 |
-
# PERF v4.3: Limit PyTorch threads to avoid CPU thrashing under concurrent requests.
|
| 98 |
-
# HF Spaces CPU-basic has 2 vCPUs. Reserve 1 thread for Gradio server.
|
| 99 |
-
torch.set_num_threads(2)
|
| 100 |
-
torch.set_num_interop_threads(1)
|
| 101 |
except Exception:
|
| 102 |
pass
|
| 103 |
|
| 104 |
-
# ── ONNX Runtime (soft-fail, for quantized model) ─────────────────────
|
| 105 |
-
_HAS_ORT = False
|
| 106 |
-
try:
|
| 107 |
-
from optimum.onnxruntime import ORTModelForSequenceClassification as _ORTModel
|
| 108 |
-
_HAS_ORT = True
|
| 109 |
-
except ImportError:
|
| 110 |
-
pass
|
| 111 |
-
|
| 112 |
-
# ── CrossEncoder for NLI (soft-fail) ──────────────────────────────────
|
| 113 |
-
_HAS_CROSS_ENCODER = False
|
| 114 |
-
try:
|
| 115 |
-
from sentence_transformers import CrossEncoder as _CrossEncoder
|
| 116 |
-
_HAS_CROSS_ENCODER = True
|
| 117 |
-
except ImportError:
|
| 118 |
-
pass
|
| 119 |
-
|
| 120 |
# ── Import submodules ───────────────────────────────────────────────
|
| 121 |
from compare import compare_contracts, render_comparison_html
|
| 122 |
from obligations import extract_obligations, render_obligations_html
|
|
@@ -179,12 +137,7 @@ _UNFAIR_LABELS = [
|
|
| 179 |
"Jurisdiction", "Arbitration"
|
| 180 |
]
|
| 181 |
|
| 182 |
-
|
| 183 |
-
_EXTRA_REGEX_LABELS = [
|
| 184 |
-
"Indemnification", "Confidentiality", "Force Majeure", "Penalties"
|
| 185 |
-
]
|
| 186 |
-
|
| 187 |
-
_ALL_LABELS = CUAD_LABELS + _UNFAIR_LABELS + _EXTRA_REGEX_LABELS
|
| 188 |
|
| 189 |
RISK_MAP = {
|
| 190 |
# Critical
|
|
@@ -240,11 +193,6 @@ RISK_MAP = {
|
|
| 240 |
"Other": "LOW",
|
| 241 |
"ROFR/ROFO/ROFN": "LOW",
|
| 242 |
"Contract by using": "LOW",
|
| 243 |
-
# FIX v4.2: Added regex-only labels that were missing from RISK_MAP
|
| 244 |
-
"Indemnification": "HIGH",
|
| 245 |
-
"Confidentiality": "MEDIUM",
|
| 246 |
-
"Force Majeure": "LOW",
|
| 247 |
-
"Penalties": "HIGH",
|
| 248 |
}
|
| 249 |
|
| 250 |
DESC_MAP = {label: label.replace("_", " ") for label in _ALL_LABELS}
|
|
@@ -285,65 +233,10 @@ DESC_MAP.update({
|
|
| 285 |
"Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
|
| 286 |
"Unlimited/All-You-Can-Eat License": "License with no usage limits.",
|
| 287 |
"Notice Period to Terminate Renewal": "Required notice period before automatic renewal.",
|
| 288 |
-
# FIX v4.2: Added descriptions for regex-only labels
|
| 289 |
-
"Indemnification": "Obligation to compensate the other party for losses or damages.",
|
| 290 |
-
"Confidentiality": "Restrictions on sharing proprietary or sensitive information.",
|
| 291 |
-
"Force Majeure": "Excuses performance due to extraordinary events beyond control.",
|
| 292 |
-
"Penalties": "Financial penalties for breach or late performance.",
|
| 293 |
})
|
| 294 |
|
| 295 |
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 296 |
|
| 297 |
-
# FIX v4.3.1: Content-based severity refinement
|
| 298 |
-
# Default RISK_MAP assigns severity by label alone. This function downgrades severity
|
| 299 |
-
# when the clause text contains mitigating language (caps, carve-outs, time limits).
|
| 300 |
-
_SEVERITY_MITIGATORS = {
|
| 301 |
-
"IP Ownership Assignment": {
|
| 302 |
-
# Downgrade from CRITICAL to HIGH if pre-existing IP is carved out
|
| 303 |
-
"HIGH": re.compile(r'pre[\-\s]existing|background\s+ip|prior\s+(?:ip|intellectual)', re.IGNORECASE),
|
| 304 |
-
# Downgrade to MEDIUM if both carve-out AND license-back exist
|
| 305 |
-
"MEDIUM": re.compile(r'(?:pre[\-\s]existing|background\s+ip).*(?:license|retain)', re.IGNORECASE | re.DOTALL),
|
| 306 |
-
},
|
| 307 |
-
"Limitation of liability": {
|
| 308 |
-
# Downgrade from CRITICAL to HIGH if there's any cap
|
| 309 |
-
"HIGH": re.compile(r'shall\s+not\s+exceed|aggregate.{0,20}(?:not\s+exceed|limited\s+to)|cap(?:ped)?\s+at', re.IGNORECASE),
|
| 310 |
-
# Downgrade to MEDIUM if there's a reasonable cap AND exceptions for gross negligence
|
| 311 |
-
"MEDIUM": re.compile(r'(?:shall\s+not\s+exceed|limited\s+to).{0,80}(?:gross\s+negligence|willful|fraud)', re.IGNORECASE | re.DOTALL),
|
| 312 |
-
},
|
| 313 |
-
"Termination for Convenience": {
|
| 314 |
-
# Downgrade from CRITICAL to HIGH if there's a notice period
|
| 315 |
-
"HIGH": re.compile(r'(?:\d+)\s+(?:day|month|week)s?.{0,20}(?:prior|advance|written)\s+notice', re.IGNORECASE),
|
| 316 |
-
# Downgrade to MEDIUM if mutual termination right
|
| 317 |
-
"MEDIUM": re.compile(r'either\s+party\s+may\s+terminat', re.IGNORECASE),
|
| 318 |
-
},
|
| 319 |
-
"Non-Compete": {
|
| 320 |
-
# Downgrade from HIGH to MEDIUM if time-limited
|
| 321 |
-
"MEDIUM": re.compile(r'(?:period\s+of|for)\s+(?:\d+|one|two|three|six|twelve)\s+(?:\(\d+\)\s+)?(?:month|year)', re.IGNORECASE),
|
| 322 |
-
},
|
| 323 |
-
"Arbitration": {
|
| 324 |
-
# Downgrade from CRITICAL to HIGH if opt-out is available
|
| 325 |
-
"HIGH": re.compile(r'opt[\-\s]?out|may\s+elect|small\s+claims', re.IGNORECASE),
|
| 326 |
-
},
|
| 327 |
-
}
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
def _refine_severity(label, text, default_risk):
|
| 331 |
-
"""FIX v4.3.1: Refine severity based on clause content, not just label."""
|
| 332 |
-
mitigators = _SEVERITY_MITIGATORS.get(label)
|
| 333 |
-
if not mitigators:
|
| 334 |
-
return default_risk
|
| 335 |
-
|
| 336 |
-
# Check from lowest severity up — return the lowest matching level
|
| 337 |
-
for level in ["MEDIUM", "HIGH"]:
|
| 338 |
-
pattern = mitigators.get(level)
|
| 339 |
-
if pattern and pattern.search(text):
|
| 340 |
-
# Only downgrade, never upgrade
|
| 341 |
-
level_order = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1}
|
| 342 |
-
if level_order.get(level, 0) < level_order.get(default_risk, 0):
|
| 343 |
-
return level
|
| 344 |
-
|
| 345 |
-
return default_risk
|
| 346 |
-
|
| 347 |
RISK_STYLES = {
|
| 348 |
"CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
|
| 349 |
"HIGH": ("#ea580c", "#fff7ed", "⚡"),
|
|
@@ -351,58 +244,15 @@ RISK_STYLES = {
|
|
| 351 |
"LOW": ("#16a34a", "#f0fdf4", "✓"),
|
| 352 |
}
|
| 353 |
|
| 354 |
-
#
|
| 355 |
-
#
|
| 356 |
-
# The model was trained with cross-entropy (single-label), so inference
|
| 357 |
-
# now uses softmax+argmax, not sigmoid. Thresholds apply to softmax probs.
|
| 358 |
-
# ═══════════════════════════════════════════════════════════════════════
|
| 359 |
_CUAD_THRESHOLDS = {}
|
| 360 |
_WEAK_CLASSES = {0, 1, 2, 7, 9, 21, 22, 27, 37, 38}
|
| 361 |
for _i in range(41):
|
| 362 |
if _i in _WEAK_CLASSES:
|
| 363 |
_CUAD_THRESHOLDS[_i] = 0.85 # Only flag if very confident (these classes are unreliable)
|
| 364 |
else:
|
| 365 |
-
_CUAD_THRESHOLDS[_i] = 0.40 # Reasonable threshold for
|
| 366 |
-
|
| 367 |
-
# ═══════════════════════════════════════════════════════════════════════
|
| 368 |
-
# FIX v4.1: Bounded LRU Cache utility (replaces unbounded dicts)
|
| 369 |
-
# ═══════════════════════════════════════════════════════════════════════
|
| 370 |
-
|
| 371 |
-
class BoundedCache:
|
| 372 |
-
"""Thread-safe bounded LRU cache using OrderedDict + RLock.
|
| 373 |
-
FIX v4.2: Added threading.RLock to prevent race conditions under
|
| 374 |
-
Gradio's concurrent request handling. OrderedDict compound operations
|
| 375 |
-
(contains + setitem + move_to_end + popitem) are NOT atomic even with GIL."""
|
| 376 |
-
def __init__(self, maxsize=1000):
|
| 377 |
-
self._cache = OrderedDict()
|
| 378 |
-
self._maxsize = maxsize
|
| 379 |
-
self._lock = threading.RLock()
|
| 380 |
-
|
| 381 |
-
def get(self, key, default=None):
|
| 382 |
-
with self._lock:
|
| 383 |
-
if key in self._cache:
|
| 384 |
-
self._cache.move_to_end(key)
|
| 385 |
-
return self._cache[key]
|
| 386 |
-
return default
|
| 387 |
-
|
| 388 |
-
def put(self, key, value):
|
| 389 |
-
with self._lock:
|
| 390 |
-
if key in self._cache:
|
| 391 |
-
self._cache.move_to_end(key)
|
| 392 |
-
self._cache[key] = value
|
| 393 |
-
else:
|
| 394 |
-
if len(self._cache) >= self._maxsize:
|
| 395 |
-
self._cache.popitem(last=False)
|
| 396 |
-
self._cache[key] = value
|
| 397 |
-
|
| 398 |
-
def __contains__(self, key):
|
| 399 |
-
with self._lock:
|
| 400 |
-
return key in self._cache
|
| 401 |
-
|
| 402 |
-
def __len__(self):
|
| 403 |
-
with self._lock:
|
| 404 |
-
return len(self._cache)
|
| 405 |
-
|
| 406 |
|
| 407 |
# ═══════════════════════════════════════════════════════════════════════
|
| 408 |
# 2. MODEL LOADING
|
|
@@ -411,30 +261,11 @@ class BoundedCache:
|
|
| 411 |
cuad_tokenizer = None
|
| 412 |
cuad_model = None
|
| 413 |
ner_pipeline = None
|
| 414 |
-
|
| 415 |
_model_status = {"cuad": "not_loaded", "ner": "not_loaded", "nli": "not_loaded"}
|
| 416 |
|
| 417 |
def _load_cuad_model():
|
| 418 |
global cuad_tokenizer, cuad_model, _model_status
|
| 419 |
-
# PERF v4.3: Try ONNX quantized model first (2-4x faster on CPU)
|
| 420 |
-
onnx_model_path = os.environ.get("ONNX_MODEL_PATH", "")
|
| 421 |
-
onnx_hub_id = os.environ.get("ONNX_HUB_MODEL_ID", "gaurv007/clauseguard-onnx-int8")
|
| 422 |
-
|
| 423 |
-
if _HAS_ORT:
|
| 424 |
-
for source in [onnx_model_path, onnx_hub_id]:
|
| 425 |
-
if not source:
|
| 426 |
-
continue
|
| 427 |
-
try:
|
| 428 |
-
print(f"[ClauseGuard] Trying ONNX model: {source}")
|
| 429 |
-
cuad_model = _ORTModel.from_pretrained(source, file_name="model_quantized.onnx")
|
| 430 |
-
cuad_tokenizer = AutoTokenizer.from_pretrained(source)
|
| 431 |
-
_model_status["cuad"] = "loaded (ONNX INT8)"
|
| 432 |
-
print(f"[ClauseGuard] ONNX INT8 model loaded from {source}")
|
| 433 |
-
return
|
| 434 |
-
except Exception as e:
|
| 435 |
-
print(f"[ClauseGuard] ONNX load failed from {source}: {e}")
|
| 436 |
-
|
| 437 |
-
# Fallback to PyTorch PEFT model
|
| 438 |
if not _HAS_TORCH:
|
| 439 |
print("[ClauseGuard] PyTorch not available — using regex fallback")
|
| 440 |
_model_status["cuad"] = "unavailable"
|
|
@@ -442,15 +273,15 @@ def _load_cuad_model():
|
|
| 442 |
try:
|
| 443 |
base = "nlpaueb/legal-bert-base-uncased"
|
| 444 |
adapter = "Mokshith31/legalbert-contract-clause-classification"
|
| 445 |
-
print(f"[ClauseGuard] Loading CUAD classifier
|
| 446 |
cuad_tokenizer = AutoTokenizer.from_pretrained(base)
|
| 447 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 448 |
base, num_labels=41, ignore_mismatched_sizes=True
|
| 449 |
)
|
| 450 |
cuad_model = PeftModel.from_pretrained(base_model, adapter)
|
| 451 |
cuad_model.eval()
|
| 452 |
-
_model_status["cuad"] = "loaded
|
| 453 |
-
print("[ClauseGuard] CUAD model loaded successfully
|
| 454 |
except Exception as e:
|
| 455 |
print(f"[ClauseGuard] CUAD model load failed: {e}")
|
| 456 |
cuad_tokenizer = None
|
|
@@ -478,16 +309,20 @@ def _load_ner_model():
|
|
| 478 |
_model_status["ner"] = f"failed: {e}"
|
| 479 |
|
| 480 |
def _load_nli_model():
|
| 481 |
-
global
|
| 482 |
-
if not
|
| 483 |
-
_model_status["nli"] = "unavailable
|
| 484 |
return
|
| 485 |
try:
|
| 486 |
-
print("[ClauseGuard] Loading NLI model: cross-encoder/nli-deberta-v3-base
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
_HAS_NLI_MODEL = True
|
| 489 |
_model_status["nli"] = "loaded"
|
| 490 |
-
print("[ClauseGuard] NLI
|
| 491 |
except Exception as e:
|
| 492 |
print(f"[ClauseGuard] NLI model load failed (using heuristic fallback): {e}")
|
| 493 |
_model_status["nli"] = f"failed: {e}"
|
|
@@ -549,45 +384,46 @@ def parse_document(file_path):
|
|
| 549 |
return None, f"Unsupported file type: {ext}"
|
| 550 |
|
| 551 |
# ═══════════════════════════════════════════════════════════════════════
|
| 552 |
-
# 4. DETERMINISTIC CLAUSE SPLITTING
|
| 553 |
-
# FIX v4.1: Bounded cache (max 500 documents) instead of unbounded dict
|
| 554 |
# ═══════════════════════════════════════════════════════════════════════
|
| 555 |
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
# FIX v4.2: Pre-compile section pattern at module level (was recompiling per call)
|
| 559 |
-
_SECTION_PATTERN = re.compile(
|
| 560 |
-
r'(?:^|\n\n)'
|
| 561 |
-
r'(?='
|
| 562 |
-
r'\d+(?:\.\d+)*[.)]\s' # 1. 2. 3.1. 3.1)
|
| 563 |
-
r'|[A-Z]{2,}[A-Z\s]*\n' # ALL CAPS HEADERS
|
| 564 |
-
r'|\([a-z]\)\s' # (a) (b) (c)
|
| 565 |
-
r'|(?:Section|Article|Clause)\s+\d+' # Section 1, Article 2
|
| 566 |
-
r')',
|
| 567 |
-
re.MULTILINE
|
| 568 |
-
)
|
| 569 |
|
| 570 |
def split_clauses(text):
|
| 571 |
"""Deterministic, structure-aware clause splitting.
|
| 572 |
-
Same input ALWAYS produces same output. Normalized text is hashed
|
| 573 |
and cached so repeated runs on identical documents are identical."""
|
|
|
|
| 574 |
normalized = re.sub(r'\s+', ' ', text.strip())
|
| 575 |
text_hash = hashlib.sha256(normalized.encode()).hexdigest()
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
return cached
|
| 579 |
|
| 580 |
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
| 581 |
|
| 582 |
# First try to detect numbered sections (1., 2., 3.1, (a), etc.)
|
| 583 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
if len(positions) >= 3:
|
|
|
|
| 586 |
clauses = []
|
| 587 |
for i, pos in enumerate(positions):
|
| 588 |
end = positions[i + 1] if i + 1 < len(positions) else len(text)
|
| 589 |
chunk = text[pos:end].strip()
|
| 590 |
if len(chunk) > 30:
|
|
|
|
| 591 |
if len(chunk) > 1500:
|
| 592 |
sub_parts = chunk.split('\n\n')
|
| 593 |
current = ""
|
|
@@ -602,20 +438,22 @@ def split_clauses(text):
|
|
| 602 |
clauses.append(current.strip())
|
| 603 |
else:
|
| 604 |
clauses.append(chunk)
|
|
|
|
| 605 |
if positions and positions[0] > 50:
|
| 606 |
preamble = text[:positions[0]].strip()
|
| 607 |
if len(preamble) > 30:
|
| 608 |
clauses.insert(0, preamble)
|
| 609 |
result = clauses if clauses else _fallback_split(text)
|
| 610 |
-
_chunk_cache
|
| 611 |
return result
|
| 612 |
else:
|
| 613 |
result = _fallback_split(text)
|
| 614 |
-
_chunk_cache
|
| 615 |
return result
|
| 616 |
|
| 617 |
def _fallback_split(text):
|
| 618 |
"""Fallback: split on paragraph breaks and sentence boundaries."""
|
|
|
|
| 619 |
paragraphs = text.split('\n\n')
|
| 620 |
if len(paragraphs) >= 3:
|
| 621 |
clauses = []
|
|
@@ -623,6 +461,7 @@ def _fallback_split(text):
|
|
| 623 |
p = p.strip()
|
| 624 |
if len(p) > 30:
|
| 625 |
if len(p) > 1500:
|
|
|
|
| 626 |
sents = re.split(r'(?<=[.!?])\s+(?=[A-Z])', p)
|
| 627 |
current = ""
|
| 628 |
for s in sents:
|
|
@@ -638,16 +477,17 @@ def _fallback_split(text):
|
|
| 638 |
clauses.append(p)
|
| 639 |
return clauses
|
| 640 |
|
|
|
|
| 641 |
parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])', text)
|
| 642 |
return [p.strip() for p in parts if len(p.strip()) > 30]
|
| 643 |
|
| 644 |
# ═══════════════════════════════════════════════════════════════════════
|
| 645 |
-
# 5. CLAUSE DETECTION
|
| 646 |
-
#
|
| 647 |
-
#
|
| 648 |
-
# FIX v4.1: Bounded prediction cache
|
| 649 |
# ═══════════════════════════════════════════════════════════════════════
|
| 650 |
|
|
|
|
| 651 |
_HEADING_RE = re.compile(r'^\d+(?:\.\d+)*\s+[A-Z][A-Z\s&,/]+$', re.MULTILINE)
|
| 652 |
|
| 653 |
def _strip_heading(text):
|
|
@@ -658,6 +498,7 @@ def _strip_heading(text):
|
|
| 658 |
return stripped if len(stripped) > 20 else text
|
| 659 |
return text
|
| 660 |
|
|
|
|
| 661 |
_LABEL_GUARDRAILS = {
|
| 662 |
"Liquidated Damages": re.compile(
|
| 663 |
r'liquidated|pre-?determined.{0,10}damage|agreed.{0,10}sum|penalty clause|stipulated.{0,10}damage',
|
|
@@ -667,127 +508,58 @@ _LABEL_GUARDRAILS = {
|
|
| 667 |
r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
|
| 668 |
re.IGNORECASE
|
| 669 |
),
|
| 670 |
-
"ROFR/ROFO/ROFN": re.compile(
|
| 671 |
-
r'right\s+of\s+first\s+(?:refusal|offer|negotiation)|ROFR|ROFO|ROFN',
|
| 672 |
-
re.IGNORECASE
|
| 673 |
-
),
|
| 674 |
-
"Renewal Term": re.compile(
|
| 675 |
-
r'renew(?:al)?|successive\s+term|auto(?:matic(?:ally)?)?\s*[\-\s]?renew|non[\-\s]?renewal',
|
| 676 |
-
re.IGNORECASE
|
| 677 |
-
),
|
| 678 |
-
# FIX v4.3.1: Revenue/Profit Sharing fires on IP assignment "rights for value" language
|
| 679 |
-
"Revenue/Profit Sharing": re.compile(
|
| 680 |
-
r'revenue\s+shar|profit\s+shar|revenue\s+split|percentage\s+of\s+revenue|royalt(?:y|ies)|gross\s+profit',
|
| 681 |
-
re.IGNORECASE
|
| 682 |
-
),
|
| 683 |
-
# FIX v4.3.1: Minimum Commitment fires on fee schedules — require explicit minimum language
|
| 684 |
-
"Minimum Commitment": re.compile(
|
| 685 |
-
r'minimum\s+(?:purchase|order|spend|volume|commitment)|take[\-\s]or[\-\s]pay|minimum\s+annual',
|
| 686 |
-
re.IGNORECASE
|
| 687 |
-
),
|
| 688 |
-
# FIX v4.3.1: Non-Disparagement fires on arbitration/class-waiver language
|
| 689 |
-
"Non-Disparagement": re.compile(
|
| 690 |
-
r'disparag|defam|false\s+statement|negative\s+statement|social\s+media|reputat',
|
| 691 |
-
re.IGNORECASE
|
| 692 |
-
),
|
| 693 |
-
}
|
| 694 |
-
|
| 695 |
-
# FIX v4.3: Exclusion patterns — even if guardrail passes, exclude if contra-indicators present
|
| 696 |
-
_LABEL_EXCLUSIONS = {
|
| 697 |
-
"ROFR/ROFO/ROFN": re.compile(
|
| 698 |
-
r'assigns?\s+to|irrevocab(?:ly|le)\s+assign|all\s+right,?\s+title,?\s+and\s+interest|work[\-\s]for[\-\s]hire',
|
| 699 |
-
re.IGNORECASE
|
| 700 |
-
),
|
| 701 |
-
"Renewal Term": re.compile(
|
| 702 |
-
r'limitation\s+of\s+liabilit|shall\s+not\s+be\s+liable|indemnif|hold\s+harmless|defend\s+and',
|
| 703 |
-
re.IGNORECASE
|
| 704 |
-
),
|
| 705 |
-
# FIX v4.3.1: Revenue/Profit Sharing must NOT fire on IP assignment or license grant clauses
|
| 706 |
-
"Revenue/Profit Sharing": re.compile(
|
| 707 |
-
r'assigns?\s+to|irrevocab(?:ly|le)\s+assign|work[\-\s](?:made\s+)?for[\-\s]hire|license\s+to\s+access|license\s+grant|non[\-\s]exclusive\s+license',
|
| 708 |
-
re.IGNORECASE
|
| 709 |
-
),
|
| 710 |
-
# FIX v4.3.1: Non-Disparagement must NOT fire on arbitration/dispute sections
|
| 711 |
-
"Non-Disparagement": re.compile(
|
| 712 |
-
r'arbitrat|(?<!\w)aaa(?!\w)|(?<!\w)jams(?!\w)|class\s+action|collective\s+(?:proceeding|action)|waives?\s+any\s+right\s+to\s+participate|binding\s+arbitration',
|
| 713 |
-
re.IGNORECASE
|
| 714 |
-
),
|
| 715 |
-
}
|
| 716 |
-
|
| 717 |
-
# FIX v4.3: Minimum confidence thresholds per label
|
| 718 |
-
_LABEL_MIN_CONFIDENCE = {
|
| 719 |
-
"ROFR/ROFO/ROFN": 0.65,
|
| 720 |
-
"Renewal Term": 0.70,
|
| 721 |
-
"Revenue/Profit Sharing": 0.65, # FIX v4.3.1
|
| 722 |
-
"Minimum Commitment": 0.65, # FIX v4.3.1
|
| 723 |
}
|
| 724 |
|
| 725 |
def _apply_guardrails(label, text, confidence):
|
| 726 |
-
|
| 727 |
-
min_conf = _LABEL_MIN_CONFIDENCE.get(label)
|
| 728 |
-
if min_conf and confidence < min_conf:
|
| 729 |
-
return "Other", confidence * 0.2
|
| 730 |
-
|
| 731 |
-
# Check required keywords (must be present)
|
| 732 |
guard = _LABEL_GUARDRAILS.get(label)
|
| 733 |
if guard and not guard.search(text):
|
| 734 |
-
return "Other", confidence * 0.3
|
| 735 |
-
|
| 736 |
-
# Check exclusion patterns (must NOT be present)
|
| 737 |
-
exclusion = _LABEL_EXCLUSIONS.get(label)
|
| 738 |
-
if exclusion and exclusion.search(text):
|
| 739 |
-
return "Other", confidence * 0.2
|
| 740 |
return label, confidence
|
| 741 |
|
| 742 |
def _text_hash(text):
|
| 743 |
return hashlib.md5(text.encode()).hexdigest()
|
| 744 |
|
| 745 |
-
|
| 746 |
-
|
| 747 |
|
| 748 |
def classify_cuad(clause_text):
|
| 749 |
if cuad_model is None or cuad_tokenizer is None:
|
| 750 |
return _classify_regex(clause_text)
|
| 751 |
|
|
|
|
| 752 |
clean_text = _strip_heading(clause_text)
|
| 753 |
|
|
|
|
| 754 |
h = _text_hash(clean_text[:512])
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
return cached
|
| 758 |
|
| 759 |
try:
|
| 760 |
-
# FIX v4.1: max_length=512 (was 256 — truncating long legal clauses)
|
| 761 |
inputs = cuad_tokenizer(
|
| 762 |
clean_text,
|
| 763 |
return_tensors="pt",
|
| 764 |
truncation=True,
|
| 765 |
-
max_length=
|
| 766 |
padding=True
|
| 767 |
)
|
| 768 |
with torch.no_grad():
|
| 769 |
logits = cuad_model(**inputs).logits
|
| 770 |
|
| 771 |
-
#
|
| 772 |
-
|
| 773 |
-
probs = torch.softmax(logits, dim=-1)[0]
|
| 774 |
-
|
| 775 |
-
# Get the top prediction
|
| 776 |
-
top_prob, top_idx = torch.max(probs, dim=0)
|
| 777 |
-
top_idx = int(top_idx)
|
| 778 |
-
top_conf = float(top_prob)
|
| 779 |
|
| 780 |
results = []
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
|
|
|
| 789 |
risk = RISK_MAP.get(label, "LOW")
|
| 790 |
-
risk = _refine_severity(label, clause_text, risk)
|
| 791 |
results.append({
|
| 792 |
"label": label,
|
| 793 |
"confidence": round(conf, 3),
|
|
@@ -795,170 +567,21 @@ def classify_cuad(clause_text):
|
|
| 795 |
"description": DESC_MAP.get(label, label),
|
| 796 |
"source": "ml",
|
| 797 |
})
|
| 798 |
-
|
| 799 |
-
# Also check 2nd-best prediction if confident enough
|
| 800 |
-
if len(probs) > 1:
|
| 801 |
-
sorted_probs, sorted_indices = torch.sort(probs, descending=True)
|
| 802 |
-
if len(sorted_probs) > 1:
|
| 803 |
-
second_idx = int(sorted_indices[1])
|
| 804 |
-
second_conf = float(sorted_probs[1])
|
| 805 |
-
second_threshold = _CUAD_THRESHOLDS.get(second_idx, 0.40)
|
| 806 |
-
if second_conf > second_threshold and second_idx < len(CUAD_LABELS):
|
| 807 |
-
label2 = CUAD_LABELS[second_idx]
|
| 808 |
-
conf2 = second_conf
|
| 809 |
-
label2, conf2 = _apply_guardrails(label2, clause_text, conf2)
|
| 810 |
-
if not (label2 == "Other" and conf2 < 0.3):
|
| 811 |
-
# Only add if different from primary
|
| 812 |
-
if not results or results[0]["label"] != label2:
|
| 813 |
-
risk2 = RISK_MAP.get(label2, "LOW")
|
| 814 |
-
risk2 = _refine_severity(label2, clause_text, risk2)
|
| 815 |
-
results.append({
|
| 816 |
-
"label": label2,
|
| 817 |
-
"confidence": round(conf2, 3),
|
| 818 |
-
"risk": risk2,
|
| 819 |
-
"description": DESC_MAP.get(label2, label2),
|
| 820 |
-
"source": "ml",
|
| 821 |
-
})
|
| 822 |
-
|
| 823 |
results.sort(key=lambda x: x["confidence"], reverse=True)
|
| 824 |
|
| 825 |
# If no ML results, also try regex to catch what model misses
|
| 826 |
if not results:
|
| 827 |
results = _classify_regex(clause_text)
|
| 828 |
|
| 829 |
-
|
|
|
|
|
|
|
|
|
|
| 830 |
return results
|
| 831 |
except Exception as e:
|
| 832 |
print(f"[ClauseGuard] CUAD inference error: {e}")
|
| 833 |
return _classify_regex(clause_text)
|
| 834 |
|
| 835 |
-
# ═══════════════════════════════════════════════════════════════════════
|
| 836 |
-
# 5b. BATCHED CLAUSE CLASSIFICATION
|
| 837 |
-
# PERF v4.3: Single forward pass for all clauses instead of one-by-one
|
| 838 |
-
# ══════════════════════════════════════════════════════════════════════���
|
| 839 |
-
|
| 840 |
-
def classify_cuad_batch(clauses, batch_size=8):
|
| 841 |
-
"""Classify a batch of clauses in a single forward pass.
|
| 842 |
-
PERF v4.3: Replaces sequential classify_cuad() loop.
|
| 843 |
-
On CPU, batch_size=8 balances memory vs throughput."""
|
| 844 |
-
if cuad_model is None or cuad_tokenizer is None:
|
| 845 |
-
# Fallback to regex for all clauses
|
| 846 |
-
return [_classify_regex(c) for c in clauses]
|
| 847 |
-
|
| 848 |
-
all_results = []
|
| 849 |
-
# Check cache first, collect uncached clauses
|
| 850 |
-
uncached_indices = []
|
| 851 |
-
uncached_texts = []
|
| 852 |
-
for i, clause in enumerate(clauses):
|
| 853 |
-
clean = _strip_heading(clause)
|
| 854 |
-
h = _text_hash(clean[:512])
|
| 855 |
-
cached = _prediction_cache.get(h)
|
| 856 |
-
if cached is not None:
|
| 857 |
-
all_results.append((i, cached))
|
| 858 |
-
else:
|
| 859 |
-
uncached_indices.append(i)
|
| 860 |
-
uncached_texts.append(clean)
|
| 861 |
-
all_results.append((i, None)) # placeholder
|
| 862 |
-
|
| 863 |
-
if not uncached_texts:
|
| 864 |
-
return [r for _, r in sorted(all_results)]
|
| 865 |
-
|
| 866 |
-
# Process uncached in batches
|
| 867 |
-
for batch_start in range(0, len(uncached_texts), batch_size):
|
| 868 |
-
batch_texts = uncached_texts[batch_start:batch_start + batch_size]
|
| 869 |
-
batch_original = [clauses[uncached_indices[batch_start + j]] for j in range(len(batch_texts))]
|
| 870 |
-
|
| 871 |
-
try:
|
| 872 |
-
inputs = cuad_tokenizer(
|
| 873 |
-
batch_texts,
|
| 874 |
-
return_tensors="pt",
|
| 875 |
-
truncation=True,
|
| 876 |
-
max_length=512,
|
| 877 |
-
padding=True,
|
| 878 |
-
)
|
| 879 |
-
with torch.no_grad():
|
| 880 |
-
logits = cuad_model(**inputs).logits
|
| 881 |
-
|
| 882 |
-
probs = torch.softmax(logits, dim=-1)
|
| 883 |
-
|
| 884 |
-
for j in range(len(batch_texts)):
|
| 885 |
-
clause_probs = probs[j]
|
| 886 |
-
original_text = batch_original[j]
|
| 887 |
-
results = []
|
| 888 |
-
|
| 889 |
-
# Primary prediction
|
| 890 |
-
top_prob, top_idx = torch.max(clause_probs, dim=0)
|
| 891 |
-
top_idx_int = int(top_idx)
|
| 892 |
-
top_conf = float(top_prob)
|
| 893 |
-
|
| 894 |
-
threshold = _CUAD_THRESHOLDS.get(top_idx_int, 0.40)
|
| 895 |
-
if top_conf > threshold and top_idx_int < len(CUAD_LABELS):
|
| 896 |
-
label = CUAD_LABELS[top_idx_int]
|
| 897 |
-
conf = top_conf
|
| 898 |
-
label, conf = _apply_guardrails(label, original_text, conf)
|
| 899 |
-
if not (label == "Other" and conf < 0.3):
|
| 900 |
-
risk = RISK_MAP.get(label, "LOW")
|
| 901 |
-
risk = _refine_severity(label, original_text, risk)
|
| 902 |
-
results.append({
|
| 903 |
-
"label": label,
|
| 904 |
-
"confidence": round(conf, 3),
|
| 905 |
-
"risk": risk,
|
| 906 |
-
"description": DESC_MAP.get(label, label),
|
| 907 |
-
"source": "ml",
|
| 908 |
-
})
|
| 909 |
-
|
| 910 |
-
# 2nd-best prediction
|
| 911 |
-
sorted_probs, sorted_indices = torch.sort(clause_probs, descending=True)
|
| 912 |
-
if len(sorted_probs) > 1:
|
| 913 |
-
second_idx = int(sorted_indices[1])
|
| 914 |
-
second_conf = float(sorted_probs[1])
|
| 915 |
-
second_threshold = _CUAD_THRESHOLDS.get(second_idx, 0.40)
|
| 916 |
-
if second_conf > second_threshold and second_idx < len(CUAD_LABELS):
|
| 917 |
-
label2 = CUAD_LABELS[second_idx]
|
| 918 |
-
conf2 = second_conf
|
| 919 |
-
label2, conf2 = _apply_guardrails(label2, original_text, conf2)
|
| 920 |
-
if not (label2 == "Other" and conf2 < 0.3):
|
| 921 |
-
if not results or results[0]["label"] != label2:
|
| 922 |
-
risk2 = RISK_MAP.get(label2, "LOW")
|
| 923 |
-
risk2 = _refine_severity(label2, original_text, risk2)
|
| 924 |
-
results.append({
|
| 925 |
-
"label": label2,
|
| 926 |
-
"confidence": round(conf2, 3),
|
| 927 |
-
"risk": risk2,
|
| 928 |
-
"description": DESC_MAP.get(label2, label2),
|
| 929 |
-
"source": "ml",
|
| 930 |
-
})
|
| 931 |
-
|
| 932 |
-
results.sort(key=lambda x: x["confidence"], reverse=True)
|
| 933 |
-
|
| 934 |
-
if not results:
|
| 935 |
-
results = _classify_regex(original_text)
|
| 936 |
-
|
| 937 |
-
# Cache the result
|
| 938 |
-
h = _text_hash(batch_texts[j][:512])
|
| 939 |
-
_prediction_cache.put(h, results)
|
| 940 |
-
|
| 941 |
-
# Update placeholder in all_results
|
| 942 |
-
global_idx = uncached_indices[batch_start + j]
|
| 943 |
-
for k, (idx, _) in enumerate(all_results):
|
| 944 |
-
if idx == global_idx:
|
| 945 |
-
all_results[k] = (idx, results)
|
| 946 |
-
break
|
| 947 |
-
|
| 948 |
-
except Exception as e:
|
| 949 |
-
print(f"[ClauseGuard] Batch CUAD inference error: {e}")
|
| 950 |
-
# Fallback to regex for this batch
|
| 951 |
-
for j in range(len(batch_texts)):
|
| 952 |
-
global_idx = uncached_indices[batch_start + j]
|
| 953 |
-
results = _classify_regex(batch_original[j])
|
| 954 |
-
for k, (idx, _) in enumerate(all_results):
|
| 955 |
-
if idx == global_idx:
|
| 956 |
-
all_results[k] = (idx, results)
|
| 957 |
-
break
|
| 958 |
-
|
| 959 |
-
return [r for _, r in sorted(all_results)]
|
| 960 |
-
|
| 961 |
-
# FIX v4.1: Extended regex patterns to cover more CUAD categories
|
| 962 |
_REGEX_PATTERNS = {
|
| 963 |
"Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
|
| 964 |
"Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
|
|
@@ -970,60 +593,30 @@ _REGEX_PATTERNS = {
|
|
| 970 |
"Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
|
| 971 |
"Governing Law": [r"governed by", r"laws of", r"jurisdiction of"],
|
| 972 |
"Termination for Convenience": [r"terminat.*for convenience", r"terminat.*without cause", r"terminat.*at any time"],
|
| 973 |
-
"Non-Compete": [r"non-compete", r"shall not compete", r"competition
|
| 974 |
"Exclusivity": [r"exclusive(?:ly)?(?:\s+(?:deal|relationship|partner|right))", r"exclusivity"],
|
| 975 |
-
"IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"
|
| 976 |
"Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
|
| 977 |
"Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed", r"aggregate liability.*not exceed"],
|
| 978 |
"Indemnification": [r"indemnif", r"hold harmless", r"defend.*against.*claim"],
|
| 979 |
"Confidentiality": [r"confidential(?:ity)?", r"non-disclosure", r"\bnda\b"],
|
| 980 |
"Force Majeure": [r"force majeure", r"act of god", r"beyond.*(?:reasonable\s+)?control"],
|
| 981 |
"Penalties": [r"penalt(?:y|ies)", r"late fee", r"default charge", r"interest on overdue"],
|
| 982 |
-
# FIX v4.1: Added missing regex patterns for more CUAD categories
|
| 983 |
-
"Audit Rights": [r"audit rights?", r"right to audit", r"inspect.*records?", r"examination of.*records?", r"access to.*books"],
|
| 984 |
-
"Warranty Duration": [r"warrant(?:y|ies).*(?:period|duration|term|months?|years?)", r"warranty.*shall.*(?:remain|last|continue)", r"limited warranty"],
|
| 985 |
-
"Insurance": [r"(?:shall|must).*maintain.*insurance", r"insurance.*coverage", r"policy of insurance", r"certificate of insurance"],
|
| 986 |
-
"Source Code Escrow": [r"source code escrow", r"escrow.*source code", r"escrow agent"],
|
| 987 |
-
"Post-Termination Services": [r"post.?termination.*(?:service|obligation|support)", r"(?:after|following|upon).*termination.*(?:shall|must|will).*(?:provide|continue)"],
|
| 988 |
-
"Renewal Term": [r"renew(?:al)?.*term", r"auto(?:matic(?:ally)?)?.*renew", r"successive.*(?:term|period)"],
|
| 989 |
-
"Notice Period to Terminate Renewal": [r"notice.*(?:to\s+)?terminat.*renew", r"(?:days?|months?).*(?:prior|advance).*(?:notice|written).*(?:terminat|renew)", r"notice of non.?renewal"],
|
| 990 |
-
"Change of Control": [r"change of control", r"change in.*(?:ownership|control)", r"merger.*acquisition", r"sale of.*(?:all|substantially).*assets"],
|
| 991 |
-
"Anti-Assignment": [r"(?:shall|may)\s+not\s+assign", r"anti.?assignment", r"no.*assignment.*without.*consent"],
|
| 992 |
-
"Revenue/Profit Sharing": [r"revenue.*shar", r"profit.*shar", r"royalt(?:y|ies)"],
|
| 993 |
-
"Liquidated Damages": [r"liquidated.*damages?", r"pre.?determined.*damage", r"stipulated.*damage"],
|
| 994 |
-
"Covenant Not to Sue": [r"covenant not to sue", r"(?:shall|agree).*not.*(?:bring|file|commence).*(?:action|claim|suit)"],
|
| 995 |
-
"Joint IP Ownership": [r"joint(?:ly)?.*own(?:ed|ership)?.*(?:ip|intellectual property)", r"co.?own(?:ed|ership)?"],
|
| 996 |
-
"License Grant": [r"(?:grant|license).*(?:non.?exclusive|exclusive|perpetual|irrevocable).*(?:license|right)", r"hereby grants?.*license"],
|
| 997 |
-
"Non-Transferable License": [r"non.?transferable.*license", r"license.*(?:shall|may)\s+not.*(?:transfer|assign|sublicense)"],
|
| 998 |
-
"ROFR/ROFO/ROFN": [r"right of first.*(?:refusal|offer|negotiation)", r"ROFR", r"ROFO", r"ROFN"],
|
| 999 |
-
"No-Solicit of Customers": [r"(?:shall|must|agree).*not.*solicit.*customer", r"no.?solicit.*customer", r"non.?solicitation.*customer"],
|
| 1000 |
-
"No-Solicit of Employees": [r"(?:shall|must|agree).*not.*solicit.*employee", r"no.?solicit.*employee", r"non.?solicitation.*employee", r"no.?hire"],
|
| 1001 |
-
"Non-Disparagement": [r"non.?disparagement", r"(?:shall|must|agree).*not.*(?:disparag|defam|make.*negative)", r"not.*make.*derogatory"],
|
| 1002 |
-
"Most Favored Nation": [r"most favou?red.*nation", r"MFN", r"most favou?red.*(?:customer|pricing|terms)"],
|
| 1003 |
-
"Third Party Beneficiary": [r"third.?party.*beneficiar", r"no.*third.?party.*beneficiar"],
|
| 1004 |
-
"Minimum Commitment": [r"minimum.*(?:commitment|purchase|order|volume|spend)", r"(?:shall|must).*(?:purchase|order).*(?:at least|minimum|no less than)"],
|
| 1005 |
-
"Volume Restriction": [r"volume.*(?:restriction|limitation|cap|ceiling)", r"(?:shall|may).*not.*exceed.*(?:volume|quantity)"],
|
| 1006 |
-
"Price Restriction": [r"price.*(?:restriction|limitation|ceiling|cap|floor)", r"(?:shall|may).*not.*(?:increase|raise|exceed).*price"],
|
| 1007 |
}
|
| 1008 |
|
| 1009 |
-
# FIX v4.2: Pre-compile regex patterns at module level (was recompiling per call)
|
| 1010 |
-
_REGEX_PATTERNS_COMPILED = {}
|
| 1011 |
-
for _label, _pats in _REGEX_PATTERNS.items():
|
| 1012 |
-
_REGEX_PATTERNS_COMPILED[_label] = [re.compile(p, re.IGNORECASE) for p in _pats]
|
| 1013 |
-
|
| 1014 |
def _classify_regex(text):
|
| 1015 |
"""Regex fallback — returns pattern match, NOT fake confidence."""
|
| 1016 |
text_lower = text.lower()
|
| 1017 |
results = []
|
| 1018 |
seen = set()
|
| 1019 |
-
for label, patterns in
|
| 1020 |
for pat in patterns:
|
| 1021 |
-
if
|
| 1022 |
if label not in seen:
|
| 1023 |
risk = RISK_MAP.get(label, "MEDIUM")
|
| 1024 |
results.append({
|
| 1025 |
"label": label,
|
| 1026 |
-
"confidence": None,
|
| 1027 |
"risk": risk,
|
| 1028 |
"description": DESC_MAP.get(label, label),
|
| 1029 |
"source": "pattern",
|
|
@@ -1034,25 +627,20 @@ def _classify_regex(text):
|
|
| 1034 |
|
| 1035 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1036 |
# 6. LEGAL NER — ML model with regex fallback
|
| 1037 |
-
# FIX v4.1: Batch all chunks in single pipeline call
|
| 1038 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1039 |
|
| 1040 |
def extract_entities(text):
|
| 1041 |
"""Extract entities using ML model (matterstack/legal-bert-ner) with regex fallback."""
|
| 1042 |
entities = []
|
| 1043 |
|
|
|
|
| 1044 |
if _HAS_NER_MODEL and ner_pipeline is not None:
|
| 1045 |
try:
|
| 1046 |
-
#
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
# Single batched pipeline call instead of sequential
|
| 1052 |
-
all_ner_results = ner_pipeline(chunks, batch_size=8)
|
| 1053 |
-
|
| 1054 |
-
for chunk_idx, ner_results in enumerate(all_ner_results):
|
| 1055 |
-
offset = offsets[chunk_idx]
|
| 1056 |
for ent in ner_results:
|
| 1057 |
if ent.get("score", 0) > 0.5:
|
| 1058 |
entities.append({
|
|
@@ -1063,55 +651,16 @@ def extract_entities(text):
|
|
| 1063 |
"score": round(ent["score"], 3),
|
| 1064 |
"source": "ml",
|
| 1065 |
})
|
|
|
|
| 1066 |
except Exception as e:
|
| 1067 |
print(f"[ClauseGuard] ML NER error, falling back to regex: {e}")
|
| 1068 |
entities = _extract_entities_regex(text)
|
| 1069 |
else:
|
| 1070 |
entities = _extract_entities_regex(text)
|
| 1071 |
|
| 1072 |
-
# FIX v4.3: Post-process ML entities to clean up WordPiece artefacts
|
| 1073 |
-
cleaned_entities = []
|
| 1074 |
-
for e in entities:
|
| 1075 |
-
text_val = e.get("text", "")
|
| 1076 |
-
# Strip WordPiece subword tokens (## prefix)
|
| 1077 |
-
if "##" in text_val:
|
| 1078 |
-
text_val = re.sub(r'##\w*', '', text_val).strip()
|
| 1079 |
-
text_val = re.sub(r'\s+', ' ', text_val).strip()
|
| 1080 |
-
# Discard entities that are too short, start/end with hyphens, or are garbled
|
| 1081 |
-
if len(text_val) < 2:
|
| 1082 |
-
continue
|
| 1083 |
-
if text_val.startswith("-") or text_val.endswith("-"):
|
| 1084 |
-
continue
|
| 1085 |
-
# Discard low-confidence MISC entities (almost always tokenisation artefacts)
|
| 1086 |
-
if e.get("type") == "MISC" and e.get("score", 1.0) < 0.6:
|
| 1087 |
-
continue
|
| 1088 |
-
# Discard entities that are mostly punctuation/symbols
|
| 1089 |
-
alpha_ratio = sum(1 for c in text_val if c.isalnum()) / max(len(text_val), 1)
|
| 1090 |
-
if alpha_ratio < 0.4:
|
| 1091 |
-
continue
|
| 1092 |
-
e["text"] = text_val
|
| 1093 |
-
cleaned_entities.append(e)
|
| 1094 |
-
entities = cleaned_entities
|
| 1095 |
-
|
| 1096 |
-
# FIX v4.3: Split concatenated MONEY/QUANTITY entities
|
| 1097 |
-
# e.g., "usd $ 485, 000,usd $ 72, 000" → separate entities
|
| 1098 |
-
_CURRENCY_SPLIT = re.compile(r'(?<=[\d,])\s*(?=(?:USD|usd|EUR|GBP|\$|£|€))', re.IGNORECASE)
|
| 1099 |
-
split_entities = []
|
| 1100 |
-
for e in entities:
|
| 1101 |
-
if e.get("type") in ("MONEY", "QUANTITY") and _CURRENCY_SPLIT.search(e["text"]):
|
| 1102 |
-
parts = _CURRENCY_SPLIT.split(e["text"])
|
| 1103 |
-
for part in parts:
|
| 1104 |
-
part = part.strip().strip(",").strip()
|
| 1105 |
-
if len(part) >= 2:
|
| 1106 |
-
new_ent = dict(e)
|
| 1107 |
-
new_ent["text"] = re.sub(r'\s+', '', part) if "$" in part or "USD" in part.upper() else part
|
| 1108 |
-
split_entities.append(new_ent)
|
| 1109 |
-
else:
|
| 1110 |
-
split_entities.append(e)
|
| 1111 |
-
entities = split_entities
|
| 1112 |
-
|
| 1113 |
# Always supplement with regex patterns for things NER often misses
|
| 1114 |
regex_ents = _extract_entities_regex(text)
|
|
|
|
| 1115 |
ml_spans = set()
|
| 1116 |
for e in entities:
|
| 1117 |
for pos in range(e["start"], e["end"]):
|
|
@@ -1131,13 +680,20 @@ def extract_entities(text):
|
|
| 1131 |
return filtered
|
| 1132 |
|
| 1133 |
def _map_ner_label(label):
|
|
|
|
| 1134 |
label = label.upper()
|
| 1135 |
mapping = {
|
| 1136 |
-
"PER": "PERSON",
|
| 1137 |
-
"
|
| 1138 |
-
"
|
| 1139 |
-
"
|
| 1140 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1141 |
}
|
| 1142 |
return mapping.get(label, label)
|
| 1143 |
|
|
@@ -1145,19 +701,26 @@ def _extract_entities_regex(text):
|
|
| 1145 |
"""Regex-based NER fallback."""
|
| 1146 |
entities = []
|
| 1147 |
patterns = [
|
|
|
|
| 1148 |
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
| 1149 |
(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
|
| 1150 |
(r'\b\d{1,2}-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2,4}\b', "DATE"),
|
| 1151 |
(r'\b(?:Effective|Commencement|Expiration|Termination)\s+Date\b', "DATE_REF"),
|
|
|
|
| 1152 |
(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
| 1153 |
(r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros|pounds)', "MONEY"),
|
| 1154 |
(r'\b(?:USD|EUR|GBP)\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?', "MONEY"),
|
|
|
|
| 1155 |
(r'\b\d+(?:\.\d+)?%', "PERCENTAGE"),
|
|
|
|
| 1156 |
(r'\b\d+\s*(?:year|month|week|day|business day)s?\b', "DURATION"),
|
|
|
|
| 1157 |
(r'\b[A-Z][A-Za-z0-9\s&,]+?(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH|AG|S\.A\.?|B\.V\.?|L\.P\.?|LLP)\b', "PARTY"),
|
| 1158 |
(r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Customer|Vendor|Client)\b', "PARTY_ROLE"),
|
|
|
|
| 1159 |
(r'\b(?:State|Commonwealth)\s+of\s+[A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
| 1160 |
(r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong|Ontario|British Columbia)\b', "JURISDICTION"),
|
|
|
|
| 1161 |
(r'"([A-Z][A-Za-z\s]{1,40})"', "DEFINED_TERM"),
|
| 1162 |
(r'\((?:the\s+)?"([A-Z][A-Za-z\s]{1,40})"\)', "DEFINED_TERM"),
|
| 1163 |
]
|
|
@@ -1174,29 +737,9 @@ def _extract_entities_regex(text):
|
|
| 1174 |
return entities
|
| 1175 |
|
| 1176 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1177 |
-
# 7. NLI / CONTRADICTION DETECTION
|
| 1178 |
-
# FIX v4.1: Pass (text_a, text_b) as dict with proper keys for
|
| 1179 |
-
# cross-encoder pipeline, not [SEP]-concatenated string
|
| 1180 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1181 |
|
| 1182 |
-
def _run_nli(text_a, text_b):
|
| 1183 |
-
"""Run NLI using CrossEncoder with correct input format.
|
| 1184 |
-
FIX v4.2: Use sentence_transformers.CrossEncoder.predict() which accepts
|
| 1185 |
-
a list of (text_a, text_b) tuples. Returns scores for [contradiction, entailment, neutral].
|
| 1186 |
-
The old code used pipeline("text-classification") with dict input, which was broken."""
|
| 1187 |
-
try:
|
| 1188 |
-
# CrossEncoder.predict returns numpy array of shape (n_pairs, 3)
|
| 1189 |
-
# Columns: [contradiction, entailment, neutral]
|
| 1190 |
-
scores = nli_model.predict([(text_a[:256], text_b[:256])])
|
| 1191 |
-
label_mapping = ["contradiction", "entailment", "neutral"]
|
| 1192 |
-
top_idx = int(scores[0].argmax())
|
| 1193 |
-
top_score = float(scores[0][top_idx])
|
| 1194 |
-
return [{"label": label_mapping[top_idx], "score": top_score}]
|
| 1195 |
-
except Exception as e:
|
| 1196 |
-
print(f"[ClauseGuard] NLI inference error: {e}")
|
| 1197 |
-
return None
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
def detect_contradictions(clause_results, raw_text=""):
|
| 1201 |
"""
|
| 1202 |
Detect contradictions using:
|
|
@@ -1213,7 +756,8 @@ def detect_contradictions(clause_results, raw_text=""):
|
|
| 1213 |
clause_texts_by_label[cr["label"]].append(cr.get("text", ""))
|
| 1214 |
|
| 1215 |
# ── 1. Semantic NLI (if model available) ──
|
| 1216 |
-
if _HAS_NLI_MODEL and
|
|
|
|
| 1217 |
conflict_pairs = [
|
| 1218 |
("Uncapped Liability", "Cap on Liability",
|
| 1219 |
"Liability cannot be both uncapped and capped simultaneously."),
|
|
@@ -1228,20 +772,24 @@ def detect_contradictions(clause_results, raw_text=""):
|
|
| 1228 |
texts_b = clause_texts_by_label[label_b]
|
| 1229 |
for ta in texts_a[:2]:
|
| 1230 |
for tb in texts_b[:2]:
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1245 |
|
| 1246 |
# Also check for internal contradictions within governing law / termination
|
| 1247 |
for label in ["Governing Law", "Termination for Convenience"]:
|
|
@@ -1249,19 +797,23 @@ def detect_contradictions(clause_results, raw_text=""):
|
|
| 1249 |
if len(texts) >= 2:
|
| 1250 |
for i in range(len(texts)):
|
| 1251 |
for j in range(i + 1, min(len(texts), i + 3)):
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1265 |
else:
|
| 1266 |
# ── Heuristic fallback (improved) ──
|
| 1267 |
_heuristic_pairs = [
|
|
@@ -1282,7 +834,7 @@ def detect_contradictions(clause_results, raw_text=""):
|
|
| 1282 |
"source": "heuristic",
|
| 1283 |
})
|
| 1284 |
|
| 1285 |
-
# ── 2. Missing critical clauses ──
|
| 1286 |
_REQUIRED_CLAUSE_PATTERNS = {
|
| 1287 |
"Governing Law": re.compile(
|
| 1288 |
r'govern(?:ed|ing).{0,15}law|applicable.{0,10}law|laws?\s+of\s+the\s+state',
|
|
@@ -1302,6 +854,7 @@ def detect_contradictions(clause_results, raw_text=""):
|
|
| 1302 |
),
|
| 1303 |
}
|
| 1304 |
for clause_name, pattern in _REQUIRED_CLAUSE_PATTERNS.items():
|
|
|
|
| 1305 |
if not pattern.search(raw_text):
|
| 1306 |
contradictions.append({
|
| 1307 |
"type": "MISSING",
|
|
@@ -1324,7 +877,6 @@ def detect_contradictions(clause_results, raw_text=""):
|
|
| 1324 |
|
| 1325 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1326 |
# 8. RISK SCORING
|
| 1327 |
-
# FIX v4.1: Absolute risk based on findings, not normalized by doc length
|
| 1328 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1329 |
|
| 1330 |
def compute_risk_score(clause_results, total_clauses):
|
|
@@ -1334,28 +886,8 @@ def compute_risk_score(clause_results, total_clauses):
|
|
| 1334 |
sev_counts[sev] += 1
|
| 1335 |
if total_clauses == 0:
|
| 1336 |
return 0, "A", sev_counts
|
| 1337 |
-
|
| 1338 |
-
# FIX v4.3: Revised risk formula — scale denominator with clause count
|
| 1339 |
-
# to prevent small contracts from always scoring 80+.
|
| 1340 |
-
# The old formula used a fixed /30 denominator which meant even 2 CRITICAL
|
| 1341 |
-
# flags scored 73, making almost every contract grade F.
|
| 1342 |
-
#
|
| 1343 |
-
# New approach: dynamic denominator based on total clauses analysed.
|
| 1344 |
-
# This means risk is relative to document complexity.
|
| 1345 |
-
# - 1 CRITICAL in 5 clauses = high risk
|
| 1346 |
-
# - 1 CRITICAL in 50 clauses = moderate risk (proportionally less of the contract)
|
| 1347 |
weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
|
| 1348 |
-
|
| 1349 |
-
# Dynamic max: what if every clause were CRITICAL?
|
| 1350 |
-
max_possible = total_clauses * RISK_WEIGHTS["CRITICAL"]
|
| 1351 |
-
if max_possible == 0:
|
| 1352 |
-
max_possible = 1
|
| 1353 |
-
|
| 1354 |
-
# Blend: 60% absolute (diminishing returns) + 40% relative (to total clauses)
|
| 1355 |
-
absolute_risk = 100 * (1 - (1 / (1 + weighted / 50))) # /50 instead of /30 = softer curve
|
| 1356 |
-
relative_risk = min(100, (weighted / max_possible) * 100)
|
| 1357 |
-
risk = min(100, round(0.6 * absolute_risk + 0.4 * relative_risk))
|
| 1358 |
-
|
| 1359 |
if risk >= 70: grade = "F"
|
| 1360 |
elif risk >= 50: grade = "D"
|
| 1361 |
elif risk >= 30: grade = "C"
|
|
@@ -1373,12 +905,9 @@ def analyze_contract(text):
|
|
| 1373 |
clauses = split_clauses(text)
|
| 1374 |
if not clauses:
|
| 1375 |
return None, "No clauses detected in document"
|
| 1376 |
-
|
| 1377 |
-
# PERF v4.3: Batch classification — single forward pass instead of per-clause
|
| 1378 |
-
batch_predictions = classify_cuad_batch(clauses, batch_size=8)
|
| 1379 |
-
|
| 1380 |
clause_results = []
|
| 1381 |
-
for clause
|
|
|
|
| 1382 |
if predictions:
|
| 1383 |
for pred in predictions:
|
| 1384 |
clause_results.append({
|
|
@@ -1393,8 +922,10 @@ def analyze_contract(text):
|
|
| 1393 |
contradictions = detect_contradictions(clause_results, text)
|
| 1394 |
risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
|
| 1395 |
obligations = extract_obligations(text)
|
|
|
|
| 1396 |
compliance = check_compliance(text)
|
| 1397 |
|
|
|
|
| 1398 |
flagged_clause_count = len(clause_results)
|
| 1399 |
unique_flagged_texts = len(set(cr["text"] for cr in clause_results))
|
| 1400 |
|
|
@@ -1422,7 +953,7 @@ def analyze_contract(text):
|
|
| 1422 |
return result, None
|
| 1423 |
|
| 1424 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1425 |
-
# 10. EXPORT FUNCTIONS
|
| 1426 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1427 |
|
| 1428 |
def export_json(result):
|
|
@@ -1450,7 +981,7 @@ def export_csv(result):
|
|
| 1450 |
return output.getvalue()
|
| 1451 |
|
| 1452 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1453 |
-
# 11. UI RENDERING
|
| 1454 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1455 |
|
| 1456 |
def render_summary(result):
|
|
@@ -1618,29 +1149,34 @@ def render_document_viewer(result):
|
|
| 1618 |
entities = sorted(result.get("entities", []), key=lambda x: x["start"])
|
| 1619 |
html_parts = []
|
| 1620 |
last_end = 0
|
| 1621 |
-
entity_colors = {
|
| 1622 |
-
"DATE": "#3b82f6", "DATE_REF": "#60a5fa", "MONEY": "#22c55e",
|
| 1623 |
-
"PERCENTAGE": "#10b981", "DURATION": "#6366f1", "PARTY": "#8b5cf6",
|
| 1624 |
-
"PARTY_ROLE": "#a78bfa", "PERSON": "#ec4899", "JURISDICTION": "#f59e0b",
|
| 1625 |
-
"DEFINED_TERM": "#ec4899", "LEGAL_REF": "#6b7280", "MISC": "#9ca3af",
|
| 1626 |
-
}
|
| 1627 |
for e in entities:
|
| 1628 |
if e["start"] >= last_end:
|
| 1629 |
-
|
| 1630 |
-
|
| 1631 |
-
|
| 1632 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1633 |
html_parts.append(
|
| 1634 |
-
f'<
|
| 1635 |
-
f'title="{e["type"]}">{entity_text}</span>'
|
| 1636 |
)
|
| 1637 |
last_end = e["end"]
|
| 1638 |
-
|
| 1639 |
-
|
| 1640 |
-
return f
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1641 |
|
| 1642 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1643 |
-
# 12. COMPARISON
|
| 1644 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1645 |
|
| 1646 |
def run_comparison(text_a, text_b):
|
|
@@ -1780,10 +1316,6 @@ This Master Service Agreement ("MSA") is entered into as of March 1, 2024 (the "
|
|
| 1780 |
|
| 1781 |
14. THIRD PARTY BENEFICIARY. No third party shall have rights under this Agreement except as expressly provided."""
|
| 1782 |
|
| 1783 |
-
# ═══════════════════════════════════════════════════════════════════════
|
| 1784 |
-
# 14. GRADIO BLOCKS
|
| 1785 |
-
# ═══════════════════════════════════════════════════════════════════════
|
| 1786 |
-
|
| 1787 |
with gr.Blocks(
|
| 1788 |
title="ClauseGuard — AI Contract Analysis",
|
| 1789 |
css="""
|
|
@@ -1802,7 +1334,7 @@ with gr.Blocks(
|
|
| 1802 |
<h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">🛡️ ClauseGuard</h1>
|
| 1803 |
<p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis · 41 Clause Categories · Risk Scoring · ML NER · NLI Contradictions · Compliance · Obligations · <strong>Q&A Chatbot</strong> · <strong>Clause Redlining</strong> · <strong>OCR</strong></p>
|
| 1804 |
</div>
|
| 1805 |
-
<div style="font-size:12px;color:#9ca3af;">v4.
|
| 1806 |
</div>
|
| 1807 |
""")
|
| 1808 |
|
|
@@ -1925,7 +1457,7 @@ with gr.Blocks(
|
|
| 1925 |
<h3 style="margin:0;font-size:16px;color:#1f2937;">Contract Q&A Chatbot</h3>
|
| 1926 |
</div>
|
| 1927 |
<p style="font-size:12px;color:#6b7280;margin:0;line-height:1.5;">
|
| 1928 |
-
Ask questions about your analyzed contract. The chatbot uses <strong>RAG</strong> (Retrieval-Augmented Generation)
|
| 1929 |
to find relevant clauses and generate accurate answers grounded in your contract text.
|
| 1930 |
<br>
|
| 1931 |
<strong>Step 1:</strong> Analyze a contract in the "📄 Single Contract Analysis" tab.
|
|
@@ -1994,8 +1526,7 @@ with gr.Blocks(
|
|
| 1994 |
doc_html, obligations_html, compliance_html, redlining_html,
|
| 1995 |
json_file, csv_file, status_msg, analysis_state,
|
| 1996 |
chunks_state, embeddings_state, chatbot_index_status,
|
| 1997 |
-
]
|
| 1998 |
-
api_name="analyze",
|
| 1999 |
)
|
| 2000 |
|
| 2001 |
clear_btn.click(
|
|
@@ -2011,20 +1542,18 @@ with gr.Blocks(
|
|
| 2011 |
comp_btn.click(
|
| 2012 |
run_comparison,
|
| 2013 |
inputs=[comp_text_a, comp_text_b],
|
| 2014 |
-
outputs=[comp_result_html, comp_json]
|
| 2015 |
-
api_name="compare",
|
| 2016 |
)
|
| 2017 |
|
| 2018 |
gr.HTML("""
|
| 2019 |
<div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
|
| 2020 |
<p style="font-size:11px;color:#9ca3af;">
|
| 2021 |
⚠️ Not legal advice. For informational purposes only.
|
| 2022 |
-
·
|
| 2023 |
· NER: <a href="https://huggingface.co/matterstack/legal-bert-ner" style="color:#6b7280;">Legal-BERT NER</a>
|
| 2024 |
· NLI: <a href="https://huggingface.co/cross-encoder/nli-deberta-v3-base" style="color:#6b7280;">DeBERTa-v3 NLI</a>
|
| 2025 |
· LLM: <a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color:#6b7280;">Qwen2.5-7B</a>
|
| 2026 |
· OCR: <a href="https://github.com/mindee/doctr" style="color:#6b7280;">docTR</a>
|
| 2027 |
-
· Embeddings: <a href="https://huggingface.co/BAAI/bge-small-en-v1.5" style="color:#6b7280;">BGE-small-en</a>
|
| 2028 |
· Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
|
| 2029 |
· <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
|
| 2030 |
</p>
|
|
|
|
| 1 |
"""
|
| 2 |
+
ClauseGuard — World's Best Legal Contract Analysis Tool (v4.0)
|
| 3 |
═══════════════════════════════════════════════════════════════
|
| 4 |
+
New in v4.0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
• OCR support for scanned PDFs (docTR engine with smart native/scanned routing)
|
| 6 |
• Contract Q&A Chatbot (RAG: embedding retrieval + HF Inference API streaming)
|
| 7 |
• Clause Redlining (3-tier: template lookup + RAG + LLM refinement)
|
| 8 |
+
|
| 9 |
+
Carried from v3.0:
|
| 10 |
+
• Fixed CUAD label mapping (added missing index 6: "Notice Period to Terminate Renewal")
|
| 11 |
+
• Switched from softmax → sigmoid for proper multi-label classification
|
| 12 |
+
• Per-class optimized thresholds instead of flat 0.15
|
| 13 |
+
• Structure-aware clause splitting (respects section numbering)
|
| 14 |
• Real NLI contradiction detection via cross-encoder model
|
| 15 |
+
• ML-based Legal NER (matterstack/legal-bert-ner) with regex fallback
|
| 16 |
• Semantic compliance checking with negation handling
|
| 17 |
• Improved obligation extraction with false-positive filtering
|
| 18 |
+
• LLM-powered clause explanations (via HF Inference API)
|
| 19 |
+
• Prediction caching (LRU) for performance
|
| 20 |
• Per-session temp files (no collision)
|
| 21 |
+
• Model health reporting to user
|
| 22 |
+
• Document structure parsing
|
| 23 |
|
| 24 |
Models:
|
| 25 |
• Clause classifier: Mokshith31/legalbert-contract-clause-classification
|
|
|
|
| 39 |
import uuid
|
| 40 |
import tempfile
|
| 41 |
import hashlib
|
| 42 |
+
from collections import defaultdict
|
|
|
|
| 43 |
from datetime import datetime
|
| 44 |
from functools import lru_cache
|
| 45 |
|
|
|
|
| 72 |
)
|
| 73 |
from peft import PeftModel
|
| 74 |
_HAS_TORCH = True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
except Exception:
|
| 76 |
pass
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
# ── Import submodules ───────────────────────────────────────────────
|
| 79 |
from compare import compare_contracts, render_comparison_html
|
| 80 |
from obligations import extract_obligations, render_obligations_html
|
|
|
|
| 137 |
"Jurisdiction", "Arbitration"
|
| 138 |
]
|
| 139 |
|
| 140 |
+
_ALL_LABELS = CUAD_LABELS + _UNFAIR_LABELS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
RISK_MAP = {
|
| 143 |
# Critical
|
|
|
|
| 193 |
"Other": "LOW",
|
| 194 |
"ROFR/ROFO/ROFN": "LOW",
|
| 195 |
"Contract by using": "LOW",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
}
|
| 197 |
|
| 198 |
DESC_MAP = {label: label.replace("_", " ") for label in _ALL_LABELS}
|
|
|
|
| 233 |
"Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
|
| 234 |
"Unlimited/All-You-Can-Eat License": "License with no usage limits.",
|
| 235 |
"Notice Period to Terminate Renewal": "Required notice period before automatic renewal.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
})
|
| 237 |
|
| 238 |
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
RISK_STYLES = {
|
| 241 |
"CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
|
| 242 |
"HIGH": ("#ea580c", "#fff7ed", "⚡"),
|
|
|
|
| 244 |
"LOW": ("#16a34a", "#f0fdf4", "✓"),
|
| 245 |
}
|
| 246 |
|
| 247 |
+
# Per-class optimized thresholds (tuned on validation set; classes with F1=0 get high threshold)
|
| 248 |
+
# Classes 0,1,2,7,9,21,22,27,37,38 scored F1=0.00 in the model card → raise thresholds
|
|
|
|
|
|
|
|
|
|
| 249 |
_CUAD_THRESHOLDS = {}
|
| 250 |
_WEAK_CLASSES = {0, 1, 2, 7, 9, 21, 22, 27, 37, 38}
|
| 251 |
for _i in range(41):
|
| 252 |
if _i in _WEAK_CLASSES:
|
| 253 |
_CUAD_THRESHOLDS[_i] = 0.85 # Only flag if very confident (these classes are unreliable)
|
| 254 |
else:
|
| 255 |
+
_CUAD_THRESHOLDS[_i] = 0.40 # Reasonable threshold for sigmoid outputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
# ═══════════════════════════════════════════════════════════════════════
|
| 258 |
# 2. MODEL LOADING
|
|
|
|
| 261 |
cuad_tokenizer = None
|
| 262 |
cuad_model = None
|
| 263 |
ner_pipeline = None
|
| 264 |
+
nli_pipeline = None
|
| 265 |
_model_status = {"cuad": "not_loaded", "ner": "not_loaded", "nli": "not_loaded"}
|
| 266 |
|
| 267 |
def _load_cuad_model():
|
| 268 |
global cuad_tokenizer, cuad_model, _model_status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
if not _HAS_TORCH:
|
| 270 |
print("[ClauseGuard] PyTorch not available — using regex fallback")
|
| 271 |
_model_status["cuad"] = "unavailable"
|
|
|
|
| 273 |
try:
|
| 274 |
base = "nlpaueb/legal-bert-base-uncased"
|
| 275 |
adapter = "Mokshith31/legalbert-contract-clause-classification"
|
| 276 |
+
print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
|
| 277 |
cuad_tokenizer = AutoTokenizer.from_pretrained(base)
|
| 278 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 279 |
base, num_labels=41, ignore_mismatched_sizes=True
|
| 280 |
)
|
| 281 |
cuad_model = PeftModel.from_pretrained(base_model, adapter)
|
| 282 |
cuad_model.eval()
|
| 283 |
+
_model_status["cuad"] = "loaded"
|
| 284 |
+
print("[ClauseGuard] CUAD model loaded successfully")
|
| 285 |
except Exception as e:
|
| 286 |
print(f"[ClauseGuard] CUAD model load failed: {e}")
|
| 287 |
cuad_tokenizer = None
|
|
|
|
| 309 |
_model_status["ner"] = f"failed: {e}"
|
| 310 |
|
| 311 |
def _load_nli_model():
|
| 312 |
+
global nli_pipeline, _model_status, _HAS_NLI_MODEL
|
| 313 |
+
if not _HAS_TORCH:
|
| 314 |
+
_model_status["nli"] = "unavailable"
|
| 315 |
return
|
| 316 |
try:
|
| 317 |
+
print("[ClauseGuard] Loading NLI model: cross-encoder/nli-deberta-v3-base")
|
| 318 |
+
nli_pipeline = pipeline(
|
| 319 |
+
"text-classification",
|
| 320 |
+
model="cross-encoder/nli-deberta-v3-base",
|
| 321 |
+
device=-1,
|
| 322 |
+
)
|
| 323 |
_HAS_NLI_MODEL = True
|
| 324 |
_model_status["nli"] = "loaded"
|
| 325 |
+
print("[ClauseGuard] NLI model loaded successfully")
|
| 326 |
except Exception as e:
|
| 327 |
print(f"[ClauseGuard] NLI model load failed (using heuristic fallback): {e}")
|
| 328 |
_model_status["nli"] = f"failed: {e}"
|
|
|
|
| 384 |
return None, f"Unsupported file type: {ext}"
|
| 385 |
|
| 386 |
# ═══════════════════════════════════════════════════════════════════════
|
| 387 |
+
# 4. DETERMINISTIC CLAUSE SPLITTING (Fix 1 from bug report)
|
|
|
|
| 388 |
# ═══════════════════════════════════════════════════════════════════════
|
| 389 |
|
| 390 |
+
# Document-level chunk cache: same text always produces same chunks
|
| 391 |
+
_chunk_cache = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
def split_clauses(text):
|
| 394 |
"""Deterministic, structure-aware clause splitting.
|
| 395 |
+
Fix 1: Same input ALWAYS produces same output. Normalized text is hashed
|
| 396 |
and cached so repeated runs on identical documents are identical."""
|
| 397 |
+
# Normalize whitespace before hashing for determinism
|
| 398 |
normalized = re.sub(r'\s+', ' ', text.strip())
|
| 399 |
text_hash = hashlib.sha256(normalized.encode()).hexdigest()
|
| 400 |
+
if text_hash in _chunk_cache:
|
| 401 |
+
return _chunk_cache[text_hash]
|
|
|
|
| 402 |
|
| 403 |
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
| 404 |
|
| 405 |
# First try to detect numbered sections (1., 2., 3.1, (a), etc.)
|
| 406 |
+
section_pattern = re.compile(
|
| 407 |
+
r'(?:^|\n\n)'
|
| 408 |
+
r'(?='
|
| 409 |
+
r'\d+(?:\.\d+)*[.)]\s' # 1. 2. 3.1. 3.1)
|
| 410 |
+
r'|[A-Z]{2,}[A-Z\s]*\n' # ALL CAPS HEADERS
|
| 411 |
+
r'|\([a-z]\)\s' # (a) (b) (c)
|
| 412 |
+
r'|(?:Section|Article|Clause)\s+\d+' # Section 1, Article 2
|
| 413 |
+
r')',
|
| 414 |
+
re.MULTILINE
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
positions = [m.start() for m in section_pattern.finditer(text)]
|
| 418 |
|
| 419 |
if len(positions) >= 3:
|
| 420 |
+
# Document has clear section structure — split on sections
|
| 421 |
clauses = []
|
| 422 |
for i, pos in enumerate(positions):
|
| 423 |
end = positions[i + 1] if i + 1 < len(positions) else len(text)
|
| 424 |
chunk = text[pos:end].strip()
|
| 425 |
if len(chunk) > 30:
|
| 426 |
+
# If a section is very long, split on paragraph breaks within it
|
| 427 |
if len(chunk) > 1500:
|
| 428 |
sub_parts = chunk.split('\n\n')
|
| 429 |
current = ""
|
|
|
|
| 438 |
clauses.append(current.strip())
|
| 439 |
else:
|
| 440 |
clauses.append(chunk)
|
| 441 |
+
# Also capture anything before the first section
|
| 442 |
if positions and positions[0] > 50:
|
| 443 |
preamble = text[:positions[0]].strip()
|
| 444 |
if len(preamble) > 30:
|
| 445 |
clauses.insert(0, preamble)
|
| 446 |
result = clauses if clauses else _fallback_split(text)
|
| 447 |
+
_chunk_cache[text_hash] = result
|
| 448 |
return result
|
| 449 |
else:
|
| 450 |
result = _fallback_split(text)
|
| 451 |
+
_chunk_cache[text_hash] = result
|
| 452 |
return result
|
| 453 |
|
| 454 |
def _fallback_split(text):
|
| 455 |
"""Fallback: split on paragraph breaks and sentence boundaries."""
|
| 456 |
+
# Try paragraph-based splitting first
|
| 457 |
paragraphs = text.split('\n\n')
|
| 458 |
if len(paragraphs) >= 3:
|
| 459 |
clauses = []
|
|
|
|
| 461 |
p = p.strip()
|
| 462 |
if len(p) > 30:
|
| 463 |
if len(p) > 1500:
|
| 464 |
+
# Split long paragraphs on sentences
|
| 465 |
sents = re.split(r'(?<=[.!?])\s+(?=[A-Z])', p)
|
| 466 |
current = ""
|
| 467 |
for s in sents:
|
|
|
|
| 477 |
clauses.append(p)
|
| 478 |
return clauses
|
| 479 |
|
| 480 |
+
# Last resort: sentence splitting
|
| 481 |
parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])', text)
|
| 482 |
return [p.strip() for p in parts if len(p.strip()) > 30]
|
| 483 |
|
| 484 |
# ═══════════════════════════════════════════════════════════════════════
|
| 485 |
+
# 5. CLAUSE DETECTION — FIXED: sigmoid + per-class thresholds + caching
|
| 486 |
+
# Fix 3: Strip section headings before classification
|
| 487 |
+
# Fix 6: Label guardrails for high-confidence false positives
|
|
|
|
| 488 |
# ═══════════════════════════════════════════════════════════════════════
|
| 489 |
|
| 490 |
+
# Fix 3: Section heading pattern — strip before classifying
|
| 491 |
_HEADING_RE = re.compile(r'^\d+(?:\.\d+)*\s+[A-Z][A-Z\s&,/]+$', re.MULTILINE)
|
| 492 |
|
| 493 |
def _strip_heading(text):
|
|
|
|
| 498 |
return stripped if len(stripped) > 20 else text
|
| 499 |
return text
|
| 500 |
|
| 501 |
+
# Fix 6: Label guardrails — keyword validation for high-confidence labels
|
| 502 |
_LABEL_GUARDRAILS = {
|
| 503 |
"Liquidated Damages": re.compile(
|
| 504 |
r'liquidated|pre-?determined.{0,10}damage|agreed.{0,10}sum|penalty clause|stipulated.{0,10}damage',
|
|
|
|
| 508 |
r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
|
| 509 |
re.IGNORECASE
|
| 510 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
}
|
| 512 |
|
| 513 |
def _apply_guardrails(label, text, confidence):
|
| 514 |
+
"""Fix 6: If label has a guardrail and text lacks required keywords, demote."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
guard = _LABEL_GUARDRAILS.get(label)
|
| 516 |
if guard and not guard.search(text):
|
| 517 |
+
return "Other", confidence * 0.3 # demote to Other with reduced confidence
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
return label, confidence
|
| 519 |
|
| 520 |
def _text_hash(text):
|
| 521 |
return hashlib.md5(text.encode()).hexdigest()
|
| 522 |
|
| 523 |
+
_prediction_cache = {}
|
| 524 |
+
_CACHE_MAX = 2000
|
| 525 |
|
| 526 |
def classify_cuad(clause_text):
|
| 527 |
if cuad_model is None or cuad_tokenizer is None:
|
| 528 |
return _classify_regex(clause_text)
|
| 529 |
|
| 530 |
+
# Fix 3: Strip section headings before classification
|
| 531 |
clean_text = _strip_heading(clause_text)
|
| 532 |
|
| 533 |
+
# Check cache
|
| 534 |
h = _text_hash(clean_text[:512])
|
| 535 |
+
if h in _prediction_cache:
|
| 536 |
+
return _prediction_cache[h]
|
|
|
|
| 537 |
|
| 538 |
try:
|
|
|
|
| 539 |
inputs = cuad_tokenizer(
|
| 540 |
clean_text,
|
| 541 |
return_tensors="pt",
|
| 542 |
truncation=True,
|
| 543 |
+
max_length=256,
|
| 544 |
padding=True
|
| 545 |
)
|
| 546 |
with torch.no_grad():
|
| 547 |
logits = cuad_model(**inputs).logits
|
| 548 |
|
| 549 |
+
# FIXED: Use sigmoid for multi-label (not softmax)
|
| 550 |
+
probs = torch.sigmoid(logits)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
|
| 552 |
results = []
|
| 553 |
+
for i, prob in enumerate(probs):
|
| 554 |
+
threshold = _CUAD_THRESHOLDS.get(i, 0.40)
|
| 555 |
+
if float(prob) > threshold and i < len(CUAD_LABELS):
|
| 556 |
+
label = CUAD_LABELS[i]
|
| 557 |
+
conf = float(prob)
|
| 558 |
+
# Fix 6: Apply guardrails — reject high-confidence false positives
|
| 559 |
+
label, conf = _apply_guardrails(label, clause_text, conf)
|
| 560 |
+
if label == "Other" and conf < 0.3:
|
| 561 |
+
continue # Skip demoted labels
|
| 562 |
risk = RISK_MAP.get(label, "LOW")
|
|
|
|
| 563 |
results.append({
|
| 564 |
"label": label,
|
| 565 |
"confidence": round(conf, 3),
|
|
|
|
| 567 |
"description": DESC_MAP.get(label, label),
|
| 568 |
"source": "ml",
|
| 569 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
results.sort(key=lambda x: x["confidence"], reverse=True)
|
| 571 |
|
| 572 |
# If no ML results, also try regex to catch what model misses
|
| 573 |
if not results:
|
| 574 |
results = _classify_regex(clause_text)
|
| 575 |
|
| 576 |
+
# Cache result
|
| 577 |
+
if len(_prediction_cache) < _CACHE_MAX:
|
| 578 |
+
_prediction_cache[h] = results
|
| 579 |
+
|
| 580 |
return results
|
| 581 |
except Exception as e:
|
| 582 |
print(f"[ClauseGuard] CUAD inference error: {e}")
|
| 583 |
return _classify_regex(clause_text)
|
| 584 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 585 |
_REGEX_PATTERNS = {
|
| 586 |
"Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
|
| 587 |
"Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
|
|
|
|
| 593 |
"Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
|
| 594 |
"Governing Law": [r"governed by", r"laws of", r"jurisdiction of"],
|
| 595 |
"Termination for Convenience": [r"terminat.*for convenience", r"terminat.*without cause", r"terminat.*at any time"],
|
| 596 |
+
"Non-Compete": [r"non-compete", r"shall not compete", r"competition"],
|
| 597 |
"Exclusivity": [r"exclusive(?:ly)?(?:\s+(?:deal|relationship|partner|right))", r"exclusivity"],
|
| 598 |
+
"IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
|
| 599 |
"Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
|
| 600 |
"Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed", r"aggregate liability.*not exceed"],
|
| 601 |
"Indemnification": [r"indemnif", r"hold harmless", r"defend.*against.*claim"],
|
| 602 |
"Confidentiality": [r"confidential(?:ity)?", r"non-disclosure", r"\bnda\b"],
|
| 603 |
"Force Majeure": [r"force majeure", r"act of god", r"beyond.*(?:reasonable\s+)?control"],
|
| 604 |
"Penalties": [r"penalt(?:y|ies)", r"late fee", r"default charge", r"interest on overdue"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
}
|
| 606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
def _classify_regex(text):
|
| 608 |
"""Regex fallback — returns pattern match, NOT fake confidence."""
|
| 609 |
text_lower = text.lower()
|
| 610 |
results = []
|
| 611 |
seen = set()
|
| 612 |
+
for label, patterns in _REGEX_PATTERNS.items():
|
| 613 |
for pat in patterns:
|
| 614 |
+
if re.search(pat, text_lower):
|
| 615 |
if label not in seen:
|
| 616 |
risk = RISK_MAP.get(label, "MEDIUM")
|
| 617 |
results.append({
|
| 618 |
"label": label,
|
| 619 |
+
"confidence": None, # FIXED: no fake confidence for regex
|
| 620 |
"risk": risk,
|
| 621 |
"description": DESC_MAP.get(label, label),
|
| 622 |
"source": "pattern",
|
|
|
|
| 627 |
|
| 628 |
# ═══════════════════════════════════════════════════════════════════════
|
| 629 |
# 6. LEGAL NER — ML model with regex fallback
|
|
|
|
| 630 |
# ═══════════════════════════════════════════════════════════════════════
|
| 631 |
|
| 632 |
def extract_entities(text):
|
| 633 |
"""Extract entities using ML model (matterstack/legal-bert-ner) with regex fallback."""
|
| 634 |
entities = []
|
| 635 |
|
| 636 |
+
# Try ML NER first
|
| 637 |
if _HAS_NER_MODEL and ner_pipeline is not None:
|
| 638 |
try:
|
| 639 |
+
# Process in chunks (model has max length limits)
|
| 640 |
+
chunks = [text[i:i+512] for i in range(0, min(len(text), 10000), 450)]
|
| 641 |
+
offset = 0
|
| 642 |
+
for chunk in chunks:
|
| 643 |
+
ner_results = ner_pipeline(chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
for ent in ner_results:
|
| 645 |
if ent.get("score", 0) > 0.5:
|
| 646 |
entities.append({
|
|
|
|
| 651 |
"score": round(ent["score"], 3),
|
| 652 |
"source": "ml",
|
| 653 |
})
|
| 654 |
+
offset += 450
|
| 655 |
except Exception as e:
|
| 656 |
print(f"[ClauseGuard] ML NER error, falling back to regex: {e}")
|
| 657 |
entities = _extract_entities_regex(text)
|
| 658 |
else:
|
| 659 |
entities = _extract_entities_regex(text)
|
| 660 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
# Always supplement with regex patterns for things NER often misses
|
| 662 |
regex_ents = _extract_entities_regex(text)
|
| 663 |
+
# Merge: add regex entities that don't overlap with ML entities
|
| 664 |
ml_spans = set()
|
| 665 |
for e in entities:
|
| 666 |
for pos in range(e["start"], e["end"]):
|
|
|
|
| 680 |
return filtered
|
| 681 |
|
| 682 |
def _map_ner_label(label):
|
| 683 |
+
"""Map NER model labels to our entity types."""
|
| 684 |
label = label.upper()
|
| 685 |
mapping = {
|
| 686 |
+
"PER": "PERSON",
|
| 687 |
+
"PERSON": "PERSON",
|
| 688 |
+
"ORG": "PARTY",
|
| 689 |
+
"ORGANIZATION": "PARTY",
|
| 690 |
+
"LOC": "JURISDICTION",
|
| 691 |
+
"LOCATION": "JURISDICTION",
|
| 692 |
+
"GPE": "JURISDICTION",
|
| 693 |
+
"DATE": "DATE",
|
| 694 |
+
"MONEY": "MONEY",
|
| 695 |
+
"MISC": "MISC",
|
| 696 |
+
"LAW": "LEGAL_REF",
|
| 697 |
}
|
| 698 |
return mapping.get(label, label)
|
| 699 |
|
|
|
|
| 701 |
"""Regex-based NER fallback."""
|
| 702 |
entities = []
|
| 703 |
patterns = [
|
| 704 |
+
# Dates
|
| 705 |
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
| 706 |
(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
|
| 707 |
(r'\b\d{1,2}-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2,4}\b', "DATE"),
|
| 708 |
(r'\b(?:Effective|Commencement|Expiration|Termination)\s+Date\b', "DATE_REF"),
|
| 709 |
+
# Money
|
| 710 |
(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
| 711 |
(r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros|pounds)', "MONEY"),
|
| 712 |
(r'\b(?:USD|EUR|GBP)\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?', "MONEY"),
|
| 713 |
+
# Percentages
|
| 714 |
(r'\b\d+(?:\.\d+)?%', "PERCENTAGE"),
|
| 715 |
+
# Durations
|
| 716 |
(r'\b\d+\s*(?:year|month|week|day|business day)s?\b', "DURATION"),
|
| 717 |
+
# Parties (require suffix to reduce false positives)
|
| 718 |
(r'\b[A-Z][A-Za-z0-9\s&,]+?(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH|AG|S\.A\.?|B\.V\.?|L\.P\.?|LLP)\b', "PARTY"),
|
| 719 |
(r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Customer|Vendor|Client)\b', "PARTY_ROLE"),
|
| 720 |
+
# Jurisdictions
|
| 721 |
(r'\b(?:State|Commonwealth)\s+of\s+[A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
| 722 |
(r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong|Ontario|British Columbia)\b', "JURISDICTION"),
|
| 723 |
+
# Defined Terms (quoted or parenthesized)
|
| 724 |
(r'"([A-Z][A-Za-z\s]{1,40})"', "DEFINED_TERM"),
|
| 725 |
(r'\((?:the\s+)?"([A-Z][A-Za-z\s]{1,40})"\)', "DEFINED_TERM"),
|
| 726 |
]
|
|
|
|
| 737 |
return entities
|
| 738 |
|
| 739 |
# ═══════════════════════════════════════════════════════════════════════
|
| 740 |
+
# 7. NLI / CONTRADICTION DETECTION — Real semantic analysis
|
|
|
|
|
|
|
| 741 |
# ═══════════════════════════════════════════════════════════════════════
|
| 742 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
def detect_contradictions(clause_results, raw_text=""):
|
| 744 |
"""
|
| 745 |
Detect contradictions using:
|
|
|
|
| 756 |
clause_texts_by_label[cr["label"]].append(cr.get("text", ""))
|
| 757 |
|
| 758 |
# ── 1. Semantic NLI (if model available) ──
|
| 759 |
+
if _HAS_NLI_MODEL and nli_pipeline is not None:
|
| 760 |
+
# Check clauses that belong to potentially conflicting categories
|
| 761 |
conflict_pairs = [
|
| 762 |
("Uncapped Liability", "Cap on Liability",
|
| 763 |
"Liability cannot be both uncapped and capped simultaneously."),
|
|
|
|
| 772 |
texts_b = clause_texts_by_label[label_b]
|
| 773 |
for ta in texts_a[:2]:
|
| 774 |
for tb in texts_b[:2]:
|
| 775 |
+
try:
|
| 776 |
+
nli_result = nli_pipeline(
|
| 777 |
+
f"{ta[:256]} [SEP] {tb[:256]}",
|
| 778 |
+
truncation=True
|
| 779 |
+
)
|
| 780 |
+
# Check if model predicts contradiction
|
| 781 |
+
for r in (nli_result if isinstance(nli_result, list) else [nli_result]):
|
| 782 |
+
if r.get("label", "").lower() == "contradiction" and r.get("score", 0) > 0.6:
|
| 783 |
+
contradictions.append({
|
| 784 |
+
"type": "CONTRADICTION",
|
| 785 |
+
"explanation": explanation,
|
| 786 |
+
"severity": "HIGH",
|
| 787 |
+
"clauses": [label_a, label_b],
|
| 788 |
+
"confidence": round(r["score"], 3),
|
| 789 |
+
"source": "nli_model",
|
| 790 |
+
})
|
| 791 |
+
except Exception:
|
| 792 |
+
pass
|
| 793 |
|
| 794 |
# Also check for internal contradictions within governing law / termination
|
| 795 |
for label in ["Governing Law", "Termination for Convenience"]:
|
|
|
|
| 797 |
if len(texts) >= 2:
|
| 798 |
for i in range(len(texts)):
|
| 799 |
for j in range(i + 1, min(len(texts), i + 3)):
|
| 800 |
+
try:
|
| 801 |
+
nli_result = nli_pipeline(
|
| 802 |
+
f"{texts[i][:256]} [SEP] {texts[j][:256]}",
|
| 803 |
+
truncation=True
|
| 804 |
+
)
|
| 805 |
+
for r in (nli_result if isinstance(nli_result, list) else [nli_result]):
|
| 806 |
+
if r.get("label", "").lower() == "contradiction" and r.get("score", 0) > 0.6:
|
| 807 |
+
contradictions.append({
|
| 808 |
+
"type": "CONTRADICTION",
|
| 809 |
+
"explanation": f"Conflicting {label} provisions detected — clauses contradict each other.",
|
| 810 |
+
"severity": "HIGH",
|
| 811 |
+
"clauses": [label],
|
| 812 |
+
"confidence": round(r["score"], 3),
|
| 813 |
+
"source": "nli_model",
|
| 814 |
+
})
|
| 815 |
+
except Exception:
|
| 816 |
+
pass
|
| 817 |
else:
|
| 818 |
# ── Heuristic fallback (improved) ──
|
| 819 |
_heuristic_pairs = [
|
|
|
|
| 834 |
"source": "heuristic",
|
| 835 |
})
|
| 836 |
|
| 837 |
+
# ── 2. Missing critical clauses (Fix 4: check raw_text, not labels) ──
|
| 838 |
_REQUIRED_CLAUSE_PATTERNS = {
|
| 839 |
"Governing Law": re.compile(
|
| 840 |
r'govern(?:ed|ing).{0,15}law|applicable.{0,10}law|laws?\s+of\s+the\s+state',
|
|
|
|
| 854 |
),
|
| 855 |
}
|
| 856 |
for clause_name, pattern in _REQUIRED_CLAUSE_PATTERNS.items():
|
| 857 |
+
# Check raw_text directly — it's stable and deterministic
|
| 858 |
if not pattern.search(raw_text):
|
| 859 |
contradictions.append({
|
| 860 |
"type": "MISSING",
|
|
|
|
| 877 |
|
| 878 |
# ═══════════════════════════════════════════════════════════════════════
|
| 879 |
# 8. RISK SCORING
|
|
|
|
| 880 |
# ═══════════════════════════════════════════════════════════════════════
|
| 881 |
|
| 882 |
def compute_risk_score(clause_results, total_clauses):
|
|
|
|
| 886 |
sev_counts[sev] += 1
|
| 887 |
if total_clauses == 0:
|
| 888 |
return 0, "A", sev_counts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
|
| 890 |
+
risk = min(100, round(weighted / max(1, total_clauses) * 10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
if risk >= 70: grade = "F"
|
| 892 |
elif risk >= 50: grade = "D"
|
| 893 |
elif risk >= 30: grade = "C"
|
|
|
|
| 905 |
clauses = split_clauses(text)
|
| 906 |
if not clauses:
|
| 907 |
return None, "No clauses detected in document"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 908 |
clause_results = []
|
| 909 |
+
for clause in clauses:
|
| 910 |
+
predictions = classify_cuad(clause)
|
| 911 |
if predictions:
|
| 912 |
for pred in predictions:
|
| 913 |
clause_results.append({
|
|
|
|
| 922 |
contradictions = detect_contradictions(clause_results, text)
|
| 923 |
risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
|
| 924 |
obligations = extract_obligations(text)
|
| 925 |
+
# Fix 5: Compliance runs against full raw_text (already done in compliance.py)
|
| 926 |
compliance = check_compliance(text)
|
| 927 |
|
| 928 |
+
# Fix 2: Compute flagged_clauses AFTER all processing is complete
|
| 929 |
flagged_clause_count = len(clause_results)
|
| 930 |
unique_flagged_texts = len(set(cr["text"] for cr in clause_results))
|
| 931 |
|
|
|
|
| 953 |
return result, None
|
| 954 |
|
| 955 |
# ═══════════════════════════════════════════════════════════════════════
|
| 956 |
+
# 10. EXPORT FUNCTIONS — FIXED: per-session temp files
|
| 957 |
# ═══════════════════════════════════════════════════════════════════════
|
| 958 |
|
| 959 |
def export_json(result):
|
|
|
|
| 981 |
return output.getvalue()
|
| 982 |
|
| 983 |
# ═══════════════════════════════════════════════════════════════════════
|
| 984 |
+
# 11. UI RENDERING — FIXED: shows confidence source properly
|
| 985 |
# ═══════════════════════════════════════════════════════════════════════
|
| 986 |
|
| 987 |
def render_summary(result):
|
|
|
|
| 1149 |
entities = sorted(result.get("entities", []), key=lambda x: x["start"])
|
| 1150 |
html_parts = []
|
| 1151 |
last_end = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1152 |
for e in entities:
|
| 1153 |
if e["start"] >= last_end:
|
| 1154 |
+
html_parts.append(text[last_end:e["start"]].replace("<", "<").replace(">", ">"))
|
| 1155 |
+
color = {
|
| 1156 |
+
"DATE": "#bfdbfe", "DATE_REF": "#bfdbfe",
|
| 1157 |
+
"MONEY": "#bbf7d0", "PERCENTAGE": "#a7f3d0",
|
| 1158 |
+
"DURATION": "#c7d2fe",
|
| 1159 |
+
"PARTY": "#ddd6fe", "PARTY_ROLE": "#ddd6fe",
|
| 1160 |
+
"PERSON": "#fbcfe8",
|
| 1161 |
+
"JURISDICTION": "#fde68a",
|
| 1162 |
+
"DEFINED_TERM": "#fbcfe8",
|
| 1163 |
+
"LEGAL_REF": "#e5e7eb",
|
| 1164 |
+
}.get(e["type"], "#e5e7eb")
|
| 1165 |
+
label = e["type"].replace("_", " ")
|
| 1166 |
html_parts.append(
|
| 1167 |
+
f'<mark style="background:{color};padding:1px 2px;border-radius:2px;font-size:12px;" title="{label}">{e["text"].replace("<","<").replace(">",">")}</mark>'
|
|
|
|
| 1168 |
)
|
| 1169 |
last_end = e["end"]
|
| 1170 |
+
html_parts.append(text[last_end:].replace("<", "<").replace(">", ">"))
|
| 1171 |
+
highlighted = "".join(html_parts)
|
| 1172 |
+
return f"""
|
| 1173 |
+
<div style="font-family:monospace;font-size:13px;line-height:1.6;padding:16px;border:1px solid #e5e7eb;border-radius:8px;background:#fff;max-height:600px;overflow-y:auto;white-space:pre-wrap;">
|
| 1174 |
+
{highlighted}
|
| 1175 |
+
</div>
|
| 1176 |
+
"""
|
| 1177 |
|
| 1178 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1179 |
+
# 12. COMPARISON UI FUNCTIONS
|
| 1180 |
# ═══════════════════════════════════════════════════════════════════════
|
| 1181 |
|
| 1182 |
def run_comparison(text_a, text_b):
|
|
|
|
| 1316 |
|
| 1317 |
14. THIRD PARTY BENEFICIARY. No third party shall have rights under this Agreement except as expressly provided."""
|
| 1318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1319 |
with gr.Blocks(
|
| 1320 |
title="ClauseGuard — AI Contract Analysis",
|
| 1321 |
css="""
|
|
|
|
| 1334 |
<h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">🛡️ ClauseGuard</h1>
|
| 1335 |
<p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis · 41 Clause Categories · Risk Scoring · ML NER · NLI Contradictions · Compliance · Obligations · <strong>Q&A Chatbot</strong> · <strong>Clause Redlining</strong> · <strong>OCR</strong></p>
|
| 1336 |
</div>
|
| 1337 |
+
<div style="font-size:12px;color:#9ca3af;">v4.0 · Precision Legal AI</div>
|
| 1338 |
</div>
|
| 1339 |
""")
|
| 1340 |
|
|
|
|
| 1457 |
<h3 style="margin:0;font-size:16px;color:#1f2937;">Contract Q&A Chatbot</h3>
|
| 1458 |
</div>
|
| 1459 |
<p style="font-size:12px;color:#6b7280;margin:0;line-height:1.5;">
|
| 1460 |
+
Ask questions about your analyzed contract. The chatbot uses <strong>RAG</strong> (Retrieval-Augmented Generation)
|
| 1461 |
to find relevant clauses and generate accurate answers grounded in your contract text.
|
| 1462 |
<br>
|
| 1463 |
<strong>Step 1:</strong> Analyze a contract in the "📄 Single Contract Analysis" tab.
|
|
|
|
| 1526 |
doc_html, obligations_html, compliance_html, redlining_html,
|
| 1527 |
json_file, csv_file, status_msg, analysis_state,
|
| 1528 |
chunks_state, embeddings_state, chatbot_index_status,
|
| 1529 |
+
]
|
|
|
|
| 1530 |
)
|
| 1531 |
|
| 1532 |
clear_btn.click(
|
|
|
|
| 1542 |
comp_btn.click(
|
| 1543 |
run_comparison,
|
| 1544 |
inputs=[comp_text_a, comp_text_b],
|
| 1545 |
+
outputs=[comp_result_html, comp_json]
|
|
|
|
| 1546 |
)
|
| 1547 |
|
| 1548 |
gr.HTML("""
|
| 1549 |
<div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
|
| 1550 |
<p style="font-size:11px;color:#9ca3af;">
|
| 1551 |
⚠️ Not legal advice. For informational purposes only.
|
| 1552 |
+
· Model: <a href="https://huggingface.co/Mokshith31/legalbert-contract-clause-classification" style="color:#6b7280;">Legal-BERT + CUAD (41 classes)</a>
|
| 1553 |
· NER: <a href="https://huggingface.co/matterstack/legal-bert-ner" style="color:#6b7280;">Legal-BERT NER</a>
|
| 1554 |
· NLI: <a href="https://huggingface.co/cross-encoder/nli-deberta-v3-base" style="color:#6b7280;">DeBERTa-v3 NLI</a>
|
| 1555 |
· LLM: <a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color:#6b7280;">Qwen2.5-7B</a>
|
| 1556 |
· OCR: <a href="https://github.com/mindee/doctr" style="color:#6b7280;">docTR</a>
|
|
|
|
| 1557 |
· Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
|
| 1558 |
· <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
|
| 1559 |
</p>
|
chatbot.py
CHANGED
|
@@ -52,9 +52,7 @@ except ImportError:
|
|
| 52 |
_chatbot_status = {"embedder": "not_loaded", "llm": "not_loaded"}
|
| 53 |
|
| 54 |
def _load_embedder():
|
| 55 |
-
"""Load sentence-transformers embedding model (lazy).
|
| 56 |
-
PERF v4.3: Upgraded from all-MiniLM-L6-v2 to BAAI/bge-small-en-v1.5
|
| 57 |
-
(+21% MTEB retrieval accuracy, same 384-dim, same latency)."""
|
| 58 |
global _embedder, _chatbot_status
|
| 59 |
if _embedder is not None:
|
| 60 |
return _embedder
|
|
@@ -62,10 +60,10 @@ def _load_embedder():
|
|
| 62 |
_chatbot_status["embedder"] = "unavailable"
|
| 63 |
return None
|
| 64 |
try:
|
| 65 |
-
print("[ClauseGuard Chat] Loading embedding model:
|
| 66 |
-
_embedder = SentenceTransformer("
|
| 67 |
_chatbot_status["embedder"] = "loaded"
|
| 68 |
-
print("[ClauseGuard Chat] Embedding model loaded
|
| 69 |
return _embedder
|
| 70 |
except Exception as e:
|
| 71 |
_chatbot_status["embedder"] = f"failed: {e}"
|
|
@@ -196,9 +194,7 @@ def retrieve_chunks(query, chunks, embeddings, top_k=5):
|
|
| 196 |
return []
|
| 197 |
|
| 198 |
try:
|
| 199 |
-
|
| 200 |
-
_BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
|
| 201 |
-
q_emb = embedder.encode([_BGE_QUERY_PREFIX + query], normalize_embeddings=True)
|
| 202 |
scores = (q_emb @ embeddings.T)[0]
|
| 203 |
top_indices = np.argsort(scores)[::-1][:top_k]
|
| 204 |
|
|
|
|
| 52 |
_chatbot_status = {"embedder": "not_loaded", "llm": "not_loaded"}
|
| 53 |
|
| 54 |
def _load_embedder():
|
| 55 |
+
"""Load sentence-transformers embedding model (lazy)."""
|
|
|
|
|
|
|
| 56 |
global _embedder, _chatbot_status
|
| 57 |
if _embedder is not None:
|
| 58 |
return _embedder
|
|
|
|
| 60 |
_chatbot_status["embedder"] = "unavailable"
|
| 61 |
return None
|
| 62 |
try:
|
| 63 |
+
print("[ClauseGuard Chat] Loading embedding model: all-MiniLM-L6-v2...")
|
| 64 |
+
_embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 65 |
_chatbot_status["embedder"] = "loaded"
|
| 66 |
+
print("[ClauseGuard Chat] Embedding model loaded")
|
| 67 |
return _embedder
|
| 68 |
except Exception as e:
|
| 69 |
_chatbot_status["embedder"] = f"failed: {e}"
|
|
|
|
| 194 |
return []
|
| 195 |
|
| 196 |
try:
|
| 197 |
+
q_emb = embedder.encode([query], normalize_embeddings=True)
|
|
|
|
|
|
|
| 198 |
scores = (q_emb @ embeddings.T)[0]
|
| 199 |
top_indices = np.argsort(scores)[::-1][:top_k]
|
| 200 |
|
compare.py
CHANGED
|
@@ -1,36 +1,34 @@
|
|
| 1 |
"""
|
| 2 |
-
ClauseGuard — Contract Comparison Engine v3.
|
| 3 |
═════════════════════════════════════════════
|
| 4 |
-
FIXED in v3.
|
| 5 |
-
•
|
| 6 |
-
•
|
| 7 |
-
•
|
|
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
import re
|
| 11 |
from difflib import SequenceMatcher
|
| 12 |
from collections import defaultdict
|
| 13 |
-
import numpy as np
|
| 14 |
|
| 15 |
# Try to load sentence-transformers for semantic comparison
|
| 16 |
_HAS_EMBEDDINGS = False
|
| 17 |
_embedder = None
|
| 18 |
|
| 19 |
try:
|
| 20 |
-
from sentence_transformers import SentenceTransformer
|
| 21 |
_HAS_EMBEDDINGS = True
|
| 22 |
except ImportError:
|
| 23 |
pass
|
| 24 |
|
| 25 |
|
| 26 |
def _load_embedder():
|
| 27 |
-
"""Load shared SentenceTransformer singleton.
|
| 28 |
-
PERF v4.3: Upgraded to BAAI/bge-small-en-v1.5 (+21% retrieval accuracy)."""
|
| 29 |
global _embedder
|
| 30 |
if _HAS_EMBEDDINGS and _embedder is None:
|
| 31 |
try:
|
| 32 |
-
_embedder = SentenceTransformer("
|
| 33 |
-
print("[ClauseGuard] Sentence embeddings loaded for comparison
|
| 34 |
except Exception as e:
|
| 35 |
print(f"[ClauseGuard] Embeddings not available: {e}")
|
| 36 |
|
|
@@ -43,34 +41,18 @@ def _normalize_clause(text):
|
|
| 43 |
return text
|
| 44 |
|
| 45 |
|
| 46 |
-
def
|
| 47 |
-
"""
|
| 48 |
-
FIX v3.1: Compute similarity matrix using pre-computed embeddings + matrix multiply.
|
| 49 |
-
Was: O(n²) individual encode() calls per pair.
|
| 50 |
-
Now: O(n+m) encode calls + O(n*m) dot product (fast numpy).
|
| 51 |
-
"""
|
| 52 |
if _embedder is not None:
|
| 53 |
try:
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
emb_b = _embedder.encode(texts_b, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
|
| 59 |
-
# Cosine similarity via dot product (embeddings are L2-normalized)
|
| 60 |
-
sim_matrix = np.dot(emb_a, emb_b.T)
|
| 61 |
-
return sim_matrix, "semantic"
|
| 62 |
except Exception:
|
| 63 |
pass
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
n, m = len(clauses_a), len(clauses_b)
|
| 67 |
-
sim_matrix = np.zeros((n, m))
|
| 68 |
-
for i in range(n):
|
| 69 |
-
norm_a = _normalize_clause(clauses_a[i])
|
| 70 |
-
for j in range(m):
|
| 71 |
-
norm_b = _normalize_clause(clauses_b[j])
|
| 72 |
-
sim_matrix[i, j] = SequenceMatcher(None, norm_a, norm_b).ratio()
|
| 73 |
-
return sim_matrix, "lexical"
|
| 74 |
|
| 75 |
|
| 76 |
def _extract_clause_type(clause_text):
|
|
@@ -107,14 +89,16 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
|
|
| 107 |
if not text_a or not text_b:
|
| 108 |
return {"error": "Both contracts required"}
|
| 109 |
|
|
|
|
| 110 |
_load_embedder()
|
| 111 |
|
|
|
|
| 112 |
if clauses_a is None:
|
| 113 |
clauses_a = _split_clauses(text_a)
|
| 114 |
if clauses_b is None:
|
| 115 |
clauses_b = _split_clauses(text_b)
|
| 116 |
|
| 117 |
-
# Detect contract types and flag cross-domain comparisons
|
| 118 |
_CONTRACT_TYPE_KEYWORDS = {
|
| 119 |
"employment": ["employee", "employer", "salary", "compensation", "benefits", "vacation", "severance", "at-will"],
|
| 120 |
"lease": ["landlord", "tenant", "rent", "premises", "lease", "occupancy", "security deposit", "eviction"],
|
|
@@ -144,35 +128,25 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
|
|
| 144 |
for c in clauses_b:
|
| 145 |
type_map_b[_extract_clause_type(c)].append(c)
|
| 146 |
|
| 147 |
-
#
|
| 148 |
-
if clauses_a and clauses_b:
|
| 149 |
-
sim_matrix, method_type = _compute_similarity_matrix(clauses_a, clauses_b)
|
| 150 |
-
else:
|
| 151 |
-
sim_matrix = np.zeros((0, 0))
|
| 152 |
-
method_type = "none"
|
| 153 |
-
|
| 154 |
-
# Find matches using the pre-computed matrix
|
| 155 |
matched_a = set()
|
| 156 |
matched_b = set()
|
| 157 |
modified = []
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
masked_row = np.where(available, row, -1.0)
|
| 174 |
-
best_j = int(np.argmax(masked_row))
|
| 175 |
-
best_sim = masked_row[best_j]
|
| 176 |
|
| 177 |
if best_sim >= SIMILARITY_THRESHOLD:
|
| 178 |
matched_a.add(i)
|
|
@@ -180,20 +154,21 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
|
|
| 180 |
if best_sim < 0.95:
|
| 181 |
modified.append({
|
| 182 |
"type": "modified",
|
| 183 |
-
"similarity": round(
|
| 184 |
-
"clause_a":
|
| 185 |
"clause_b": clauses_b[best_j][:200],
|
| 186 |
-
"clause_type": _extract_clause_type(
|
| 187 |
})
|
| 188 |
elif best_sim >= MODIFIED_THRESHOLD:
|
| 189 |
matched_a.add(i)
|
| 190 |
-
|
|
|
|
| 191 |
modified.append({
|
| 192 |
"type": "partial",
|
| 193 |
-
"similarity": round(
|
| 194 |
-
"clause_a":
|
| 195 |
-
"clause_b": clauses_b[best_j][:200],
|
| 196 |
-
"clause_type": _extract_clause_type(
|
| 197 |
})
|
| 198 |
|
| 199 |
removed = [clauses_a[i] for i in range(len(clauses_a)) if i not in matched_a]
|
|
@@ -201,9 +176,12 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
|
|
| 201 |
|
| 202 |
# Compute alignment score
|
| 203 |
total_pairs = max(len(clauses_a), len(clauses_b))
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
# Risk delta
|
| 207 |
risk_keywords = ["unlimited", "unilateral", "waive", "arbitration", "indemnif",
|
| 208 |
"not liable", "no warranty", "sole discretion", "terminate",
|
| 209 |
"non-compete", "liquidated damages", "uncapped"]
|
|
@@ -226,11 +204,12 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
|
|
| 226 |
risk_delta = "Similar risk profiles"
|
| 227 |
risk_winner = "tie"
|
| 228 |
|
|
|
|
| 229 |
if is_cross_domain:
|
| 230 |
risk_delta = f"Cross-domain comparison ({type_a} vs {type_b}) — risk delta not meaningful across different contract types"
|
| 231 |
risk_winner = "cross-domain"
|
| 232 |
|
| 233 |
-
comparison_method =
|
| 234 |
|
| 235 |
return {
|
| 236 |
"alignment_score": round(alignment, 3),
|
|
@@ -253,12 +232,14 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
|
|
| 253 |
def _split_clauses(text):
|
| 254 |
"""Split text into clauses."""
|
| 255 |
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
|
|
|
| 256 |
section_splits = re.split(
|
| 257 |
r'(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|(?:Section|Article|Clause)\s+\d+)',
|
| 258 |
text
|
| 259 |
)
|
| 260 |
if len(section_splits) >= 3:
|
| 261 |
return [p.strip() for p in section_splits if len(p.strip()) > 30]
|
|
|
|
| 262 |
parts = re.split(
|
| 263 |
r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)',
|
| 264 |
text
|
|
@@ -304,6 +285,7 @@ def render_comparison_html(result):
|
|
| 304 |
</div>
|
| 305 |
'''
|
| 306 |
|
|
|
|
| 307 |
if result["modified_clauses"]:
|
| 308 |
html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">📝 Modified Clauses</h3>'
|
| 309 |
for m in result["modified_clauses"][:20]:
|
|
@@ -318,12 +300,14 @@ def render_comparison_html(result):
|
|
| 318 |
'''
|
| 319 |
html += '</div>'
|
| 320 |
|
|
|
|
| 321 |
if result["added_clauses"]:
|
| 322 |
html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➕ Added in Contract B</h3>'
|
| 323 |
for a in result["added_clauses"][:15]:
|
| 324 |
html += f'<div style="background:#f0fdf4;padding:8px;border-radius:4px;font-size:12px;color:#166534;margin-bottom:4px;border-left:3px solid #22c55e;"><b>{a["type"].upper()}</b> · {a["text"][:150]}...</div>'
|
| 325 |
html += '</div>'
|
| 326 |
|
|
|
|
| 327 |
if result["removed_clauses"]:
|
| 328 |
html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➖ Removed from Contract A</h3>'
|
| 329 |
for r in result["removed_clauses"][:15]:
|
|
|
|
| 1 |
"""
|
| 2 |
+
ClauseGuard — Contract Comparison Engine v3.0
|
| 3 |
═════════════════════════════════════════════
|
| 4 |
+
FIXED in v3.0:
|
| 5 |
+
• Semantic similarity using sentence embeddings (when available)
|
| 6 |
+
• Better clause type detection with legal taxonomy
|
| 7 |
+
• Improved diff visualization
|
| 8 |
+
• Fallback to SequenceMatcher when embeddings unavailable
|
| 9 |
"""
|
| 10 |
|
| 11 |
import re
|
| 12 |
from difflib import SequenceMatcher
|
| 13 |
from collections import defaultdict
|
|
|
|
| 14 |
|
| 15 |
# Try to load sentence-transformers for semantic comparison
|
| 16 |
_HAS_EMBEDDINGS = False
|
| 17 |
_embedder = None
|
| 18 |
|
| 19 |
try:
|
| 20 |
+
from sentence_transformers import SentenceTransformer, util
|
| 21 |
_HAS_EMBEDDINGS = True
|
| 22 |
except ImportError:
|
| 23 |
pass
|
| 24 |
|
| 25 |
|
| 26 |
def _load_embedder():
|
|
|
|
|
|
|
| 27 |
global _embedder
|
| 28 |
if _HAS_EMBEDDINGS and _embedder is None:
|
| 29 |
try:
|
| 30 |
+
_embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 31 |
+
print("[ClauseGuard] Sentence embeddings loaded for comparison")
|
| 32 |
except Exception as e:
|
| 33 |
print(f"[ClauseGuard] Embeddings not available: {e}")
|
| 34 |
|
|
|
|
| 41 |
return text
|
| 42 |
|
| 43 |
|
| 44 |
+
def _clause_similarity(a, b):
|
| 45 |
+
"""Compute similarity using semantic embeddings or string matching."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
if _embedder is not None:
|
| 47 |
try:
|
| 48 |
+
emb_a = _embedder.encode(a[:512], convert_to_tensor=True)
|
| 49 |
+
emb_b = _embedder.encode(b[:512], convert_to_tensor=True)
|
| 50 |
+
sim = util.cos_sim(emb_a, emb_b).item()
|
| 51 |
+
return max(0, min(1, sim))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
except Exception:
|
| 53 |
pass
|
| 54 |
+
# Fallback to string matching
|
| 55 |
+
return SequenceMatcher(None, _normalize_clause(a), _normalize_clause(b)).ratio()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
def _extract_clause_type(clause_text):
|
|
|
|
| 89 |
if not text_a or not text_b:
|
| 90 |
return {"error": "Both contracts required"}
|
| 91 |
|
| 92 |
+
# Try to load embedder
|
| 93 |
_load_embedder()
|
| 94 |
|
| 95 |
+
# Split into clauses if not provided
|
| 96 |
if clauses_a is None:
|
| 97 |
clauses_a = _split_clauses(text_a)
|
| 98 |
if clauses_b is None:
|
| 99 |
clauses_b = _split_clauses(text_b)
|
| 100 |
|
| 101 |
+
# Fix 9: Detect contract types and flag cross-domain comparisons
|
| 102 |
_CONTRACT_TYPE_KEYWORDS = {
|
| 103 |
"employment": ["employee", "employer", "salary", "compensation", "benefits", "vacation", "severance", "at-will"],
|
| 104 |
"lease": ["landlord", "tenant", "rent", "premises", "lease", "occupancy", "security deposit", "eviction"],
|
|
|
|
| 128 |
for c in clauses_b:
|
| 129 |
type_map_b[_extract_clause_type(c)].append(c)
|
| 130 |
|
| 131 |
+
# Find matches
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
matched_a = set()
|
| 133 |
matched_b = set()
|
| 134 |
modified = []
|
| 135 |
|
| 136 |
+
# Fix 10: Raise thresholds to reject false "modified" matches
|
| 137 |
+
SIMILARITY_THRESHOLD = 0.75 # was 0.70 — too many false matches
|
| 138 |
+
MODIFIED_THRESHOLD = 0.55 # was 0.40 — "Good Reason" ≠ "Force Majeure"
|
| 139 |
+
|
| 140 |
+
for i, ca in enumerate(clauses_a):
|
| 141 |
+
best_sim = 0
|
| 142 |
+
best_j = -1
|
| 143 |
+
for j, cb in enumerate(clauses_b):
|
| 144 |
+
if j in matched_b:
|
| 145 |
+
continue
|
| 146 |
+
sim = _clause_similarity(ca, cb)
|
| 147 |
+
if sim > best_sim:
|
| 148 |
+
best_sim = sim
|
| 149 |
+
best_j = j
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
if best_sim >= SIMILARITY_THRESHOLD:
|
| 152 |
matched_a.add(i)
|
|
|
|
| 154 |
if best_sim < 0.95:
|
| 155 |
modified.append({
|
| 156 |
"type": "modified",
|
| 157 |
+
"similarity": round(best_sim, 3),
|
| 158 |
+
"clause_a": ca[:200],
|
| 159 |
"clause_b": clauses_b[best_j][:200],
|
| 160 |
+
"clause_type": _extract_clause_type(ca),
|
| 161 |
})
|
| 162 |
elif best_sim >= MODIFIED_THRESHOLD:
|
| 163 |
matched_a.add(i)
|
| 164 |
+
if best_j >= 0:
|
| 165 |
+
matched_b.add(best_j)
|
| 166 |
modified.append({
|
| 167 |
"type": "partial",
|
| 168 |
+
"similarity": round(best_sim, 3),
|
| 169 |
+
"clause_a": ca[:200],
|
| 170 |
+
"clause_b": clauses_b[best_j][:200] if best_j >= 0 else "",
|
| 171 |
+
"clause_type": _extract_clause_type(ca),
|
| 172 |
})
|
| 173 |
|
| 174 |
removed = [clauses_a[i] for i in range(len(clauses_a)) if i not in matched_a]
|
|
|
|
| 176 |
|
| 177 |
# Compute alignment score
|
| 178 |
total_pairs = max(len(clauses_a), len(clauses_b))
|
| 179 |
+
if total_pairs > 0:
|
| 180 |
+
alignment = len(matched_a) / total_pairs
|
| 181 |
+
else:
|
| 182 |
+
alignment = 0.0
|
| 183 |
|
| 184 |
+
# Risk delta: compare risk keywords with context
|
| 185 |
risk_keywords = ["unlimited", "unilateral", "waive", "arbitration", "indemnif",
|
| 186 |
"not liable", "no warranty", "sole discretion", "terminate",
|
| 187 |
"non-compete", "liquidated damages", "uncapped"]
|
|
|
|
| 204 |
risk_delta = "Similar risk profiles"
|
| 205 |
risk_winner = "tie"
|
| 206 |
|
| 207 |
+
# Fix 9: Cross-domain warning
|
| 208 |
if is_cross_domain:
|
| 209 |
risk_delta = f"Cross-domain comparison ({type_a} vs {type_b}) — risk delta not meaningful across different contract types"
|
| 210 |
risk_winner = "cross-domain"
|
| 211 |
|
| 212 |
+
comparison_method = "semantic (sentence embeddings)" if _embedder is not None else "lexical (string matching)"
|
| 213 |
|
| 214 |
return {
|
| 215 |
"alignment_score": round(alignment, 3),
|
|
|
|
| 232 |
def _split_clauses(text):
|
| 233 |
"""Split text into clauses."""
|
| 234 |
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
| 235 |
+
# Try section-based splitting first
|
| 236 |
section_splits = re.split(
|
| 237 |
r'(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|(?:Section|Article|Clause)\s+\d+)',
|
| 238 |
text
|
| 239 |
)
|
| 240 |
if len(section_splits) >= 3:
|
| 241 |
return [p.strip() for p in section_splits if len(p.strip()) > 30]
|
| 242 |
+
# Fallback to paragraph/sentence splitting
|
| 243 |
parts = re.split(
|
| 244 |
r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)',
|
| 245 |
text
|
|
|
|
| 285 |
</div>
|
| 286 |
'''
|
| 287 |
|
| 288 |
+
# Modified clauses
|
| 289 |
if result["modified_clauses"]:
|
| 290 |
html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">📝 Modified Clauses</h3>'
|
| 291 |
for m in result["modified_clauses"][:20]:
|
|
|
|
| 300 |
'''
|
| 301 |
html += '</div>'
|
| 302 |
|
| 303 |
+
# Added clauses
|
| 304 |
if result["added_clauses"]:
|
| 305 |
html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➕ Added in Contract B</h3>'
|
| 306 |
for a in result["added_clauses"][:15]:
|
| 307 |
html += f'<div style="background:#f0fdf4;padding:8px;border-radius:4px;font-size:12px;color:#166534;margin-bottom:4px;border-left:3px solid #22c55e;"><b>{a["type"].upper()}</b> · {a["text"][:150]}...</div>'
|
| 308 |
html += '</div>'
|
| 309 |
|
| 310 |
+
# Removed clauses
|
| 311 |
if result["removed_clauses"]:
|
| 312 |
html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➖ Removed from Contract A</h3>'
|
| 313 |
for r in result["removed_clauses"][:15]:
|
compliance.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
ClauseGuard — Compliance Checker v3.
|
| 3 |
═════════════════════════════════════
|
| 4 |
-
FIXED in v3.
|
| 5 |
-
•
|
| 6 |
-
•
|
| 7 |
-
•
|
| 8 |
-
•
|
| 9 |
"""
|
| 10 |
|
| 11 |
import re
|
|
@@ -13,19 +13,13 @@ from collections import defaultdict
|
|
| 13 |
|
| 14 |
# Negation patterns that invert compliance meaning
|
| 15 |
_NEGATION_PATTERNS = [
|
| 16 |
-
r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain
|
| 17 |
-
r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty
|
| 18 |
-
r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject
|
| 19 |
-
r"shall\s+not\s+be\s+(?:required|obligated|responsible
|
| 20 |
-
r"is\s+not\s+(?:responsible|liable|required|obligated
|
| 21 |
-
r"expressly\s+(?:disclaim|exclud|waiv|reject)",
|
| 22 |
-
r"to\s+the\s+(?:maximum|fullest)\s+extent\s+(?:permitted|allowed).*(?:disclaim|exclud|waiv)",
|
| 23 |
-
r"notwithstanding.*(?:shall\s+not|does\s+not|is\s+not)",
|
| 24 |
]
|
| 25 |
|
| 26 |
-
# FIX v4.2: Pre-compile negation patterns at module level
|
| 27 |
-
_NEGATION_PATTERNS_COMPILED = [re.compile(p, re.IGNORECASE) for p in _NEGATION_PATTERNS]
|
| 28 |
-
|
| 29 |
# Regulatory requirement definitions
|
| 30 |
REGULATIONS = {
|
| 31 |
"GDPR": {
|
|
@@ -178,59 +172,24 @@ RISK_STYLES = {
|
|
| 178 |
}
|
| 179 |
|
| 180 |
|
| 181 |
-
def
|
| 182 |
-
"""
|
| 183 |
-
# Find sentence boundaries around the match
|
| 184 |
-
# Look backward for sentence start
|
| 185 |
-
sent_start = start_idx
|
| 186 |
-
for i in range(start_idx - 1, max(0, start_idx - 500), -1):
|
| 187 |
-
if text_lower[i] in '.!?' and i < start_idx - 2:
|
| 188 |
-
sent_start = i + 1
|
| 189 |
-
break
|
| 190 |
-
else:
|
| 191 |
-
sent_start = max(0, start_idx - 500)
|
| 192 |
-
|
| 193 |
-
# Look forward for sentence end
|
| 194 |
-
sent_end = start_idx + len(keyword_lower)
|
| 195 |
-
for i in range(sent_end, min(len(text_lower), sent_end + 500)):
|
| 196 |
-
if text_lower[i] in '.!?':
|
| 197 |
-
sent_end = i + 1
|
| 198 |
-
break
|
| 199 |
-
else:
|
| 200 |
-
sent_end = min(len(text_lower), sent_end + 500)
|
| 201 |
-
|
| 202 |
-
return text_lower[sent_start:sent_end].strip()
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
def _check_negation(text_lower, keyword, window=200):
|
| 206 |
-
"""FIX v3.1: Check if a keyword match is negated — uses sentence-aware window."""
|
| 207 |
idx = text_lower.find(keyword.lower())
|
| 208 |
if idx == -1:
|
| 209 |
return False
|
| 210 |
-
|
| 211 |
-
# Get sentence-aware context (more accurate than fixed window)
|
| 212 |
-
sentence = _get_sentence_containing(text_lower, keyword.lower(), idx)
|
| 213 |
-
|
| 214 |
-
# Also get a wider window for cross-sentence negation
|
| 215 |
start = max(0, idx - window)
|
| 216 |
end = min(len(text_lower), idx + len(keyword) + window)
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
# Check sentence first (higher confidence)
|
| 220 |
-
for neg_pat in _NEGATION_PATTERNS_COMPILED:
|
| 221 |
-
if neg_pat.search(sentence):
|
| 222 |
-
return True
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
if neg_pat.search(wider_context):
|
| 227 |
return True
|
| 228 |
-
|
| 229 |
return False
|
| 230 |
|
| 231 |
|
| 232 |
-
def _get_context(text, keyword, window=
|
| 233 |
-
"""Extract context around a keyword match
|
| 234 |
text_lower = text.lower()
|
| 235 |
idx = text_lower.find(keyword.lower())
|
| 236 |
if idx == -1:
|
|
@@ -245,55 +204,12 @@ def _get_context(text, keyword, window=100):
|
|
| 245 |
return context
|
| 246 |
|
| 247 |
|
| 248 |
-
# FIX v4.3: Regulation applicability gates — only apply regulations relevant to the contract type
|
| 249 |
-
_REGULATION_GATES = {
|
| 250 |
-
"SOX": re.compile(
|
| 251 |
-
r'financial\s+statement|internal\s+control|audit\s+committee|public\s+company|sec\s+filing|pcaob|sarbanes',
|
| 252 |
-
re.IGNORECASE
|
| 253 |
-
),
|
| 254 |
-
"HIPAA": re.compile(
|
| 255 |
-
r'protected\s+health|(?<!\w)phi(?!\w)|health\s+information|medical\s+record|business\s+associate\s+agreement|(?<!\w)baa(?!\w)|hipaa',
|
| 256 |
-
re.IGNORECASE
|
| 257 |
-
),
|
| 258 |
-
"FINRA": re.compile(
|
| 259 |
-
r'securities|broker[\-\s]?dealer|investment\s+advis|financial\s+industry|(?<!\w)finra(?!\w)|registered\s+representative',
|
| 260 |
-
re.IGNORECASE
|
| 261 |
-
),
|
| 262 |
-
}
|
| 263 |
-
|
| 264 |
-
|
| 265 |
def check_compliance(text):
|
| 266 |
-
"""Check contract text against
|
| 267 |
-
|
| 268 |
-
FIX v4.3:
|
| 269 |
-
- Regulation applicability gates: SOX/HIPAA/FINRA only checked if contract contains relevant terms
|
| 270 |
-
- Whole-word keyword matching: prevents substring false positives (e.g. "SAR" in "Year 3")
|
| 271 |
-
- GDPR and CCPA always checked (broadly applicable)
|
| 272 |
-
"""
|
| 273 |
text_lower = text.lower()
|
| 274 |
results = {}
|
| 275 |
|
| 276 |
-
# FIX v4.3: Determine which regulations apply to this contract
|
| 277 |
-
applicable_regs = {"GDPR", "CCPA"} # Always check these
|
| 278 |
-
for reg_name, gate_pattern in _REGULATION_GATES.items():
|
| 279 |
-
if gate_pattern.search(text):
|
| 280 |
-
applicable_regs.add(reg_name)
|
| 281 |
-
|
| 282 |
for reg_name, reg_data in REGULATIONS.items():
|
| 283 |
-
# FIX v4.3: Skip regulations that don't apply to this contract
|
| 284 |
-
if reg_name not in applicable_regs:
|
| 285 |
-
# Still include in results but mark as not applicable
|
| 286 |
-
results[reg_name] = {
|
| 287 |
-
"description": reg_data["description"],
|
| 288 |
-
"compliance_rate": -1, # -1 = not applicable
|
| 289 |
-
"checks": [],
|
| 290 |
-
"overall_status": "NOT_APPLICABLE",
|
| 291 |
-
"negated_count": 0,
|
| 292 |
-
"ambiguous_count": 0,
|
| 293 |
-
"note": f"{reg_name} does not appear applicable to this contract type.",
|
| 294 |
-
}
|
| 295 |
-
continue
|
| 296 |
-
|
| 297 |
checks = []
|
| 298 |
for req_name, req_data in reg_data["requirements"].items():
|
| 299 |
matched = False
|
|
@@ -302,27 +218,17 @@ def check_compliance(text):
|
|
| 302 |
context_snippets = []
|
| 303 |
|
| 304 |
for kw in req_data["keywords"]:
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
continue
|
| 317 |
-
|
| 318 |
-
matched_keywords.append(kw)
|
| 319 |
-
if _check_negation(text_lower, kw):
|
| 320 |
-
negated = True
|
| 321 |
-
else:
|
| 322 |
-
matched = True
|
| 323 |
-
ctx = _get_context(text, kw)
|
| 324 |
-
if ctx:
|
| 325 |
-
context_snippets.append(ctx)
|
| 326 |
|
| 327 |
if matched and not negated:
|
| 328 |
status = "PASS"
|
|
@@ -339,7 +245,7 @@ def check_compliance(text):
|
|
| 339 |
"severity": req_data["severity"],
|
| 340 |
"status": status,
|
| 341 |
"matched_keywords": matched_keywords,
|
| 342 |
-
"context": context_snippets[:2],
|
| 343 |
})
|
| 344 |
|
| 345 |
passed = sum(1 for c in checks if c["status"] == "PASS")
|
|
@@ -356,6 +262,7 @@ def check_compliance(text):
|
|
| 356 |
else:
|
| 357 |
overall = "NON-COMPLIANT"
|
| 358 |
|
|
|
|
| 359 |
if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
|
| 360 |
overall = "WARNING"
|
| 361 |
|
|
@@ -379,28 +286,6 @@ def render_compliance_html(results):
|
|
| 379 |
rate = reg_result["compliance_rate"]
|
| 380 |
status = reg_result["overall_status"]
|
| 381 |
|
| 382 |
-
# FIX v4.3: Handle NOT_APPLICABLE regulations
|
| 383 |
-
if status == "NOT_APPLICABLE":
|
| 384 |
-
note = reg_result.get("note", f"{reg_name} not applicable to this contract.")
|
| 385 |
-
html += f'''
|
| 386 |
-
<div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;opacity:0.6;">
|
| 387 |
-
<div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:#f9fafb;border-bottom:1px solid #e5e7eb;">
|
| 388 |
-
<div>
|
| 389 |
-
<span style="font-size:16px;font-weight:700;color:#9ca3af;">{reg_name}</span>
|
| 390 |
-
<p style="font-size:11px;color:#9ca3af;margin:2px 0 0 0;">{reg_result["description"]}</p>
|
| 391 |
-
</div>
|
| 392 |
-
<div style="text-align:right;">
|
| 393 |
-
<div style="font-size:12px;font-weight:600;color:#9ca3af;">N/A</div>
|
| 394 |
-
<div style="font-size:10px;color:#9ca3af;">Not Applicable</div>
|
| 395 |
-
</div>
|
| 396 |
-
</div>
|
| 397 |
-
<div style="padding:10px 16px;font-size:11px;color:#9ca3af;font-style:italic;">
|
| 398 |
-
{note}
|
| 399 |
-
</div>
|
| 400 |
-
</div>
|
| 401 |
-
'''
|
| 402 |
-
continue
|
| 403 |
-
|
| 404 |
status_colors = {
|
| 405 |
"COMPLIANT": ("#16a34a", "#f0fdf4"),
|
| 406 |
"PARTIAL": ("#ca8a04", "#fefce8"),
|
|
|
|
| 1 |
"""
|
| 2 |
+
ClauseGuard — Compliance Checker v3.0
|
| 3 |
═════════════════════════════════════
|
| 4 |
+
FIXED in v3.0:
|
| 5 |
+
• Negation handling (clause saying "we do NOT" won't score as PASS)
|
| 6 |
+
• Context windows around keyword matches (shows what the clause actually says)
|
| 7 |
+
• Semantic scoring (keyword proximity + negation awareness)
|
| 8 |
+
• Added more regulatory frameworks
|
| 9 |
"""
|
| 10 |
|
| 11 |
import re
|
|
|
|
| 13 |
|
| 14 |
# Negation patterns that invert compliance meaning
|
| 15 |
_NEGATION_PATTERNS = [
|
| 16 |
+
r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)",
|
| 17 |
+
r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)",
|
| 18 |
+
r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)",
|
| 19 |
+
r"shall\s+not\s+be\s+(?:required|obligated|responsible)",
|
| 20 |
+
r"is\s+not\s+(?:responsible|liable|required|obligated)",
|
|
|
|
|
|
|
|
|
|
| 21 |
]
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
# Regulatory requirement definitions
|
| 24 |
REGULATIONS = {
|
| 25 |
"GDPR": {
|
|
|
|
| 172 |
}
|
| 173 |
|
| 174 |
|
| 175 |
+
def _check_negation(text_lower, keyword, window=100):
|
| 176 |
+
"""Check if a keyword match is negated by nearby negation words."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
idx = text_lower.find(keyword.lower())
|
| 178 |
if idx == -1:
|
| 179 |
return False
|
| 180 |
+
# Get context window around the match
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
start = max(0, idx - window)
|
| 182 |
end = min(len(text_lower), idx + len(keyword) + window)
|
| 183 |
+
context = text_lower[start:end]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
for neg_pat in _NEGATION_PATTERNS:
|
| 186 |
+
if re.search(neg_pat, context, re.IGNORECASE):
|
|
|
|
| 187 |
return True
|
|
|
|
| 188 |
return False
|
| 189 |
|
| 190 |
|
| 191 |
+
def _get_context(text, keyword, window=80):
|
| 192 |
+
"""Extract context around a keyword match."""
|
| 193 |
text_lower = text.lower()
|
| 194 |
idx = text_lower.find(keyword.lower())
|
| 195 |
if idx == -1:
|
|
|
|
| 204 |
return context
|
| 205 |
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
def check_compliance(text):
|
| 208 |
+
"""Check contract text against all regulatory frameworks with negation handling."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
text_lower = text.lower()
|
| 210 |
results = {}
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
for reg_name, reg_data in REGULATIONS.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
checks = []
|
| 214 |
for req_name, req_data in reg_data["requirements"].items():
|
| 215 |
matched = False
|
|
|
|
| 218 |
context_snippets = []
|
| 219 |
|
| 220 |
for kw in req_data["keywords"]:
|
| 221 |
+
if kw.lower() in text_lower:
|
| 222 |
+
matched_keywords.append(kw)
|
| 223 |
+
# Check if the match is negated
|
| 224 |
+
if _check_negation(text_lower, kw):
|
| 225 |
+
negated = True
|
| 226 |
+
else:
|
| 227 |
+
matched = True
|
| 228 |
+
# Get context
|
| 229 |
+
ctx = _get_context(text, kw)
|
| 230 |
+
if ctx:
|
| 231 |
+
context_snippets.append(ctx)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
if matched and not negated:
|
| 234 |
status = "PASS"
|
|
|
|
| 245 |
"severity": req_data["severity"],
|
| 246 |
"status": status,
|
| 247 |
"matched_keywords": matched_keywords,
|
| 248 |
+
"context": context_snippets[:2], # Keep top 2 context snippets
|
| 249 |
})
|
| 250 |
|
| 251 |
passed = sum(1 for c in checks if c["status"] == "PASS")
|
|
|
|
| 262 |
else:
|
| 263 |
overall = "NON-COMPLIANT"
|
| 264 |
|
| 265 |
+
# Override if there are negated critical requirements
|
| 266 |
if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
|
| 267 |
overall = "WARNING"
|
| 268 |
|
|
|
|
| 286 |
rate = reg_result["compliance_rate"]
|
| 287 |
status = reg_result["overall_status"]
|
| 288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
status_colors = {
|
| 290 |
"COMPLIANT": ("#16a34a", "#f0fdf4"),
|
| 291 |
"PARTIAL": ("#ca8a04", "#fefce8"),
|
extension/background.js
CHANGED
|
@@ -1,19 +1,12 @@
|
|
| 1 |
/**
|
| 2 |
-
* ClauseGuard — Background Service Worker
|
| 3 |
-
* FIXED
|
| 4 |
-
*
|
| 5 |
-
* directly, which doesn't expose a REST /api/analyze endpoint.
|
| 6 |
-
* FIXED v4.3: session_id from analyze response is now stored so chat can use it.
|
| 7 |
-
* FIXED v4.3: sidePanel.open() is properly awaited.
|
| 8 |
*/
|
| 9 |
|
| 10 |
-
|
| 11 |
-
// polling in its /api/analyze route. The extension just needs a REST endpoint.
|
| 12 |
-
// Previously pointed to "https://gaurv007-clauseguard.hf.space" which is a
|
| 13 |
-
// Gradio Space that only exposes /gradio_api/call/analyze (SSE, not REST).
|
| 14 |
-
const API_BASE = "https://clauseguardweb.netlify.app";
|
| 15 |
const FREE_SCANS_PER_MONTH = 10;
|
| 16 |
-
const API_TIMEOUT_MS =
|
| 17 |
|
| 18 |
const SITE_ORIGINS = [
|
| 19 |
"https://clauseguardweb.netlify.app",
|
|
@@ -40,15 +33,10 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
|
|
| 40 |
case "GET_AUTH": return await getAuth();
|
| 41 |
case "GET_USER": return await getUser();
|
| 42 |
case "CHECK_USAGE": return await checkUsage();
|
| 43 |
-
case "OPEN_SIDEPANEL":
|
| 44 |
-
if (sender.tab?.id) {
|
| 45 |
-
try { await chrome.sidePanel.open({ tabId: sender.tab.id }); } catch(e) { console.warn("sidePanel.open failed:", e); }
|
| 46 |
-
}
|
| 47 |
-
return { ok: true };
|
| 48 |
case "GET_RESULTS": return await getStoredResults(sender.tab?.id || message.tabId);
|
| 49 |
case "SYNC_AUTH": return await syncAuthFromWebsite();
|
| 50 |
case "GET_SCAN_HISTORY": return await getScanHistory();
|
| 51 |
-
case "GET_SESSION_ID": return await getStoredSessionId(sender.tab?.id || message.tabId);
|
| 52 |
default: return null;
|
| 53 |
}
|
| 54 |
};
|
|
@@ -106,8 +94,7 @@ async function handleAnalyze(payload, tabId) {
|
|
| 106 |
let results;
|
| 107 |
try {
|
| 108 |
const auth = await getAuth();
|
| 109 |
-
//
|
| 110 |
-
// handles Gradio SSE polling internally and returns plain JSON.
|
| 111 |
const resp = await fetchWithTimeout(`${API_BASE}/api/analyze`, {
|
| 112 |
method: "POST",
|
| 113 |
headers: {
|
|
@@ -121,17 +108,9 @@ async function handleAnalyze(payload, tabId) {
|
|
| 121 |
return { error: "rate_limited", message: "Too many requests. Please wait a moment." };
|
| 122 |
}
|
| 123 |
|
| 124 |
-
if (resp.
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
results = localAnalyze(text);
|
| 128 |
-
results.source = "local";
|
| 129 |
-
} else if (!resp.ok) {
|
| 130 |
-
throw new Error(`HTTP ${resp.status}`);
|
| 131 |
-
} else {
|
| 132 |
-
results = await resp.json();
|
| 133 |
-
results.source = "api";
|
| 134 |
-
}
|
| 135 |
} catch (err) {
|
| 136 |
console.warn("API unavailable, using local:", err.message);
|
| 137 |
results = localAnalyze(text);
|
|
@@ -141,12 +120,6 @@ async function handleAnalyze(payload, tabId) {
|
|
| 141 |
// Store results
|
| 142 |
if (tabId) {
|
| 143 |
await chrome.storage.local.set({ [`results_${tabId}`]: results });
|
| 144 |
-
|
| 145 |
-
// FIX v4.3: Also store session_id so the chat feature can use it
|
| 146 |
-
if (results.session_id) {
|
| 147 |
-
await chrome.storage.local.set({ [`session_${tabId}`]: results.session_id });
|
| 148 |
-
}
|
| 149 |
-
|
| 150 |
const flagged = results.results?.filter(r => r.categories?.length > 0).length || results.flagged_count || 0;
|
| 151 |
chrome.action.setBadgeText({ text: flagged > 0 ? String(flagged) : "", tabId });
|
| 152 |
if (flagged > 0) chrome.action.setBadgeBackgroundColor({ color: flagged > 3 ? "#ef4444" : "#f59e0b", tabId });
|
|
@@ -178,12 +151,6 @@ async function getScanHistory() {
|
|
| 178 |
return { history: scanHistory };
|
| 179 |
}
|
| 180 |
|
| 181 |
-
// ─── Get stored session ID (for chat) ───
|
| 182 |
-
async function getStoredSessionId(tabId) {
|
| 183 |
-
if (!tabId) return null;
|
| 184 |
-
return new Promise(r => chrome.storage.local.get([`session_${tabId}`], d => r(d[`session_${tabId}`] || null)));
|
| 185 |
-
}
|
| 186 |
-
|
| 187 |
// ─── Sync auth from website ───
|
| 188 |
async function syncAuthFromWebsite() {
|
| 189 |
return await getAuth();
|
|
@@ -215,16 +182,12 @@ function localAnalyze(text) {
|
|
| 215 |
|
| 216 |
const flagged = results.filter(r => r.categories.length > 0);
|
| 217 |
const sev = { HIGH: 0, MEDIUM: 0, LOW: 0 };
|
| 218 |
-
flagged.forEach(r => r.categories.forEach(c =>
|
| 219 |
-
|
| 220 |
-
else sev.MEDIUM++;
|
| 221 |
-
}));
|
| 222 |
-
const weighted = sev.HIGH * 20 + sev.MEDIUM * 10 + sev.LOW * 3;
|
| 223 |
-
const risk = Math.min(100, Math.round(100 * (1 - (1 / (1 + weighted / 30)))));
|
| 224 |
|
| 225 |
return {
|
| 226 |
risk_score: risk,
|
| 227 |
-
grade: risk >=
|
| 228 |
total_clauses: clauses.length, flagged_count: flagged.length, results,
|
| 229 |
};
|
| 230 |
}
|
|
@@ -275,4 +238,4 @@ async function getStoredResults(tabId) {
|
|
| 275 |
return new Promise(r => chrome.storage.local.get([`results_${tabId}`], d => r(d[`results_${tabId}`]||null)));
|
| 276 |
}
|
| 277 |
|
| 278 |
-
chrome.tabs.onRemoved.addListener(tabId => chrome.storage.local.remove([`results_${tabId}`
|
|
|
|
| 1 |
/**
|
| 2 |
+
* ClauseGuard — Background Service Worker v3.0
|
| 3 |
+
* FIXED: API payload now sends {text, source_url} (not {clauses})
|
| 4 |
+
* FIXED: Error handling and retry logic
|
|
|
|
|
|
|
|
|
|
| 5 |
*/
|
| 6 |
|
| 7 |
+
const API_BASE = "https://gaurv007-clauseguard-api.hf.space";
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
const FREE_SCANS_PER_MONTH = 10;
|
| 9 |
+
const API_TIMEOUT_MS = 45000;
|
| 10 |
|
| 11 |
const SITE_ORIGINS = [
|
| 12 |
"https://clauseguardweb.netlify.app",
|
|
|
|
| 33 |
case "GET_AUTH": return await getAuth();
|
| 34 |
case "GET_USER": return await getUser();
|
| 35 |
case "CHECK_USAGE": return await checkUsage();
|
| 36 |
+
case "OPEN_SIDEPANEL": if (sender.tab?.id) chrome.sidePanel.open({ tabId: sender.tab.id }); return { ok: true };
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
case "GET_RESULTS": return await getStoredResults(sender.tab?.id || message.tabId);
|
| 38 |
case "SYNC_AUTH": return await syncAuthFromWebsite();
|
| 39 |
case "GET_SCAN_HISTORY": return await getScanHistory();
|
|
|
|
| 40 |
default: return null;
|
| 41 |
}
|
| 42 |
};
|
|
|
|
| 94 |
let results;
|
| 95 |
try {
|
| 96 |
const auth = await getAuth();
|
| 97 |
+
// FIXED: Send {text, source_url} not {clauses}
|
|
|
|
| 98 |
const resp = await fetchWithTimeout(`${API_BASE}/api/analyze`, {
|
| 99 |
method: "POST",
|
| 100 |
headers: {
|
|
|
|
| 108 |
return { error: "rate_limited", message: "Too many requests. Please wait a moment." };
|
| 109 |
}
|
| 110 |
|
| 111 |
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
| 112 |
+
results = await resp.json();
|
| 113 |
+
results.source = "api";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
} catch (err) {
|
| 115 |
console.warn("API unavailable, using local:", err.message);
|
| 116 |
results = localAnalyze(text);
|
|
|
|
| 120 |
// Store results
|
| 121 |
if (tabId) {
|
| 122 |
await chrome.storage.local.set({ [`results_${tabId}`]: results });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
const flagged = results.results?.filter(r => r.categories?.length > 0).length || results.flagged_count || 0;
|
| 124 |
chrome.action.setBadgeText({ text: flagged > 0 ? String(flagged) : "", tabId });
|
| 125 |
if (flagged > 0) chrome.action.setBadgeBackgroundColor({ color: flagged > 3 ? "#ef4444" : "#f59e0b", tabId });
|
|
|
|
| 151 |
return { history: scanHistory };
|
| 152 |
}
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
// ─── Sync auth from website ───
|
| 155 |
async function syncAuthFromWebsite() {
|
| 156 |
return await getAuth();
|
|
|
|
| 182 |
|
| 183 |
const flagged = results.filter(r => r.categories.length > 0);
|
| 184 |
const sev = { HIGH: 0, MEDIUM: 0, LOW: 0 };
|
| 185 |
+
flagged.forEach(r => r.categories.forEach(c => sev[c.severity]++));
|
| 186 |
+
const risk = Math.min(100, Math.round((sev.HIGH*20 + sev.MEDIUM*10 + sev.LOW*5) / Math.max(1, clauses.length) * 100));
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
return {
|
| 189 |
risk_score: risk,
|
| 190 |
+
grade: risk >= 60 ? "F" : risk >= 40 ? "D" : risk >= 20 ? "C" : risk >= 10 ? "B" : "A",
|
| 191 |
total_clauses: clauses.length, flagged_count: flagged.length, results,
|
| 192 |
};
|
| 193 |
}
|
|
|
|
| 238 |
return new Promise(r => chrome.storage.local.get([`results_${tabId}`], d => r(d[`results_${tabId}`]||null)));
|
| 239 |
}
|
| 240 |
|
| 241 |
+
chrome.tabs.onRemoved.addListener(tabId => chrome.storage.local.remove([`results_${tabId}`]));
|
extension/content.js
CHANGED
|
@@ -1,8 +1,6 @@
|
|
| 1 |
/**
|
| 2 |
-
* ClauseGuard — Content Script
|
| 3 |
* Page scanning + highlighting + auth bridge.
|
| 4 |
-
*
|
| 5 |
-
* FIXED v4.3: CRITICAL severity is now handled in highlights and tooltips.
|
| 6 |
*
|
| 7 |
* Auth bridge: listens for postMessage from the website's ExtensionBridge component.
|
| 8 |
* Content scripts CAN receive window.postMessage from the page — they share the same
|
|
@@ -18,9 +16,6 @@
|
|
| 18 |
let isScanning = false;
|
| 19 |
let currentHighlights = [];
|
| 20 |
|
| 21 |
-
// Severity ordering (higher = more severe)
|
| 22 |
-
const SEV_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
|
| 23 |
-
|
| 24 |
// ─── Auth Bridge ───
|
| 25 |
// Listen for auth sync from our website (ExtensionBridge component sends this)
|
| 26 |
window.addEventListener("message", (event) => {
|
|
@@ -108,18 +103,13 @@
|
|
| 108 |
try {
|
| 109 |
const range = document.createRange();
|
| 110 |
range.setStart(textNode, start); range.setEnd(textNode, end);
|
| 111 |
-
|
| 112 |
-
const severity = clauseData.categories.reduce((m, c) =>
|
| 113 |
-
(SEV_ORDER[c.severity] || 0) > (SEV_ORDER[m] || 0) ? c.severity : m
|
| 114 |
-
, "LOW");
|
| 115 |
const mark = document.createElement("mark");
|
| 116 |
mark.className = `clauseguard-highlight clauseguard-${severity.toLowerCase()}`;
|
| 117 |
mark.dataset.categories = JSON.stringify(clauseData.categories);
|
| 118 |
mark.addEventListener("mouseenter", showTooltip);
|
| 119 |
mark.addEventListener("mouseleave", hideTooltip);
|
| 120 |
-
mark.addEventListener("click", () => {
|
| 121 |
-
try { chrome.runtime.sendMessage({ type: "OPEN_SIDEPANEL" }); } catch {}
|
| 122 |
-
});
|
| 123 |
range.surroundContents(mark);
|
| 124 |
currentHighlights.push(mark);
|
| 125 |
} catch (e) {}
|
|
|
|
| 1 |
/**
|
| 2 |
+
* ClauseGuard — Content Script
|
| 3 |
* Page scanning + highlighting + auth bridge.
|
|
|
|
|
|
|
| 4 |
*
|
| 5 |
* Auth bridge: listens for postMessage from the website's ExtensionBridge component.
|
| 6 |
* Content scripts CAN receive window.postMessage from the page — they share the same
|
|
|
|
| 16 |
let isScanning = false;
|
| 17 |
let currentHighlights = [];
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
// ─── Auth Bridge ───
|
| 20 |
// Listen for auth sync from our website (ExtensionBridge component sends this)
|
| 21 |
window.addEventListener("message", (event) => {
|
|
|
|
| 103 |
try {
|
| 104 |
const range = document.createRange();
|
| 105 |
range.setStart(textNode, start); range.setEnd(textNode, end);
|
| 106 |
+
const severity = clauseData.categories.reduce((m, c) => ({ HIGH:3,MEDIUM:2,LOW:1 }[c.severity] > ({ HIGH:3,MEDIUM:2,LOW:1 }[m]) ? c.severity : m), "LOW");
|
|
|
|
|
|
|
|
|
|
| 107 |
const mark = document.createElement("mark");
|
| 108 |
mark.className = `clauseguard-highlight clauseguard-${severity.toLowerCase()}`;
|
| 109 |
mark.dataset.categories = JSON.stringify(clauseData.categories);
|
| 110 |
mark.addEventListener("mouseenter", showTooltip);
|
| 111 |
mark.addEventListener("mouseleave", hideTooltip);
|
| 112 |
+
mark.addEventListener("click", () => { try { chrome.runtime.sendMessage({ type: "OPEN_SIDEPANEL" }); } catch {} });
|
|
|
|
|
|
|
| 113 |
range.surroundContents(mark);
|
| 114 |
currentHighlights.push(mark);
|
| 115 |
} catch (e) {}
|
extension/manifest.json
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
{
|
| 2 |
"manifest_version": 3,
|
| 3 |
"name": "ClauseGuard — AI Fine Print Scanner",
|
| 4 |
-
"version": "1.
|
| 5 |
"description": "Highlights unfair clauses in Terms of Service, contracts, and lease agreements.",
|
| 6 |
"permissions": [
|
| 7 |
"activeTab",
|
| 8 |
"storage",
|
| 9 |
"sidePanel",
|
| 10 |
-
"scripting"
|
|
|
|
| 11 |
],
|
| 12 |
"host_permissions": [
|
| 13 |
"https://gaurv007-clauseguard-api.hf.space/*",
|
| 14 |
-
"https://gaurv007-clauseguard.hf.space/*",
|
| 15 |
"https://clauseguardweb.netlify.app/*",
|
| 16 |
"https://*.netlify.app/*"
|
| 17 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"manifest_version": 3,
|
| 3 |
"name": "ClauseGuard — AI Fine Print Scanner",
|
| 4 |
+
"version": "1.0.1",
|
| 5 |
"description": "Highlights unfair clauses in Terms of Service, contracts, and lease agreements.",
|
| 6 |
"permissions": [
|
| 7 |
"activeTab",
|
| 8 |
"storage",
|
| 9 |
"sidePanel",
|
| 10 |
+
"scripting",
|
| 11 |
+
"cookies"
|
| 12 |
],
|
| 13 |
"host_permissions": [
|
| 14 |
"https://gaurv007-clauseguard-api.hf.space/*",
|
|
|
|
| 15 |
"https://clauseguardweb.netlify.app/*",
|
| 16 |
"https://*.netlify.app/*"
|
| 17 |
],
|
extension/popup.js
CHANGED
|
@@ -1,10 +1,6 @@
|
|
| 1 |
/**
|
| 2 |
-
* ClauseGuard — Popup Script
|
| 3 |
* Shows user status (logged in / guest), scan results, usage.
|
| 4 |
-
*
|
| 5 |
-
* FIXED v4.3: sidePanel.open() is properly awaited.
|
| 6 |
-
* FIXED v4.3: CRITICAL severity is now counted and displayed.
|
| 7 |
-
* FIXED v4.3: Shows scan source ("Legal-BERT" / "Local") accurately.
|
| 8 |
*/
|
| 9 |
|
| 10 |
document.addEventListener("DOMContentLoaded", async () => {
|
|
@@ -82,17 +78,16 @@ document.addEventListener("DOMContentLoaded", async () => {
|
|
| 82 |
try { await chrome.tabs.sendMessage(tab.id, { type: "TRIGGER_SCAN" }); } catch {} window.close();
|
| 83 |
});
|
| 84 |
|
| 85 |
-
//
|
| 86 |
const btnDetails = document.getElementById("btn-details");
|
| 87 |
-
if (btnDetails) btnDetails.addEventListener("click",
|
| 88 |
-
try {
|
| 89 |
-
window.close();
|
| 90 |
});
|
| 91 |
|
| 92 |
// Login button
|
| 93 |
const btnLogin = document.getElementById("btn-login");
|
| 94 |
if (btnLogin) btnLogin.addEventListener("click", () => {
|
| 95 |
-
chrome.tabs.create({ url: "https://clauseguardweb.netlify.app/auth/login" });
|
| 96 |
});
|
| 97 |
});
|
| 98 |
|
|
@@ -115,20 +110,15 @@ function showResults(results) {
|
|
| 115 |
bar.className = "bar-fill " + (results.risk_score >= 60 ? "bar-red" : results.risk_score >= 30 ? "bar-amber" : "bar-green");
|
| 116 |
}
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
if (counts[c.severity] !== undefined) counts[c.severity]++;
|
| 122 |
-
else counts.MEDIUM++; // Unknown severities default to MEDIUM
|
| 123 |
-
}));
|
| 124 |
-
// Merge CRITICAL into HIGH for display (popup only has 3 columns)
|
| 125 |
-
if (el("c-high")) el("c-high").textContent = counts.CRITICAL + counts.HIGH;
|
| 126 |
if (el("c-med")) el("c-med").textContent = counts.MEDIUM;
|
| 127 |
if (el("c-low")) el("c-low").textContent = counts.LOW;
|
| 128 |
|
| 129 |
// Show source indicator
|
| 130 |
const src = el("scan-source");
|
| 131 |
-
if (src) src.textContent = results.source === "api" ? "Legal-BERT" : results.source === "local" ? "Local
|
| 132 |
}
|
| 133 |
|
| 134 |
function updateUsage(usage) {
|
|
|
|
| 1 |
/**
|
| 2 |
+
* ClauseGuard — Popup Script
|
| 3 |
* Shows user status (logged in / guest), scan results, usage.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
*/
|
| 5 |
|
| 6 |
document.addEventListener("DOMContentLoaded", async () => {
|
|
|
|
| 78 |
try { await chrome.tabs.sendMessage(tab.id, { type: "TRIGGER_SCAN" }); } catch {} window.close();
|
| 79 |
});
|
| 80 |
|
| 81 |
+
// Details
|
| 82 |
const btnDetails = document.getElementById("btn-details");
|
| 83 |
+
if (btnDetails) btnDetails.addEventListener("click", () => {
|
| 84 |
+
try { chrome.sidePanel.open({ tabId: tab.id }); } catch {} window.close();
|
|
|
|
| 85 |
});
|
| 86 |
|
| 87 |
// Login button
|
| 88 |
const btnLogin = document.getElementById("btn-login");
|
| 89 |
if (btnLogin) btnLogin.addEventListener("click", () => {
|
| 90 |
+
chrome.tabs.create({ url: "https://clauseguardweb.netlify.app/auth/login" }); // Update with your actual URL
|
| 91 |
});
|
| 92 |
});
|
| 93 |
|
|
|
|
| 110 |
bar.className = "bar-fill " + (results.risk_score >= 60 ? "bar-red" : results.risk_score >= 30 ? "bar-amber" : "bar-green");
|
| 111 |
}
|
| 112 |
|
| 113 |
+
const counts = { HIGH: 0, MEDIUM: 0, LOW: 0 };
|
| 114 |
+
(results.results || []).forEach(r => (r.categories || []).forEach(c => { if (counts[c.severity] !== undefined) counts[c.severity]++; }));
|
| 115 |
+
if (el("c-high")) el("c-high").textContent = counts.HIGH;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if (el("c-med")) el("c-med").textContent = counts.MEDIUM;
|
| 117 |
if (el("c-low")) el("c-low").textContent = counts.LOW;
|
| 118 |
|
| 119 |
// Show source indicator
|
| 120 |
const src = el("scan-source");
|
| 121 |
+
if (src) src.textContent = results.source === "api" ? "Legal-BERT" : results.source === "local" ? "Local" : "";
|
| 122 |
}
|
| 123 |
|
| 124 |
function updateUsage(usage) {
|
extension/sidepanel.html
CHANGED
|
@@ -29,7 +29,6 @@
|
|
| 29 |
.filter-btn.active { background: #18181b; color: #fff; border-color: #18181b; }
|
| 30 |
.filter-count { font-size: 10px; opacity: 0.6; }
|
| 31 |
.dot { width: 6px; height: 6px; border-radius: 50%; }
|
| 32 |
-
.dot-purple { background: #a855f7; }
|
| 33 |
.dot-red { background: #ef4444; }
|
| 34 |
.dot-amber { background: #f59e0b; }
|
| 35 |
.dot-blue { background: #3b82f6; }
|
|
@@ -37,7 +36,6 @@
|
|
| 37 |
.clause-list { padding: 8px; }
|
| 38 |
.clause-card { border: 1px solid #e4e4e7; border-radius: 10px; padding: 12px; margin-bottom: 6px; transition: all 0.15s; cursor: default; }
|
| 39 |
.clause-card:hover { border-color: #d4d4d8; box-shadow: 0 1px 3px rgba(0,0,0,0.04); }
|
| 40 |
-
.clause-card.sev-critical { border-left: 3px solid #a855f7; }
|
| 41 |
.clause-card.sev-high { border-left: 3px solid #ef4444; }
|
| 42 |
.clause-card.sev-medium { border-left: 3px solid #f59e0b; }
|
| 43 |
.clause-card.sev-low { border-left: 3px solid #3b82f6; }
|
|
@@ -45,7 +43,6 @@
|
|
| 45 |
.clause-tags { display: flex; flex-wrap: wrap; gap: 4px; }
|
| 46 |
.tag { font-size: 10px; font-weight: 600; padding: 2px 8px; border-radius: 4px; border: 1px solid; display: inline-flex; align-items: center; gap: 3px; }
|
| 47 |
.tag svg { width: 10px; height: 10px; }
|
| 48 |
-
.tag-critical { background: #faf5ff; color: #7c3aed; border-color: #d8b4fe; }
|
| 49 |
.tag-high { background: #fef2f2; color: #b91c1c; border-color: #fecaca; }
|
| 50 |
.tag-medium { background: #fffbeb; color: #a16207; border-color: #fde68a; }
|
| 51 |
.tag-low { background: #eff6ff; color: #1d4ed8; border-color: #bfdbfe; }
|
|
@@ -74,7 +71,6 @@
|
|
| 74 |
|
| 75 |
<div class="filters" id="filters" style="display:none;">
|
| 76 |
<button class="filter-btn active" data-filter="all">All</button>
|
| 77 |
-
<button class="filter-btn" data-filter="CRITICAL" id="filter-critical" style="display:none;"><span class="dot dot-purple"></span>Critical <span class="filter-count" id="fc-crit">0</span></button>
|
| 78 |
<button class="filter-btn" data-filter="HIGH"><span class="dot dot-red"></span>High <span class="filter-count" id="fc-high">0</span></button>
|
| 79 |
<button class="filter-btn" data-filter="MEDIUM"><span class="dot dot-amber"></span>Medium <span class="filter-count" id="fc-med">0</span></button>
|
| 80 |
<button class="filter-btn" data-filter="LOW"><span class="dot dot-blue"></span>Low <span class="filter-count" id="fc-low">0</span></button>
|
|
|
|
| 29 |
.filter-btn.active { background: #18181b; color: #fff; border-color: #18181b; }
|
| 30 |
.filter-count { font-size: 10px; opacity: 0.6; }
|
| 31 |
.dot { width: 6px; height: 6px; border-radius: 50%; }
|
|
|
|
| 32 |
.dot-red { background: #ef4444; }
|
| 33 |
.dot-amber { background: #f59e0b; }
|
| 34 |
.dot-blue { background: #3b82f6; }
|
|
|
|
| 36 |
.clause-list { padding: 8px; }
|
| 37 |
.clause-card { border: 1px solid #e4e4e7; border-radius: 10px; padding: 12px; margin-bottom: 6px; transition: all 0.15s; cursor: default; }
|
| 38 |
.clause-card:hover { border-color: #d4d4d8; box-shadow: 0 1px 3px rgba(0,0,0,0.04); }
|
|
|
|
| 39 |
.clause-card.sev-high { border-left: 3px solid #ef4444; }
|
| 40 |
.clause-card.sev-medium { border-left: 3px solid #f59e0b; }
|
| 41 |
.clause-card.sev-low { border-left: 3px solid #3b82f6; }
|
|
|
|
| 43 |
.clause-tags { display: flex; flex-wrap: wrap; gap: 4px; }
|
| 44 |
.tag { font-size: 10px; font-weight: 600; padding: 2px 8px; border-radius: 4px; border: 1px solid; display: inline-flex; align-items: center; gap: 3px; }
|
| 45 |
.tag svg { width: 10px; height: 10px; }
|
|
|
|
| 46 |
.tag-high { background: #fef2f2; color: #b91c1c; border-color: #fecaca; }
|
| 47 |
.tag-medium { background: #fffbeb; color: #a16207; border-color: #fde68a; }
|
| 48 |
.tag-low { background: #eff6ff; color: #1d4ed8; border-color: #bfdbfe; }
|
|
|
|
| 71 |
|
| 72 |
<div class="filters" id="filters" style="display:none;">
|
| 73 |
<button class="filter-btn active" data-filter="all">All</button>
|
|
|
|
| 74 |
<button class="filter-btn" data-filter="HIGH"><span class="dot dot-red"></span>High <span class="filter-count" id="fc-high">0</span></button>
|
| 75 |
<button class="filter-btn" data-filter="MEDIUM"><span class="dot dot-amber"></span>Medium <span class="filter-count" id="fc-med">0</span></button>
|
| 76 |
<button class="filter-btn" data-filter="LOW"><span class="dot dot-blue"></span>Low <span class="filter-count" id="fc-low">0</span></button>
|
extension/sidepanel.js
CHANGED
|
@@ -1,8 +1,5 @@
|
|
| 1 |
/**
|
| 2 |
-
* ClauseGuard — Side Panel
|
| 3 |
-
*
|
| 4 |
-
* FIXED v4.3: Added CRITICAL severity support (filter, cards, icons, descriptions).
|
| 5 |
-
* FIXED v4.3: Severity ordering now uses numeric mapping consistently.
|
| 6 |
*/
|
| 7 |
|
| 8 |
const DESCS = {
|
|
@@ -16,12 +13,8 @@ const DESCS = {
|
|
| 16 |
"Arbitration": "You waive your right to sue in court.",
|
| 17 |
};
|
| 18 |
|
| 19 |
-
// Severity numeric ordering (higher = more severe)
|
| 20 |
-
const SEV_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
|
| 21 |
-
|
| 22 |
// SVG icons for severity
|
| 23 |
const SEV_ICONS = {
|
| 24 |
-
CRITICAL: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/></svg>',
|
| 25 |
HIGH: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/></svg>',
|
| 26 |
MEDIUM: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 8v4"/><path d="M12 16h.01"/></svg>',
|
| 27 |
LOW: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg>',
|
|
@@ -58,20 +51,10 @@ async function loadResults() {
|
|
| 58 |
pf.style.width = `${results.risk_score}%`;
|
| 59 |
pf.style.background = results.risk_score >= 60 ? "#ef4444" : results.risk_score >= 30 ? "#f59e0b" : "#22c55e";
|
| 60 |
|
| 61 |
-
//
|
| 62 |
-
const counts = {
|
| 63 |
const flagged = results.results.filter(r => r.categories?.length > 0);
|
| 64 |
-
flagged.forEach(r => r.categories.forEach(c => {
|
| 65 |
-
if (counts[c.severity] !== undefined) counts[c.severity]++;
|
| 66 |
-
else counts.MEDIUM++; // Default unknown to MEDIUM
|
| 67 |
-
}));
|
| 68 |
-
|
| 69 |
-
// Show CRITICAL count in the filter if any exist
|
| 70 |
-
const fcCrit = document.getElementById("fc-crit");
|
| 71 |
-
const critFilter = document.getElementById("filter-critical");
|
| 72 |
-
if (fcCrit) fcCrit.textContent = counts.CRITICAL;
|
| 73 |
-
if (critFilter) critFilter.style.display = counts.CRITICAL > 0 ? "flex" : "none";
|
| 74 |
-
|
| 75 |
document.getElementById("fc-high").textContent = counts.HIGH;
|
| 76 |
document.getElementById("fc-med").textContent = counts.MEDIUM;
|
| 77 |
document.getElementById("fc-low").textContent = counts.LOW;
|
|
@@ -91,10 +74,11 @@ function renderClauses() {
|
|
| 91 |
|
| 92 |
list.innerHTML = filtered.map((clause, i) => {
|
| 93 |
const maxSev = clause.categories.reduce((m, c) => {
|
| 94 |
-
|
|
|
|
| 95 |
}, "LOW");
|
| 96 |
|
| 97 |
-
const tagMap = {
|
| 98 |
|
| 99 |
const tags = clause.categories.map(c =>
|
| 100 |
`<span class="tag ${tagMap[c.severity] || "tag-medium"}">${SEV_ICONS[c.severity] || ""} ${c.name}</span>`
|
|
|
|
| 1 |
/**
|
| 2 |
+
* ClauseGuard — Side Panel (redesigned)
|
|
|
|
|
|
|
|
|
|
| 3 |
*/
|
| 4 |
|
| 5 |
const DESCS = {
|
|
|
|
| 13 |
"Arbitration": "You waive your right to sue in court.",
|
| 14 |
};
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
// SVG icons for severity
|
| 17 |
const SEV_ICONS = {
|
|
|
|
| 18 |
HIGH: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/></svg>',
|
| 19 |
MEDIUM: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 8v4"/><path d="M12 16h.01"/></svg>',
|
| 20 |
LOW: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg>',
|
|
|
|
| 51 |
pf.style.width = `${results.risk_score}%`;
|
| 52 |
pf.style.background = results.risk_score >= 60 ? "#ef4444" : results.risk_score >= 30 ? "#f59e0b" : "#22c55e";
|
| 53 |
|
| 54 |
+
// Counts
|
| 55 |
+
const counts = { HIGH: 0, MEDIUM: 0, LOW: 0 };
|
| 56 |
const flagged = results.results.filter(r => r.categories?.length > 0);
|
| 57 |
+
flagged.forEach(r => r.categories.forEach(c => { if (counts[c.severity] !== undefined) counts[c.severity]++; }));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
document.getElementById("fc-high").textContent = counts.HIGH;
|
| 59 |
document.getElementById("fc-med").textContent = counts.MEDIUM;
|
| 60 |
document.getElementById("fc-low").textContent = counts.LOW;
|
|
|
|
| 74 |
|
| 75 |
list.innerHTML = filtered.map((clause, i) => {
|
| 76 |
const maxSev = clause.categories.reduce((m, c) => {
|
| 77 |
+
const o = { HIGH: 3, MEDIUM: 2, LOW: 1 };
|
| 78 |
+
return (o[c.severity] || 0) > (o[m] || 0) ? c.severity : m;
|
| 79 |
}, "LOW");
|
| 80 |
|
| 81 |
+
const tagMap = { HIGH: "tag-high", MEDIUM: "tag-medium", LOW: "tag-low" };
|
| 82 |
|
| 83 |
const tags = clause.categories.map(c =>
|
| 84 |
`<span class="tag ${tagMap[c.severity] || "tag-medium"}">${SEV_ICONS[c.severity] || ""} ${c.name}</span>`
|
extension/styles/content.css
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
/* ClauseGuard — Content Script Styles (injected into web pages) */
|
| 2 |
-
/* v4.3: Added CRITICAL severity styles */
|
| 3 |
|
| 4 |
/* Highlight severity levels */
|
| 5 |
.clauseguard-highlight {
|
|
@@ -10,16 +9,6 @@
|
|
| 10 |
position: relative;
|
| 11 |
}
|
| 12 |
|
| 13 |
-
/* CRITICAL — purple (most severe) */
|
| 14 |
-
.clauseguard-critical {
|
| 15 |
-
background: rgba(168, 85, 247, 0.22);
|
| 16 |
-
border-bottom: 2.5px solid #a855f7;
|
| 17 |
-
}
|
| 18 |
-
.clauseguard-critical:hover {
|
| 19 |
-
background: rgba(168, 85, 247, 0.38);
|
| 20 |
-
}
|
| 21 |
-
|
| 22 |
-
/* HIGH — red */
|
| 23 |
.clauseguard-high {
|
| 24 |
background: rgba(239, 68, 68, 0.22);
|
| 25 |
border-bottom: 2.5px solid #ef4444;
|
|
@@ -28,7 +17,6 @@
|
|
| 28 |
background: rgba(239, 68, 68, 0.35);
|
| 29 |
}
|
| 30 |
|
| 31 |
-
/* MEDIUM — amber */
|
| 32 |
.clauseguard-medium {
|
| 33 |
background: rgba(245, 158, 11, 0.18);
|
| 34 |
border-bottom: 2.5px solid #f59e0b;
|
|
@@ -37,7 +25,6 @@
|
|
| 37 |
background: rgba(245, 158, 11, 0.32);
|
| 38 |
}
|
| 39 |
|
| 40 |
-
/* LOW — blue */
|
| 41 |
.clauseguard-low {
|
| 42 |
background: rgba(59, 130, 246, 0.14);
|
| 43 |
border-bottom: 2.5px solid #3b82f6;
|
|
@@ -87,10 +74,6 @@
|
|
| 87 |
letter-spacing: 0.5px;
|
| 88 |
}
|
| 89 |
|
| 90 |
-
.clauseguard-badge-critical {
|
| 91 |
-
background: #e9d5ff;
|
| 92 |
-
color: #6b21a8;
|
| 93 |
-
}
|
| 94 |
.clauseguard-badge-high {
|
| 95 |
background: #fecaca;
|
| 96 |
color: #991b1b;
|
|
|
|
| 1 |
/* ClauseGuard — Content Script Styles (injected into web pages) */
|
|
|
|
| 2 |
|
| 3 |
/* Highlight severity levels */
|
| 4 |
.clauseguard-highlight {
|
|
|
|
| 9 |
position: relative;
|
| 10 |
}
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
.clauseguard-high {
|
| 13 |
background: rgba(239, 68, 68, 0.22);
|
| 14 |
border-bottom: 2.5px solid #ef4444;
|
|
|
|
| 17 |
background: rgba(239, 68, 68, 0.35);
|
| 18 |
}
|
| 19 |
|
|
|
|
| 20 |
.clauseguard-medium {
|
| 21 |
background: rgba(245, 158, 11, 0.18);
|
| 22 |
border-bottom: 2.5px solid #f59e0b;
|
|
|
|
| 25 |
background: rgba(245, 158, 11, 0.32);
|
| 26 |
}
|
| 27 |
|
|
|
|
| 28 |
.clauseguard-low {
|
| 29 |
background: rgba(59, 130, 246, 0.14);
|
| 30 |
border-bottom: 2.5px solid #3b82f6;
|
|
|
|
| 74 |
letter-spacing: 0.5px;
|
| 75 |
}
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
.clauseguard-badge-high {
|
| 78 |
background: #fecaca;
|
| 79 |
color: #991b1b;
|
ml/export_onnx_v2.py
DELETED
|
@@ -1,169 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
ClauseGuard — ONNX Export + INT8 Quantization Pipeline (v2)
|
| 3 |
-
═══════════════════════════════════════════════════════════
|
| 4 |
-
PERF v4.3: Full pipeline to export the CUAD LoRA classifier to ONNX+INT8.
|
| 5 |
-
|
| 6 |
-
Steps:
|
| 7 |
-
1. Load base Legal-BERT + LoRA adapter
|
| 8 |
-
2. merge_and_unload() → plain PreTrainedModel
|
| 9 |
-
3. Export to ONNX via optimum
|
| 10 |
-
4. Dynamic INT8 quantization (no calibration data needed)
|
| 11 |
-
5. Push quantized model to HuggingFace Hub
|
| 12 |
-
|
| 13 |
-
Usage:
|
| 14 |
-
pip install "optimum[onnxruntime]" peft transformers torch
|
| 15 |
-
python export_onnx_v2.py
|
| 16 |
-
|
| 17 |
-
# Or with custom paths:
|
| 18 |
-
HUB_MODEL_ID=gaurv007/clauseguard-onnx-int8 python export_onnx_v2.py
|
| 19 |
-
|
| 20 |
-
Hardware: Any CPU (no GPU needed for export)
|
| 21 |
-
Time: ~2-5 minutes
|
| 22 |
-
"""
|
| 23 |
-
|
| 24 |
-
import os
|
| 25 |
-
import sys
|
| 26 |
-
import shutil
|
| 27 |
-
|
| 28 |
-
# ── Configuration ──
|
| 29 |
-
BASE_MODEL = os.environ.get("BASE_MODEL", "nlpaueb/legal-bert-base-uncased")
|
| 30 |
-
ADAPTER_MODEL = os.environ.get("ADAPTER_MODEL", "Mokshith31/legalbert-contract-clause-classification")
|
| 31 |
-
HUB_MODEL_ID = os.environ.get("HUB_MODEL_ID", "gaurv007/clauseguard-onnx-int8")
|
| 32 |
-
PUSH_TO_HUB = os.environ.get("PUSH_TO_HUB", "true").lower() == "true"
|
| 33 |
-
|
| 34 |
-
MERGED_DIR = "./merged_legalbert"
|
| 35 |
-
ONNX_DIR = "./onnx_legalbert"
|
| 36 |
-
QUANT_DIR = "./onnx_legalbert_int8"
|
| 37 |
-
|
| 38 |
-
CUAD_LABELS = [
|
| 39 |
-
"Document Name", "Parties", "Agreement Date", "Effective Date",
|
| 40 |
-
"Expiration Date", "Renewal Term", "Notice Period to Terminate Renewal",
|
| 41 |
-
"Governing Law", "Most Favored Nation", "Non-Compete", "Exclusivity",
|
| 42 |
-
"No-Solicit of Customers", "No-Solicit of Employees", "Non-Disparagement",
|
| 43 |
-
"Termination for Convenience", "ROFR/ROFO/ROFN", "Change of Control",
|
| 44 |
-
"Anti-Assignment", "Revenue/Profit Sharing", "Price Restriction",
|
| 45 |
-
"Minimum Commitment", "Volume Restriction", "IP Ownership Assignment",
|
| 46 |
-
"Joint IP Ownership", "License Grant", "Non-Transferable License",
|
| 47 |
-
"Affiliate License-Licensor", "Affiliate License-Licensee",
|
| 48 |
-
"Unlimited/All-You-Can-Eat License", "Irrevocable or Perpetual License",
|
| 49 |
-
"Source Code Escrow", "Post-Termination Services", "Audit Rights",
|
| 50 |
-
"Uncapped Liability", "Cap on Liability", "Liquidated Damages",
|
| 51 |
-
"Warranty Duration", "Insurance", "Covenant Not to Sue",
|
| 52 |
-
"Third Party Beneficiary", "Other",
|
| 53 |
-
]
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
def main():
|
| 57 |
-
print("🛡️ ClauseGuard ONNX Export + INT8 Quantization")
|
| 58 |
-
print("=" * 60)
|
| 59 |
-
print(f" Base model: {BASE_MODEL}")
|
| 60 |
-
print(f" LoRA adapter: {ADAPTER_MODEL}")
|
| 61 |
-
print(f" Hub target: {HUB_MODEL_ID}")
|
| 62 |
-
print()
|
| 63 |
-
|
| 64 |
-
# ── Step 1: Load and merge LoRA ──
|
| 65 |
-
print("📦 Step 1: Loading base model + LoRA adapter...")
|
| 66 |
-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 67 |
-
from peft import PeftModel
|
| 68 |
-
|
| 69 |
-
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 70 |
-
base_model = AutoModelForSequenceClassification.from_pretrained(
|
| 71 |
-
BASE_MODEL, num_labels=41, ignore_mismatched_sizes=True
|
| 72 |
-
)
|
| 73 |
-
peft_model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
|
| 74 |
-
|
| 75 |
-
print("🔀 Step 2: Merging LoRA weights into base model...")
|
| 76 |
-
merged_model = peft_model.merge_and_unload(safe_merge=True)
|
| 77 |
-
|
| 78 |
-
# Set label mapping
|
| 79 |
-
merged_model.config.id2label = {str(i): name for i, name in enumerate(CUAD_LABELS)}
|
| 80 |
-
merged_model.config.label2id = {name: i for i, name in enumerate(CUAD_LABELS)}
|
| 81 |
-
|
| 82 |
-
os.makedirs(MERGED_DIR, exist_ok=True)
|
| 83 |
-
merged_model.save_pretrained(MERGED_DIR)
|
| 84 |
-
tokenizer.save_pretrained(MERGED_DIR)
|
| 85 |
-
print(f" ✅ Merged model saved to {MERGED_DIR}")
|
| 86 |
-
|
| 87 |
-
# Free memory
|
| 88 |
-
del peft_model, base_model, merged_model
|
| 89 |
-
import gc
|
| 90 |
-
gc.collect()
|
| 91 |
-
|
| 92 |
-
# ── Step 3: Export to ONNX ──
|
| 93 |
-
print("\n📤 Step 3: Exporting to ONNX...")
|
| 94 |
-
from optimum.onnxruntime import ORTModelForSequenceClassification
|
| 95 |
-
|
| 96 |
-
ort_model = ORTModelForSequenceClassification.from_pretrained(
|
| 97 |
-
MERGED_DIR, export=True
|
| 98 |
-
)
|
| 99 |
-
os.makedirs(ONNX_DIR, exist_ok=True)
|
| 100 |
-
ort_model.save_pretrained(ONNX_DIR)
|
| 101 |
-
tokenizer.save_pretrained(ONNX_DIR)
|
| 102 |
-
print(f" ✅ ONNX model saved to {ONNX_DIR}")
|
| 103 |
-
|
| 104 |
-
# ── Step 4: Dynamic INT8 Quantization ──
|
| 105 |
-
print("\n⚡ Step 4: Applying dynamic INT8 quantization...")
|
| 106 |
-
from optimum.onnxruntime.configuration import AutoQuantizationConfig
|
| 107 |
-
from optimum.onnxruntime import ORTQuantizer
|
| 108 |
-
|
| 109 |
-
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
| 110 |
-
quantizer = ORTQuantizer.from_pretrained(ort_model)
|
| 111 |
-
os.makedirs(QUANT_DIR, exist_ok=True)
|
| 112 |
-
quantizer.quantize(save_dir=QUANT_DIR, quantization_config=qconfig)
|
| 113 |
-
|
| 114 |
-
# Copy tokenizer files to quantized dir
|
| 115 |
-
tokenizer.save_pretrained(QUANT_DIR)
|
| 116 |
-
# Copy config.json too
|
| 117 |
-
shutil.copy2(os.path.join(ONNX_DIR, "config.json"), QUANT_DIR)
|
| 118 |
-
print(f" ✅ Quantized model saved to {QUANT_DIR}")
|
| 119 |
-
|
| 120 |
-
# ── Step 5: Verify ──
|
| 121 |
-
print("\n🧪 Step 5: Verifying quantized model...")
|
| 122 |
-
quant_model = ORTModelForSequenceClassification.from_pretrained(
|
| 123 |
-
QUANT_DIR, file_name="model_quantized.onnx"
|
| 124 |
-
)
|
| 125 |
-
quant_tokenizer = AutoTokenizer.from_pretrained(QUANT_DIR)
|
| 126 |
-
|
| 127 |
-
test_texts = [
|
| 128 |
-
"The company may terminate your account at any time without notice.",
|
| 129 |
-
"Either party shall indemnify and hold harmless the other party.",
|
| 130 |
-
"This Agreement shall be governed by the laws of the State of Delaware.",
|
| 131 |
-
]
|
| 132 |
-
inputs = quant_tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 133 |
-
|
| 134 |
-
import torch
|
| 135 |
-
with torch.no_grad():
|
| 136 |
-
outputs = quant_model(**inputs)
|
| 137 |
-
probs = torch.softmax(outputs.logits, dim=-1)
|
| 138 |
-
|
| 139 |
-
for i, text in enumerate(test_texts):
|
| 140 |
-
top_prob, top_idx = torch.max(probs[i], dim=0)
|
| 141 |
-
label = CUAD_LABELS[int(top_idx)] if int(top_idx) < len(CUAD_LABELS) else f"Class-{int(top_idx)}"
|
| 142 |
-
print(f" Text: {text[:60]}...")
|
| 143 |
-
print(f" → {label} ({top_prob:.3f})")
|
| 144 |
-
|
| 145 |
-
# ── Step 6: Push to Hub ──
|
| 146 |
-
if PUSH_TO_HUB:
|
| 147 |
-
print(f"\n🚀 Step 6: Pushing to {HUB_MODEL_ID}...")
|
| 148 |
-
quant_model.push_to_hub(HUB_MODEL_ID, use_auth_token=True)
|
| 149 |
-
quant_tokenizer.push_to_hub(HUB_MODEL_ID, use_auth_token=True)
|
| 150 |
-
print(f" ✅ Pushed to https://huggingface.co/{HUB_MODEL_ID}")
|
| 151 |
-
else:
|
| 152 |
-
print("\n⏭️ Skipping Hub push (PUSH_TO_HUB=false)")
|
| 153 |
-
|
| 154 |
-
# ── Summary ──
|
| 155 |
-
onnx_size = os.path.getsize(os.path.join(ONNX_DIR, "model.onnx")) / 1e6
|
| 156 |
-
quant_size = os.path.getsize(os.path.join(QUANT_DIR, "model_quantized.onnx")) / 1e6
|
| 157 |
-
print(f"\n{'='*60}")
|
| 158 |
-
print(f" 📊 ONNX model size: {onnx_size:.1f} MB")
|
| 159 |
-
print(f" 📊 Quantized model size: {quant_size:.1f} MB")
|
| 160 |
-
print(f" 📊 Size reduction: {(1 - quant_size/onnx_size)*100:.0f}%")
|
| 161 |
-
print(f" 🔥 Expected speedup: 2-4x on CPU")
|
| 162 |
-
print(f"{'='*60}")
|
| 163 |
-
print("\n✅ Export complete!")
|
| 164 |
-
print(f"\nTo use in ClauseGuard, set ONNX_MODEL_PATH={QUANT_DIR}")
|
| 165 |
-
print("or point to the Hub model: gaurv007/clauseguard-onnx-int8")
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
if __name__ == "__main__":
|
| 169 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
obligations.py
CHANGED
|
@@ -85,26 +85,11 @@ _PRIORITY_MAP = {
|
|
| 85 |
"delivery": 1,
|
| 86 |
}
|
| 87 |
|
| 88 |
-
# FIX v4.2: Pre-compile obligation patterns at module level (was recompiling per sentence)
|
| 89 |
-
_OBLIGATION_PATTERNS_COMPILED = {
|
| 90 |
-
otype: [re.compile(p, re.IGNORECASE) for p in patterns]
|
| 91 |
-
for otype, patterns in OBLIGATION_PATTERNS.items()
|
| 92 |
-
}
|
| 93 |
-
|
| 94 |
-
# FIX v4.2: Pre-compile false positive patterns
|
| 95 |
-
_FALSE_POSITIVE_PATTERNS_COMPILED = [re.compile(p, re.IGNORECASE) for p in _FALSE_POSITIVE_PATTERNS]
|
| 96 |
-
|
| 97 |
-
# FIX v4.2: Pre-compile time patterns
|
| 98 |
-
_TIME_PATTERNS_COMPILED = [(re.compile(p, re.IGNORECASE), ptype) for p, ptype in TIME_PATTERNS]
|
| 99 |
-
|
| 100 |
-
# FIX v4.2: Pre-compile party patterns
|
| 101 |
-
_PARTY_PATTERNS_COMPILED = [re.compile(p) for p in PARTY_PATTERNS]
|
| 102 |
-
|
| 103 |
|
| 104 |
def _is_false_positive(sentence):
|
| 105 |
"""Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
|
| 106 |
-
for fp in
|
| 107 |
-
if
|
| 108 |
return True
|
| 109 |
return False
|
| 110 |
|
|
@@ -126,9 +111,9 @@ def extract_obligations(text):
|
|
| 126 |
continue
|
| 127 |
|
| 128 |
found_types = set()
|
| 129 |
-
for otype, patterns in
|
| 130 |
for pat in patterns:
|
| 131 |
-
if
|
| 132 |
found_types.add(otype)
|
| 133 |
break
|
| 134 |
|
|
@@ -143,8 +128,8 @@ def extract_obligations(text):
|
|
| 143 |
party = obligation_direction
|
| 144 |
else:
|
| 145 |
# Fallback to pattern matching within the sentence
|
| 146 |
-
for pp in
|
| 147 |
-
m =
|
| 148 |
if m:
|
| 149 |
candidate = m.group(0).strip()
|
| 150 |
# Fix 8: Reject party strings >40 chars (header bleed-through)
|
|
@@ -155,8 +140,8 @@ def extract_obligations(text):
|
|
| 155 |
# Extract timeframe
|
| 156 |
deadline = "Not specified"
|
| 157 |
deadline_urgency = 0
|
| 158 |
-
for pat, ptype in
|
| 159 |
-
m =
|
| 160 |
if m:
|
| 161 |
if ptype == "relative":
|
| 162 |
num = m.group(1)
|
|
@@ -192,26 +177,6 @@ def extract_obligations(text):
|
|
| 192 |
# Sort by priority (highest first)
|
| 193 |
obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
|
| 194 |
|
| 195 |
-
# FIX v4.3: Deduplicate obligations — same text producing multiple types
|
| 196 |
-
# Keep the more specific type (termination > compliance > monetary > general)
|
| 197 |
-
_TYPE_PRIORITY = {"termination": 1, "compliance": 2, "reporting": 3, "delivery": 4, "monetary": 5}
|
| 198 |
-
seen_texts = {}
|
| 199 |
-
deduped = []
|
| 200 |
-
for ob in obligations:
|
| 201 |
-
# Hash on first 80 chars of description + party
|
| 202 |
-
key = hash(ob["description"][:80] + ob["party"])
|
| 203 |
-
type_pri = _TYPE_PRIORITY.get(ob["type"], 99)
|
| 204 |
-
if key not in seen_texts:
|
| 205 |
-
seen_texts[key] = (type_pri, len(deduped))
|
| 206 |
-
deduped.append(ob)
|
| 207 |
-
else:
|
| 208 |
-
existing_pri, existing_idx = seen_texts[key]
|
| 209 |
-
if type_pri < existing_pri:
|
| 210 |
-
# This type is more specific — replace
|
| 211 |
-
deduped[existing_idx] = ob
|
| 212 |
-
seen_texts[key] = (type_pri, existing_idx)
|
| 213 |
-
obligations = deduped
|
| 214 |
-
|
| 215 |
return obligations
|
| 216 |
|
| 217 |
|
|
|
|
| 85 |
"delivery": 1,
|
| 86 |
}
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
def _is_false_positive(sentence):
|
| 90 |
"""Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
|
| 91 |
+
for fp in _FALSE_POSITIVE_PATTERNS:
|
| 92 |
+
if re.search(fp, sentence, re.IGNORECASE):
|
| 93 |
return True
|
| 94 |
return False
|
| 95 |
|
|
|
|
| 111 |
continue
|
| 112 |
|
| 113 |
found_types = set()
|
| 114 |
+
for otype, patterns in OBLIGATION_PATTERNS.items():
|
| 115 |
for pat in patterns:
|
| 116 |
+
if re.search(pat, sentence, re.IGNORECASE):
|
| 117 |
found_types.add(otype)
|
| 118 |
break
|
| 119 |
|
|
|
|
| 128 |
party = obligation_direction
|
| 129 |
else:
|
| 130 |
# Fallback to pattern matching within the sentence
|
| 131 |
+
for pp in PARTY_PATTERNS:
|
| 132 |
+
m = re.search(pp, sentence)
|
| 133 |
if m:
|
| 134 |
candidate = m.group(0).strip()
|
| 135 |
# Fix 8: Reject party strings >40 chars (header bleed-through)
|
|
|
|
| 140 |
# Extract timeframe
|
| 141 |
deadline = "Not specified"
|
| 142 |
deadline_urgency = 0
|
| 143 |
+
for pat, ptype in TIME_PATTERNS:
|
| 144 |
+
m = re.search(pat, sentence, re.IGNORECASE)
|
| 145 |
if m:
|
| 146 |
if ptype == "relative":
|
| 147 |
num = m.group(1)
|
|
|
|
| 177 |
# Sort by priority (highest first)
|
| 178 |
obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
return obligations
|
| 181 |
|
| 182 |
|
redlining.py
CHANGED
|
@@ -408,66 +408,24 @@ Write the refined safer clause (adapt the template to this specific contract's c
|
|
| 408 |
|
| 409 |
|
| 410 |
# ═══════════════════════════════════════════════════════════════════════
|
| 411 |
-
#
|
| 412 |
# ═══════════════════════════════════════════════════════════════════════
|
| 413 |
|
| 414 |
-
_LABEL_KEYWORDS = {
|
| 415 |
-
"Limitation of liability": ["liable", "liability", "damages", "limitation of liability", "in no event"],
|
| 416 |
-
"Uncapped Liability": ["uncapped", "unlimited", "no limit", "no cap"],
|
| 417 |
-
"Governing Law": ["governed by", "governing law", "jurisdiction", "laws of"],
|
| 418 |
-
"Termination for Convenience": ["terminat", "cancel", "convenience", "without cause"],
|
| 419 |
-
"Non-Compete": ["non-compete", "not compete", "competition restriction"],
|
| 420 |
-
"No-Solicit of Employees": ["solicit", "recruit", "induce", "encourage", "employee"],
|
| 421 |
-
"No-Solicit of Customers": ["solicit", "customer", "client", "divert"],
|
| 422 |
-
"Non-Disparagement": ["disparag", "defam", "negative", "derogatory"],
|
| 423 |
-
"Arbitration": ["arbitrat", "binding arbitration", "waive", "class action"],
|
| 424 |
-
"IP Ownership Assignment": ["intellectual property", "ip", "assign", "work for hire", "ownership"],
|
| 425 |
-
"Indemnification": ["indemnif", "hold harmless", "defend"],
|
| 426 |
-
"Confidentiality": ["confidential", "non-disclosure", "nda"],
|
| 427 |
-
"Exclusivity": ["exclusive", "exclusivity"],
|
| 428 |
-
"Anti-Assignment": ["assign", "transfer", "without consent"],
|
| 429 |
-
"Content removal": ["remove", "delete", "content"],
|
| 430 |
-
"Unilateral change": ["modify", "change", "amend", "sole discretion"],
|
| 431 |
-
"Unilateral termination": ["terminat", "suspend", "at any time"],
|
| 432 |
-
"Liquidated Damages": ["liquidated", "pre-determined", "stipulated"],
|
| 433 |
-
"Choice of law": ["governed by", "laws of", "choice of law"],
|
| 434 |
-
"Jurisdiction": ["jurisdiction", "courts of", "exclusive jurisdiction"],
|
| 435 |
-
"Contract by using": ["by using", "continued use", "acceptance"],
|
| 436 |
-
}
|
| 437 |
-
|
| 438 |
-
# FIX v4.3.1: Exclusion keywords — if ANY of these appear, the clause is rejected for this label.
|
| 439 |
-
# Catches chunks that span two sections (e.g., §12.5 Waiver + §12.6 Non-Solicitation merged into one chunk).
|
| 440 |
-
_LABEL_EXCLUDE_KEYWORDS = {
|
| 441 |
-
"No-Solicit of Employees": ["waiver of", "waive any", "waives the right", "failure to enforce"],
|
| 442 |
-
"No-Solicit of Customers": ["waiver of", "waive any", "waives the right", "failure to enforce"],
|
| 443 |
-
"Non-Disparagement": ["arbitrat", "aaa", "jams", "class action", "waives any right to participate"],
|
| 444 |
-
}
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
def _validate_clause_match(label, clause_text):
|
| 448 |
-
"""FIX v4.3.1: Validate clause matches label — checks BOTH required AND excluded keywords."""
|
| 449 |
-
text_lower = clause_text.lower()
|
| 450 |
-
|
| 451 |
-
# Check exclusions first — hard reject
|
| 452 |
-
exclusions = _LABEL_EXCLUDE_KEYWORDS.get(label, [])
|
| 453 |
-
if exclusions and any(kw in text_lower for kw in exclusions):
|
| 454 |
-
return False
|
| 455 |
-
|
| 456 |
-
# Check required keywords
|
| 457 |
-
keywords = _LABEL_KEYWORDS.get(label, [])
|
| 458 |
-
if not keywords:
|
| 459 |
-
return True
|
| 460 |
-
return any(kw in text_lower for kw in keywords)
|
| 461 |
-
|
| 462 |
-
|
| 463 |
def generate_redlines(analysis_result, use_llm=True):
|
| 464 |
"""
|
| 465 |
Generate redline suggestions for all flagged clauses in the analysis.
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
"""
|
| 472 |
if analysis_result is None:
|
| 473 |
return []
|
|
@@ -476,40 +434,23 @@ def generate_redlines(analysis_result, use_llm=True):
|
|
| 476 |
if not clauses:
|
| 477 |
return []
|
| 478 |
|
| 479 |
-
# FIX v4.3: Group clauses by label and pick the best match for each
|
| 480 |
-
label_clauses = {}
|
| 481 |
-
for clause in clauses:
|
| 482 |
-
label = clause.get("label", "")
|
| 483 |
-
risk = clause.get("risk", "LOW")
|
| 484 |
-
text = clause.get("text", "")
|
| 485 |
-
confidence = clause.get("confidence", 0) or 0
|
| 486 |
-
|
| 487 |
-
if risk == "LOW":
|
| 488 |
-
continue
|
| 489 |
-
|
| 490 |
-
# Validate that the clause text actually matches the label
|
| 491 |
-
if not _validate_clause_match(label, text):
|
| 492 |
-
continue
|
| 493 |
-
|
| 494 |
-
# Keep the highest-confidence match for each label
|
| 495 |
-
if label not in label_clauses or confidence > (label_clauses[label].get("confidence", 0) or 0):
|
| 496 |
-
label_clauses[label] = clause
|
| 497 |
-
|
| 498 |
redlines = []
|
| 499 |
-
|
| 500 |
|
| 501 |
# Sort by risk level: CRITICAL first
|
| 502 |
risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
|
| 503 |
-
|
| 504 |
-
label_clauses.keys(),
|
| 505 |
-
key=lambda l: risk_order.get(label_clauses[l].get("risk", "LOW"), 3)
|
| 506 |
-
)
|
| 507 |
|
| 508 |
-
for
|
| 509 |
-
|
| 510 |
risk = clause.get("risk", "LOW")
|
| 511 |
text = clause.get("text", "")
|
| 512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
# Find matching templates (Tier 1 + Tier 2)
|
| 514 |
matches = _find_similar_templates(label, text)
|
| 515 |
if not matches:
|
|
@@ -517,12 +458,6 @@ def generate_redlines(analysis_result, use_llm=True):
|
|
| 517 |
|
| 518 |
best_key, best_template, score = matches[0]
|
| 519 |
|
| 520 |
-
# FIX v4.3: Dedup — skip if this template's alternative was already used
|
| 521 |
-
alt_fingerprint = best_template["safe_alternative"][:120]
|
| 522 |
-
if alt_fingerprint in seen_alternatives:
|
| 523 |
-
continue
|
| 524 |
-
seen_alternatives.add(alt_fingerprint)
|
| 525 |
-
|
| 526 |
# Tier 3: Try LLM refinement if enabled
|
| 527 |
refined_text = None
|
| 528 |
tier = "template"
|
|
|
|
| 408 |
|
| 409 |
|
| 410 |
# ═══════════════════════════════════════════════════════════════════════
|
| 411 |
+
# PUBLIC API
|
| 412 |
# ═══════════════════════════════════════════════════════════════════════
|
| 413 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
def generate_redlines(analysis_result, use_llm=True):
|
| 415 |
"""
|
| 416 |
Generate redline suggestions for all flagged clauses in the analysis.
|
| 417 |
+
|
| 418 |
+
Returns list of redline suggestions:
|
| 419 |
+
[{
|
| 420 |
+
"original_text": str,
|
| 421 |
+
"clause_label": str,
|
| 422 |
+
"risk_level": str,
|
| 423 |
+
"safe_alternative": str,
|
| 424 |
+
"legal_basis": str,
|
| 425 |
+
"consumer_standard": str,
|
| 426 |
+
"tier": "template" | "llm_refined",
|
| 427 |
+
"confidence": str,
|
| 428 |
+
}]
|
| 429 |
"""
|
| 430 |
if analysis_result is None:
|
| 431 |
return []
|
|
|
|
| 434 |
if not clauses:
|
| 435 |
return []
|
| 436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
redlines = []
|
| 438 |
+
seen_labels = set() # Deduplicate by label
|
| 439 |
|
| 440 |
# Sort by risk level: CRITICAL first
|
| 441 |
risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
|
| 442 |
+
sorted_clauses = sorted(clauses, key=lambda c: risk_order.get(c.get("risk", "LOW"), 3))
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
+
for clause in sorted_clauses:
|
| 445 |
+
label = clause.get("label", "")
|
| 446 |
risk = clause.get("risk", "LOW")
|
| 447 |
text = clause.get("text", "")
|
| 448 |
|
| 449 |
+
# Skip LOW risk and already-seen labels
|
| 450 |
+
if risk == "LOW" or label in seen_labels:
|
| 451 |
+
continue
|
| 452 |
+
seen_labels.add(label)
|
| 453 |
+
|
| 454 |
# Find matching templates (Tier 1 + Tier 2)
|
| 455 |
matches = _find_similar_templates(label, text)
|
| 456 |
if not matches:
|
|
|
|
| 458 |
|
| 459 |
best_key, best_template, score = matches[0]
|
| 460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
# Tier 3: Try LLM refinement if enabled
|
| 462 |
refined_text = None
|
| 463 |
tier = "template"
|
requirements.txt
CHANGED
|
@@ -9,4 +9,3 @@ accelerate>=1.2.0
|
|
| 9 |
sentence-transformers>=3.0.0
|
| 10 |
python-doctr[torch]>=0.9.0
|
| 11 |
huggingface_hub>=0.25.0
|
| 12 |
-
optimum[onnxruntime]>=1.23.0
|
|
|
|
| 9 |
sentence-transformers>=3.0.0
|
| 10 |
python-doctr[torch]>=0.9.0
|
| 11 |
huggingface_hub>=0.25.0
|
|
|
web/.env.example
CHANGED
|
@@ -17,13 +17,7 @@ RESEND_API_KEY=re_...
|
|
| 17 |
|
| 18 |
# App
|
| 19 |
NEXT_PUBLIC_SITE_URL=http://localhost:3000
|
| 20 |
-
|
| 21 |
-
# ClauseGuard Gradio Space URL (used by analyze, compare, redline routes)
|
| 22 |
-
CLAUSEGUARD_GRADIO_URL=https://gaurv007-clauseguard.hf.space
|
| 23 |
-
|
| 24 |
-
# Optional: FastAPI backend URL (only needed if deployed separately for chat/RAG sessions)
|
| 25 |
-
# If not set, chat will direct users to the Gradio Space
|
| 26 |
-
CLAUSEGUARD_API_URL=
|
| 27 |
|
| 28 |
# HF Inference API (for chatbot + redlining LLM)
|
| 29 |
HF_TOKEN=hf_...
|
|
|
|
| 17 |
|
| 18 |
# App
|
| 19 |
NEXT_PUBLIC_SITE_URL=http://localhost:3000
|
| 20 |
+
CLAUSEGUARD_API_URL=https://gaurv007-clauseguard-api.hf.space
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# HF Inference API (for chatbot + redlining LLM)
|
| 23 |
HF_TOKEN=hf_...
|
web/app/api/analyze/route.ts
CHANGED
|
@@ -1,22 +1,11 @@
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
| 2 |
-
import { createClient } from "@/lib/supabase/server";
|
| 3 |
|
| 4 |
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
|
| 5 |
|
| 6 |
-
// FIX v4.1: Max text size validation (prevent oversized payloads)
|
| 7 |
-
const MAX_TEXT_LENGTH = 200_000; // 200KB
|
| 8 |
-
|
| 9 |
export async function POST(req: NextRequest) {
|
| 10 |
try {
|
| 11 |
-
const supabase = await createClient();
|
| 12 |
-
const { data: { user } } = await supabase.auth.getUser();
|
| 13 |
-
|
| 14 |
-
if (!user) {
|
| 15 |
-
return NextResponse.json({ error: "Unauthorized. Please log in to analyze texts." }, { status: 401 });
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
const body = await req.json();
|
| 19 |
-
|
| 20 |
|
| 21 |
if (!text || typeof text !== "string" || text.trim().length < 50) {
|
| 22 |
return NextResponse.json(
|
|
@@ -25,41 +14,8 @@ export async function POST(req: NextRequest) {
|
|
| 25 |
);
|
| 26 |
}
|
| 27 |
|
| 28 |
-
// FIX v4.1: Input size validation
|
| 29 |
-
if (text.length > MAX_TEXT_LENGTH) {
|
| 30 |
-
return NextResponse.json(
|
| 31 |
-
{ error: `Text too long (${(text.length / 1000).toFixed(0)}KB). Maximum is ${MAX_TEXT_LENGTH / 1000}KB.` },
|
| 32 |
-
{ status: 400 }
|
| 33 |
-
);
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
// FIX v4.1: REMOVED the XSS sanitization that corrupted contract text.
|
| 37 |
-
// The old code did: text = text.replace(/</g, "<").replace(/>/g, ">");
|
| 38 |
-
// This PERMANENTLY MUTATED the text before analysis, corrupting contracts
|
| 39 |
-
// that contain < or > characters (e.g., "shall not exceed >$10,000").
|
| 40 |
-
// Sanitization should happen at RENDER TIME in the frontend, not at analysis time.
|
| 41 |
-
// The frontend already uses React which auto-escapes HTML in JSX.
|
| 42 |
-
|
| 43 |
-
// Check scan limits — FIX v4.1: query the CORRECT table name
|
| 44 |
-
const { data: profile } = await supabase
|
| 45 |
-
.from("profiles")
|
| 46 |
-
.select("plan, role, analyses_this_month")
|
| 47 |
-
.eq("id", user.id)
|
| 48 |
-
.single();
|
| 49 |
-
|
| 50 |
-
const isAdmin = profile?.role === "admin";
|
| 51 |
-
const plan = profile?.plan || "free";
|
| 52 |
-
|
| 53 |
-
// FIX v4.1: Use analyses_this_month from profiles (already tracked), not a separate count query
|
| 54 |
-
const scanCount = profile?.analyses_this_month ?? 0;
|
| 55 |
-
const limit = isAdmin ? 999999 : plan === "free" ? 10 : 999999;
|
| 56 |
-
if (scanCount >= limit) {
|
| 57 |
-
return NextResponse.json({ error: "Monthly scan limit reached. Please upgrade to Pro." }, { status: 403 });
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
// Step 1: Submit to Gradio Space
|
| 61 |
-
|
| 62 |
-
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/analyze`, {
|
| 63 |
method: "POST",
|
| 64 |
headers: { "Content-Type": "application/json" },
|
| 65 |
body: JSON.stringify({ data: [text] }),
|
|
@@ -72,16 +28,15 @@ export async function POST(req: NextRequest) {
|
|
| 72 |
const { event_id } = await submitRes.json();
|
| 73 |
if (!event_id) throw new Error("No event_id from Gradio");
|
| 74 |
|
| 75 |
-
//
|
| 76 |
-
//
|
| 77 |
let resultText = "";
|
| 78 |
let attempts = 0;
|
| 79 |
-
const maxAttempts =
|
| 80 |
-
let delay = 500; // Start at 500ms, increase
|
| 81 |
|
| 82 |
while (attempts < maxAttempts) {
|
| 83 |
const resultRes = await fetch(
|
| 84 |
-
`${GRADIO_URL}/gradio_api/call/
|
| 85 |
{ headers: { Accept: "text/event-stream" } }
|
| 86 |
);
|
| 87 |
|
|
@@ -93,26 +48,31 @@ export async function POST(req: NextRequest) {
|
|
| 93 |
throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
|
| 94 |
}
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
attempts++;
|
| 99 |
}
|
| 100 |
|
| 101 |
if (!resultText.includes("event: complete")) {
|
| 102 |
-
throw new Error("Analysis timed out
|
| 103 |
}
|
| 104 |
|
| 105 |
// Step 3: Parse the SSE data
|
|
|
|
|
|
|
| 106 |
const completeIdx = resultText.indexOf("event: complete");
|
| 107 |
const dataIdx = resultText.indexOf("data: ", completeIdx);
|
| 108 |
if (dataIdx === -1) throw new Error("No data in response");
|
| 109 |
|
| 110 |
const dataStr = resultText.substring(dataIdx + 6).trim();
|
| 111 |
|
|
|
|
|
|
|
| 112 |
let gradioData: any[];
|
| 113 |
try {
|
| 114 |
gradioData = JSON.parse(dataStr);
|
| 115 |
} catch {
|
|
|
|
| 116 |
const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
|
| 117 |
if (ch === "\n") return "\\n";
|
| 118 |
if (ch === "\r") return "\\r";
|
|
@@ -123,11 +83,13 @@ export async function POST(req: NextRequest) {
|
|
| 123 |
}
|
| 124 |
|
| 125 |
// Step 4: Download the JSON report file (structured data)
|
|
|
|
| 126 |
const jsonFileObj = gradioData[8];
|
| 127 |
if (!jsonFileObj?.url) {
|
| 128 |
throw new Error("No JSON report generated");
|
| 129 |
}
|
| 130 |
|
|
|
|
| 131 |
const jsonRes = await fetch(jsonFileObj.url);
|
| 132 |
if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
|
| 133 |
const analysisData = await jsonRes.json();
|
|
@@ -153,80 +115,37 @@ export async function POST(req: NextRequest) {
|
|
| 153 |
}
|
| 154 |
const results = Array.from(clauseMap.values());
|
| 155 |
|
| 156 |
-
//
|
| 157 |
const redlines: any[] = [];
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
const block = blocks[i];
|
| 182 |
-
const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
|
| 183 |
-
const origMatch = block.match(/<del>([^<]*)<\/del>/);
|
| 184 |
-
const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
|
| 185 |
-
const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
|
| 186 |
-
const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
|
| 187 |
-
const isLLM = block.includes("LLM Refined");
|
| 188 |
-
|
| 189 |
-
if (labelMatch) {
|
| 190 |
-
redlines.push({
|
| 191 |
-
clause_label: labelMatch[1].trim(),
|
| 192 |
-
risk_level: labelMatch[2].trim(),
|
| 193 |
-
original_text: origMatch ? origMatch[1].trim() : "",
|
| 194 |
-
safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
|
| 195 |
-
legal_basis: legalMatch ? legalMatch[1].trim() : "",
|
| 196 |
-
consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
|
| 197 |
-
tier: isLLM ? "llm_refined" : "template",
|
| 198 |
-
});
|
| 199 |
-
}
|
| 200 |
}
|
| 201 |
}
|
| 202 |
}
|
| 203 |
|
| 204 |
const modelStatus = analysisData.metadata?.model || "";
|
| 205 |
|
| 206 |
-
// FIX v4.1: Increment scan count in profiles table
|
| 207 |
-
await supabase
|
| 208 |
-
.from("profiles")
|
| 209 |
-
.update({ analyses_this_month: scanCount + 1 })
|
| 210 |
-
.eq("id", user.id);
|
| 211 |
-
|
| 212 |
-
// FIX v4.3: Save analysis to DB so it shows in history
|
| 213 |
-
// Wrapped in Promise.resolve() because Supabase returns PromiseLike (no .catch)
|
| 214 |
-
Promise.resolve(
|
| 215 |
-
supabase.from("analyses").insert({
|
| 216 |
-
user_id: user.id,
|
| 217 |
-
total_clauses: totalClauses,
|
| 218 |
-
flagged_count: flaggedCount,
|
| 219 |
-
risk_score: riskScore,
|
| 220 |
-
grade,
|
| 221 |
-
clauses: results,
|
| 222 |
-
entities: analysisData.entities || [],
|
| 223 |
-
contradictions: analysisData.contradictions || [],
|
| 224 |
-
obligations: analysisData.obligations || [],
|
| 225 |
-
compliance: analysisData.compliance || {},
|
| 226 |
-
model: modelStatus.includes("loaded") ? "ml" : "regex",
|
| 227 |
-
})
|
| 228 |
-
).catch(() => {}); // fire-and-forget, don't block response
|
| 229 |
-
|
| 230 |
return NextResponse.json({
|
| 231 |
risk_score: riskScore,
|
| 232 |
grade,
|
|
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
| 2 |
|
| 3 |
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
export async function POST(req: NextRequest) {
|
| 6 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
const body = await req.json();
|
| 8 |
+
const { text } = body;
|
| 9 |
|
| 10 |
if (!text || typeof text !== "string" || text.trim().length < 50) {
|
| 11 |
return NextResponse.json(
|
|
|
|
| 14 |
);
|
| 15 |
}
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
// Step 1: Submit to Gradio Space
|
| 18 |
+
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
|
|
|
|
| 19 |
method: "POST",
|
| 20 |
headers: { "Content-Type": "application/json" },
|
| 21 |
body: JSON.stringify({ data: [text] }),
|
|
|
|
| 28 |
const { event_id } = await submitRes.json();
|
| 29 |
if (!event_id) throw new Error("No event_id from Gradio");
|
| 30 |
|
| 31 |
+
// Step 2: Poll for result (SSE)
|
| 32 |
+
// The Gradio API streams but we need the full response
|
| 33 |
let resultText = "";
|
| 34 |
let attempts = 0;
|
| 35 |
+
const maxAttempts = 60; // 60 seconds max
|
|
|
|
| 36 |
|
| 37 |
while (attempts < maxAttempts) {
|
| 38 |
const resultRes = await fetch(
|
| 39 |
+
`${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
|
| 40 |
{ headers: { Accept: "text/event-stream" } }
|
| 41 |
);
|
| 42 |
|
|
|
|
| 48 |
throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
|
| 49 |
}
|
| 50 |
|
| 51 |
+
// Wait 1 second and retry
|
| 52 |
+
await new Promise(r => setTimeout(r, 1000));
|
| 53 |
attempts++;
|
| 54 |
}
|
| 55 |
|
| 56 |
if (!resultText.includes("event: complete")) {
|
| 57 |
+
throw new Error("Analysis timed out");
|
| 58 |
}
|
| 59 |
|
| 60 |
// Step 3: Parse the SSE data
|
| 61 |
+
// Format: "event: complete\ndata: [...]"
|
| 62 |
+
// The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
|
| 63 |
const completeIdx = resultText.indexOf("event: complete");
|
| 64 |
const dataIdx = resultText.indexOf("data: ", completeIdx);
|
| 65 |
if (dataIdx === -1) throw new Error("No data in response");
|
| 66 |
|
| 67 |
const dataStr = resultText.substring(dataIdx + 6).trim();
|
| 68 |
|
| 69 |
+
// Parse JSON — the HTML strings contain control characters so we need to handle that
|
| 70 |
+
// In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
|
| 71 |
let gradioData: any[];
|
| 72 |
try {
|
| 73 |
gradioData = JSON.parse(dataStr);
|
| 74 |
} catch {
|
| 75 |
+
// If direct parse fails, try replacing problematic control characters
|
| 76 |
const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
|
| 77 |
if (ch === "\n") return "\\n";
|
| 78 |
if (ch === "\r") return "\\r";
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
// Step 4: Download the JSON report file (structured data)
|
| 86 |
+
// gradioData[8] is the JSON file object with { url, path, ... }
|
| 87 |
const jsonFileObj = gradioData[8];
|
| 88 |
if (!jsonFileObj?.url) {
|
| 89 |
throw new Error("No JSON report generated");
|
| 90 |
}
|
| 91 |
|
| 92 |
+
// Download immediately (temp files expire quickly)
|
| 93 |
const jsonRes = await fetch(jsonFileObj.url);
|
| 94 |
if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
|
| 95 |
const analysisData = await jsonRes.json();
|
|
|
|
| 115 |
}
|
| 116 |
const results = Array.from(clauseMap.values());
|
| 117 |
|
| 118 |
+
// Parse redlines from HTML (gradioData[7])
|
| 119 |
const redlines: any[] = [];
|
| 120 |
+
const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
|
| 121 |
+
if (redlineHtml.includes("Clause Redlining")) {
|
| 122 |
+
// Split by redline card borders
|
| 123 |
+
const blocks = redlineHtml.split(/border-left:4px solid #/);
|
| 124 |
+
for (let i = 1; i < blocks.length; i++) {
|
| 125 |
+
const block = blocks[i];
|
| 126 |
+
const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
|
| 127 |
+
const origMatch = block.match(/<del>([^<]*)<\/del>/);
|
| 128 |
+
const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
|
| 129 |
+
const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
|
| 130 |
+
const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
|
| 131 |
+
const isLLM = block.includes("LLM Refined");
|
| 132 |
+
|
| 133 |
+
if (labelMatch) {
|
| 134 |
+
redlines.push({
|
| 135 |
+
clause_label: labelMatch[1].trim(),
|
| 136 |
+
risk_level: labelMatch[2].trim(),
|
| 137 |
+
original_text: origMatch ? origMatch[1].trim() : "",
|
| 138 |
+
safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
|
| 139 |
+
legal_basis: legalMatch ? legalMatch[1].trim() : "",
|
| 140 |
+
consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
|
| 141 |
+
tier: isLLM ? "llm_refined" : "template",
|
| 142 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
}
|
| 144 |
}
|
| 145 |
}
|
| 146 |
|
| 147 |
const modelStatus = analysisData.metadata?.model || "";
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
return NextResponse.json({
|
| 150 |
risk_score: riskScore,
|
| 151 |
grade,
|
web/app/api/chat/route.ts
CHANGED
|
@@ -1,35 +1,11 @@
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
| 2 |
-
import { createClient } from "@/lib/supabase/server";
|
| 3 |
|
| 4 |
-
/
|
| 5 |
-
|
| 6 |
-
*
|
| 7 |
-
* ARCHITECTURE:
|
| 8 |
-
* The Gradio ChatInterface uses gr.State for RAG embeddings — these are
|
| 9 |
-
* per-browser-session and NOT accessible via the Gradio REST API. Every API
|
| 10 |
-
* call creates a new session with empty state, so chat via Gradio API will
|
| 11 |
-
* NEVER have contract context.
|
| 12 |
-
*
|
| 13 |
-
* The correct approach:
|
| 14 |
-
* 1. PRIMARY: Use the FastAPI backend (/api/chat) which manages RAG sessions
|
| 15 |
-
* with proper TTL-based expiry. The session_id comes from /api/analyze.
|
| 16 |
-
* 2. FALLBACK: If FastAPI is unavailable, return a clear error directing
|
| 17 |
-
* the user to use the Gradio Space directly.
|
| 18 |
-
*
|
| 19 |
-
* The old code tried to call a non-existent Gradio "chat" endpoint which
|
| 20 |
-
* always failed. Removed the broken Gradio fallback entirely.
|
| 21 |
-
*/
|
| 22 |
export async function POST(req: NextRequest) {
|
| 23 |
try {
|
| 24 |
-
const supabase = await createClient();
|
| 25 |
-
const { data: { user } } = await supabase.auth.getUser();
|
| 26 |
-
|
| 27 |
-
if (!user) {
|
| 28 |
-
return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
const body = await req.json();
|
| 32 |
-
const { message, history
|
| 33 |
|
| 34 |
if (!message) {
|
| 35 |
return NextResponse.json(
|
|
@@ -38,64 +14,55 @@ export async function POST(req: NextRequest) {
|
|
| 38 |
);
|
| 39 |
}
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
try {
|
| 52 |
-
const apiRes = await fetch(`${apiUrl}/api/chat`, {
|
| 53 |
-
method: "POST",
|
| 54 |
-
headers: { "Content-Type": "application/json" },
|
| 55 |
-
body: JSON.stringify({ message, session_id, history: history || [] }),
|
| 56 |
-
});
|
| 57 |
-
if (apiRes.ok) {
|
| 58 |
-
const data = await apiRes.json();
|
| 59 |
-
return NextResponse.json({ response: data.response });
|
| 60 |
-
}
|
| 61 |
-
// If 404, session expired
|
| 62 |
-
if (apiRes.status === 404) {
|
| 63 |
-
return NextResponse.json({
|
| 64 |
-
response: "⚠️ Your chat session has expired (sessions last 1 hour). " +
|
| 65 |
-
"Please analyze the contract again to start a new chat session."
|
| 66 |
-
});
|
| 67 |
-
}
|
| 68 |
-
} catch {
|
| 69 |
-
// FastAPI backend unreachable — fall through to error message
|
| 70 |
-
}
|
| 71 |
}
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
}
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
});
|
| 94 |
|
|
|
|
| 95 |
} catch (error: any) {
|
| 96 |
console.error("Chat error:", error.message);
|
| 97 |
return NextResponse.json(
|
| 98 |
-
{ error: error.message || "Chat failed. Make sure you analyzed a contract first." },
|
| 99 |
{ status: 500 }
|
| 100 |
);
|
| 101 |
}
|
|
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
| 2 |
|
| 3 |
+
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
|
| 4 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
export async function POST(req: NextRequest) {
|
| 6 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
const body = await req.json();
|
| 8 |
+
const { message, history } = body;
|
| 9 |
|
| 10 |
if (!message) {
|
| 11 |
return NextResponse.json(
|
|
|
|
| 14 |
);
|
| 15 |
}
|
| 16 |
|
| 17 |
+
// The Gradio ChatInterface endpoint is /chat
|
| 18 |
+
// It accepts: message (str), then the additional_inputs are handled by Gradio state
|
| 19 |
+
// We need to call the Gradio API with the message
|
| 20 |
+
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/chat`, {
|
| 21 |
+
method: "POST",
|
| 22 |
+
headers: { "Content-Type": "application/json" },
|
| 23 |
+
body: JSON.stringify({ data: [message] }),
|
| 24 |
+
});
|
| 25 |
|
| 26 |
+
if (!submitRes.ok) {
|
| 27 |
+
const errText = await submitRes.text().catch(() => "");
|
| 28 |
+
throw new Error(`Chat submit failed (${submitRes.status}): ${errText}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
}
|
| 30 |
|
| 31 |
+
const { event_id } = await submitRes.json();
|
| 32 |
+
if (!event_id) throw new Error("No event_id from Gradio chat");
|
| 33 |
+
|
| 34 |
+
// Poll for streaming result
|
| 35 |
+
const resultRes = await fetch(
|
| 36 |
+
`${GRADIO_URL}/gradio_api/call/chat/${event_id}`,
|
| 37 |
+
{ headers: { Accept: "text/event-stream" } }
|
| 38 |
+
);
|
| 39 |
+
|
| 40 |
+
if (!resultRes.ok) {
|
| 41 |
+
throw new Error(`Chat result failed: ${resultRes.status}`);
|
| 42 |
}
|
| 43 |
|
| 44 |
+
const resultText = await resultRes.text();
|
| 45 |
+
|
| 46 |
+
// Find the complete event data
|
| 47 |
+
const dataMatch = resultText.match(/event:\s*complete\s*\ndata:\s*(.+)/);
|
| 48 |
+
if (!dataMatch) {
|
| 49 |
+
// Check for error
|
| 50 |
+
const errMatch = resultText.match(/event:\s*error\s*\ndata:\s*(.+)/);
|
| 51 |
+
if (errMatch) {
|
| 52 |
+
throw new Error(`Chat error: ${errMatch[1]}`);
|
| 53 |
+
}
|
| 54 |
+
throw new Error("No response from chatbot. Analyze a contract first in the Gradio Space, then try chatting.");
|
| 55 |
}
|
| 56 |
|
| 57 |
+
const responseData = JSON.parse(dataMatch[1]);
|
| 58 |
+
// The ChatInterface returns the response as a string
|
| 59 |
+
const responseText = typeof responseData === "string" ? responseData : responseData[0] || "";
|
|
|
|
| 60 |
|
| 61 |
+
return NextResponse.json({ response: responseText });
|
| 62 |
} catch (error: any) {
|
| 63 |
console.error("Chat error:", error.message);
|
| 64 |
return NextResponse.json(
|
| 65 |
+
{ error: error.message || "Chat failed. Make sure you analyzed a contract in the Gradio Space first." },
|
| 66 |
{ status: 500 }
|
| 67 |
);
|
| 68 |
}
|
web/app/api/compare/route.ts
CHANGED
|
@@ -1,17 +1,9 @@
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
| 2 |
-
import { createClient } from "@/lib/supabase/server";
|
| 3 |
|
| 4 |
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
|
| 5 |
|
| 6 |
export async function POST(req: NextRequest) {
|
| 7 |
try {
|
| 8 |
-
const supabase = await createClient();
|
| 9 |
-
const { data: { user } } = await supabase.auth.getUser();
|
| 10 |
-
|
| 11 |
-
if (!user) {
|
| 12 |
-
return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
const body = await req.json();
|
| 16 |
const { text_a, text_b } = body;
|
| 17 |
|
|
@@ -22,13 +14,8 @@ export async function POST(req: NextRequest) {
|
|
| 22 |
);
|
| 23 |
}
|
| 24 |
|
| 25 |
-
// FIX v4.3: REMOVED HTML-escaping that CORRUPTED contract text before analysis.
|
| 26 |
-
// The old code did text_a.replace(/</g, "<") which permanently mutated
|
| 27 |
-
// the text (e.g., ">$10,000" → ">$10,000"). Sanitization is the
|
| 28 |
-
// frontend's job — React auto-escapes in JSX. Never mutate analysis input.
|
| 29 |
-
|
| 30 |
// Call Gradio Space API
|
| 31 |
-
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/
|
| 32 |
method: "POST",
|
| 33 |
headers: { "Content-Type": "application/json" },
|
| 34 |
body: JSON.stringify({ data: [text_a, text_b] }),
|
|
@@ -41,44 +28,24 @@ export async function POST(req: NextRequest) {
|
|
| 41 |
const { event_id } = await submitRes.json();
|
| 42 |
if (!event_id) throw new Error("No event_id from Gradio");
|
| 43 |
|
| 44 |
-
// Poll for result
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
while (attempts < maxAttempts) {
|
| 51 |
-
const resultRes = await fetch(
|
| 52 |
-
`${GRADIO_URL}/gradio_api/call/compare/${event_id}`,
|
| 53 |
-
{ headers: { Accept: "text/event-stream" } }
|
| 54 |
-
);
|
| 55 |
-
|
| 56 |
-
resultText = await resultRes.text();
|
| 57 |
-
|
| 58 |
-
if (resultText.includes("event: complete")) break;
|
| 59 |
-
if (resultText.includes("event: error")) {
|
| 60 |
-
const errMatch = resultText.match(/data:\s*(.+)/);
|
| 61 |
-
throw new Error(errMatch ? errMatch[1] : "Comparison failed in backend");
|
| 62 |
-
}
|
| 63 |
-
|
| 64 |
-
await new Promise(r => setTimeout(r, delay));
|
| 65 |
-
delay = Math.min(delay * 1.2, 2000);
|
| 66 |
-
attempts++;
|
| 67 |
-
}
|
| 68 |
|
| 69 |
-
if (!
|
| 70 |
-
throw new Error(
|
| 71 |
}
|
| 72 |
|
| 73 |
-
const
|
| 74 |
-
const
|
| 75 |
-
if (
|
| 76 |
-
|
| 77 |
-
const dataStr = resultText.substring(dataIdx + 6).trim();
|
| 78 |
-
const gradioData = JSON.parse(dataStr);
|
| 79 |
|
|
|
|
| 80 |
// gradioData[0] = comparison HTML
|
| 81 |
// gradioData[1] = raw JSON comparison data
|
|
|
|
| 82 |
const comparisonResult = gradioData[1];
|
| 83 |
if (typeof comparisonResult === "object" && comparisonResult !== null) {
|
| 84 |
return NextResponse.json(comparisonResult);
|
|
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
| 2 |
|
| 3 |
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
|
| 4 |
|
| 5 |
export async function POST(req: NextRequest) {
|
| 6 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
const body = await req.json();
|
| 8 |
const { text_a, text_b } = body;
|
| 9 |
|
|
|
|
| 14 |
);
|
| 15 |
}
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
// Call Gradio Space API
|
| 18 |
+
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/run_comparison`, {
|
| 19 |
method: "POST",
|
| 20 |
headers: { "Content-Type": "application/json" },
|
| 21 |
body: JSON.stringify({ data: [text_a, text_b] }),
|
|
|
|
| 28 |
const { event_id } = await submitRes.json();
|
| 29 |
if (!event_id) throw new Error("No event_id from Gradio");
|
| 30 |
|
| 31 |
+
// Poll for result
|
| 32 |
+
const resultRes = await fetch(
|
| 33 |
+
`${GRADIO_URL}/gradio_api/call/run_comparison/${event_id}`,
|
| 34 |
+
{ headers: { Accept: "text/event-stream" } }
|
| 35 |
+
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
if (!resultRes.ok) {
|
| 38 |
+
throw new Error(`Gradio result failed: ${resultRes.status}`);
|
| 39 |
}
|
| 40 |
|
| 41 |
+
const resultText = await resultRes.text();
|
| 42 |
+
const dataMatch = resultText.match(/event:\s*complete\s*\ndata:\s*(.+)/);
|
| 43 |
+
if (!dataMatch) throw new Error("No complete event from Gradio");
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
+
const gradioData = JSON.parse(dataMatch[1]);
|
| 46 |
// gradioData[0] = comparison HTML
|
| 47 |
// gradioData[1] = raw JSON comparison data
|
| 48 |
+
|
| 49 |
const comparisonResult = gradioData[1];
|
| 50 |
if (typeof comparisonResult === "object" && comparisonResult !== null) {
|
| 51 |
return NextResponse.json(comparisonResult);
|
web/app/api/parse-upload/route.ts
CHANGED
|
@@ -1,20 +1,9 @@
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
| 2 |
-
import { createClient } from "@/lib/supabase/server";
|
| 3 |
|
| 4 |
export const runtime = "nodejs";
|
| 5 |
|
| 6 |
-
// Add a 5MB size limit
|
| 7 |
-
const MAX_FILE_SIZE = 5 * 1024 * 1024;
|
| 8 |
-
|
| 9 |
export async function POST(req: NextRequest) {
|
| 10 |
try {
|
| 11 |
-
const supabase = await createClient();
|
| 12 |
-
const { data: { user } } = await supabase.auth.getUser();
|
| 13 |
-
|
| 14 |
-
if (!user) {
|
| 15 |
-
return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
const formData = await req.formData();
|
| 19 |
const file = formData.get("file") as File | null;
|
| 20 |
|
|
@@ -22,10 +11,6 @@ export async function POST(req: NextRequest) {
|
|
| 22 |
return NextResponse.json({ error: "No file uploaded" }, { status: 400 });
|
| 23 |
}
|
| 24 |
|
| 25 |
-
if (file.size > MAX_FILE_SIZE) {
|
| 26 |
-
return NextResponse.json({ error: "File exceeds 5MB size limit" }, { status: 400 });
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
const name = file.name.toLowerCase();
|
| 30 |
const buffer = Buffer.from(await file.arrayBuffer());
|
| 31 |
let text = "";
|
|
@@ -33,20 +18,13 @@ export async function POST(req: NextRequest) {
|
|
| 33 |
if (name.endsWith(".txt") || name.endsWith(".md")) {
|
| 34 |
text = new TextDecoder().decode(buffer);
|
| 35 |
} else if (name.endsWith(".pdf")) {
|
| 36 |
-
// pdf-parse v2
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
await parser.destroy();
|
| 44 |
-
} catch (pdfErr: any) {
|
| 45 |
-
console.error("PDF parse error:", pdfErr);
|
| 46 |
-
return NextResponse.json({
|
| 47 |
-
error: "PDF parsing failed. Please copy-paste the text directly, or use the Gradio Space which has OCR support."
|
| 48 |
-
}, { status: 400 });
|
| 49 |
-
}
|
| 50 |
} else if (name.endsWith(".docx")) {
|
| 51 |
const mammoth = (await import("mammoth")).default;
|
| 52 |
const result = await mammoth.extractRawText({ buffer });
|
|
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
| 2 |
|
| 3 |
export const runtime = "nodejs";
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
export async function POST(req: NextRequest) {
|
| 6 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
const formData = await req.formData();
|
| 8 |
const file = formData.get("file") as File | null;
|
| 9 |
|
|
|
|
| 11 |
return NextResponse.json({ error: "No file uploaded" }, { status: 400 });
|
| 12 |
}
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
const name = file.name.toLowerCase();
|
| 15 |
const buffer = Buffer.from(await file.arrayBuffer());
|
| 16 |
let text = "";
|
|
|
|
| 18 |
if (name.endsWith(".txt") || name.endsWith(".md")) {
|
| 19 |
text = new TextDecoder().decode(buffer);
|
| 20 |
} else if (name.endsWith(".pdf")) {
|
| 21 |
+
// pdf-parse v2
|
| 22 |
+
await import("pdf-parse/worker");
|
| 23 |
+
const { PDFParse } = await import("pdf-parse");
|
| 24 |
+
const parser = new PDFParse({ data: buffer });
|
| 25 |
+
const result = await parser.getText();
|
| 26 |
+
text = result.text;
|
| 27 |
+
await parser.destroy();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
} else if (name.endsWith(".docx")) {
|
| 29 |
const mammoth = (await import("mammoth")).default;
|
| 30 |
const result = await mammoth.extractRawText({ buffer });
|
web/app/api/redline/route.ts
CHANGED
|
@@ -1,25 +1,9 @@
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
| 2 |
-
import { createClient } from "@/lib/supabase/server";
|
| 3 |
|
| 4 |
-
/
|
| 5 |
-
* FIX v4.3: Redline route now works through the Gradio Space directly.
|
| 6 |
-
* The old code pointed to a non-existent FastAPI Space (gaurv007-clauseguard-api.hf.space).
|
| 7 |
-
* Since redlining is already part of the analyze pipeline (returned in analysis results),
|
| 8 |
-
* this endpoint is primarily for re-running redlines on existing text.
|
| 9 |
-
*/
|
| 10 |
-
|
| 11 |
-
const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
|
| 12 |
-
const API_URL = process.env.CLAUSEGUARD_API_URL || "";
|
| 13 |
|
| 14 |
export async function POST(req: NextRequest) {
|
| 15 |
try {
|
| 16 |
-
const supabase = await createClient();
|
| 17 |
-
const { data: { user } } = await supabase.auth.getUser();
|
| 18 |
-
|
| 19 |
-
if (!user) {
|
| 20 |
-
return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
const body = await req.json();
|
| 24 |
const { session_id, text, use_llm } = body;
|
| 25 |
|
|
@@ -30,89 +14,19 @@ export async function POST(req: NextRequest) {
|
|
| 30 |
);
|
| 31 |
}
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
headers: { "Content-Type": "application/json" },
|
| 39 |
-
body: JSON.stringify({ session_id, text, use_llm: use_llm ?? true }),
|
| 40 |
-
});
|
| 41 |
-
|
| 42 |
-
if (response.ok) {
|
| 43 |
-
const result = await response.json();
|
| 44 |
-
return NextResponse.json(result);
|
| 45 |
-
}
|
| 46 |
-
} catch {
|
| 47 |
-
// Fall through to Gradio approach
|
| 48 |
-
}
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
// Fallback: If text is provided, run full analysis via Gradio (includes redlines)
|
| 52 |
-
if (text) {
|
| 53 |
-
if (text.trim().length < 50) {
|
| 54 |
-
return NextResponse.json({ error: "Text too short (min 50 chars)" }, { status: 400 });
|
| 55 |
-
}
|
| 56 |
-
|
| 57 |
-
const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/analyze`, {
|
| 58 |
-
method: "POST",
|
| 59 |
-
headers: { "Content-Type": "application/json" },
|
| 60 |
-
body: JSON.stringify({ data: [text] }),
|
| 61 |
-
});
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
const { event_id } = await submitRes.json();
|
| 68 |
-
if (!event_id) throw new Error("No event_id from Gradio");
|
| 69 |
-
|
| 70 |
-
let resultText = "";
|
| 71 |
-
let attempts = 0;
|
| 72 |
-
while (attempts < 90) {
|
| 73 |
-
const resultRes = await fetch(
|
| 74 |
-
`${GRADIO_URL}/gradio_api/call/analyze/${event_id}`,
|
| 75 |
-
{ headers: { Accept: "text/event-stream" } }
|
| 76 |
-
);
|
| 77 |
-
resultText = await resultRes.text();
|
| 78 |
-
if (resultText.includes("event: complete")) break;
|
| 79 |
-
if (resultText.includes("event: error")) throw new Error("Redline analysis failed");
|
| 80 |
-
await new Promise(r => setTimeout(r, 1000));
|
| 81 |
-
attempts++;
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
if (!resultText.includes("event: complete")) {
|
| 85 |
-
throw new Error("Analysis timed out");
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
// Parse the result to extract redlines from the JSON report
|
| 89 |
-
const completeIdx = resultText.indexOf("event: complete");
|
| 90 |
-
const dataIdx = resultText.indexOf("data: ", completeIdx);
|
| 91 |
-
if (dataIdx === -1) throw new Error("No data in response");
|
| 92 |
-
|
| 93 |
-
const dataStr = resultText.substring(dataIdx + 6).trim();
|
| 94 |
-
const gradioData = JSON.parse(dataStr);
|
| 95 |
-
|
| 96 |
-
// Download JSON report file
|
| 97 |
-
const jsonFileObj = gradioData[8];
|
| 98 |
-
if (jsonFileObj?.url) {
|
| 99 |
-
const jsonRes = await fetch(jsonFileObj.url);
|
| 100 |
-
if (jsonRes.ok) {
|
| 101 |
-
const analysisData = await jsonRes.json();
|
| 102 |
-
if (analysisData.redlines) {
|
| 103 |
-
return NextResponse.json({ redlines: analysisData.redlines, count: analysisData.redlines.length });
|
| 104 |
-
}
|
| 105 |
-
}
|
| 106 |
-
}
|
| 107 |
-
|
| 108 |
-
return NextResponse.json({ redlines: [], count: 0 });
|
| 109 |
}
|
| 110 |
|
| 111 |
-
|
| 112 |
-
return NextResponse.json(
|
| 113 |
-
error: "Redline by session_id requires the FastAPI backend. Provide contract text instead, or use the analysis results which already include redline suggestions.",
|
| 114 |
-
}, { status: 400 });
|
| 115 |
-
|
| 116 |
} catch (error: any) {
|
| 117 |
console.error("Redline error:", error.message);
|
| 118 |
return NextResponse.json(
|
|
|
|
| 1 |
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
| 2 |
|
| 3 |
+
const API_URL = process.env.CLAUSEGUARD_API_URL || "https://gaurv007-clauseguard-api.hf.space";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
export async function POST(req: NextRequest) {
|
| 6 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
const body = await req.json();
|
| 8 |
const { session_id, text, use_llm } = body;
|
| 9 |
|
|
|
|
| 14 |
);
|
| 15 |
}
|
| 16 |
|
| 17 |
+
const response = await fetch(`${API_URL}/api/redline`, {
|
| 18 |
+
method: "POST",
|
| 19 |
+
headers: { "Content-Type": "application/json" },
|
| 20 |
+
body: JSON.stringify({ session_id, text, use_llm: use_llm ?? true }),
|
| 21 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
if (!response.ok) {
|
| 24 |
+
const err = await response.text().catch(() => "");
|
| 25 |
+
throw new Error(err || `Backend error: ${response.status}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
+
const result = await response.json();
|
| 29 |
+
return NextResponse.json(result);
|
|
|
|
|
|
|
|
|
|
| 30 |
} catch (error: any) {
|
| 31 |
console.error("Redline error:", error.message);
|
| 32 |
return NextResponse.json(
|
web/app/api/subscribe/create/route.ts
CHANGED
|
@@ -13,7 +13,7 @@ export async function POST(req: NextRequest) {
|
|
| 13 |
return NextResponse.json({ error: "Invalid plan" }, { status: 400 });
|
| 14 |
}
|
| 15 |
|
| 16 |
-
const planId = PLANS[plan
|
| 17 |
if (!planId) return NextResponse.json({ error: "Plan not configured" }, { status: 500 });
|
| 18 |
|
| 19 |
const razorpay = getRazorpay();
|
|
|
|
| 13 |
return NextResponse.json({ error: "Invalid plan" }, { status: 400 });
|
| 14 |
}
|
| 15 |
|
| 16 |
+
const planId = PLANS[plan].razorpay_plan_id;
|
| 17 |
if (!planId) return NextResponse.json({ error: "Plan not configured" }, { status: 500 });
|
| 18 |
|
| 19 |
const razorpay = getRazorpay();
|
web/app/auth/callback/route.ts
CHANGED
|
@@ -4,14 +4,9 @@ import { NextResponse } from "next/server";
|
|
| 4 |
export async function GET(request: Request) {
|
| 5 |
const requestUrl = new URL(request.url);
|
| 6 |
const code = requestUrl.searchParams.get("code");
|
| 7 |
-
|
| 8 |
const origin = requestUrl.origin;
|
| 9 |
|
| 10 |
-
// Prevent open redirect
|
| 11 |
-
if (next && !next.startsWith("/")) {
|
| 12 |
-
next = "/dashboard-pages/dashboard";
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
if (code) {
|
| 16 |
const supabase = await createClient();
|
| 17 |
const { error } = await supabase.auth.exchangeCodeForSession(code);
|
|
|
|
| 4 |
export async function GET(request: Request) {
|
| 5 |
const requestUrl = new URL(request.url);
|
| 6 |
const code = requestUrl.searchParams.get("code");
|
| 7 |
+
const next = requestUrl.searchParams.get("next") || "/dashboard-pages/dashboard";
|
| 8 |
const origin = requestUrl.origin;
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
if (code) {
|
| 11 |
const supabase = await createClient();
|
| 12 |
const { error } = await supabase.auth.exchangeCodeForSession(code);
|
web/app/auth/login/page.tsx
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
-
import { useState, useEffect
|
| 4 |
import { createClient } from "@/lib/supabase/client";
|
| 5 |
import { getBaseUrl } from "@/lib/auth-url";
|
| 6 |
import Link from "next/link";
|
| 7 |
-
import { useSearchParams
|
| 8 |
import { ArrowLeft, Mail, Loader2 } from "lucide-react";
|
| 9 |
|
| 10 |
-
function
|
| 11 |
const [email, setEmail] = useState("");
|
| 12 |
const [password, setPassword] = useState("");
|
| 13 |
const [error, setError] = useState("");
|
|
@@ -16,22 +16,21 @@ function LoginForm() {
|
|
| 16 |
const [magicSent, setMagicSent] = useState(false);
|
| 17 |
const supabase = createClient();
|
| 18 |
const searchParams = useSearchParams();
|
| 19 |
-
const router = useRouter();
|
| 20 |
const next = searchParams.get("next") || "/dashboard-pages/dashboard";
|
| 21 |
|
| 22 |
// Check if already logged in — redirect immediately
|
| 23 |
useEffect(() => {
|
| 24 |
supabase.auth.getUser().then(({ data: { user } }) => {
|
| 25 |
-
if (user) {
|
| 26 |
else { setChecking(false); }
|
| 27 |
});
|
| 28 |
-
}, [
|
| 29 |
|
| 30 |
async function handleLogin(e: React.FormEvent) {
|
| 31 |
e.preventDefault(); setLoading(true); setError("");
|
| 32 |
const { error } = await supabase.auth.signInWithPassword({ email, password });
|
| 33 |
if (error) { setError(error.message); setLoading(false); }
|
| 34 |
-
else {
|
| 35 |
}
|
| 36 |
|
| 37 |
async function handleMagicLink() {
|
|
@@ -120,15 +119,3 @@ function LoginForm() {
|
|
| 120 |
</div>
|
| 121 |
);
|
| 122 |
}
|
| 123 |
-
|
| 124 |
-
export default function LoginPage() {
|
| 125 |
-
return (
|
| 126 |
-
<Suspense fallback={
|
| 127 |
-
<div className="min-h-screen flex items-center justify-center bg-white">
|
| 128 |
-
<Loader2 className="w-5 h-5 text-zinc-300 animate-spin" />
|
| 129 |
-
</div>
|
| 130 |
-
}>
|
| 131 |
-
<LoginForm />
|
| 132 |
-
</Suspense>
|
| 133 |
-
);
|
| 134 |
-
}
|
|
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
+
import { useState, useEffect } from "react";
|
| 4 |
import { createClient } from "@/lib/supabase/client";
|
| 5 |
import { getBaseUrl } from "@/lib/auth-url";
|
| 6 |
import Link from "next/link";
|
| 7 |
+
import { useSearchParams } from "next/navigation";
|
| 8 |
import { ArrowLeft, Mail, Loader2 } from "lucide-react";
|
| 9 |
|
| 10 |
+
export default function LoginPage() {
|
| 11 |
const [email, setEmail] = useState("");
|
| 12 |
const [password, setPassword] = useState("");
|
| 13 |
const [error, setError] = useState("");
|
|
|
|
| 16 |
const [magicSent, setMagicSent] = useState(false);
|
| 17 |
const supabase = createClient();
|
| 18 |
const searchParams = useSearchParams();
|
|
|
|
| 19 |
const next = searchParams.get("next") || "/dashboard-pages/dashboard";
|
| 20 |
|
| 21 |
// Check if already logged in — redirect immediately
|
| 22 |
useEffect(() => {
|
| 23 |
supabase.auth.getUser().then(({ data: { user } }) => {
|
| 24 |
+
if (user) { window.location.href = next; }
|
| 25 |
else { setChecking(false); }
|
| 26 |
});
|
| 27 |
+
}, []);
|
| 28 |
|
| 29 |
async function handleLogin(e: React.FormEvent) {
|
| 30 |
e.preventDefault(); setLoading(true); setError("");
|
| 31 |
const { error } = await supabase.auth.signInWithPassword({ email, password });
|
| 32 |
if (error) { setError(error.message); setLoading(false); }
|
| 33 |
+
else { window.location.href = next; }
|
| 34 |
}
|
| 35 |
|
| 36 |
async function handleMagicLink() {
|
|
|
|
| 119 |
</div>
|
| 120 |
);
|
| 121 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/app/auth/signup/page.tsx
CHANGED
|
@@ -4,7 +4,6 @@ import { useState, useEffect } from "react";
|
|
| 4 |
import { createClient } from "@/lib/supabase/client";
|
| 5 |
import { getBaseUrl } from "@/lib/auth-url";
|
| 6 |
import Link from "next/link";
|
| 7 |
-
import { useRouter } from "next/navigation";
|
| 8 |
import { ArrowLeft, Loader2 } from "lucide-react";
|
| 9 |
|
| 10 |
export default function SignupPage() {
|
|
@@ -15,15 +14,14 @@ export default function SignupPage() {
|
|
| 15 |
const [checking, setChecking] = useState(true);
|
| 16 |
const [done, setDone] = useState(false);
|
| 17 |
const supabase = createClient();
|
| 18 |
-
const router = useRouter();
|
| 19 |
|
| 20 |
// Redirect if already logged in
|
| 21 |
useEffect(() => {
|
| 22 |
supabase.auth.getUser().then(({ data: { user } }) => {
|
| 23 |
-
if (user) {
|
| 24 |
else { setChecking(false); }
|
| 25 |
});
|
| 26 |
-
}, [
|
| 27 |
|
| 28 |
async function handleSignup(e: React.FormEvent) {
|
| 29 |
e.preventDefault(); setLoading(true); setError("");
|
|
|
|
| 4 |
import { createClient } from "@/lib/supabase/client";
|
| 5 |
import { getBaseUrl } from "@/lib/auth-url";
|
| 6 |
import Link from "next/link";
|
|
|
|
| 7 |
import { ArrowLeft, Loader2 } from "lucide-react";
|
| 8 |
|
| 9 |
export default function SignupPage() {
|
|
|
|
| 14 |
const [checking, setChecking] = useState(true);
|
| 15 |
const [done, setDone] = useState(false);
|
| 16 |
const supabase = createClient();
|
|
|
|
| 17 |
|
| 18 |
// Redirect if already logged in
|
| 19 |
useEffect(() => {
|
| 20 |
supabase.auth.getUser().then(({ data: { user } }) => {
|
| 21 |
+
if (user) { window.location.href = "/dashboard-pages/dashboard"; }
|
| 22 |
else { setChecking(false); }
|
| 23 |
});
|
| 24 |
+
}, []);
|
| 25 |
|
| 26 |
async function handleSignup(e: React.FormEvent) {
|
| 27 |
e.preventDefault(); setLoading(true); setError("");
|
web/app/dashboard-pages/analyze/loading.tsx
DELETED
|
@@ -1,50 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* ClauseGuard — Loading skeleton for Analyze page
|
| 3 |
-
* FIX v4.1: Added loading.tsx for instant navigation feedback
|
| 4 |
-
* Next.js App Router automatically shows this while the page component loads
|
| 5 |
-
*/
|
| 6 |
-
|
| 7 |
-
import { ScanText } from "lucide-react";
|
| 8 |
-
|
| 9 |
-
export default function AnalyzeLoading() {
|
| 10 |
-
return (
|
| 11 |
-
<div className="min-h-screen bg-zinc-50/30">
|
| 12 |
-
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6 sm:py-10">
|
| 13 |
-
{/* Header skeleton */}
|
| 14 |
-
<div className="mb-6 sm:mb-8">
|
| 15 |
-
<h1 className="text-xl sm:text-2xl font-semibold tracking-tight flex items-center gap-2">
|
| 16 |
-
<ScanText className="w-5 h-5 sm:w-6 sm:h-6 text-zinc-400" />
|
| 17 |
-
Scan a document
|
| 18 |
-
</h1>
|
| 19 |
-
<div className="mt-2 h-4 w-96 bg-zinc-200 rounded animate-pulse" />
|
| 20 |
-
</div>
|
| 21 |
-
|
| 22 |
-
<div className="grid lg:grid-cols-5 gap-4 sm:gap-6">
|
| 23 |
-
{/* Input panel skeleton */}
|
| 24 |
-
<div className="lg:col-span-2">
|
| 25 |
-
<div className="bg-white border border-zinc-200 rounded-xl p-3 sm:p-4">
|
| 26 |
-
<div className="w-full h-[260px] sm:h-[360px] bg-zinc-100 rounded-lg animate-pulse" />
|
| 27 |
-
<div className="mt-3 flex gap-2">
|
| 28 |
-
<div className="flex-1 h-10 bg-zinc-900/10 rounded-lg animate-pulse" />
|
| 29 |
-
<div className="w-20 h-10 bg-zinc-100 rounded-lg animate-pulse" />
|
| 30 |
-
<div className="w-10 h-10 bg-zinc-100 rounded-lg animate-pulse" />
|
| 31 |
-
</div>
|
| 32 |
-
</div>
|
| 33 |
-
</div>
|
| 34 |
-
|
| 35 |
-
{/* Results panel skeleton */}
|
| 36 |
-
<div className="lg:col-span-3">
|
| 37 |
-
<div className="bg-white border border-zinc-200 rounded-xl p-4 sm:p-5">
|
| 38 |
-
<div className="flex items-center justify-center h-48 text-zinc-300">
|
| 39 |
-
<div className="text-center">
|
| 40 |
-
<ScanText className="w-10 h-10 mx-auto mb-3 text-zinc-200" />
|
| 41 |
-
<div className="h-4 w-48 bg-zinc-100 rounded mx-auto animate-pulse" />
|
| 42 |
-
</div>
|
| 43 |
-
</div>
|
| 44 |
-
</div>
|
| 45 |
-
</div>
|
| 46 |
-
</div>
|
| 47 |
-
</div>
|
| 48 |
-
</div>
|
| 49 |
-
);
|
| 50 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/app/dashboard-pages/analyze/page.tsx
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
import { useState, useRef, useEffect } from "react";
|
| 4 |
import {
|
| 5 |
ScanText, ScanLine, TriangleAlert, CircleAlert, CircleCheck, Info,
|
| 6 |
-
ChevronDown, ChevronUp, Copy, Check, Upload, FileText,
|
| 7 |
ShieldCheck, ShieldAlert, Scale, Gavel, Ban, Globe, Eye, Stamp, FileX,
|
| 8 |
Lock, Sparkles as SparklesIcon, X, Layers, Landmark, Briefcase,
|
| 9 |
AlertTriangle, Tag, BookOpen, ClipboardList, DollarSign,
|
|
@@ -12,7 +12,6 @@ import {
|
|
| 12 |
ShieldOff, CircleSlash, MessageSquareWarning, Construction,
|
| 13 |
MessageSquare, Send, Loader2
|
| 14 |
} from "lucide-react";
|
| 15 |
-
import { ExportDropdown } from "@/components/export-dropdown";
|
| 16 |
|
| 17 |
interface Cat { name: string; severity: string; description?: string; confidence?: number; }
|
| 18 |
interface Clause { text: string; categories: Cat[]; }
|
|
@@ -20,7 +19,7 @@ interface Entity { text: string; type: string; score?: number; source?: string;
|
|
| 20 |
interface Contradiction { type: string; explanation: string; severity: string; confidence?: number; source?: string; }
|
| 21 |
interface Obligation { type: string; party: string; description: string; deadline: string; priority?: number; }
|
| 22 |
interface ComplianceCheck { requirement: string; description: string; severity: string; status: string; matched_keywords: string[]; context?: string[]; }
|
| 23 |
-
interface ComplianceReg { description: string; compliance_rate: number; checks: ComplianceCheck[]; overall_status: string; negated_count?: number; ambiguous_count?: number;
|
| 24 |
interface Redline {
|
| 25 |
original_text: string;
|
| 26 |
clause_label: string;
|
|
@@ -101,7 +100,6 @@ const COMPLIANCE_STATUS: Record<string, { bg: string; text: string; border: stri
|
|
| 101 |
PARTIAL: { bg: "bg-amber-50", text: "text-amber-700", border: "border-amber-200" },
|
| 102 |
"NON-COMPLIANT": { bg: "bg-red-50", text: "text-red-700", border: "border-red-200" },
|
| 103 |
WARNING: { bg: "bg-orange-50", text: "text-orange-700", border: "border-orange-200" },
|
| 104 |
-
NOT_APPLICABLE: { bg: "bg-zinc-50", text: "text-zinc-400", border: "border-zinc-200" },
|
| 105 |
};
|
| 106 |
|
| 107 |
function SourceBadge({ isML, confidence }: { isML: boolean; confidence?: number | null }) {
|
|
@@ -235,6 +233,17 @@ export default function AnalyzePage() {
|
|
| 235 |
if (fileInputRef.current) fileInputRef.current.value = "";
|
| 236 |
}
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
function handleCopy() {
|
| 239 |
if (!results) return;
|
| 240 |
const summary = `ClauseGuard Report\nRisk: ${results.risk_score}/100 (Grade ${results.grade})\n${results.flagged_count} of ${results.total_clauses} clauses flagged\nEntities: ${results.entities.length}\nContradictions: ${results.contradictions.length}\nObligations: ${results.obligations.length}\n\n` +
|
|
@@ -360,51 +369,41 @@ export default function AnalyzePage() {
|
|
| 360 |
<div className="lg:col-span-3">
|
| 361 |
{results ? (
|
| 362 |
<div className="space-y-3 sm:space-y-4">
|
| 363 |
-
{/* Score Card
|
| 364 |
-
<div className="bg-white border border-zinc-200 rounded-
|
| 365 |
-
<div className="flex
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
<
|
| 370 |
-
<circle cx="50" cy="50" r="42" fill="none"
|
| 371 |
-
stroke={results.risk_score >= 60 ? "#ef4444" : results.risk_score >= 30 ? "#f59e0b" : "#22c55e"}
|
| 372 |
-
strokeWidth="8" strokeLinecap="round"
|
| 373 |
-
strokeDasharray={`${results.risk_score * 2.64} 264`}
|
| 374 |
-
className="transition-all duration-1000 ease-out" />
|
| 375 |
-
</svg>
|
| 376 |
-
<div className="absolute inset-0 flex flex-col items-center justify-center">
|
| 377 |
-
<span className="text-xl sm:text-2xl font-bold tracking-tight">{results.risk_score}</span>
|
| 378 |
-
<span className="text-[9px] text-zinc-400 -mt-0.5">/ 100</span>
|
| 379 |
</div>
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
<span className={`text-sm font-bold px-3 py-1 rounded-lg border ${GRADE_STYLE[results.grade] || GRADE_STYLE.C}`}>
|
| 385 |
-
Grade {results.grade}
|
| 386 |
-
</span>
|
| 387 |
-
<span className="text-xs text-zinc-400">
|
| 388 |
-
{results.risk_score < 20 ? "Low Risk" : results.risk_score < 40 ? "Moderate Risk" : results.risk_score < 60 ? "Elevated Risk" : results.risk_score < 80 ? "High Risk" : "Critical Risk"}
|
| 389 |
-
</span>
|
| 390 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
<span className="w-px h-3 bg-zinc-200" />
|
| 409 |
<span className="flex items-center gap-1"><Tag className="w-3 h-3" />{results.entities.length} entities</span>
|
| 410 |
<span className="w-px h-3 bg-zinc-200" />
|
|
@@ -412,11 +411,9 @@ export default function AnalyzePage() {
|
|
| 412 |
<span className="w-px h-3 bg-zinc-200" />
|
| 413 |
<span className="flex items-center gap-1"><Clock className="w-3 h-3" />{results.latency_ms}ms</span>
|
| 414 |
<span className="w-px h-3 bg-zinc-200" />
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
</div>
|
| 419 |
-
</div>
|
| 420 |
</div>
|
| 421 |
</div>
|
| 422 |
|
|
@@ -436,11 +433,11 @@ export default function AnalyzePage() {
|
|
| 436 |
</button>
|
| 437 |
))}
|
| 438 |
</div>
|
| 439 |
-
<div className="flex gap-1.5 self-end sm:self-auto
|
| 440 |
<button onClick={handleCopy} className="p-2 rounded-md hover:bg-zinc-100 text-zinc-400 hover:text-zinc-600 transition-colors" title="Copy summary">
|
| 441 |
{copied ? <Check className="w-4 h-4 text-emerald-500" /> : <Copy className="w-4 h-4" />}
|
| 442 |
</button>
|
| 443 |
-
<
|
| 444 |
</div>
|
| 445 |
</div>
|
| 446 |
|
|
@@ -460,7 +457,7 @@ export default function AnalyzePage() {
|
|
| 460 |
</div>
|
| 461 |
|
| 462 |
{/* Tab Content */}
|
| 463 |
-
<div className="max-h-[
|
| 464 |
|
| 465 |
{/* Clauses */}
|
| 466 |
{activeTab === "clauses" && (
|
|
@@ -660,9 +657,8 @@ export default function AnalyzePage() {
|
|
| 660 |
</div>
|
| 661 |
) : Object.entries(results.compliance).map(([regName, reg]) => {
|
| 662 |
const status = COMPLIANCE_STATUS[reg.overall_status] || COMPLIANCE_STATUS.PARTIAL;
|
| 663 |
-
const isNA = reg.overall_status === "NOT_APPLICABLE";
|
| 664 |
return (
|
| 665 |
-
<div key={regName} className=
|
| 666 |
<div className={`flex flex-col sm:flex-row sm:items-center justify-between p-4 border-b ${status.bg} ${status.border}`}>
|
| 667 |
<div>
|
| 668 |
<div className="flex items-center gap-2 flex-wrap">
|
|
@@ -681,15 +677,10 @@ export default function AnalyzePage() {
|
|
| 681 |
<p className="text-[11px] text-zinc-500 mt-0.5">{reg.description}</p>
|
| 682 |
</div>
|
| 683 |
<div className="text-left sm:text-right mt-2 sm:mt-0">
|
| 684 |
-
<span className={`text-lg font-bold ${status.text}`}>{
|
| 685 |
<span className={`text-[11px] font-medium block ${status.text}`}>{reg.overall_status}</span>
|
| 686 |
</div>
|
| 687 |
</div>
|
| 688 |
-
{isNA ? (
|
| 689 |
-
<div className="p-3 text-xs text-zinc-400 italic">
|
| 690 |
-
{reg.note || `${regName} does not appear applicable to this contract type.`}
|
| 691 |
-
</div>
|
| 692 |
-
) : (
|
| 693 |
<div className="p-3 space-y-0.5">
|
| 694 |
{reg.checks.map((check, i) => {
|
| 695 |
const sev = SEV_CONFIG[check.severity] || SEV_CONFIG.MEDIUM;
|
|
@@ -716,7 +707,6 @@ export default function AnalyzePage() {
|
|
| 716 |
);
|
| 717 |
})}
|
| 718 |
</div>
|
| 719 |
-
)}
|
| 720 |
</div>
|
| 721 |
);
|
| 722 |
})}
|
|
@@ -857,18 +847,8 @@ export default function AnalyzePage() {
|
|
| 857 |
)}
|
| 858 |
</div>
|
| 859 |
</div>
|
| 860 |
-
) : loading ? (
|
| 861 |
-
<div className="bg-white border border-zinc-200 rounded-2xl h-[300px] sm:h-[420px] flex flex-col items-center justify-center shadow-sm">
|
| 862 |
-
<div className="relative w-16 h-16 mb-4">
|
| 863 |
-
<div className="absolute inset-0 rounded-full border-2 border-zinc-100" />
|
| 864 |
-
<div className="absolute inset-0 rounded-full border-2 border-t-zinc-900 animate-spin" />
|
| 865 |
-
<ScanLine className="absolute inset-0 m-auto w-6 h-6 text-zinc-400" />
|
| 866 |
-
</div>
|
| 867 |
-
<p className="text-sm font-medium text-zinc-700">Analyzing contract...</p>
|
| 868 |
-
<p className="text-xs text-zinc-400 mt-1">Running 6 ML models · This may take 30-60 seconds</p>
|
| 869 |
-
</div>
|
| 870 |
) : (
|
| 871 |
-
<div className="bg-white border border-dashed border-zinc-200 rounded-
|
| 872 |
<ScanText className="w-10 h-10 text-zinc-200 mb-3" />
|
| 873 |
<p className="text-sm text-zinc-300">Paste text and analyze to see results</p>
|
| 874 |
</div>
|
|
|
|
| 3 |
import { useState, useRef, useEffect } from "react";
|
| 4 |
import {
|
| 5 |
ScanText, ScanLine, TriangleAlert, CircleAlert, CircleCheck, Info,
|
| 6 |
+
FileDown, ChevronDown, ChevronUp, Copy, Check, Upload, FileText,
|
| 7 |
ShieldCheck, ShieldAlert, Scale, Gavel, Ban, Globe, Eye, Stamp, FileX,
|
| 8 |
Lock, Sparkles as SparklesIcon, X, Layers, Landmark, Briefcase,
|
| 9 |
AlertTriangle, Tag, BookOpen, ClipboardList, DollarSign,
|
|
|
|
| 12 |
ShieldOff, CircleSlash, MessageSquareWarning, Construction,
|
| 13 |
MessageSquare, Send, Loader2
|
| 14 |
} from "lucide-react";
|
|
|
|
| 15 |
|
| 16 |
interface Cat { name: string; severity: string; description?: string; confidence?: number; }
|
| 17 |
interface Clause { text: string; categories: Cat[]; }
|
|
|
|
| 19 |
interface Contradiction { type: string; explanation: string; severity: string; confidence?: number; source?: string; }
|
| 20 |
interface Obligation { type: string; party: string; description: string; deadline: string; priority?: number; }
|
| 21 |
interface ComplianceCheck { requirement: string; description: string; severity: string; status: string; matched_keywords: string[]; context?: string[]; }
|
| 22 |
+
interface ComplianceReg { description: string; compliance_rate: number; checks: ComplianceCheck[]; overall_status: string; negated_count?: number; ambiguous_count?: number; }
|
| 23 |
interface Redline {
|
| 24 |
original_text: string;
|
| 25 |
clause_label: string;
|
|
|
|
| 100 |
PARTIAL: { bg: "bg-amber-50", text: "text-amber-700", border: "border-amber-200" },
|
| 101 |
"NON-COMPLIANT": { bg: "bg-red-50", text: "text-red-700", border: "border-red-200" },
|
| 102 |
WARNING: { bg: "bg-orange-50", text: "text-orange-700", border: "border-orange-200" },
|
|
|
|
| 103 |
};
|
| 104 |
|
| 105 |
function SourceBadge({ isML, confidence }: { isML: boolean; confidence?: number | null }) {
|
|
|
|
| 233 |
if (fileInputRef.current) fileInputRef.current.value = "";
|
| 234 |
}
|
| 235 |
|
| 236 |
+
async function handleDownloadPDF() {
|
| 237 |
+
if (!results) return;
|
| 238 |
+
try {
|
| 239 |
+
const res = await fetch("/api/pdf/report", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(results) });
|
| 240 |
+
const blob = await res.blob();
|
| 241 |
+
const url = URL.createObjectURL(blob);
|
| 242 |
+
const a = document.createElement("a"); a.href = url; a.download = "clauseguard-report.pdf"; a.click();
|
| 243 |
+
URL.revokeObjectURL(url);
|
| 244 |
+
} catch {}
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
function handleCopy() {
|
| 248 |
if (!results) return;
|
| 249 |
const summary = `ClauseGuard Report\nRisk: ${results.risk_score}/100 (Grade ${results.grade})\n${results.flagged_count} of ${results.total_clauses} clauses flagged\nEntities: ${results.entities.length}\nContradictions: ${results.contradictions.length}\nObligations: ${results.obligations.length}\n\n` +
|
|
|
|
| 369 |
<div className="lg:col-span-3">
|
| 370 |
{results ? (
|
| 371 |
<div className="space-y-3 sm:space-y-4">
|
| 372 |
+
{/* Score Card */}
|
| 373 |
+
<div className="bg-white border border-zinc-200 rounded-xl p-4 sm:p-5">
|
| 374 |
+
<div className="flex flex-col sm:flex-row sm:items-start sm:justify-between gap-3">
|
| 375 |
+
<div>
|
| 376 |
+
<div className="flex items-baseline gap-2">
|
| 377 |
+
<span className="text-3xl sm:text-4xl font-semibold tracking-tight">{results.risk_score}</span>
|
| 378 |
+
<span className="text-sm text-zinc-400">/100 risk</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
</div>
|
| 380 |
+
<div className="mt-2 h-1.5 w-full sm:w-48 bg-zinc-100 rounded-full overflow-hidden">
|
| 381 |
+
<div className={`h-full rounded-full transition-all duration-700 ${
|
| 382 |
+
results.risk_score >= 60 ? "bg-red-500" : results.risk_score >= 30 ? "bg-amber-400" : "bg-emerald-500"
|
| 383 |
+
}`} style={{ width: `${results.risk_score}%` }} />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
</div>
|
| 385 |
+
</div>
|
| 386 |
+
<span className={`self-start text-sm font-semibold px-3 py-1 rounded-lg border ${GRADE_STYLE[results.grade] || GRADE_STYLE.C}`}>
|
| 387 |
+
Grade {results.grade}
|
| 388 |
+
</span>
|
| 389 |
+
</div>
|
| 390 |
|
| 391 |
+
{/* Severity breakdown grid */}
|
| 392 |
+
<div className="mt-4 grid grid-cols-4 gap-2">
|
| 393 |
+
{(["CRITICAL", "HIGH", "MEDIUM", "LOW"] as const).map(sev => {
|
| 394 |
+
const c = SEV_CONFIG[sev];
|
| 395 |
+
return (
|
| 396 |
+
<div key={sev} className={`text-center p-2 rounded-lg ${c.bg} border ${c.border}`}>
|
| 397 |
+
<span className={`text-lg font-bold ${c.text}`}>{sevCounts[sev]}</span>
|
| 398 |
+
<p className={`text-[10px] ${c.text} opacity-70`}>{c.label}</p>
|
| 399 |
+
</div>
|
| 400 |
+
);
|
| 401 |
+
})}
|
| 402 |
+
</div>
|
| 403 |
|
| 404 |
+
{/* Meta stats */}
|
| 405 |
+
<div className="mt-3 flex items-center gap-2 sm:gap-3 text-[11px] text-zinc-400 flex-wrap">
|
| 406 |
+
<span className="flex items-center gap-1"><Layers className="w-3 h-3" />{results.total_clauses} clauses</span>
|
| 407 |
<span className="w-px h-3 bg-zinc-200" />
|
| 408 |
<span className="flex items-center gap-1"><Tag className="w-3 h-3" />{results.entities.length} entities</span>
|
| 409 |
<span className="w-px h-3 bg-zinc-200" />
|
|
|
|
| 411 |
<span className="w-px h-3 bg-zinc-200" />
|
| 412 |
<span className="flex items-center gap-1"><Clock className="w-3 h-3" />{results.latency_ms}ms</span>
|
| 413 |
<span className="w-px h-3 bg-zinc-200" />
|
| 414 |
+
<span className="flex items-center gap-1">
|
| 415 |
+
{results.model !== "regex" ? <><Cpu className="w-3 h-3" /> ML Models</> : <><FileSearch className="w-3 h-3" /> Pattern fallback</>}
|
| 416 |
+
</span>
|
|
|
|
|
|
|
| 417 |
</div>
|
| 418 |
</div>
|
| 419 |
|
|
|
|
| 433 |
</button>
|
| 434 |
))}
|
| 435 |
</div>
|
| 436 |
+
<div className="flex gap-1.5 self-end sm:self-auto">
|
| 437 |
<button onClick={handleCopy} className="p-2 rounded-md hover:bg-zinc-100 text-zinc-400 hover:text-zinc-600 transition-colors" title="Copy summary">
|
| 438 |
{copied ? <Check className="w-4 h-4 text-emerald-500" /> : <Copy className="w-4 h-4" />}
|
| 439 |
</button>
|
| 440 |
+
<button onClick={handleDownloadPDF} className="p-2 rounded-md hover:bg-zinc-100 text-zinc-400 hover:text-zinc-600 transition-colors" title="Download PDF"><FileDown className="w-4 h-4" /></button>
|
| 441 |
</div>
|
| 442 |
</div>
|
| 443 |
|
|
|
|
| 457 |
</div>
|
| 458 |
|
| 459 |
{/* Tab Content */}
|
| 460 |
+
<div className="max-h-[350px] sm:max-h-[420px] overflow-y-auto pr-1">
|
| 461 |
|
| 462 |
{/* Clauses */}
|
| 463 |
{activeTab === "clauses" && (
|
|
|
|
| 657 |
</div>
|
| 658 |
) : Object.entries(results.compliance).map(([regName, reg]) => {
|
| 659 |
const status = COMPLIANCE_STATUS[reg.overall_status] || COMPLIANCE_STATUS.PARTIAL;
|
|
|
|
| 660 |
return (
|
| 661 |
+
<div key={regName} className="bg-white border border-zinc-200 rounded-xl overflow-hidden">
|
| 662 |
<div className={`flex flex-col sm:flex-row sm:items-center justify-between p-4 border-b ${status.bg} ${status.border}`}>
|
| 663 |
<div>
|
| 664 |
<div className="flex items-center gap-2 flex-wrap">
|
|
|
|
| 677 |
<p className="text-[11px] text-zinc-500 mt-0.5">{reg.description}</p>
|
| 678 |
</div>
|
| 679 |
<div className="text-left sm:text-right mt-2 sm:mt-0">
|
| 680 |
+
<span className={`text-lg font-bold ${status.text}`}>{reg.compliance_rate}%</span>
|
| 681 |
<span className={`text-[11px] font-medium block ${status.text}`}>{reg.overall_status}</span>
|
| 682 |
</div>
|
| 683 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
<div className="p-3 space-y-0.5">
|
| 685 |
{reg.checks.map((check, i) => {
|
| 686 |
const sev = SEV_CONFIG[check.severity] || SEV_CONFIG.MEDIUM;
|
|
|
|
| 707 |
);
|
| 708 |
})}
|
| 709 |
</div>
|
|
|
|
| 710 |
</div>
|
| 711 |
);
|
| 712 |
})}
|
|
|
|
| 847 |
)}
|
| 848 |
</div>
|
| 849 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 850 |
) : (
|
| 851 |
+
<div className="bg-white border border-dashed border-zinc-200 rounded-xl h-[300px] sm:h-[420px] flex flex-col items-center justify-center">
|
| 852 |
<ScanText className="w-10 h-10 text-zinc-200 mb-3" />
|
| 853 |
<p className="text-sm text-zinc-300">Paste text and analyze to see results</p>
|
| 854 |
</div>
|
web/app/dashboard-pages/compare/loading.tsx
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
import { GitCompare } from "lucide-react";
|
| 2 |
-
|
| 3 |
-
export default function CompareLoading() {
|
| 4 |
-
return (
|
| 5 |
-
<div className="min-h-screen bg-zinc-50/30">
|
| 6 |
-
<div className="max-w-6xl mx-auto px-4 sm:px-6 py-8 sm:py-12">
|
| 7 |
-
<div className="flex items-center gap-3 mb-8">
|
| 8 |
-
<GitCompare className="w-6 h-6 text-zinc-400 animate-pulse" />
|
| 9 |
-
<div className="h-7 w-44 bg-zinc-200 rounded-lg animate-pulse" />
|
| 10 |
-
</div>
|
| 11 |
-
<div className="grid md:grid-cols-2 gap-6">
|
| 12 |
-
{[...Array(2)].map((_, i) => (
|
| 13 |
-
<div key={i} className="bg-white rounded-xl p-4 border border-zinc-200">
|
| 14 |
-
<div className="h-3 w-24 bg-zinc-100 rounded animate-pulse mb-3" />
|
| 15 |
-
<div className="h-[280px] bg-zinc-50 rounded-lg animate-pulse" />
|
| 16 |
-
</div>
|
| 17 |
-
))}
|
| 18 |
-
</div>
|
| 19 |
-
</div>
|
| 20 |
-
</div>
|
| 21 |
-
);
|
| 22 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/app/dashboard-pages/dashboard/loading.tsx
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
import { ShieldCheck, Loader2 } from "lucide-react";
|
| 2 |
-
|
| 3 |
-
export default function DashboardLoading() {
|
| 4 |
-
return (
|
| 5 |
-
<div className="min-h-screen bg-zinc-50/30">
|
| 6 |
-
<div className="max-w-6xl mx-auto px-4 sm:px-6 py-8 sm:py-12">
|
| 7 |
-
<div className="flex items-center gap-3 mb-8">
|
| 8 |
-
<ShieldCheck className="w-6 h-6 text-indigo-400 animate-pulse" />
|
| 9 |
-
<div className="h-7 w-32 bg-zinc-200 rounded-lg animate-pulse" />
|
| 10 |
-
</div>
|
| 11 |
-
<div className="grid grid-cols-2 lg:grid-cols-4 gap-4 mb-10">
|
| 12 |
-
{[...Array(4)].map((_, i) => (
|
| 13 |
-
<div key={i} className="bg-white rounded-xl p-5 border border-zinc-200">
|
| 14 |
-
<div className="h-3 w-20 bg-zinc-100 rounded animate-pulse mb-3" />
|
| 15 |
-
<div className="h-7 w-16 bg-zinc-200 rounded animate-pulse" />
|
| 16 |
-
</div>
|
| 17 |
-
))}
|
| 18 |
-
</div>
|
| 19 |
-
<div className="grid sm:grid-cols-3 gap-4 mb-10">
|
| 20 |
-
{[...Array(3)].map((_, i) => (
|
| 21 |
-
<div key={i} className="bg-white rounded-xl p-5 border border-zinc-200 flex items-center gap-4">
|
| 22 |
-
<div className="w-10 h-10 rounded-lg bg-zinc-100 animate-pulse" />
|
| 23 |
-
<div>
|
| 24 |
-
<div className="h-3 w-24 bg-zinc-100 rounded animate-pulse mb-2" />
|
| 25 |
-
<div className="h-5 w-12 bg-zinc-200 rounded animate-pulse" />
|
| 26 |
-
</div>
|
| 27 |
-
</div>
|
| 28 |
-
))}
|
| 29 |
-
</div>
|
| 30 |
-
<div className="bg-white rounded-xl border border-zinc-200 p-6">
|
| 31 |
-
<div className="h-5 w-28 bg-zinc-200 rounded animate-pulse mb-6" />
|
| 32 |
-
{[...Array(4)].map((_, i) => (
|
| 33 |
-
<div key={i} className="flex items-center justify-between py-4 border-b border-zinc-50">
|
| 34 |
-
<div>
|
| 35 |
-
<div className="h-4 w-48 bg-zinc-100 rounded animate-pulse mb-2" />
|
| 36 |
-
<div className="h-3 w-32 bg-zinc-50 rounded animate-pulse" />
|
| 37 |
-
</div>
|
| 38 |
-
<div className="h-7 w-14 bg-zinc-100 rounded-full animate-pulse" />
|
| 39 |
-
</div>
|
| 40 |
-
))}
|
| 41 |
-
</div>
|
| 42 |
-
</div>
|
| 43 |
-
</div>
|
| 44 |
-
);
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/app/dashboard-pages/settings/loading.tsx
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
import { Settings } from "lucide-react";
|
| 2 |
-
|
| 3 |
-
export default function SettingsLoading() {
|
| 4 |
-
return (
|
| 5 |
-
<div className="min-h-screen bg-zinc-50/30">
|
| 6 |
-
<div className="max-w-3xl mx-auto px-4 sm:px-6 py-8 sm:py-12">
|
| 7 |
-
<div className="flex items-center gap-3 mb-8">
|
| 8 |
-
<Settings className="w-6 h-6 text-zinc-400 animate-pulse" />
|
| 9 |
-
<div className="h-7 w-28 bg-zinc-200 rounded-lg animate-pulse" />
|
| 10 |
-
</div>
|
| 11 |
-
{[...Array(3)].map((_, i) => (
|
| 12 |
-
<div key={i} className="bg-white rounded-xl p-6 border border-zinc-200 mb-4">
|
| 13 |
-
<div className="h-5 w-32 bg-zinc-200 rounded animate-pulse mb-4" />
|
| 14 |
-
<div className="space-y-3">
|
| 15 |
-
<div className="h-10 bg-zinc-50 rounded-lg animate-pulse" />
|
| 16 |
-
<div className="h-10 bg-zinc-50 rounded-lg animate-pulse" />
|
| 17 |
-
</div>
|
| 18 |
-
</div>
|
| 19 |
-
))}
|
| 20 |
-
</div>
|
| 21 |
-
</div>
|
| 22 |
-
);
|
| 23 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/components/export-dropdown.tsx
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
"use client";
|
| 2 |
-
|
| 3 |
-
import { useState, useRef, useEffect } from "react";
|
| 4 |
-
import { FileDown, ChevronDown, Loader2 } from "lucide-react";
|
| 5 |
-
import { EXPORT_FORMATS } from "@/lib/export-utils";
|
| 6 |
-
import type { AnalysisResult } from "@/lib/types";
|
| 7 |
-
|
| 8 |
-
export function ExportDropdown({ results }: { results: AnalysisResult }) {
|
| 9 |
-
const [open, setOpen] = useState(false);
|
| 10 |
-
const [exporting, setExporting] = useState<string | null>(null);
|
| 11 |
-
const ref = useRef<HTMLDivElement>(null);
|
| 12 |
-
|
| 13 |
-
useEffect(() => {
|
| 14 |
-
function handleClickOutside(e: MouseEvent) {
|
| 15 |
-
if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false);
|
| 16 |
-
}
|
| 17 |
-
document.addEventListener("mousedown", handleClickOutside);
|
| 18 |
-
return () => document.removeEventListener("mousedown", handleClickOutside);
|
| 19 |
-
}, []);
|
| 20 |
-
|
| 21 |
-
async function handleExport(key: string, fn: (r: AnalysisResult) => void | Promise<any>) {
|
| 22 |
-
setExporting(key);
|
| 23 |
-
try {
|
| 24 |
-
await fn(results);
|
| 25 |
-
} catch (e) {
|
| 26 |
-
console.error("Export failed:", e);
|
| 27 |
-
}
|
| 28 |
-
setExporting(null);
|
| 29 |
-
setOpen(false);
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
return (
|
| 33 |
-
<div ref={ref} className="relative">
|
| 34 |
-
<button
|
| 35 |
-
onClick={() => setOpen(!open)}
|
| 36 |
-
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium text-zinc-600 bg-white border border-zinc-200 rounded-lg hover:bg-zinc-50 hover:border-zinc-300 transition-all shadow-sm"
|
| 37 |
-
>
|
| 38 |
-
<FileDown className="w-3.5 h-3.5" />
|
| 39 |
-
Export
|
| 40 |
-
<ChevronDown className={`w-3 h-3 transition-transform ${open ? "rotate-180" : ""}`} />
|
| 41 |
-
</button>
|
| 42 |
-
|
| 43 |
-
{open && (
|
| 44 |
-
<div className="absolute right-0 top-full mt-1.5 w-64 bg-white border border-zinc-200 rounded-xl shadow-xl z-50 overflow-hidden animate-in fade-in slide-in-from-top-1 duration-150">
|
| 45 |
-
<div className="px-3 py-2 border-b border-zinc-100">
|
| 46 |
-
<p className="text-[10px] font-semibold text-zinc-400 uppercase tracking-wider">Export Report</p>
|
| 47 |
-
</div>
|
| 48 |
-
<div className="py-1">
|
| 49 |
-
{EXPORT_FORMATS.map((fmt) => (
|
| 50 |
-
<button
|
| 51 |
-
key={fmt.key}
|
| 52 |
-
onClick={() => handleExport(fmt.key, fmt.fn)}
|
| 53 |
-
disabled={exporting !== null}
|
| 54 |
-
className="w-full flex items-center gap-3 px-3 py-2.5 text-left hover:bg-zinc-50 transition-colors disabled:opacity-40"
|
| 55 |
-
>
|
| 56 |
-
<span className="text-base w-5 text-center">{fmt.icon}</span>
|
| 57 |
-
<div className="flex-1 min-w-0">
|
| 58 |
-
<p className="text-sm font-medium text-zinc-700">{fmt.label}</p>
|
| 59 |
-
<p className="text-[10px] text-zinc-400">{fmt.description}</p>
|
| 60 |
-
</div>
|
| 61 |
-
{exporting === fmt.key && <Loader2 className="w-3.5 h-3.5 text-zinc-400 animate-spin" />}
|
| 62 |
-
</button>
|
| 63 |
-
))}
|
| 64 |
-
</div>
|
| 65 |
-
</div>
|
| 66 |
-
)}
|
| 67 |
-
</div>
|
| 68 |
-
);
|
| 69 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/components/extension-bridge.tsx
CHANGED
|
@@ -57,24 +57,20 @@ export function ExtensionBridge() {
|
|
| 57 |
const { data: { subscription } } = supabase.auth.onAuthStateChange(async (event, session) => {
|
| 58 |
// Handle ALL events that mean "user is logged in"
|
| 59 |
if (session && (event === "SIGNED_IN" || event === "INITIAL_SESSION" || event === "TOKEN_REFRESHED")) {
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
profileData = result.data;
|
| 68 |
-
} catch {
|
| 69 |
-
// ignore
|
| 70 |
-
}
|
| 71 |
|
| 72 |
sendAuthToExtension(
|
| 73 |
session.access_token,
|
| 74 |
session.user.email || "",
|
| 75 |
-
|
| 76 |
session.user.id,
|
| 77 |
-
|
| 78 |
);
|
| 79 |
}
|
| 80 |
|
|
|
|
| 57 |
const { data: { subscription } } = supabase.auth.onAuthStateChange(async (event, session) => {
|
| 58 |
// Handle ALL events that mean "user is logged in"
|
| 59 |
if (session && (event === "SIGNED_IN" || event === "INITIAL_SESSION" || event === "TOKEN_REFRESHED")) {
|
| 60 |
+
const { data: profile } = await supabase
|
| 61 |
+
.from("profiles")
|
| 62 |
+
.select("plan, full_name")
|
| 63 |
+
.eq("id", session.user.id)
|
| 64 |
+
.single()
|
| 65 |
+
.then(r => r)
|
| 66 |
+
.catch(() => ({ data: null }));
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
sendAuthToExtension(
|
| 69 |
session.access_token,
|
| 70 |
session.user.email || "",
|
| 71 |
+
profile?.full_name || session.user.user_metadata?.full_name || "",
|
| 72 |
session.user.id,
|
| 73 |
+
profile?.plan || "free",
|
| 74 |
);
|
| 75 |
}
|
| 76 |
|
web/components/nav.tsx
CHANGED
|
@@ -29,69 +29,33 @@ export function Nav() {
|
|
| 29 |
const hasTeam = !!userTeam;
|
| 30 |
|
| 31 |
useEffect(() => {
|
| 32 |
-
let cancelled = false;
|
| 33 |
const supabase = createClient();
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
const { data: { subscription } } = supabase.auth.onAuthStateChange(
|
| 40 |
-
(event, session) => {
|
| 41 |
-
if (cancelled) return;
|
| 42 |
-
|
| 43 |
-
if (!session?.user) {
|
| 44 |
-
setUserEmail(null);
|
| 45 |
-
setUserRole(null);
|
| 46 |
-
setUserTeam(null);
|
| 47 |
-
setLoaded(true);
|
| 48 |
-
return;
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
// User is authenticated — show logged-in nav immediately
|
| 52 |
-
setUserEmail(session.user.email || null);
|
| 53 |
-
setLoaded(true);
|
| 54 |
-
|
| 55 |
-
// Fetch profile (role, team) in background — don't block the UI
|
| 56 |
-
supabase
|
| 57 |
.from("profiles")
|
| 58 |
.select("role, team_id")
|
| 59 |
-
.eq("id",
|
| 60 |
-
.single()
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
if (error) {
|
| 64 |
-
console.error("[ClauseGuard Nav] Profile error:", error.message);
|
| 65 |
-
setUserRole("user");
|
| 66 |
-
setUserTeam(null);
|
| 67 |
-
} else {
|
| 68 |
-
setUserRole(profile?.role || "user");
|
| 69 |
-
setUserTeam(profile?.team_id || null);
|
| 70 |
-
}
|
| 71 |
-
});
|
| 72 |
}
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
cancelled = true;
|
| 77 |
-
subscription.unsubscribe();
|
| 78 |
-
};
|
| 79 |
-
}, []); // eslint-disable-line react-hooks/exhaustive-deps
|
| 80 |
|
| 81 |
async function handleSignOut() {
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
await supabase.auth.signOut();
|
| 85 |
-
} catch {
|
| 86 |
-
// ignore
|
| 87 |
-
}
|
| 88 |
setUserEmail(null);
|
| 89 |
setUserRole(null);
|
| 90 |
setUserTeam(null);
|
| 91 |
window.location.href = "/";
|
| 92 |
}
|
| 93 |
|
| 94 |
-
// Public links
|
| 95 |
const mainLinks: NavLink[] = [
|
| 96 |
{ href: "/#features", label: "Features", icon: Sparkles },
|
| 97 |
{ href: "/#pricing", label: "Pricing", icon: CreditCard },
|
|
@@ -106,12 +70,12 @@ export function Nav() {
|
|
| 106 |
<Link href="/" className="flex items-center gap-2">
|
| 107 |
<ShieldCheck className="w-5 h-5 text-zinc-900" strokeWidth={2.2} />
|
| 108 |
<span className="font-semibold text-[15px] tracking-tight text-zinc-900">ClauseGuard</span>
|
| 109 |
-
<span className="hidden sm:inline text-[10px] font-medium text-zinc-400 ml-1 border border-zinc-200 px-1.5 py-0.5 rounded">v4.
|
| 110 |
</Link>
|
| 111 |
|
| 112 |
-
{/* Desktop Nav */}
|
| 113 |
<div className="hidden md:flex items-center gap-0.5">
|
| 114 |
-
{/* Public links
|
| 115 |
{mainLinks.map((l) => {
|
| 116 |
const isActive = pathname === l.href;
|
| 117 |
return (
|
|
@@ -124,7 +88,7 @@ export function Nav() {
|
|
| 124 |
);
|
| 125 |
})}
|
| 126 |
|
| 127 |
-
{/* Loading skeleton
|
| 128 |
{!loaded && (
|
| 129 |
<>
|
| 130 |
<div className="w-px h-4 bg-zinc-200 mx-1.5" />
|
|
@@ -135,7 +99,7 @@ export function Nav() {
|
|
| 135 |
</>
|
| 136 |
)}
|
| 137 |
|
| 138 |
-
{/* Logged-in links */}
|
| 139 |
{loaded && isLoggedIn && (
|
| 140 |
<>
|
| 141 |
{/* Dashboard */}
|
|
@@ -149,7 +113,7 @@ export function Nav() {
|
|
| 149 |
Dashboard
|
| 150 |
</Link>
|
| 151 |
|
| 152 |
-
{/* Team
|
| 153 |
{hasTeam && (
|
| 154 |
<Link href="/dashboard-pages/team"
|
| 155 |
className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
|
|
@@ -162,7 +126,7 @@ export function Nav() {
|
|
| 162 |
</Link>
|
| 163 |
)}
|
| 164 |
|
| 165 |
-
{/* Admin
|
| 166 |
{isAdmin && (
|
| 167 |
<Link href="/admin"
|
| 168 |
className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
|
|
@@ -187,13 +151,14 @@ export function Nav() {
|
|
| 187 |
<Settings className="w-3.5 h-3.5" />
|
| 188 |
Settings
|
| 189 |
</Link>
|
| 190 |
-
|
|
|
|
| 191 |
<div className="relative group ml-1">
|
| 192 |
<button className="flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] text-zinc-500 hover:text-zinc-900 rounded-md hover:bg-zinc-50 transition-colors">
|
| 193 |
<UserCircle className="w-3.5 h-3.5" />
|
| 194 |
<span className="max-w-[100px] truncate">{userEmail?.split("@")[0]}</span>
|
| 195 |
</button>
|
| 196 |
-
{/* Dropdown
|
| 197 |
<div className="absolute right-0 top-full mt-1 w-52 bg-white border border-zinc-200 rounded-xl shadow-lg opacity-0 invisible group-hover:opacity-100 group-hover:visible transition-all duration-150 z-50">
|
| 198 |
<div className="px-3 py-2.5 border-b border-zinc-100">
|
| 199 |
<p className="text-xs text-zinc-400">Signed in as</p>
|
|
@@ -233,7 +198,7 @@ export function Nav() {
|
|
| 233 |
</>
|
| 234 |
)}
|
| 235 |
|
| 236 |
-
{/* Logged-out links */}
|
| 237 |
{loaded && !isLoggedIn && (
|
| 238 |
<>
|
| 239 |
<div className="w-px h-4 bg-zinc-200 mx-1.5" />
|
|
@@ -261,7 +226,7 @@ export function Nav() {
|
|
| 261 |
</button>
|
| 262 |
</div>
|
| 263 |
|
| 264 |
-
{/* Mobile Menu */}
|
| 265 |
{open && (
|
| 266 |
<div className="md:hidden border-t border-zinc-100 bg-white px-5 py-3 space-y-0.5">
|
| 267 |
{/* Public links */}
|
|
@@ -279,7 +244,7 @@ export function Nav() {
|
|
| 279 |
);
|
| 280 |
})}
|
| 281 |
|
| 282 |
-
{/* Mobile loading skeleton */}
|
| 283 |
{!loaded && (
|
| 284 |
<>
|
| 285 |
<div className="h-px bg-zinc-100 my-1.5" />
|
|
@@ -290,12 +255,12 @@ export function Nav() {
|
|
| 290 |
</>
|
| 291 |
)}
|
| 292 |
|
| 293 |
-
{/* Mobile: Logged-in links */}
|
| 294 |
{loaded && isLoggedIn && (
|
| 295 |
<>
|
| 296 |
<div className="h-px bg-zinc-100 my-1.5" />
|
| 297 |
|
| 298 |
-
{/* User info
|
| 299 |
<div className="px-3 py-2">
|
| 300 |
<p className="text-xs text-zinc-400">Signed in as</p>
|
| 301 |
<p className="text-sm text-zinc-700 font-medium truncate">{userEmail}</p>
|
|
@@ -319,7 +284,7 @@ export function Nav() {
|
|
| 319 |
<Settings className="w-4 h-4 text-zinc-400" /> Settings
|
| 320 |
</Link>
|
| 321 |
|
| 322 |
-
{/* Team
|
| 323 |
{hasTeam && (
|
| 324 |
<Link href="/dashboard-pages/team" onClick={() => setOpen(false)}
|
| 325 |
className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
|
|
@@ -331,7 +296,7 @@ export function Nav() {
|
|
| 331 |
</Link>
|
| 332 |
)}
|
| 333 |
|
| 334 |
-
{/* Admin
|
| 335 |
{isAdmin && (
|
| 336 |
<Link href="/admin" onClick={() => setOpen(false)}
|
| 337 |
className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
|
|
@@ -359,7 +324,7 @@ export function Nav() {
|
|
| 359 |
</>
|
| 360 |
)}
|
| 361 |
|
| 362 |
-
{/* Mobile: Logged-out links */}
|
| 363 |
{loaded && !isLoggedIn && (
|
| 364 |
<>
|
| 365 |
<div className="h-px bg-zinc-100 my-1.5" />
|
|
|
|
| 29 |
const hasTeam = !!userTeam;
|
| 30 |
|
| 31 |
useEffect(() => {
|
|
|
|
| 32 |
const supabase = createClient();
|
| 33 |
+
supabase.auth.getUser().then(async ({ data }) => {
|
| 34 |
+
const user = data.user;
|
| 35 |
+
setUserEmail(user?.email || null);
|
| 36 |
+
if (user) {
|
| 37 |
+
const { data: profile } = await supabase
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
.from("profiles")
|
| 39 |
.select("role, team_id")
|
| 40 |
+
.eq("id", user.id)
|
| 41 |
+
.single();
|
| 42 |
+
setUserRole(profile?.role || "user");
|
| 43 |
+
setUserTeam(profile?.team_id || null);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
}
|
| 45 |
+
setLoaded(true);
|
| 46 |
+
});
|
| 47 |
+
}, []);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
async function handleSignOut() {
|
| 50 |
+
const supabase = createClient();
|
| 51 |
+
await supabase.auth.signOut();
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
setUserEmail(null);
|
| 53 |
setUserRole(null);
|
| 54 |
setUserTeam(null);
|
| 55 |
window.location.href = "/";
|
| 56 |
}
|
| 57 |
|
| 58 |
+
// Public links — always visible
|
| 59 |
const mainLinks: NavLink[] = [
|
| 60 |
{ href: "/#features", label: "Features", icon: Sparkles },
|
| 61 |
{ href: "/#pricing", label: "Pricing", icon: CreditCard },
|
|
|
|
| 70 |
<Link href="/" className="flex items-center gap-2">
|
| 71 |
<ShieldCheck className="w-5 h-5 text-zinc-900" strokeWidth={2.2} />
|
| 72 |
<span className="font-semibold text-[15px] tracking-tight text-zinc-900">ClauseGuard</span>
|
| 73 |
+
<span className="hidden sm:inline text-[10px] font-medium text-zinc-400 ml-1 border border-zinc-200 px-1.5 py-0.5 rounded">v4.0</span>
|
| 74 |
</Link>
|
| 75 |
|
| 76 |
+
{/* ─── Desktop Nav ─── */}
|
| 77 |
<div className="hidden md:flex items-center gap-0.5">
|
| 78 |
+
{/* Public links — always visible */}
|
| 79 |
{mainLinks.map((l) => {
|
| 80 |
const isActive = pathname === l.href;
|
| 81 |
return (
|
|
|
|
| 88 |
);
|
| 89 |
})}
|
| 90 |
|
| 91 |
+
{/* ── Loading skeleton ── */}
|
| 92 |
{!loaded && (
|
| 93 |
<>
|
| 94 |
<div className="w-px h-4 bg-zinc-200 mx-1.5" />
|
|
|
|
| 99 |
</>
|
| 100 |
)}
|
| 101 |
|
| 102 |
+
{/* ── Logged-in links ── */}
|
| 103 |
{loaded && isLoggedIn && (
|
| 104 |
<>
|
| 105 |
{/* Dashboard */}
|
|
|
|
| 113 |
Dashboard
|
| 114 |
</Link>
|
| 115 |
|
| 116 |
+
{/* Team — only when user has a team */}
|
| 117 |
{hasTeam && (
|
| 118 |
<Link href="/dashboard-pages/team"
|
| 119 |
className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
|
|
|
|
| 126 |
</Link>
|
| 127 |
)}
|
| 128 |
|
| 129 |
+
{/* Admin — only for admin role */}
|
| 130 |
{isAdmin && (
|
| 131 |
<Link href="/admin"
|
| 132 |
className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
|
|
|
|
| 151 |
<Settings className="w-3.5 h-3.5" />
|
| 152 |
Settings
|
| 153 |
</Link>
|
| 154 |
+
|
| 155 |
+
{/* User indicator + sign out dropdown */}
|
| 156 |
<div className="relative group ml-1">
|
| 157 |
<button className="flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] text-zinc-500 hover:text-zinc-900 rounded-md hover:bg-zinc-50 transition-colors">
|
| 158 |
<UserCircle className="w-3.5 h-3.5" />
|
| 159 |
<span className="max-w-[100px] truncate">{userEmail?.split("@")[0]}</span>
|
| 160 |
</button>
|
| 161 |
+
{/* Dropdown */}
|
| 162 |
<div className="absolute right-0 top-full mt-1 w-52 bg-white border border-zinc-200 rounded-xl shadow-lg opacity-0 invisible group-hover:opacity-100 group-hover:visible transition-all duration-150 z-50">
|
| 163 |
<div className="px-3 py-2.5 border-b border-zinc-100">
|
| 164 |
<p className="text-xs text-zinc-400">Signed in as</p>
|
|
|
|
| 198 |
</>
|
| 199 |
)}
|
| 200 |
|
| 201 |
+
{/* ── Logged-out links ── */}
|
| 202 |
{loaded && !isLoggedIn && (
|
| 203 |
<>
|
| 204 |
<div className="w-px h-4 bg-zinc-200 mx-1.5" />
|
|
|
|
| 226 |
</button>
|
| 227 |
</div>
|
| 228 |
|
| 229 |
+
{/* ─── Mobile Menu ─── */}
|
| 230 |
{open && (
|
| 231 |
<div className="md:hidden border-t border-zinc-100 bg-white px-5 py-3 space-y-0.5">
|
| 232 |
{/* Public links */}
|
|
|
|
| 244 |
);
|
| 245 |
})}
|
| 246 |
|
| 247 |
+
{/* ── Mobile loading skeleton ── */}
|
| 248 |
{!loaded && (
|
| 249 |
<>
|
| 250 |
<div className="h-px bg-zinc-100 my-1.5" />
|
|
|
|
| 255 |
</>
|
| 256 |
)}
|
| 257 |
|
| 258 |
+
{/* ── Mobile: Logged-in links ── */}
|
| 259 |
{loaded && isLoggedIn && (
|
| 260 |
<>
|
| 261 |
<div className="h-px bg-zinc-100 my-1.5" />
|
| 262 |
|
| 263 |
+
{/* User info */}
|
| 264 |
<div className="px-3 py-2">
|
| 265 |
<p className="text-xs text-zinc-400">Signed in as</p>
|
| 266 |
<p className="text-sm text-zinc-700 font-medium truncate">{userEmail}</p>
|
|
|
|
| 284 |
<Settings className="w-4 h-4 text-zinc-400" /> Settings
|
| 285 |
</Link>
|
| 286 |
|
| 287 |
+
{/* Team */}
|
| 288 |
{hasTeam && (
|
| 289 |
<Link href="/dashboard-pages/team" onClick={() => setOpen(false)}
|
| 290 |
className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
|
|
|
|
| 296 |
</Link>
|
| 297 |
)}
|
| 298 |
|
| 299 |
+
{/* Admin */}
|
| 300 |
{isAdmin && (
|
| 301 |
<Link href="/admin" onClick={() => setOpen(false)}
|
| 302 |
className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
|
|
|
|
| 324 |
</>
|
| 325 |
)}
|
| 326 |
|
| 327 |
+
{/* ── Mobile: Logged-out links ── */}
|
| 328 |
{loaded && !isLoggedIn && (
|
| 329 |
<>
|
| 330 |
<div className="h-px bg-zinc-100 my-1.5" />
|
web/lib/export-utils.ts
DELETED
|
@@ -1,454 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* ClauseGuard — Multi-format Report Export Utility
|
| 3 |
-
* Generates reports in: JSON, CSV, Markdown, Plain Text, HTML
|
| 4 |
-
* PDF and DOCX use server-side generation via API routes.
|
| 5 |
-
*/
|
| 6 |
-
|
| 7 |
-
import type { AnalysisResult, Clause, Entity, Contradiction, Obligation, ComplianceReg, Redline } from "./types";
|
| 8 |
-
|
| 9 |
-
// ── Severity ordering ──
|
| 10 |
-
const SEV_ORDER: Record<string, number> = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
|
| 11 |
-
|
| 12 |
-
function sevSort(a: string, b: string) {
|
| 13 |
-
return (SEV_ORDER[b] || 0) - (SEV_ORDER[a] || 0);
|
| 14 |
-
}
|
| 15 |
-
|
| 16 |
-
function timestamp() {
|
| 17 |
-
return new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
| 18 |
-
}
|
| 19 |
-
|
| 20 |
-
function download(content: string | Blob, filename: string, mime: string) {
|
| 21 |
-
const blob = content instanceof Blob ? content : new Blob([content], { type: mime });
|
| 22 |
-
const url = URL.createObjectURL(blob);
|
| 23 |
-
const a = document.createElement("a");
|
| 24 |
-
a.href = url;
|
| 25 |
-
a.download = filename;
|
| 26 |
-
document.body.appendChild(a);
|
| 27 |
-
a.click();
|
| 28 |
-
document.body.removeChild(a);
|
| 29 |
-
URL.revokeObjectURL(url);
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
// ═══════════════════════════════════════════════════════════════
|
| 33 |
-
// JSON Export
|
| 34 |
-
// ═══════════════════════════════════════════════════════════════
|
| 35 |
-
|
| 36 |
-
export function exportJSON(results: AnalysisResult, formatted = true) {
|
| 37 |
-
const json = formatted
|
| 38 |
-
? JSON.stringify(results, null, 2)
|
| 39 |
-
: JSON.stringify(results);
|
| 40 |
-
download(json, `clauseguard-report-${timestamp()}.json`, "application/json");
|
| 41 |
-
}
|
| 42 |
-
|
| 43 |
-
// ═══════════════════════════════════════════════════════════════
|
| 44 |
-
// CSV Export
|
| 45 |
-
// ═══════════════════════════════════════════════════════════════
|
| 46 |
-
|
| 47 |
-
function escapeCSV(val: string): string {
|
| 48 |
-
if (val.includes(",") || val.includes('"') || val.includes("\n")) {
|
| 49 |
-
return `"${val.replace(/"/g, '""')}"`;
|
| 50 |
-
}
|
| 51 |
-
return val;
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
export function exportCSV(results: AnalysisResult) {
|
| 55 |
-
const rows: string[] = [];
|
| 56 |
-
|
| 57 |
-
// Header
|
| 58 |
-
rows.push("Section,Category,Severity,Confidence,Source,Text,Description");
|
| 59 |
-
|
| 60 |
-
// Clauses
|
| 61 |
-
for (const clause of results.results) {
|
| 62 |
-
for (const cat of clause.categories) {
|
| 63 |
-
rows.push([
|
| 64 |
-
"Clause",
|
| 65 |
-
escapeCSV(cat.name),
|
| 66 |
-
cat.severity,
|
| 67 |
-
cat.confidence != null ? String(Math.round(cat.confidence * 100)) + "%" : "pattern",
|
| 68 |
-
cat.confidence != null ? "ML" : "Pattern",
|
| 69 |
-
escapeCSV(clause.text.slice(0, 500)),
|
| 70 |
-
escapeCSV(cat.description || ""),
|
| 71 |
-
].join(","));
|
| 72 |
-
}
|
| 73 |
-
}
|
| 74 |
-
|
| 75 |
-
// Entities
|
| 76 |
-
for (const ent of results.entities) {
|
| 77 |
-
rows.push([
|
| 78 |
-
"Entity",
|
| 79 |
-
escapeCSV(ent.type),
|
| 80 |
-
"",
|
| 81 |
-
ent.score ? String(Math.round(ent.score * 100)) + "%" : "",
|
| 82 |
-
ent.source || "",
|
| 83 |
-
escapeCSV(ent.text),
|
| 84 |
-
"",
|
| 85 |
-
].join(","));
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
// Contradictions
|
| 89 |
-
for (const c of results.contradictions) {
|
| 90 |
-
rows.push([
|
| 91 |
-
"Contradiction",
|
| 92 |
-
escapeCSV(c.type),
|
| 93 |
-
c.severity,
|
| 94 |
-
c.confidence ? String(Math.round(c.confidence * 100)) + "%" : "",
|
| 95 |
-
c.source || "",
|
| 96 |
-
escapeCSV(c.explanation),
|
| 97 |
-
"",
|
| 98 |
-
].join(","));
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
// Obligations
|
| 102 |
-
for (const o of results.obligations) {
|
| 103 |
-
rows.push([
|
| 104 |
-
"Obligation",
|
| 105 |
-
escapeCSV(o.type),
|
| 106 |
-
o.priority != null && o.priority >= 3 ? "HIGH" : o.priority === 2 ? "MEDIUM" : "LOW",
|
| 107 |
-
"",
|
| 108 |
-
"",
|
| 109 |
-
escapeCSV(o.description),
|
| 110 |
-
escapeCSV(`${o.party} · ${o.deadline}`),
|
| 111 |
-
].join(","));
|
| 112 |
-
}
|
| 113 |
-
|
| 114 |
-
download(rows.join("\n"), `clauseguard-report-${timestamp()}.csv`, "text/csv");
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
// ═══════════════════════════════════════════════════════════════
|
| 118 |
-
// Markdown Export
|
| 119 |
-
// ═══════════════════════════════════════════════════════════════
|
| 120 |
-
|
| 121 |
-
export function exportMarkdown(results: AnalysisResult) {
|
| 122 |
-
const lines: string[] = [];
|
| 123 |
-
const flagged = results.results.filter(r => r.categories.length > 0);
|
| 124 |
-
const sevCounts = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
|
| 125 |
-
flagged.forEach(r => r.categories.forEach(c => {
|
| 126 |
-
if (sevCounts[c.severity as keyof typeof sevCounts] !== undefined) sevCounts[c.severity as keyof typeof sevCounts]++;
|
| 127 |
-
}));
|
| 128 |
-
|
| 129 |
-
lines.push("# 🛡️ ClauseGuard Analysis Report");
|
| 130 |
-
lines.push("");
|
| 131 |
-
lines.push(`**Generated:** ${new Date().toLocaleString()}`);
|
| 132 |
-
lines.push(`**Risk Score:** ${results.risk_score}/100 · **Grade:** ${results.grade}`);
|
| 133 |
-
lines.push(`**Clauses:** ${results.total_clauses} total · ${results.flagged_count} flagged`);
|
| 134 |
-
lines.push(`**Model:** ${results.model === "ml" || results.model !== "regex" ? "ML Models" : "Pattern Matching"}`);
|
| 135 |
-
lines.push("");
|
| 136 |
-
|
| 137 |
-
// Severity breakdown
|
| 138 |
-
lines.push("## 📊 Risk Breakdown");
|
| 139 |
-
lines.push("");
|
| 140 |
-
lines.push("| Severity | Count |");
|
| 141 |
-
lines.push("|----------|-------|");
|
| 142 |
-
lines.push(`| 🔴 Critical | ${sevCounts.CRITICAL} |`);
|
| 143 |
-
lines.push(`| 🟠 High | ${sevCounts.HIGH} |`);
|
| 144 |
-
lines.push(`| 🟡 Medium | ${sevCounts.MEDIUM} |`);
|
| 145 |
-
lines.push(`| 🟢 Low | ${sevCounts.LOW} |`);
|
| 146 |
-
lines.push("");
|
| 147 |
-
|
| 148 |
-
// Flagged clauses
|
| 149 |
-
if (flagged.length > 0) {
|
| 150 |
-
lines.push("## ⚠️ Flagged Clauses");
|
| 151 |
-
lines.push("");
|
| 152 |
-
for (const clause of flagged) {
|
| 153 |
-
const labels = clause.categories.map(c => `**${c.name}** (${c.severity})`).join(", ");
|
| 154 |
-
lines.push(`### ${labels}`);
|
| 155 |
-
lines.push("");
|
| 156 |
-
lines.push(`> ${clause.text.slice(0, 500)}${clause.text.length > 500 ? "..." : ""}`);
|
| 157 |
-
lines.push("");
|
| 158 |
-
for (const cat of clause.categories) {
|
| 159 |
-
if (cat.description) lines.push(`- ${cat.description}`);
|
| 160 |
-
const src = cat.confidence != null ? `ML ${Math.round(cat.confidence * 100)}%` : "Pattern match";
|
| 161 |
-
lines.push(`- *Source: ${src}*`);
|
| 162 |
-
}
|
| 163 |
-
lines.push("");
|
| 164 |
-
}
|
| 165 |
-
}
|
| 166 |
-
|
| 167 |
-
// Entities
|
| 168 |
-
if (results.entities.length > 0) {
|
| 169 |
-
lines.push("## 🏷️ Extracted Entities");
|
| 170 |
-
lines.push("");
|
| 171 |
-
const grouped: Record<string, string[]> = {};
|
| 172 |
-
results.entities.forEach(e => {
|
| 173 |
-
if (!grouped[e.type]) grouped[e.type] = [];
|
| 174 |
-
if (!grouped[e.type].includes(e.text)) grouped[e.type].push(e.text);
|
| 175 |
-
});
|
| 176 |
-
for (const [type, items] of Object.entries(grouped)) {
|
| 177 |
-
lines.push(`**${type.replace(/_/g, " ")}:** ${items.join(", ")}`);
|
| 178 |
-
}
|
| 179 |
-
lines.push("");
|
| 180 |
-
}
|
| 181 |
-
|
| 182 |
-
// Contradictions
|
| 183 |
-
if (results.contradictions.length > 0) {
|
| 184 |
-
lines.push("## 🔍 Contradictions & Issues");
|
| 185 |
-
lines.push("");
|
| 186 |
-
for (const c of results.contradictions) {
|
| 187 |
-
lines.push(`- **[${c.severity}] ${c.type}:** ${c.explanation}`);
|
| 188 |
-
}
|
| 189 |
-
lines.push("");
|
| 190 |
-
}
|
| 191 |
-
|
| 192 |
-
// Obligations
|
| 193 |
-
if (results.obligations.length > 0) {
|
| 194 |
-
lines.push("## 📋 Obligations");
|
| 195 |
-
lines.push("");
|
| 196 |
-
lines.push("| Type | Party | Description | Deadline |");
|
| 197 |
-
lines.push("|------|-------|-------------|----------|");
|
| 198 |
-
for (const o of results.obligations) {
|
| 199 |
-
lines.push(`| ${o.type} | ${o.party} | ${o.description.slice(0, 100)} | ${o.deadline} |`);
|
| 200 |
-
}
|
| 201 |
-
lines.push("");
|
| 202 |
-
}
|
| 203 |
-
|
| 204 |
-
// Compliance
|
| 205 |
-
if (Object.keys(results.compliance).length > 0) {
|
| 206 |
-
lines.push("## ⚖️ Compliance");
|
| 207 |
-
lines.push("");
|
| 208 |
-
for (const [name, reg] of Object.entries(results.compliance)) {
|
| 209 |
-
lines.push(`### ${name} — ${reg.compliance_rate}% (${reg.overall_status})`);
|
| 210 |
-
lines.push(`*${reg.description}*`);
|
| 211 |
-
lines.push("");
|
| 212 |
-
for (const check of reg.checks) {
|
| 213 |
-
const icon = check.status === "PASS" ? "✅" : check.status === "MISSING" ? "❌" : "⚠️";
|
| 214 |
-
lines.push(`${icon} ${check.description} (${check.severity})`);
|
| 215 |
-
}
|
| 216 |
-
lines.push("");
|
| 217 |
-
}
|
| 218 |
-
}
|
| 219 |
-
|
| 220 |
-
// Redlines
|
| 221 |
-
if (results.redlines && results.redlines.length > 0) {
|
| 222 |
-
lines.push("## ✏️ Redlining Suggestions");
|
| 223 |
-
lines.push("");
|
| 224 |
-
for (const rl of results.redlines) {
|
| 225 |
-
lines.push(`### ${rl.clause_label} (${rl.risk_level})`);
|
| 226 |
-
lines.push("");
|
| 227 |
-
lines.push(`~~${rl.original_text.slice(0, 200)}~~`);
|
| 228 |
-
lines.push("");
|
| 229 |
-
lines.push(`✅ **Suggested:** ${rl.safe_alternative}`);
|
| 230 |
-
lines.push(`📚 ${rl.legal_basis} · 🛡️ ${rl.consumer_standard}`);
|
| 231 |
-
lines.push("");
|
| 232 |
-
}
|
| 233 |
-
}
|
| 234 |
-
|
| 235 |
-
lines.push("---");
|
| 236 |
-
lines.push("*⚠️ Not legal advice. Generated by ClauseGuard AI.*");
|
| 237 |
-
|
| 238 |
-
download(lines.join("\n"), `clauseguard-report-${timestamp()}.md`, "text/markdown");
|
| 239 |
-
}
|
| 240 |
-
|
| 241 |
-
// ═══════════════════════════════════════════════════════════════
|
| 242 |
-
// Plain Text Export
|
| 243 |
-
// ═══════════════════════════════════════════════════════════════
|
| 244 |
-
|
| 245 |
-
export function exportText(results: AnalysisResult) {
|
| 246 |
-
const lines: string[] = [];
|
| 247 |
-
const flagged = results.results.filter(r => r.categories.length > 0);
|
| 248 |
-
|
| 249 |
-
lines.push("═══════════════════════════════════════════════════════");
|
| 250 |
-
lines.push(" CLAUSEGUARD ANALYSIS REPORT");
|
| 251 |
-
lines.push("═══════════════════════════════════════════════════════");
|
| 252 |
-
lines.push("");
|
| 253 |
-
lines.push(`Date: ${new Date().toLocaleString()}`);
|
| 254 |
-
lines.push(`Risk Score: ${results.risk_score}/100`);
|
| 255 |
-
lines.push(`Grade: ${results.grade}`);
|
| 256 |
-
lines.push(`Clauses: ${results.total_clauses} total, ${results.flagged_count} flagged`);
|
| 257 |
-
lines.push(`Entities: ${results.entities.length}`);
|
| 258 |
-
lines.push(`Issues: ${results.contradictions.length}`);
|
| 259 |
-
lines.push(`Obligations: ${results.obligations.length}`);
|
| 260 |
-
lines.push("");
|
| 261 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 262 |
-
lines.push(" FLAGGED CLAUSES");
|
| 263 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 264 |
-
lines.push("");
|
| 265 |
-
|
| 266 |
-
for (let i = 0; i < flagged.length; i++) {
|
| 267 |
-
const clause = flagged[i];
|
| 268 |
-
const labels = clause.categories.map(c => `[${c.severity}] ${c.name}`).join(", ");
|
| 269 |
-
lines.push(`${i + 1}. ${labels}`);
|
| 270 |
-
lines.push(` ${clause.text.slice(0, 300)}${clause.text.length > 300 ? "..." : ""}`);
|
| 271 |
-
lines.push("");
|
| 272 |
-
}
|
| 273 |
-
|
| 274 |
-
if (results.entities.length > 0) {
|
| 275 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 276 |
-
lines.push(" ENTITIES");
|
| 277 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 278 |
-
lines.push("");
|
| 279 |
-
const grouped: Record<string, string[]> = {};
|
| 280 |
-
results.entities.forEach(e => {
|
| 281 |
-
if (!grouped[e.type]) grouped[e.type] = [];
|
| 282 |
-
if (!grouped[e.type].includes(e.text)) grouped[e.type].push(e.text);
|
| 283 |
-
});
|
| 284 |
-
for (const [type, items] of Object.entries(grouped)) {
|
| 285 |
-
lines.push(` ${type}: ${items.join(", ")}`);
|
| 286 |
-
}
|
| 287 |
-
lines.push("");
|
| 288 |
-
}
|
| 289 |
-
|
| 290 |
-
if (results.contradictions.length > 0) {
|
| 291 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 292 |
-
lines.push(" CONTRADICTIONS & ISSUES");
|
| 293 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 294 |
-
lines.push("");
|
| 295 |
-
for (const c of results.contradictions) {
|
| 296 |
-
lines.push(` [${c.severity}] ${c.type}: ${c.explanation}`);
|
| 297 |
-
}
|
| 298 |
-
lines.push("");
|
| 299 |
-
}
|
| 300 |
-
|
| 301 |
-
if (results.obligations.length > 0) {
|
| 302 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 303 |
-
lines.push(" OBLIGATIONS");
|
| 304 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 305 |
-
lines.push("");
|
| 306 |
-
for (const o of results.obligations) {
|
| 307 |
-
lines.push(` [${o.type}] ${o.party}: ${o.description} (${o.deadline})`);
|
| 308 |
-
}
|
| 309 |
-
lines.push("");
|
| 310 |
-
}
|
| 311 |
-
|
| 312 |
-
if (results.redlines && results.redlines.length > 0) {
|
| 313 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 314 |
-
lines.push(" REDLINING SUGGESTIONS");
|
| 315 |
-
lines.push("───────────────────────────────────────────────────────");
|
| 316 |
-
lines.push("");
|
| 317 |
-
for (const rl of results.redlines) {
|
| 318 |
-
lines.push(` [${rl.risk_level}] ${rl.clause_label}`);
|
| 319 |
-
lines.push(` ORIGINAL: ${rl.original_text.slice(0, 200)}`);
|
| 320 |
-
lines.push(` SUGGESTED: ${rl.safe_alternative}`);
|
| 321 |
-
lines.push("");
|
| 322 |
-
}
|
| 323 |
-
}
|
| 324 |
-
|
| 325 |
-
lines.push("═══════════════════════════════════════════════════════");
|
| 326 |
-
lines.push(" NOT LEGAL ADVICE — Generated by ClauseGuard AI");
|
| 327 |
-
lines.push("═══════════════════════════════════════════════════════");
|
| 328 |
-
|
| 329 |
-
download(lines.join("\n"), `clauseguard-report-${timestamp()}.txt`, "text/plain");
|
| 330 |
-
}
|
| 331 |
-
|
| 332 |
-
// ═══════════════════════════════════════════════════════════════
|
| 333 |
-
// HTML Export (self-contained styled report)
|
| 334 |
-
// ═══════════════════════════════════════════════════════════════
|
| 335 |
-
|
| 336 |
-
export function exportHTML(results: AnalysisResult) {
|
| 337 |
-
const flagged = results.results.filter(r => r.categories.length > 0);
|
| 338 |
-
const sevCounts = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
|
| 339 |
-
flagged.forEach(r => r.categories.forEach(c => {
|
| 340 |
-
if (sevCounts[c.severity as keyof typeof sevCounts] !== undefined) sevCounts[c.severity as keyof typeof sevCounts]++;
|
| 341 |
-
}));
|
| 342 |
-
|
| 343 |
-
const sevColor: Record<string, string> = { CRITICAL: "#dc2626", HIGH: "#ea580c", MEDIUM: "#ca8a04", LOW: "#16a34a" };
|
| 344 |
-
|
| 345 |
-
const clauseHTML = flagged.map(clause => {
|
| 346 |
-
const tags = clause.categories.map(c =>
|
| 347 |
-
`<span style="display:inline-block;background:${sevColor[c.severity] || '#888'}15;color:${sevColor[c.severity] || '#888'};border:1px solid ${sevColor[c.severity] || '#888'}40;padding:2px 10px;border-radius:4px;font-size:12px;font-weight:600;margin-right:4px;">${c.name} (${c.severity})</span>`
|
| 348 |
-
).join("");
|
| 349 |
-
return `<div style="border:1px solid #e5e7eb;border-radius:8px;padding:16px;margin-bottom:12px;">
|
| 350 |
-
<div style="margin-bottom:8px;">${tags}</div>
|
| 351 |
-
<p style="font-size:13px;color:#374151;line-height:1.7;margin:0;">${clause.text.replace(/</g, "<").slice(0, 500)}</p>
|
| 352 |
-
</div>`;
|
| 353 |
-
}).join("\n");
|
| 354 |
-
|
| 355 |
-
const entityHTML = (() => {
|
| 356 |
-
const grouped: Record<string, string[]> = {};
|
| 357 |
-
results.entities.forEach(e => {
|
| 358 |
-
if (!grouped[e.type]) grouped[e.type] = [];
|
| 359 |
-
if (!grouped[e.type].includes(e.text)) grouped[e.type].push(e.text);
|
| 360 |
-
});
|
| 361 |
-
return Object.entries(grouped).map(([type, items]) =>
|
| 362 |
-
`<div style="margin-bottom:12px;"><strong style="font-size:12px;text-transform:uppercase;color:#6b7280;">${type.replace(/_/g, " ")}</strong><div style="margin-top:4px;">${items.map(t => `<span style="display:inline-block;background:#f3f4f6;padding:3px 10px;border-radius:4px;font-size:12px;margin:2px;">${t}</span>`).join("")}</div></div>`
|
| 363 |
-
).join("\n");
|
| 364 |
-
})();
|
| 365 |
-
|
| 366 |
-
const html = `<!DOCTYPE html>
|
| 367 |
-
<html lang="en">
|
| 368 |
-
<head>
|
| 369 |
-
<meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
|
| 370 |
-
<title>ClauseGuard Report — ${new Date().toLocaleDateString()}</title>
|
| 371 |
-
<style>
|
| 372 |
-
*{margin:0;padding:0;box-sizing:border-box}
|
| 373 |
-
body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;color:#1f2937;background:#fff;padding:40px;max-width:800px;margin:0 auto}
|
| 374 |
-
h1{font-size:24px;font-weight:700;margin-bottom:4px}
|
| 375 |
-
h2{font-size:16px;font-weight:600;margin:24px 0 12px;padding-bottom:8px;border-bottom:1px solid #e5e7eb}
|
| 376 |
-
.meta{font-size:12px;color:#9ca3af}
|
| 377 |
-
.score-card{display:flex;justify-content:space-between;align-items:center;background:#fafafa;border:1px solid #e5e7eb;border-radius:12px;padding:20px;margin:16px 0}
|
| 378 |
-
.score{font-size:36px;font-weight:700}
|
| 379 |
-
.grade{font-size:18px;font-weight:700;padding:6px 16px;border-radius:8px;border:1px solid #e5e7eb}
|
| 380 |
-
.sev-grid{display:grid;grid-template-columns:repeat(4,1fr);gap:8px;margin:12px 0}
|
| 381 |
-
.sev-item{text-align:center;padding:8px;border-radius:8px}
|
| 382 |
-
.disclaimer{margin-top:32px;padding:12px;background:#fefce8;border:1px solid #fde68a;border-radius:8px;font-size:11px;color:#92400e}
|
| 383 |
-
@media print{body{padding:20px}h2{break-before:auto}}
|
| 384 |
-
</style>
|
| 385 |
-
</head>
|
| 386 |
-
<body>
|
| 387 |
-
<h1>🛡️ ClauseGuard Analysis Report</h1>
|
| 388 |
-
<p class="meta">${new Date().toLocaleString()} · ${results.model !== "regex" ? "ML Models" : "Pattern Matching"}</p>
|
| 389 |
-
|
| 390 |
-
<div class="score-card">
|
| 391 |
-
<div>
|
| 392 |
-
<p class="meta">RISK SCORE</p>
|
| 393 |
-
<p class="score">${results.risk_score}<span style="font-size:16px;color:#9ca3af">/100</span></p>
|
| 394 |
-
</div>
|
| 395 |
-
<span class="grade">Grade ${results.grade}</span>
|
| 396 |
-
</div>
|
| 397 |
-
|
| 398 |
-
<div class="sev-grid">
|
| 399 |
-
<div class="sev-item" style="background:#fef2f2"><strong style="color:#dc2626">${sevCounts.CRITICAL}</strong><br><small style="color:#dc2626">Critical</small></div>
|
| 400 |
-
<div class="sev-item" style="background:#fff7ed"><strong style="color:#ea580c">${sevCounts.HIGH}</strong><br><small style="color:#ea580c">High</small></div>
|
| 401 |
-
<div class="sev-item" style="background:#fefce8"><strong style="color:#ca8a04">${sevCounts.MEDIUM}</strong><br><small style="color:#ca8a04">Medium</small></div>
|
| 402 |
-
<div class="sev-item" style="background:#f0fdf4"><strong style="color:#16a34a">${sevCounts.LOW}</strong><br><small style="color:#16a34a">Low</small></div>
|
| 403 |
-
</div>
|
| 404 |
-
|
| 405 |
-
<p class="meta">${results.total_clauses} clauses · ${results.flagged_count} flagged · ${results.entities.length} entities · ${results.obligations.length} obligations</p>
|
| 406 |
-
|
| 407 |
-
${flagged.length > 0 ? `<h2>⚠️ Flagged Clauses (${flagged.length})</h2>${clauseHTML}` : ""}
|
| 408 |
-
${results.entities.length > 0 ? `<h2>🏷️ Entities (${results.entities.length})</h2>${entityHTML}` : ""}
|
| 409 |
-
${results.contradictions.length > 0 ? `<h2>🔍 Issues (${results.contradictions.length})</h2>${results.contradictions.map(c => `<div style="border:1px solid #e5e7eb;border-left:3px solid ${sevColor[c.severity] || '#888'};border-radius:6px;padding:12px;margin-bottom:8px;"><strong style="color:${sevColor[c.severity]};font-size:11px;text-transform:uppercase">${c.type} (${c.severity})</strong><p style="font-size:13px;margin-top:4px">${c.explanation}</p></div>`).join("")}` : ""}
|
| 410 |
-
${results.obligations.length > 0 ? `<h2>📋 Obligations (${results.obligations.length})</h2><table style="width:100%;border-collapse:collapse;font-size:12px"><thead><tr style="background:#f9fafb;border-bottom:1px solid #e5e7eb"><th style="text-align:left;padding:8px">Type</th><th style="text-align:left;padding:8px">Party</th><th style="text-align:left;padding:8px">Description</th><th style="text-align:left;padding:8px">Deadline</th></tr></thead><tbody>${results.obligations.map(o => `<tr style="border-bottom:1px solid #f3f4f6"><td style="padding:8px;font-weight:500">${o.type}</td><td style="padding:8px">${o.party}</td><td style="padding:8px">${o.description.slice(0, 120)}</td><td style="padding:8px">${o.deadline}</td></tr>`).join("")}</tbody></table>` : ""}
|
| 411 |
-
${results.redlines && results.redlines.length > 0 ? `<h2>✏️ Redlining (${results.redlines.length})</h2>${results.redlines.map(rl => `<div style="border:1px solid #e5e7eb;border-radius:8px;padding:16px;margin-bottom:12px"><strong style="color:${sevColor[rl.risk_level]}">${rl.clause_label} (${rl.risk_level})</strong><div style="background:#fef2f2;padding:8px;border-radius:4px;margin:8px 0;font-size:12px;text-decoration:line-through;color:#991b1b">${rl.original_text.slice(0, 200)}</div><div style="background:#f0fdf4;padding:8px;border-radius:4px;font-size:12px;color:#166534">${rl.safe_alternative}</div><p style="font-size:10px;color:#9ca3af;margin-top:6px">📚 ${rl.legal_basis} · 🛡️ ${rl.consumer_standard}</p></div>`).join("")}` : ""}
|
| 412 |
-
|
| 413 |
-
<div class="disclaimer">⚠️ <strong>Not legal advice.</strong> This report was generated by ClauseGuard AI for informational purposes only. Consult a licensed attorney for legal decisions.</div>
|
| 414 |
-
</body>
|
| 415 |
-
</html>`;
|
| 416 |
-
|
| 417 |
-
download(html, `clauseguard-report-${timestamp()}.html`, "text/html");
|
| 418 |
-
}
|
| 419 |
-
|
| 420 |
-
// ═══════════════════════════════════════════════════════════════
|
| 421 |
-
// PDF Export (via server-side API route)
|
| 422 |
-
// ═══════════════════════════════════════════════════════════════
|
| 423 |
-
|
| 424 |
-
export async function exportPDF(results: AnalysisResult) {
|
| 425 |
-
try {
|
| 426 |
-
const res = await fetch("/api/pdf/report", {
|
| 427 |
-
method: "POST",
|
| 428 |
-
headers: { "Content-Type": "application/json" },
|
| 429 |
-
body: JSON.stringify(results),
|
| 430 |
-
});
|
| 431 |
-
if (!res.ok) throw new Error("PDF generation failed");
|
| 432 |
-
const blob = await res.blob();
|
| 433 |
-
download(blob, `clauseguard-report-${timestamp()}.pdf`, "application/pdf");
|
| 434 |
-
return true;
|
| 435 |
-
} catch {
|
| 436 |
-
// Fallback: print HTML version
|
| 437 |
-
exportHTML(results);
|
| 438 |
-
return false;
|
| 439 |
-
}
|
| 440 |
-
}
|
| 441 |
-
|
| 442 |
-
// ═══════════════════════════════════════════════════════════════
|
| 443 |
-
// Export formats manifest (for the UI dropdown)
|
| 444 |
-
// ═══════════════════════════════════════════════════════════════
|
| 445 |
-
|
| 446 |
-
export const EXPORT_FORMATS = [
|
| 447 |
-
{ key: "pdf", label: "PDF Report", icon: "📄", description: "Formatted PDF document", fn: exportPDF },
|
| 448 |
-
{ key: "html", label: "HTML Report", icon: "🌐", description: "Styled HTML (printable)", fn: exportHTML },
|
| 449 |
-
{ key: "md", label: "Markdown", icon: "📝", description: "GitHub-flavored markdown", fn: exportMarkdown },
|
| 450 |
-
{ key: "txt", label: "Plain Text", icon: "📋", description: "Simple text format", fn: exportText },
|
| 451 |
-
{ key: "csv", label: "CSV Spreadsheet", icon: "📊", description: "For Excel / Google Sheets", fn: exportCSV },
|
| 452 |
-
{ key: "json", label: "JSON (formatted)", icon: "🔧", description: "Full structured data", fn: (r: AnalysisResult) => exportJSON(r, true) },
|
| 453 |
-
{ key: "json-raw", label: "JSON (raw)", icon: "⚡", description: "Compact, no whitespace", fn: (r: AnalysisResult) => exportJSON(r, false) },
|
| 454 |
-
] as const;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/lib/supabase/client.ts
CHANGED
|
@@ -2,8 +2,8 @@ import { createBrowserClient } from "@supabase/ssr";
|
|
| 2 |
|
| 3 |
export function createClient() {
|
| 4 |
return createBrowserClient(
|
| 5 |
-
process.env.NEXT_PUBLIC_SUPABASE_URL
|
| 6 |
-
process.env.NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY
|
| 7 |
{
|
| 8 |
auth: {
|
| 9 |
autoRefreshToken: true,
|
|
|
|
| 2 |
|
| 3 |
export function createClient() {
|
| 4 |
return createBrowserClient(
|
| 5 |
+
process.env.NEXT_PUBLIC_SUPABASE_URL!,
|
| 6 |
+
process.env.NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY!,
|
| 7 |
{
|
| 8 |
auth: {
|
| 9 |
autoRefreshToken: true,
|
web/lib/supabase/schema.sql
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
-- ClauseGuard — Full Database Schema v3.
|
| 2 |
-
-- FIX v4.1: Removed hardcoded admin email (was committed to public repo)
|
| 3 |
-- Tables ordered by dependency (no forward references)
|
| 4 |
|
| 5 |
-- ─── 1. Teams (no dependencies) ───
|
|
@@ -128,35 +127,24 @@ ALTER TABLE public.api_keys ENABLE ROW LEVEL SECURITY;
|
|
| 128 |
ALTER TABLE public.custom_rules ENABLE ROW LEVEL SECURITY;
|
| 129 |
ALTER TABLE public.admin_logs ENABLE ROW LEVEL SECURITY;
|
| 130 |
|
| 131 |
-
-- ─── FIX v4.3: SECURITY DEFINER function to check admin role ───
|
| 132 |
-
-- Querying profiles FROM a profiles policy causes infinite recursion (42P17).
|
| 133 |
-
-- SECURITY DEFINER bypasses RLS, breaking the cycle.
|
| 134 |
-
CREATE OR REPLACE FUNCTION public.is_admin()
|
| 135 |
-
RETURNS boolean AS $$
|
| 136 |
-
SELECT EXISTS (
|
| 137 |
-
SELECT 1 FROM public.profiles
|
| 138 |
-
WHERE id = auth.uid() AND role = 'admin'
|
| 139 |
-
);
|
| 140 |
-
$$ LANGUAGE sql SECURITY DEFINER STABLE;
|
| 141 |
-
|
| 142 |
-- Profiles
|
| 143 |
CREATE POLICY "Users see own profile" ON public.profiles FOR SELECT USING (auth.uid() = id);
|
| 144 |
CREATE POLICY "Users update own profile" ON public.profiles FOR UPDATE USING (auth.uid() = id);
|
| 145 |
-
CREATE POLICY "Admins read all profiles" ON public.profiles FOR SELECT USING (
|
| 146 |
-
CREATE POLICY "Admins update all profiles" ON public.profiles FOR UPDATE USING (
|
| 147 |
|
| 148 |
-- Analyses
|
| 149 |
CREATE POLICY "Users see own analyses" ON public.analyses FOR SELECT
|
| 150 |
USING (auth.uid() = user_id OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
|
| 151 |
CREATE POLICY "Users insert analyses" ON public.analyses FOR INSERT WITH CHECK (auth.uid() = user_id);
|
| 152 |
CREATE POLICY "Users delete own analyses" ON public.analyses FOR DELETE USING (auth.uid() = user_id);
|
| 153 |
-
CREATE POLICY "Admins read all analyses" ON public.analyses FOR SELECT USING (
|
| 154 |
|
| 155 |
-- Teams
|
| 156 |
CREATE POLICY "Team members can view" ON public.teams FOR SELECT
|
| 157 |
USING (id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()) OR owner_id = auth.uid());
|
| 158 |
CREATE POLICY "Owner can update team" ON public.teams FOR UPDATE USING (owner_id = auth.uid());
|
| 159 |
-
CREATE POLICY "Admins read all teams" ON public.teams FOR SELECT USING (
|
| 160 |
|
| 161 |
-- Team invites
|
| 162 |
CREATE POLICY "Members see team invites" ON public.team_invites FOR SELECT
|
|
@@ -167,17 +155,17 @@ CREATE POLICY "Users can invite" ON public.team_invites FOR INSERT WITH CHECK (i
|
|
| 167 |
CREATE POLICY "Users see own API keys" ON public.api_keys FOR SELECT
|
| 168 |
USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
|
| 169 |
CREATE POLICY "Users manage own API keys" ON public.api_keys FOR ALL USING (user_id = auth.uid());
|
| 170 |
-
CREATE POLICY "Admins read all api_keys" ON public.api_keys FOR SELECT USING (
|
| 171 |
|
| 172 |
-- Custom Rules
|
| 173 |
CREATE POLICY "Users see own rules" ON public.custom_rules FOR SELECT
|
| 174 |
USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
|
| 175 |
CREATE POLICY "Users manage own rules" ON public.custom_rules FOR ALL USING (user_id = auth.uid());
|
| 176 |
-
CREATE POLICY "Admins read all rules" ON public.custom_rules FOR SELECT USING (
|
| 177 |
|
| 178 |
-- Admin Logs
|
| 179 |
CREATE POLICY "Admins manage logs" ON public.admin_logs FOR ALL
|
| 180 |
-
USING (
|
| 181 |
|
| 182 |
-- ─── Auto-create profile on signup ───
|
| 183 |
CREATE OR REPLACE FUNCTION public.handle_new_user()
|
|
@@ -198,19 +186,11 @@ CREATE TRIGGER on_auth_user_created
|
|
| 198 |
AFTER INSERT ON auth.users
|
| 199 |
FOR EACH ROW EXECUTE FUNCTION public.handle_new_user();
|
| 200 |
|
| 201 |
-
-- ───
|
| 202 |
-
--
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
-- SET role = 'admin', plan = 'pro'
|
| 207 |
-
-- WHERE email = '<YOUR_EMAIL>';
|
| 208 |
-
--
|
| 209 |
-
-- Or set ADMIN_EMAIL env var and run:
|
| 210 |
-
-- DO $$ BEGIN
|
| 211 |
-
-- UPDATE public.profiles SET role = 'admin', plan = 'pro'
|
| 212 |
-
-- WHERE email = current_setting('app.admin_email', true);
|
| 213 |
-
-- END $$;
|
| 214 |
|
| 215 |
-- ─── Monthly reset function ───
|
| 216 |
CREATE OR REPLACE FUNCTION public.reset_monthly_usage()
|
|
|
|
| 1 |
+
-- ClauseGuard — Full Database Schema v3.0
|
|
|
|
| 2 |
-- Tables ordered by dependency (no forward references)
|
| 3 |
|
| 4 |
-- ─── 1. Teams (no dependencies) ───
|
|
|
|
| 127 |
ALTER TABLE public.custom_rules ENABLE ROW LEVEL SECURITY;
|
| 128 |
ALTER TABLE public.admin_logs ENABLE ROW LEVEL SECURITY;
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
-- Profiles
|
| 131 |
CREATE POLICY "Users see own profile" ON public.profiles FOR SELECT USING (auth.uid() = id);
|
| 132 |
CREATE POLICY "Users update own profile" ON public.profiles FOR UPDATE USING (auth.uid() = id);
|
| 133 |
+
CREATE POLICY "Admins read all profiles" ON public.profiles FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 134 |
+
CREATE POLICY "Admins update all profiles" ON public.profiles FOR UPDATE USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 135 |
|
| 136 |
-- Analyses
|
| 137 |
CREATE POLICY "Users see own analyses" ON public.analyses FOR SELECT
|
| 138 |
USING (auth.uid() = user_id OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
|
| 139 |
CREATE POLICY "Users insert analyses" ON public.analyses FOR INSERT WITH CHECK (auth.uid() = user_id);
|
| 140 |
CREATE POLICY "Users delete own analyses" ON public.analyses FOR DELETE USING (auth.uid() = user_id);
|
| 141 |
+
CREATE POLICY "Admins read all analyses" ON public.analyses FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 142 |
|
| 143 |
-- Teams
|
| 144 |
CREATE POLICY "Team members can view" ON public.teams FOR SELECT
|
| 145 |
USING (id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()) OR owner_id = auth.uid());
|
| 146 |
CREATE POLICY "Owner can update team" ON public.teams FOR UPDATE USING (owner_id = auth.uid());
|
| 147 |
+
CREATE POLICY "Admins read all teams" ON public.teams FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 148 |
|
| 149 |
-- Team invites
|
| 150 |
CREATE POLICY "Members see team invites" ON public.team_invites FOR SELECT
|
|
|
|
| 155 |
CREATE POLICY "Users see own API keys" ON public.api_keys FOR SELECT
|
| 156 |
USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
|
| 157 |
CREATE POLICY "Users manage own API keys" ON public.api_keys FOR ALL USING (user_id = auth.uid());
|
| 158 |
+
CREATE POLICY "Admins read all api_keys" ON public.api_keys FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 159 |
|
| 160 |
-- Custom Rules
|
| 161 |
CREATE POLICY "Users see own rules" ON public.custom_rules FOR SELECT
|
| 162 |
USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
|
| 163 |
CREATE POLICY "Users manage own rules" ON public.custom_rules FOR ALL USING (user_id = auth.uid());
|
| 164 |
+
CREATE POLICY "Admins read all rules" ON public.custom_rules FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 165 |
|
| 166 |
-- Admin Logs
|
| 167 |
CREATE POLICY "Admins manage logs" ON public.admin_logs FOR ALL
|
| 168 |
+
USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
|
| 169 |
|
| 170 |
-- ─── Auto-create profile on signup ───
|
| 171 |
CREATE OR REPLACE FUNCTION public.handle_new_user()
|
|
|
|
| 186 |
AFTER INSERT ON auth.users
|
| 187 |
FOR EACH ROW EXECUTE FUNCTION public.handle_new_user();
|
| 188 |
|
| 189 |
+
-- ─── Set owner as admin with full access ───
|
| 190 |
+
-- Run this AFTER your first signup with your email:
|
| 191 |
+
UPDATE public.profiles
|
| 192 |
+
SET role = 'admin', plan = 'pro'
|
| 193 |
+
WHERE email = 'ankygaur9972@gmail.com';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
-- ─── Monthly reset function ───
|
| 196 |
CREATE OR REPLACE FUNCTION public.reset_monthly_usage()
|
web/lib/types.ts
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
// ClauseGuard — Shared TypeScript types for the web app
|
| 2 |
-
|
| 3 |
-
export interface Cat {
|
| 4 |
-
name: string;
|
| 5 |
-
severity: string;
|
| 6 |
-
description?: string;
|
| 7 |
-
confidence?: number;
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
export interface Clause {
|
| 11 |
-
text: string;
|
| 12 |
-
categories: Cat[];
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
export interface Entity {
|
| 16 |
-
text: string;
|
| 17 |
-
type: string;
|
| 18 |
-
score?: number;
|
| 19 |
-
source?: string;
|
| 20 |
-
}
|
| 21 |
-
|
| 22 |
-
export interface Contradiction {
|
| 23 |
-
type: string;
|
| 24 |
-
explanation: string;
|
| 25 |
-
severity: string;
|
| 26 |
-
confidence?: number;
|
| 27 |
-
source?: string;
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
-
export interface Obligation {
|
| 31 |
-
type: string;
|
| 32 |
-
party: string;
|
| 33 |
-
description: string;
|
| 34 |
-
deadline: string;
|
| 35 |
-
priority?: number;
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
export interface ComplianceCheck {
|
| 39 |
-
requirement: string;
|
| 40 |
-
description: string;
|
| 41 |
-
severity: string;
|
| 42 |
-
status: string;
|
| 43 |
-
matched_keywords: string[];
|
| 44 |
-
context?: string[];
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
-
export interface ComplianceReg {
|
| 48 |
-
description: string;
|
| 49 |
-
compliance_rate: number;
|
| 50 |
-
checks: ComplianceCheck[];
|
| 51 |
-
overall_status: string;
|
| 52 |
-
negated_count?: number;
|
| 53 |
-
ambiguous_count?: number;
|
| 54 |
-
note?: string;
|
| 55 |
-
}
|
| 56 |
-
|
| 57 |
-
export interface Redline {
|
| 58 |
-
original_text: string;
|
| 59 |
-
clause_label: string;
|
| 60 |
-
risk_level: string;
|
| 61 |
-
safe_alternative: string;
|
| 62 |
-
template_alternative?: string;
|
| 63 |
-
legal_basis: string;
|
| 64 |
-
consumer_standard: string;
|
| 65 |
-
tier: string;
|
| 66 |
-
}
|
| 67 |
-
|
| 68 |
-
export interface ChatMessage {
|
| 69 |
-
role: "user" | "assistant";
|
| 70 |
-
content: string;
|
| 71 |
-
}
|
| 72 |
-
|
| 73 |
-
export interface AnalysisResult {
|
| 74 |
-
risk_score: number;
|
| 75 |
-
grade: string;
|
| 76 |
-
total_clauses: number;
|
| 77 |
-
flagged_count: number;
|
| 78 |
-
results: Clause[];
|
| 79 |
-
entities: Entity[];
|
| 80 |
-
contradictions: Contradiction[];
|
| 81 |
-
obligations: Obligation[];
|
| 82 |
-
compliance: Record<string, ComplianceReg>;
|
| 83 |
-
redlines: Redline[];
|
| 84 |
-
model: string;
|
| 85 |
-
latency_ms: number;
|
| 86 |
-
session_id?: string;
|
| 87 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web/package-lock.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
web/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"name": "clauseguard-web",
|
| 3 |
-
"version": "1.
|
| 4 |
"private": true,
|
| 5 |
"scripts": {
|
| 6 |
"dev": "next dev --turbopack",
|
|
|
|
| 1 |
{
|
| 2 |
"name": "clauseguard-web",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
"private": true,
|
| 5 |
"scripts": {
|
| 6 |
"dev": "next dev --turbopack",
|
web/proxy.ts
CHANGED
|
@@ -47,10 +47,5 @@ export async function proxy(request: NextRequest) {
|
|
| 47 |
}
|
| 48 |
|
| 49 |
export const config = {
|
| 50 |
-
//
|
| 51 |
-
// Without this, navigating to / or other non-dashboard pages doesn't refresh
|
| 52 |
-
// the Supabase session cookie, causing auth to break on page reload.
|
| 53 |
-
matcher: [
|
| 54 |
-
"/((?!_next/static|_next/image|favicon.ico|.*\\.(?:svg|png|jpg|jpeg|gif|webp|ico)$).*)",
|
| 55 |
-
],
|
| 56 |
};
|
|
|
|
| 47 |
}
|
| 48 |
|
| 49 |
export const config = {
|
| 50 |
+
matcher: ["/dashboard-pages/:path*", "/auth/:path*", "/admin/:path*"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
};
|
web/tsconfig.json
CHANGED
|
@@ -1,11 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"compilerOptions": {
|
| 3 |
"target": "ES2017",
|
| 4 |
-
"lib": [
|
| 5 |
-
"dom",
|
| 6 |
-
"dom.iterable",
|
| 7 |
-
"esnext"
|
| 8 |
-
],
|
| 9 |
"allowJs": true,
|
| 10 |
"skipLibCheck": true,
|
| 11 |
"strict": true,
|
|
@@ -15,27 +11,11 @@
|
|
| 15 |
"moduleResolution": "bundler",
|
| 16 |
"resolveJsonModule": true,
|
| 17 |
"isolatedModules": true,
|
| 18 |
-
"jsx": "
|
| 19 |
"incremental": true,
|
| 20 |
-
"plugins": [
|
| 21 |
-
|
| 22 |
-
"name": "next"
|
| 23 |
-
}
|
| 24 |
-
],
|
| 25 |
-
"paths": {
|
| 26 |
-
"@/*": [
|
| 27 |
-
"./*"
|
| 28 |
-
]
|
| 29 |
-
}
|
| 30 |
},
|
| 31 |
-
"include": [
|
| 32 |
-
|
| 33 |
-
"**/*.ts",
|
| 34 |
-
"**/*.tsx",
|
| 35 |
-
".next/types/**/*.ts",
|
| 36 |
-
".next/dev/types/**/*.ts"
|
| 37 |
-
],
|
| 38 |
-
"exclude": [
|
| 39 |
-
"node_modules"
|
| 40 |
-
]
|
| 41 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"compilerOptions": {
|
| 3 |
"target": "ES2017",
|
| 4 |
+
"lib": ["dom", "dom.iterable", "esnext"],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"allowJs": true,
|
| 6 |
"skipLibCheck": true,
|
| 7 |
"strict": true,
|
|
|
|
| 11 |
"moduleResolution": "bundler",
|
| 12 |
"resolveJsonModule": true,
|
| 13 |
"isolatedModules": true,
|
| 14 |
+
"jsx": "preserve",
|
| 15 |
"incremental": true,
|
| 16 |
+
"plugins": [{ "name": "next" }],
|
| 17 |
+
"paths": { "@/*": ["./*"] }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
},
|
| 19 |
+
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
| 20 |
+
"exclude": ["node_modules"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
}
|