fix: Header nav - Settings, Admin, Login/Signup visibility

#1
by gaurv007 - opened
README.md CHANGED
@@ -10,31 +10,11 @@ app_file: app.py
10
  pinned: false
11
  ---
12
 
13
- # 🛡️ ClauseGuard v4.3 — World's Best Open-Source Legal Contract Analysis
14
 
15
  **ClauseGuard** is the most comprehensive open-source AI-powered legal contract analysis tool. It analyzes contracts using state-of-the-art legal NLP models and provides actionable risk assessments, Q&A chatbot, clause redlining, and OCR for scanned PDFs.
16
 
17
- ## 🆕 What's New in v4.3
18
-
19
- | Feature | Description |
20
- |---------|-------------|
21
- | **⚡ ONNX + INT8 Quantization** | CUAD classifier now supports ONNX Runtime with dynamic INT8 quantization — **2-4x faster inference on CPU**. New `ml/export_onnx_v2.py` handles the full merge→export→quantize pipeline. |
22
- | **🎯 Better Embeddings** | Upgraded from `all-MiniLM-L6-v2` to `BAAI/bge-small-en-v1.5` — **+21% retrieval accuracy** on MTEB benchmarks, same 384-dim, same latency. Includes query instruction prefix for asymmetric retrieval. |
23
- | **🚀 Batched Classification** | All clauses classified in a single batched forward pass (batch_size=8) instead of one-by-one — **2-3x throughput improvement**. |
24
- | **🧵 CPU Thread Control** | `torch.set_num_threads(2)` prevents CPU thrashing under concurrent Gradio requests |
25
-
26
- ### Previous: v4.2
27
-
28
- | Feature | Description |
29
- |---------|-------------|
30
- | **🔧 NLI Fix** | Fixed contradiction detection — now uses `CrossEncoder.predict()` instead of broken `pipeline("text-classification")` dict input. Contradictions actually work now. |
31
- | **🔒 Thread Safety** | `BoundedCache` now uses `threading.RLock` to prevent race conditions under concurrent Gradio requests |
32
- | **⚡ Pre-compiled Regex** | All regex patterns (clause classification, obligations, compliance negation) pre-compiled at module level — eliminates thousands of redundant compilations |
33
- | **🔗 Extension Fix** | Chrome extension risk formula now matches backend (diminishing returns, not normalized by doc length). Fixed API_BASE URL. |
34
- | **🏷️ Label Coverage** | Added missing regex-only labels (Indemnification, Confidentiality, Force Majeure, Penalties) to RISK_MAP and DESC_MAP |
35
- | **🛡️ Security** | API CORS localhost origins now require explicit opt-in via `CORS_ALLOW_LOCALHOST=true` env var |
36
-
37
- ### Previous: v4.0
38
 
39
  | Feature | Description |
40
  |---------|-------------|
@@ -79,7 +59,7 @@ pinned: false
79
  | Clause Classification | `Mokshith31/legalbert-contract-clause-classification` — LoRA adapter on `nlpaueb/legal-bert-base-uncased`, fine-tuned on CUAD 41-class taxonomy |
80
  | Legal NER | `matterstack/legal-bert-ner` (ML) with regex fallback for 7 entity types |
81
  | NLI | `cross-encoder/nli-deberta-v3-base` (semantic contradiction detection) |
82
- | Embeddings | `BAAI/bge-small-en-v1.5` (384-dim, RAG retrieval — +21% over MiniLM) |
83
  | LLM | `Qwen/Qwen2.5-7B-Instruct` via HF Inference API (chatbot + redlining) |
84
  | OCR | `docTR` (fast_base + crnn_vgg16_bn) for scanned PDF text extraction |
85
  | Compliance | Regulatory keyword matching across GDPR, CCPA, SOX, HIPAA, FINRA |
 
10
  pinned: false
11
  ---
12
 
13
+ # 🛡️ ClauseGuard v4.0 — World's Best Open-Source Legal Contract Analysis
14
 
15
  **ClauseGuard** is the most comprehensive open-source AI-powered legal contract analysis tool. It analyzes contracts using state-of-the-art legal NLP models and provides actionable risk assessments, Q&A chatbot, clause redlining, and OCR for scanned PDFs.
16
 
17
+ ## 🆕 What's New in v4.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  | Feature | Description |
20
  |---------|-------------|
 
59
  | Clause Classification | `Mokshith31/legalbert-contract-clause-classification` — LoRA adapter on `nlpaueb/legal-bert-base-uncased`, fine-tuned on CUAD 41-class taxonomy |
60
  | Legal NER | `matterstack/legal-bert-ner` (ML) with regex fallback for 7 entity types |
61
  | NLI | `cross-encoder/nli-deberta-v3-base` (semantic contradiction detection) |
62
+ | Embeddings | `sentence-transformers/all-MiniLM-L6-v2` (384-dim, RAG retrieval) |
63
  | LLM | `Qwen/Qwen2.5-7B-Instruct` via HF Inference API (chatbot + redlining) |
64
  | OCR | `docTR` (fast_base + crnn_vgg16_bn) for scanned PDF text extraction |
65
  | Compliance | Regulatory keyword matching across GDPR, CCPA, SOX, HIPAA, FINRA |
api/main.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- ClauseGuard — FastAPI Backend v4.1
3
  ══════════════════════════════════
4
- Fixes in v4.1:
5
- FIX: Rate limiter uses sliding window with proper IP extraction (X-Forwarded-For)
6
- FIX: RAG sessions have TTL-based expiry (1 hour) instead of just count-based
7
- FIX: Input text size validation (max 200KB)
8
- FIX: Proper error handling for all endpoints
9
  """
10
 
11
  import os
@@ -56,45 +56,25 @@ SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
56
  SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
57
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
58
  SAULLM_ENDPOINT = os.environ.get("SAULLM_ENDPOINT", "")
59
- MAX_TEXT_LENGTH = int(os.environ.get("MAX_TEXT_LENGTH", "200000"))
60
 
61
- # ─── FIX v4.2: Improved sliding window rate limiter with periodic cleanup ───
62
- _rate_limits: dict[str, list[float]] = {}
63
- _rate_limits_last_cleanup: float = 0.0
64
  RATE_LIMIT_REQUESTS = 30
65
- RATE_LIMIT_WINDOW = 60 # seconds
66
-
67
- def _get_client_ip(request: Request) -> str:
68
- """Extract real client IP, handling reverse proxies."""
69
- forwarded = request.headers.get("x-forwarded-for", "")
70
- if forwarded:
71
- return forwarded.split(",")[0].strip()
72
- return request.client.host if request.client else "unknown"
73
 
74
  def _check_rate_limit(client_ip: str) -> bool:
75
- """Sliding window rate limiter with periodic stale-IP cleanup."""
76
- global _rate_limits_last_cleanup
77
  now = time.time()
78
-
79
- # FIX v4.2: Periodic cleanup every 60s regardless of dict size
80
- if now - _rate_limits_last_cleanup > 60:
81
- stale = [ip for ip, ts in _rate_limits.items() if not ts or now - ts[-1] > RATE_LIMIT_WINDOW * 2]
82
- for ip in stale:
83
- del _rate_limits[ip]
84
- _rate_limits_last_cleanup = now
85
-
86
- if client_ip not in _rate_limits:
87
- _rate_limits[client_ip] = []
88
-
89
- # Remove expired timestamps
90
- _rate_limits[client_ip] = [
91
- t for t in _rate_limits[client_ip] if now - t < RATE_LIMIT_WINDOW
92
- ]
93
-
94
- if len(_rate_limits[client_ip]) >= RATE_LIMIT_REQUESTS:
95
- return False
96
-
97
- _rate_limits[client_ip].append(now)
98
  return True
99
 
100
  # ─── Supabase helper ───
@@ -136,27 +116,9 @@ async def supabase_query(table: str, params: dict, headers_extra: dict = {}):
136
  except Exception:
137
  return []
138
 
139
- # ─── FIX v4.1: RAG sessions with TTL-based expiry ───
140
- _rag_sessions: dict[str, dict] = {}
141
  _RAG_SESSION_MAX = 100
142
- _RAG_SESSION_TTL = 3600 # 1 hour
143
-
144
- def _cleanup_rag_sessions():
145
- """Remove expired RAG sessions."""
146
- now = time.time()
147
- expired = [sid for sid, s in _rag_sessions.items() if now - s.get("created_at", 0) > _RAG_SESSION_TTL]
148
- for sid in expired:
149
- del _rag_sessions[sid]
150
-
151
- def _store_rag_session(session_id: str, data: dict):
152
- """Store a RAG session with TTL tracking."""
153
- _cleanup_rag_sessions()
154
- if len(_rag_sessions) >= _RAG_SESSION_MAX:
155
- # Remove oldest session
156
- oldest = min(_rag_sessions, key=lambda k: _rag_sessions[k].get("created_at", 0))
157
- del _rag_sessions[oldest]
158
- data["created_at"] = time.time()
159
- _rag_sessions[session_id] = data
160
 
161
  # ─── Request/Response Models ───
162
  class AnalyzeRequest(BaseModel):
@@ -194,17 +156,13 @@ class RedlineRequest(BaseModel):
194
  async def lifespan(app: FastAPI):
195
  yield
196
 
197
- app = FastAPI(title="ClauseGuard API", version="4.1.0", lifespan=lifespan)
198
 
199
- # FIX v4.2: CORS origins configurable via env var; localhost only in dev
200
- _extra_origins = os.environ.get("CORS_EXTRA_ORIGINS", "").split(",")
201
  ALLOWED_ORIGINS = [
202
  "https://clauseguardweb.netlify.app",
 
 
203
  ]
204
- # Only add localhost origins if explicitly enabled via env
205
- if os.environ.get("CORS_ALLOW_LOCALHOST", "").lower() == "true":
206
- ALLOWED_ORIGINS.extend(["http://localhost:3000", "http://localhost:3001"])
207
- ALLOWED_ORIGINS.extend([o.strip() for o in _extra_origins if o.strip()])
208
  app.add_middleware(
209
  CORSMiddleware,
210
  allow_origins=ALLOWED_ORIGINS,
@@ -221,18 +179,17 @@ async def health():
221
  return {
222
  "status": "ok",
223
  "model": model_status,
224
- "version": "4.1.0",
225
  "shared_modules": _SHARED_MODULES,
226
  "ocr": ocr_status,
227
  "features": ["analyze", "compare", "redline", "chat", "ocr"],
228
- "rag_sessions_active": len(_rag_sessions),
229
  }
230
 
231
  @app.post("/api/analyze")
232
  async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] = Depends(get_current_user)):
233
- client_ip = _get_client_ip(request)
234
  if not _check_rate_limit(client_ip):
235
- raise HTTPException(status_code=429, detail="Rate limit exceeded. Please wait 60 seconds.")
236
 
237
  text = req.text
238
  if not text and req.clauses:
@@ -240,10 +197,8 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
240
 
241
  if not text or len(text.strip()) < 50:
242
  raise HTTPException(status_code=400, detail="Text too short (minimum 50 characters)")
243
-
244
- # FIX v4.1: Input size validation
245
  if len(text) > MAX_TEXT_LENGTH:
246
- raise HTTPException(status_code=400, detail=f"Text too long (max {MAX_TEXT_LENGTH // 1000}KB)")
247
 
248
  start = time.time()
249
 
@@ -293,13 +248,16 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
293
  }],
294
  })
295
 
296
- # RAG indexing with TTL-managed sessions
297
  session_id = None
298
  try:
299
  chunks, embeddings, _status = index_contract(text)
300
  if chunks and embeddings is not None:
301
  session_id = uuid.uuid4().hex[:12]
302
- _store_rag_session(session_id, {
 
 
 
303
  "chunks": chunks,
304
  "embeddings": embeddings,
305
  "analysis": {
@@ -309,7 +267,7 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
309
  "entities": entities[:30],
310
  "contradictions": contradictions,
311
  },
312
- })
313
  except Exception as e:
314
  print(f"[API] RAG indexing error: {e}")
315
 
@@ -346,27 +304,20 @@ async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] =
346
 
347
  @app.post("/api/compare")
348
  async def compare(req: CompareRequest, request: Request):
349
- client_ip = _get_client_ip(request)
350
  if not _check_rate_limit(client_ip):
351
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
352
-
353
- # FIX v4.1: Input size validation for comparison
354
- if len(req.text_a) > MAX_TEXT_LENGTH or len(req.text_b) > MAX_TEXT_LENGTH:
355
- raise HTTPException(status_code=400, detail=f"Text too long (max {MAX_TEXT_LENGTH // 1000}KB per contract)")
356
-
357
  return compare_contracts(req.text_a, req.text_b)
358
 
359
  @app.post("/api/redline")
360
  async def redline(req: RedlineRequest, request: Request):
361
- client_ip = _get_client_ip(request)
362
  if not _check_rate_limit(client_ip):
363
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
364
 
365
  if req.session_id and req.session_id in _rag_sessions:
366
  analysis = _rag_sessions[req.session_id]["analysis"]
367
  elif req.text:
368
- if len(req.text) > MAX_TEXT_LENGTH:
369
- raise HTTPException(status_code=400, detail="Text too long")
370
  result, error = analyze_contract(req.text)
371
  if error:
372
  raise HTTPException(status_code=400, detail=error)
@@ -379,15 +330,12 @@ async def redline(req: RedlineRequest, request: Request):
379
 
380
  @app.post("/api/chat")
381
  async def chat(req: ChatRequest, request: Request):
382
- client_ip = _get_client_ip(request)
383
  if not _check_rate_limit(client_ip):
384
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
385
 
386
- # FIX v4.1: Clean up expired sessions before checking
387
- _cleanup_rag_sessions()
388
-
389
  if req.session_id not in _rag_sessions:
390
- raise HTTPException(status_code=404, detail="Session expired or not found. Please analyze a contract first.")
391
 
392
  session = _rag_sessions[req.session_id]
393
  response_text = ""
@@ -399,14 +347,12 @@ async def chat(req: ChatRequest, request: Request):
399
 
400
  @app.post("/api/chat/stream")
401
  async def chat_stream(req: ChatRequest, request: Request):
402
- client_ip = _get_client_ip(request)
403
  if not _check_rate_limit(client_ip):
404
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
405
 
406
- _cleanup_rag_sessions()
407
-
408
  if req.session_id not in _rag_sessions:
409
- raise HTTPException(status_code=404, detail="Session expired or not found.")
410
 
411
  session = _rag_sessions[req.session_id]
412
 
@@ -429,12 +375,8 @@ async def ocr_endpoint(file: UploadFile = FastAPIFile(...)):
429
  if not file.filename or not file.filename.lower().endswith(".pdf"):
430
  raise HTTPException(status_code=400, detail="Only PDF files supported")
431
 
432
- # FIX v4.1: Limit upload size (20MB)
433
- content = await file.read()
434
- if len(content) > 20 * 1024 * 1024:
435
- raise HTTPException(status_code=400, detail="File too large (max 20MB)")
436
-
437
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
 
438
  tmp.write(content)
439
  tmp_path = tmp.name
440
 
 
1
  """
2
+ ClauseGuard — FastAPI Backend v4.0
3
  ══════════════════════════════════
4
+ New in v4.0:
5
+ /api/redline clause redlining suggestions
6
+ /api/chat RAG chatbot (streaming)
7
+ /api/ocr OCR scanned PDF extraction
8
+ Updated analysis to include redlining data
9
  """
10
 
11
  import os
 
56
  SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "")
57
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
58
  SAULLM_ENDPOINT = os.environ.get("SAULLM_ENDPOINT", "")
59
+ MAX_TEXT_LENGTH = int(os.environ.get("MAX_TEXT_LENGTH", "100000"))
60
 
61
+ # ─── Rate Limiting ───
62
+ _rate_limits = {}
 
63
  RATE_LIMIT_REQUESTS = 30
64
+ RATE_LIMIT_WINDOW = 60
 
 
 
 
 
 
 
65
 
66
  def _check_rate_limit(client_ip: str) -> bool:
 
 
67
  now = time.time()
68
+ if client_ip in _rate_limits:
69
+ count, window_start = _rate_limits[client_ip]
70
+ if now - window_start > RATE_LIMIT_WINDOW:
71
+ _rate_limits[client_ip] = (1, now)
72
+ return True
73
+ if count >= RATE_LIMIT_REQUESTS:
74
+ return False
75
+ _rate_limits[client_ip] = (count + 1, window_start)
76
+ return True
77
+ _rate_limits[client_ip] = (1, now)
 
 
 
 
 
 
 
 
 
 
78
  return True
79
 
80
  # ─── Supabase helper ───
 
116
  except Exception:
117
  return []
118
 
119
+ # ─── In-memory RAG session store ───
120
+ _rag_sessions: dict = {}
121
  _RAG_SESSION_MAX = 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # ─── Request/Response Models ───
124
  class AnalyzeRequest(BaseModel):
 
156
  async def lifespan(app: FastAPI):
157
  yield
158
 
159
+ app = FastAPI(title="ClauseGuard API", version="4.0.0", lifespan=lifespan)
160
 
 
 
161
  ALLOWED_ORIGINS = [
162
  "https://clauseguardweb.netlify.app",
163
+ "http://localhost:3000",
164
+ "http://localhost:3001",
165
  ]
 
 
 
 
166
  app.add_middleware(
167
  CORSMiddleware,
168
  allow_origins=ALLOWED_ORIGINS,
 
179
  return {
180
  "status": "ok",
181
  "model": model_status,
182
+ "version": "4.0.0",
183
  "shared_modules": _SHARED_MODULES,
184
  "ocr": ocr_status,
185
  "features": ["analyze", "compare", "redline", "chat", "ocr"],
 
186
  }
187
 
188
  @app.post("/api/analyze")
189
  async def analyze(req: AnalyzeRequest, request: Request, user: Optional[dict] = Depends(get_current_user)):
190
+ client_ip = request.client.host if request.client else "unknown"
191
  if not _check_rate_limit(client_ip):
192
+ raise HTTPException(status_code=429, detail="Rate limit exceeded.")
193
 
194
  text = req.text
195
  if not text and req.clauses:
 
197
 
198
  if not text or len(text.strip()) < 50:
199
  raise HTTPException(status_code=400, detail="Text too short (minimum 50 characters)")
 
 
200
  if len(text) > MAX_TEXT_LENGTH:
201
+ raise HTTPException(status_code=400, detail=f"Text too long (max {MAX_TEXT_LENGTH} chars)")
202
 
203
  start = time.time()
204
 
 
248
  }],
249
  })
250
 
251
+ # v4.0: RAG indexing
252
  session_id = None
253
  try:
254
  chunks, embeddings, _status = index_contract(text)
255
  if chunks and embeddings is not None:
256
  session_id = uuid.uuid4().hex[:12]
257
+ if len(_rag_sessions) >= _RAG_SESSION_MAX:
258
+ oldest = next(iter(_rag_sessions))
259
+ del _rag_sessions[oldest]
260
+ _rag_sessions[session_id] = {
261
  "chunks": chunks,
262
  "embeddings": embeddings,
263
  "analysis": {
 
267
  "entities": entities[:30],
268
  "contradictions": contradictions,
269
  },
270
+ }
271
  except Exception as e:
272
  print(f"[API] RAG indexing error: {e}")
273
 
 
304
 
305
  @app.post("/api/compare")
306
  async def compare(req: CompareRequest, request: Request):
307
+ client_ip = request.client.host if request.client else "unknown"
308
  if not _check_rate_limit(client_ip):
309
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
 
 
 
 
 
310
  return compare_contracts(req.text_a, req.text_b)
311
 
312
  @app.post("/api/redline")
313
  async def redline(req: RedlineRequest, request: Request):
314
+ client_ip = request.client.host if request.client else "unknown"
315
  if not _check_rate_limit(client_ip):
316
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
317
 
318
  if req.session_id and req.session_id in _rag_sessions:
319
  analysis = _rag_sessions[req.session_id]["analysis"]
320
  elif req.text:
 
 
321
  result, error = analyze_contract(req.text)
322
  if error:
323
  raise HTTPException(status_code=400, detail=error)
 
330
 
331
  @app.post("/api/chat")
332
  async def chat(req: ChatRequest, request: Request):
333
+ client_ip = request.client.host if request.client else "unknown"
334
  if not _check_rate_limit(client_ip):
335
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
336
 
 
 
 
337
  if req.session_id not in _rag_sessions:
338
+ raise HTTPException(status_code=404, detail="Session not found. Analyze a contract first.")
339
 
340
  session = _rag_sessions[req.session_id]
341
  response_text = ""
 
347
 
348
  @app.post("/api/chat/stream")
349
  async def chat_stream(req: ChatRequest, request: Request):
350
+ client_ip = request.client.host if request.client else "unknown"
351
  if not _check_rate_limit(client_ip):
352
  raise HTTPException(status_code=429, detail="Rate limit exceeded.")
353
 
 
 
354
  if req.session_id not in _rag_sessions:
355
+ raise HTTPException(status_code=404, detail="Session not found.")
356
 
357
  session = _rag_sessions[req.session_id]
358
 
 
375
  if not file.filename or not file.filename.lower().endswith(".pdf"):
376
  raise HTTPException(status_code=400, detail="Only PDF files supported")
377
 
 
 
 
 
 
378
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
379
+ content = await file.read()
380
  tmp.write(content)
381
  tmp_path = tmp.name
382
 
app.py CHANGED
@@ -1,46 +1,25 @@
1
  """
2
- ClauseGuard — World's Best Legal Contract Analysis Tool (v4.3)
3
  ═══════════════════════════════════════════════════════════════
4
- PERF v4.3:
5
- • PERF: Upgraded embedder to BAAI/bge-small-en-v1.5 (+21% retrieval accuracy)
6
- • PERF: Batched clause classification (single forward pass, batch_size=8)
7
- • PERF: ONNX INT8 quantized model support (2-4x faster on CPU)
8
- • PERF: torch.set_num_threads(2) to prevent CPU thrashing
9
- • NEW: ml/export_onnx_v2.py — full merge→ONNX→quantize pipeline
10
-
11
- Fixes in v4.2:
12
- • FIX: NLI now uses CrossEncoder.predict() — contradictions actually work
13
- • FIX: BoundedCache uses threading.RLock — no more race conditions
14
- • FIX: Pre-compiled ALL regex patterns at module level (perf)
15
- • FIX: Added missing regex labels to RISK_MAP/DESC_MAP
16
- • FIX: Extension risk formula matches backend
17
- • FIX: Extension API_BASE URL corrected
18
- • FIX: API CORS localhost requires explicit opt-in
19
-
20
- Fixes in v4.1:
21
- • FIX: Bounded LRU caches (chunk_cache, prediction_cache) — no more memory leaks
22
- • FIX: NLI input format — pass (text_a, text_b) tuple, not [SEP]-concatenated string
23
- • FIX: Classifier max_length raised to 512 (was 256 — truncating legal clauses)
24
- • FIX: Risk score formula — absolute risk, not normalized by total_clauses
25
- • FIX: Train/inference alignment — use softmax+argmax for single-label model
26
- • FIX: Added missing regex fallback patterns for more CUAD categories
27
- • FIX: Entity extraction batching — single pipeline call instead of sequential
28
- • PERF: Shared model singleton via models.py module
29
- • PERF: LRU-bounded caches everywhere
30
-
31
- Carried from v4.0:
32
  • OCR support for scanned PDFs (docTR engine with smart native/scanned routing)
33
  • Contract Q&A Chatbot (RAG: embedding retrieval + HF Inference API streaming)
34
  • Clause Redlining (3-tier: template lookup + RAG + LLM refinement)
35
- • Fixed CUAD label mapping (added missing index 6)
36
- Structure-aware clause splitting
 
 
 
 
37
  • Real NLI contradiction detection via cross-encoder model
38
- • ML-based Legal NER with regex fallback
39
  • Semantic compliance checking with negation handling
40
  • Improved obligation extraction with false-positive filtering
41
- • LLM-powered clause explanations
 
42
  • Per-session temp files (no collision)
43
- • Model health reporting
 
44
 
45
  Models:
46
  • Clause classifier: Mokshith31/legalbert-contract-clause-classification
@@ -60,8 +39,7 @@ import io
60
  import uuid
61
  import tempfile
62
  import hashlib
63
- import threading
64
- from collections import defaultdict, OrderedDict
65
  from datetime import datetime
66
  from functools import lru_cache
67
 
@@ -94,29 +72,9 @@ try:
94
  )
95
  from peft import PeftModel
96
  _HAS_TORCH = True
97
- # PERF v4.3: Limit PyTorch threads to avoid CPU thrashing under concurrent requests.
98
- # HF Spaces CPU-basic has 2 vCPUs. Reserve 1 thread for Gradio server.
99
- torch.set_num_threads(2)
100
- torch.set_num_interop_threads(1)
101
  except Exception:
102
  pass
103
 
104
- # ── ONNX Runtime (soft-fail, for quantized model) ─────────────────────
105
- _HAS_ORT = False
106
- try:
107
- from optimum.onnxruntime import ORTModelForSequenceClassification as _ORTModel
108
- _HAS_ORT = True
109
- except ImportError:
110
- pass
111
-
112
- # ── CrossEncoder for NLI (soft-fail) ──────────────────────────────────
113
- _HAS_CROSS_ENCODER = False
114
- try:
115
- from sentence_transformers import CrossEncoder as _CrossEncoder
116
- _HAS_CROSS_ENCODER = True
117
- except ImportError:
118
- pass
119
-
120
  # ── Import submodules ───────────────────────────────────────────────
121
  from compare import compare_contracts, render_comparison_html
122
  from obligations import extract_obligations, render_obligations_html
@@ -179,12 +137,7 @@ _UNFAIR_LABELS = [
179
  "Jurisdiction", "Arbitration"
180
  ]
181
 
182
- # FIX v4.2: Include regex-only labels that aren't in CUAD or Unfair lists
183
- _EXTRA_REGEX_LABELS = [
184
- "Indemnification", "Confidentiality", "Force Majeure", "Penalties"
185
- ]
186
-
187
- _ALL_LABELS = CUAD_LABELS + _UNFAIR_LABELS + _EXTRA_REGEX_LABELS
188
 
189
  RISK_MAP = {
190
  # Critical
@@ -240,11 +193,6 @@ RISK_MAP = {
240
  "Other": "LOW",
241
  "ROFR/ROFO/ROFN": "LOW",
242
  "Contract by using": "LOW",
243
- # FIX v4.2: Added regex-only labels that were missing from RISK_MAP
244
- "Indemnification": "HIGH",
245
- "Confidentiality": "MEDIUM",
246
- "Force Majeure": "LOW",
247
- "Penalties": "HIGH",
248
  }
249
 
250
  DESC_MAP = {label: label.replace("_", " ") for label in _ALL_LABELS}
@@ -285,65 +233,10 @@ DESC_MAP.update({
285
  "Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
286
  "Unlimited/All-You-Can-Eat License": "License with no usage limits.",
287
  "Notice Period to Terminate Renewal": "Required notice period before automatic renewal.",
288
- # FIX v4.2: Added descriptions for regex-only labels
289
- "Indemnification": "Obligation to compensate the other party for losses or damages.",
290
- "Confidentiality": "Restrictions on sharing proprietary or sensitive information.",
291
- "Force Majeure": "Excuses performance due to extraordinary events beyond control.",
292
- "Penalties": "Financial penalties for breach or late performance.",
293
  })
294
 
295
  RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
296
 
297
- # FIX v4.3.1: Content-based severity refinement
298
- # Default RISK_MAP assigns severity by label alone. This function downgrades severity
299
- # when the clause text contains mitigating language (caps, carve-outs, time limits).
300
- _SEVERITY_MITIGATORS = {
301
- "IP Ownership Assignment": {
302
- # Downgrade from CRITICAL to HIGH if pre-existing IP is carved out
303
- "HIGH": re.compile(r'pre[\-\s]existing|background\s+ip|prior\s+(?:ip|intellectual)', re.IGNORECASE),
304
- # Downgrade to MEDIUM if both carve-out AND license-back exist
305
- "MEDIUM": re.compile(r'(?:pre[\-\s]existing|background\s+ip).*(?:license|retain)', re.IGNORECASE | re.DOTALL),
306
- },
307
- "Limitation of liability": {
308
- # Downgrade from CRITICAL to HIGH if there's any cap
309
- "HIGH": re.compile(r'shall\s+not\s+exceed|aggregate.{0,20}(?:not\s+exceed|limited\s+to)|cap(?:ped)?\s+at', re.IGNORECASE),
310
- # Downgrade to MEDIUM if there's a reasonable cap AND exceptions for gross negligence
311
- "MEDIUM": re.compile(r'(?:shall\s+not\s+exceed|limited\s+to).{0,80}(?:gross\s+negligence|willful|fraud)', re.IGNORECASE | re.DOTALL),
312
- },
313
- "Termination for Convenience": {
314
- # Downgrade from CRITICAL to HIGH if there's a notice period
315
- "HIGH": re.compile(r'(?:\d+)\s+(?:day|month|week)s?.{0,20}(?:prior|advance|written)\s+notice', re.IGNORECASE),
316
- # Downgrade to MEDIUM if mutual termination right
317
- "MEDIUM": re.compile(r'either\s+party\s+may\s+terminat', re.IGNORECASE),
318
- },
319
- "Non-Compete": {
320
- # Downgrade from HIGH to MEDIUM if time-limited
321
- "MEDIUM": re.compile(r'(?:period\s+of|for)\s+(?:\d+|one|two|three|six|twelve)\s+(?:\(\d+\)\s+)?(?:month|year)', re.IGNORECASE),
322
- },
323
- "Arbitration": {
324
- # Downgrade from CRITICAL to HIGH if opt-out is available
325
- "HIGH": re.compile(r'opt[\-\s]?out|may\s+elect|small\s+claims', re.IGNORECASE),
326
- },
327
- }
328
-
329
-
330
- def _refine_severity(label, text, default_risk):
331
- """FIX v4.3.1: Refine severity based on clause content, not just label."""
332
- mitigators = _SEVERITY_MITIGATORS.get(label)
333
- if not mitigators:
334
- return default_risk
335
-
336
- # Check from lowest severity up — return the lowest matching level
337
- for level in ["MEDIUM", "HIGH"]:
338
- pattern = mitigators.get(level)
339
- if pattern and pattern.search(text):
340
- # Only downgrade, never upgrade
341
- level_order = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1}
342
- if level_order.get(level, 0) < level_order.get(default_risk, 0):
343
- return level
344
-
345
- return default_risk
346
-
347
  RISK_STYLES = {
348
  "CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
349
  "HIGH": ("#ea580c", "#fff7ed", "⚡"),
@@ -351,58 +244,15 @@ RISK_STYLES = {
351
  "LOW": ("#16a34a", "#f0fdf4", "✓"),
352
  }
353
 
354
- # ═══════════════════════════════════════════════════════════════════════
355
- # FIX v4.1: Per-class thresholds aligned with single-label softmax
356
- # The model was trained with cross-entropy (single-label), so inference
357
- # now uses softmax+argmax, not sigmoid. Thresholds apply to softmax probs.
358
- # ═══════════════════════════════════════════════════════════════════════
359
  _CUAD_THRESHOLDS = {}
360
  _WEAK_CLASSES = {0, 1, 2, 7, 9, 21, 22, 27, 37, 38}
361
  for _i in range(41):
362
  if _i in _WEAK_CLASSES:
363
  _CUAD_THRESHOLDS[_i] = 0.85 # Only flag if very confident (these classes are unreliable)
364
  else:
365
- _CUAD_THRESHOLDS[_i] = 0.40 # Reasonable threshold for softmax outputs
366
-
367
- # ═══════════════════════════════════════════════════════════════════════
368
- # FIX v4.1: Bounded LRU Cache utility (replaces unbounded dicts)
369
- # ═══════════════════════════════════════════════════════════════════════
370
-
371
- class BoundedCache:
372
- """Thread-safe bounded LRU cache using OrderedDict + RLock.
373
- FIX v4.2: Added threading.RLock to prevent race conditions under
374
- Gradio's concurrent request handling. OrderedDict compound operations
375
- (contains + setitem + move_to_end + popitem) are NOT atomic even with GIL."""
376
- def __init__(self, maxsize=1000):
377
- self._cache = OrderedDict()
378
- self._maxsize = maxsize
379
- self._lock = threading.RLock()
380
-
381
- def get(self, key, default=None):
382
- with self._lock:
383
- if key in self._cache:
384
- self._cache.move_to_end(key)
385
- return self._cache[key]
386
- return default
387
-
388
- def put(self, key, value):
389
- with self._lock:
390
- if key in self._cache:
391
- self._cache.move_to_end(key)
392
- self._cache[key] = value
393
- else:
394
- if len(self._cache) >= self._maxsize:
395
- self._cache.popitem(last=False)
396
- self._cache[key] = value
397
-
398
- def __contains__(self, key):
399
- with self._lock:
400
- return key in self._cache
401
-
402
- def __len__(self):
403
- with self._lock:
404
- return len(self._cache)
405
-
406
 
407
  # ═══════════════════════════════════════════════════════════════════════
408
  # 2. MODEL LOADING
@@ -411,30 +261,11 @@ class BoundedCache:
411
  cuad_tokenizer = None
412
  cuad_model = None
413
  ner_pipeline = None
414
- nli_model = None # FIX v4.2: CrossEncoder instead of pipeline
415
  _model_status = {"cuad": "not_loaded", "ner": "not_loaded", "nli": "not_loaded"}
416
 
417
  def _load_cuad_model():
418
  global cuad_tokenizer, cuad_model, _model_status
419
- # PERF v4.3: Try ONNX quantized model first (2-4x faster on CPU)
420
- onnx_model_path = os.environ.get("ONNX_MODEL_PATH", "")
421
- onnx_hub_id = os.environ.get("ONNX_HUB_MODEL_ID", "gaurv007/clauseguard-onnx-int8")
422
-
423
- if _HAS_ORT:
424
- for source in [onnx_model_path, onnx_hub_id]:
425
- if not source:
426
- continue
427
- try:
428
- print(f"[ClauseGuard] Trying ONNX model: {source}")
429
- cuad_model = _ORTModel.from_pretrained(source, file_name="model_quantized.onnx")
430
- cuad_tokenizer = AutoTokenizer.from_pretrained(source)
431
- _model_status["cuad"] = "loaded (ONNX INT8)"
432
- print(f"[ClauseGuard] ONNX INT8 model loaded from {source}")
433
- return
434
- except Exception as e:
435
- print(f"[ClauseGuard] ONNX load failed from {source}: {e}")
436
-
437
- # Fallback to PyTorch PEFT model
438
  if not _HAS_TORCH:
439
  print("[ClauseGuard] PyTorch not available — using regex fallback")
440
  _model_status["cuad"] = "unavailable"
@@ -442,15 +273,15 @@ def _load_cuad_model():
442
  try:
443
  base = "nlpaueb/legal-bert-base-uncased"
444
  adapter = "Mokshith31/legalbert-contract-clause-classification"
445
- print(f"[ClauseGuard] Loading CUAD classifier (PyTorch): {adapter}")
446
  cuad_tokenizer = AutoTokenizer.from_pretrained(base)
447
  base_model = AutoModelForSequenceClassification.from_pretrained(
448
  base, num_labels=41, ignore_mismatched_sizes=True
449
  )
450
  cuad_model = PeftModel.from_pretrained(base_model, adapter)
451
  cuad_model.eval()
452
- _model_status["cuad"] = "loaded (PyTorch)"
453
- print("[ClauseGuard] CUAD model loaded successfully (PyTorch)")
454
  except Exception as e:
455
  print(f"[ClauseGuard] CUAD model load failed: {e}")
456
  cuad_tokenizer = None
@@ -478,16 +309,20 @@ def _load_ner_model():
478
  _model_status["ner"] = f"failed: {e}"
479
 
480
  def _load_nli_model():
481
- global nli_model, _model_status, _HAS_NLI_MODEL
482
- if not _HAS_CROSS_ENCODER:
483
- _model_status["nli"] = "unavailable (sentence-transformers not installed)"
484
  return
485
  try:
486
- print("[ClauseGuard] Loading NLI model: cross-encoder/nli-deberta-v3-base (CrossEncoder)")
487
- nli_model = _CrossEncoder("cross-encoder/nli-deberta-v3-base")
 
 
 
 
488
  _HAS_NLI_MODEL = True
489
  _model_status["nli"] = "loaded"
490
- print("[ClauseGuard] NLI CrossEncoder loaded successfully")
491
  except Exception as e:
492
  print(f"[ClauseGuard] NLI model load failed (using heuristic fallback): {e}")
493
  _model_status["nli"] = f"failed: {e}"
@@ -549,45 +384,46 @@ def parse_document(file_path):
549
  return None, f"Unsupported file type: {ext}"
550
 
551
  # ═══════════════════════════════════════════════════════════════════════
552
- # 4. DETERMINISTIC CLAUSE SPLITTING
553
- # FIX v4.1: Bounded cache (max 500 documents) instead of unbounded dict
554
  # ═══════════════════════════════════════════════════════════════════════
555
 
556
- _chunk_cache = BoundedCache(maxsize=500)
557
-
558
- # FIX v4.2: Pre-compile section pattern at module level (was recompiling per call)
559
- _SECTION_PATTERN = re.compile(
560
- r'(?:^|\n\n)'
561
- r'(?='
562
- r'\d+(?:\.\d+)*[.)]\s' # 1. 2. 3.1. 3.1)
563
- r'|[A-Z]{2,}[A-Z\s]*\n' # ALL CAPS HEADERS
564
- r'|\([a-z]\)\s' # (a) (b) (c)
565
- r'|(?:Section|Article|Clause)\s+\d+' # Section 1, Article 2
566
- r')',
567
- re.MULTILINE
568
- )
569
 
570
  def split_clauses(text):
571
  """Deterministic, structure-aware clause splitting.
572
- Same input ALWAYS produces same output. Normalized text is hashed
573
  and cached so repeated runs on identical documents are identical."""
 
574
  normalized = re.sub(r'\s+', ' ', text.strip())
575
  text_hash = hashlib.sha256(normalized.encode()).hexdigest()
576
- cached = _chunk_cache.get(text_hash)
577
- if cached is not None:
578
- return cached
579
 
580
  text = re.sub(r'\n{3,}', '\n\n', text.strip())
581
 
582
  # First try to detect numbered sections (1., 2., 3.1, (a), etc.)
583
- positions = [m.start() for m in _SECTION_PATTERN.finditer(text)]
 
 
 
 
 
 
 
 
 
 
 
584
 
585
  if len(positions) >= 3:
 
586
  clauses = []
587
  for i, pos in enumerate(positions):
588
  end = positions[i + 1] if i + 1 < len(positions) else len(text)
589
  chunk = text[pos:end].strip()
590
  if len(chunk) > 30:
 
591
  if len(chunk) > 1500:
592
  sub_parts = chunk.split('\n\n')
593
  current = ""
@@ -602,20 +438,22 @@ def split_clauses(text):
602
  clauses.append(current.strip())
603
  else:
604
  clauses.append(chunk)
 
605
  if positions and positions[0] > 50:
606
  preamble = text[:positions[0]].strip()
607
  if len(preamble) > 30:
608
  clauses.insert(0, preamble)
609
  result = clauses if clauses else _fallback_split(text)
610
- _chunk_cache.put(text_hash, result)
611
  return result
612
  else:
613
  result = _fallback_split(text)
614
- _chunk_cache.put(text_hash, result)
615
  return result
616
 
617
  def _fallback_split(text):
618
  """Fallback: split on paragraph breaks and sentence boundaries."""
 
619
  paragraphs = text.split('\n\n')
620
  if len(paragraphs) >= 3:
621
  clauses = []
@@ -623,6 +461,7 @@ def _fallback_split(text):
623
  p = p.strip()
624
  if len(p) > 30:
625
  if len(p) > 1500:
 
626
  sents = re.split(r'(?<=[.!?])\s+(?=[A-Z])', p)
627
  current = ""
628
  for s in sents:
@@ -638,16 +477,17 @@ def _fallback_split(text):
638
  clauses.append(p)
639
  return clauses
640
 
 
641
  parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])', text)
642
  return [p.strip() for p in parts if len(p.strip()) > 30]
643
 
644
  # ═══════════════════════════════════════════════════════════════════════
645
- # 5. CLAUSE DETECTION
646
- # FIX v4.1: Use softmax (matching training) instead of sigmoid
647
- # FIX v4.1: max_length raised to 512 (was 256)
648
- # FIX v4.1: Bounded prediction cache
649
  # ═══════════════════════════════════════════════════════════════════════
650
 
 
651
  _HEADING_RE = re.compile(r'^\d+(?:\.\d+)*\s+[A-Z][A-Z\s&,/]+$', re.MULTILINE)
652
 
653
  def _strip_heading(text):
@@ -658,6 +498,7 @@ def _strip_heading(text):
658
  return stripped if len(stripped) > 20 else text
659
  return text
660
 
 
661
  _LABEL_GUARDRAILS = {
662
  "Liquidated Damages": re.compile(
663
  r'liquidated|pre-?determined.{0,10}damage|agreed.{0,10}sum|penalty clause|stipulated.{0,10}damage',
@@ -667,127 +508,58 @@ _LABEL_GUARDRAILS = {
667
  r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
668
  re.IGNORECASE
669
  ),
670
- "ROFR/ROFO/ROFN": re.compile(
671
- r'right\s+of\s+first\s+(?:refusal|offer|negotiation)|ROFR|ROFO|ROFN',
672
- re.IGNORECASE
673
- ),
674
- "Renewal Term": re.compile(
675
- r'renew(?:al)?|successive\s+term|auto(?:matic(?:ally)?)?\s*[\-\s]?renew|non[\-\s]?renewal',
676
- re.IGNORECASE
677
- ),
678
- # FIX v4.3.1: Revenue/Profit Sharing fires on IP assignment "rights for value" language
679
- "Revenue/Profit Sharing": re.compile(
680
- r'revenue\s+shar|profit\s+shar|revenue\s+split|percentage\s+of\s+revenue|royalt(?:y|ies)|gross\s+profit',
681
- re.IGNORECASE
682
- ),
683
- # FIX v4.3.1: Minimum Commitment fires on fee schedules — require explicit minimum language
684
- "Minimum Commitment": re.compile(
685
- r'minimum\s+(?:purchase|order|spend|volume|commitment)|take[\-\s]or[\-\s]pay|minimum\s+annual',
686
- re.IGNORECASE
687
- ),
688
- # FIX v4.3.1: Non-Disparagement fires on arbitration/class-waiver language
689
- "Non-Disparagement": re.compile(
690
- r'disparag|defam|false\s+statement|negative\s+statement|social\s+media|reputat',
691
- re.IGNORECASE
692
- ),
693
- }
694
-
695
- # FIX v4.3: Exclusion patterns — even if guardrail passes, exclude if contra-indicators present
696
- _LABEL_EXCLUSIONS = {
697
- "ROFR/ROFO/ROFN": re.compile(
698
- r'assigns?\s+to|irrevocab(?:ly|le)\s+assign|all\s+right,?\s+title,?\s+and\s+interest|work[\-\s]for[\-\s]hire',
699
- re.IGNORECASE
700
- ),
701
- "Renewal Term": re.compile(
702
- r'limitation\s+of\s+liabilit|shall\s+not\s+be\s+liable|indemnif|hold\s+harmless|defend\s+and',
703
- re.IGNORECASE
704
- ),
705
- # FIX v4.3.1: Revenue/Profit Sharing must NOT fire on IP assignment or license grant clauses
706
- "Revenue/Profit Sharing": re.compile(
707
- r'assigns?\s+to|irrevocab(?:ly|le)\s+assign|work[\-\s](?:made\s+)?for[\-\s]hire|license\s+to\s+access|license\s+grant|non[\-\s]exclusive\s+license',
708
- re.IGNORECASE
709
- ),
710
- # FIX v4.3.1: Non-Disparagement must NOT fire on arbitration/dispute sections
711
- "Non-Disparagement": re.compile(
712
- r'arbitrat|(?<!\w)aaa(?!\w)|(?<!\w)jams(?!\w)|class\s+action|collective\s+(?:proceeding|action)|waives?\s+any\s+right\s+to\s+participate|binding\s+arbitration',
713
- re.IGNORECASE
714
- ),
715
- }
716
-
717
- # FIX v4.3: Minimum confidence thresholds per label
718
- _LABEL_MIN_CONFIDENCE = {
719
- "ROFR/ROFO/ROFN": 0.65,
720
- "Renewal Term": 0.70,
721
- "Revenue/Profit Sharing": 0.65, # FIX v4.3.1
722
- "Minimum Commitment": 0.65, # FIX v4.3.1
723
  }
724
 
725
  def _apply_guardrails(label, text, confidence):
726
- # Check minimum confidence for specific labels
727
- min_conf = _LABEL_MIN_CONFIDENCE.get(label)
728
- if min_conf and confidence < min_conf:
729
- return "Other", confidence * 0.2
730
-
731
- # Check required keywords (must be present)
732
  guard = _LABEL_GUARDRAILS.get(label)
733
  if guard and not guard.search(text):
734
- return "Other", confidence * 0.3
735
-
736
- # Check exclusion patterns (must NOT be present)
737
- exclusion = _LABEL_EXCLUSIONS.get(label)
738
- if exclusion and exclusion.search(text):
739
- return "Other", confidence * 0.2
740
  return label, confidence
741
 
742
  def _text_hash(text):
743
  return hashlib.md5(text.encode()).hexdigest()
744
 
745
- # FIX v4.1: Bounded prediction cache
746
- _prediction_cache = BoundedCache(maxsize=2000)
747
 
748
  def classify_cuad(clause_text):
749
  if cuad_model is None or cuad_tokenizer is None:
750
  return _classify_regex(clause_text)
751
 
 
752
  clean_text = _strip_heading(clause_text)
753
 
 
754
  h = _text_hash(clean_text[:512])
755
- cached = _prediction_cache.get(h)
756
- if cached is not None:
757
- return cached
758
 
759
  try:
760
- # FIX v4.1: max_length=512 (was 256 — truncating long legal clauses)
761
  inputs = cuad_tokenizer(
762
  clean_text,
763
  return_tensors="pt",
764
  truncation=True,
765
- max_length=512,
766
  padding=True
767
  )
768
  with torch.no_grad():
769
  logits = cuad_model(**inputs).logits
770
 
771
- # FIX v4.1: Use softmax (matching single-label cross-entropy training)
772
- # The model was trained with F.cross_entropy, so softmax is correct.
773
- probs = torch.softmax(logits, dim=-1)[0]
774
-
775
- # Get the top prediction
776
- top_prob, top_idx = torch.max(probs, dim=0)
777
- top_idx = int(top_idx)
778
- top_conf = float(top_prob)
779
 
780
  results = []
781
-
782
- # Primary prediction
783
- threshold = _CUAD_THRESHOLDS.get(top_idx, 0.40)
784
- if top_conf > threshold and top_idx < len(CUAD_LABELS):
785
- label = CUAD_LABELS[top_idx]
786
- conf = top_conf
787
- label, conf = _apply_guardrails(label, clause_text, conf)
788
- if not (label == "Other" and conf < 0.3):
 
789
  risk = RISK_MAP.get(label, "LOW")
790
- risk = _refine_severity(label, clause_text, risk)
791
  results.append({
792
  "label": label,
793
  "confidence": round(conf, 3),
@@ -795,170 +567,21 @@ def classify_cuad(clause_text):
795
  "description": DESC_MAP.get(label, label),
796
  "source": "ml",
797
  })
798
-
799
- # Also check 2nd-best prediction if confident enough
800
- if len(probs) > 1:
801
- sorted_probs, sorted_indices = torch.sort(probs, descending=True)
802
- if len(sorted_probs) > 1:
803
- second_idx = int(sorted_indices[1])
804
- second_conf = float(sorted_probs[1])
805
- second_threshold = _CUAD_THRESHOLDS.get(second_idx, 0.40)
806
- if second_conf > second_threshold and second_idx < len(CUAD_LABELS):
807
- label2 = CUAD_LABELS[second_idx]
808
- conf2 = second_conf
809
- label2, conf2 = _apply_guardrails(label2, clause_text, conf2)
810
- if not (label2 == "Other" and conf2 < 0.3):
811
- # Only add if different from primary
812
- if not results or results[0]["label"] != label2:
813
- risk2 = RISK_MAP.get(label2, "LOW")
814
- risk2 = _refine_severity(label2, clause_text, risk2)
815
- results.append({
816
- "label": label2,
817
- "confidence": round(conf2, 3),
818
- "risk": risk2,
819
- "description": DESC_MAP.get(label2, label2),
820
- "source": "ml",
821
- })
822
-
823
  results.sort(key=lambda x: x["confidence"], reverse=True)
824
 
825
  # If no ML results, also try regex to catch what model misses
826
  if not results:
827
  results = _classify_regex(clause_text)
828
 
829
- _prediction_cache.put(h, results)
 
 
 
830
  return results
831
  except Exception as e:
832
  print(f"[ClauseGuard] CUAD inference error: {e}")
833
  return _classify_regex(clause_text)
834
 
835
- # ═══════════════════════════════════════════════════════════════════════
836
- # 5b. BATCHED CLAUSE CLASSIFICATION
837
- # PERF v4.3: Single forward pass for all clauses instead of one-by-one
838
- # ══════════════════════════════════════════════════════════════════════���
839
-
840
- def classify_cuad_batch(clauses, batch_size=8):
841
- """Classify a batch of clauses in a single forward pass.
842
- PERF v4.3: Replaces sequential classify_cuad() loop.
843
- On CPU, batch_size=8 balances memory vs throughput."""
844
- if cuad_model is None or cuad_tokenizer is None:
845
- # Fallback to regex for all clauses
846
- return [_classify_regex(c) for c in clauses]
847
-
848
- all_results = []
849
- # Check cache first, collect uncached clauses
850
- uncached_indices = []
851
- uncached_texts = []
852
- for i, clause in enumerate(clauses):
853
- clean = _strip_heading(clause)
854
- h = _text_hash(clean[:512])
855
- cached = _prediction_cache.get(h)
856
- if cached is not None:
857
- all_results.append((i, cached))
858
- else:
859
- uncached_indices.append(i)
860
- uncached_texts.append(clean)
861
- all_results.append((i, None)) # placeholder
862
-
863
- if not uncached_texts:
864
- return [r for _, r in sorted(all_results)]
865
-
866
- # Process uncached in batches
867
- for batch_start in range(0, len(uncached_texts), batch_size):
868
- batch_texts = uncached_texts[batch_start:batch_start + batch_size]
869
- batch_original = [clauses[uncached_indices[batch_start + j]] for j in range(len(batch_texts))]
870
-
871
- try:
872
- inputs = cuad_tokenizer(
873
- batch_texts,
874
- return_tensors="pt",
875
- truncation=True,
876
- max_length=512,
877
- padding=True,
878
- )
879
- with torch.no_grad():
880
- logits = cuad_model(**inputs).logits
881
-
882
- probs = torch.softmax(logits, dim=-1)
883
-
884
- for j in range(len(batch_texts)):
885
- clause_probs = probs[j]
886
- original_text = batch_original[j]
887
- results = []
888
-
889
- # Primary prediction
890
- top_prob, top_idx = torch.max(clause_probs, dim=0)
891
- top_idx_int = int(top_idx)
892
- top_conf = float(top_prob)
893
-
894
- threshold = _CUAD_THRESHOLDS.get(top_idx_int, 0.40)
895
- if top_conf > threshold and top_idx_int < len(CUAD_LABELS):
896
- label = CUAD_LABELS[top_idx_int]
897
- conf = top_conf
898
- label, conf = _apply_guardrails(label, original_text, conf)
899
- if not (label == "Other" and conf < 0.3):
900
- risk = RISK_MAP.get(label, "LOW")
901
- risk = _refine_severity(label, original_text, risk)
902
- results.append({
903
- "label": label,
904
- "confidence": round(conf, 3),
905
- "risk": risk,
906
- "description": DESC_MAP.get(label, label),
907
- "source": "ml",
908
- })
909
-
910
- # 2nd-best prediction
911
- sorted_probs, sorted_indices = torch.sort(clause_probs, descending=True)
912
- if len(sorted_probs) > 1:
913
- second_idx = int(sorted_indices[1])
914
- second_conf = float(sorted_probs[1])
915
- second_threshold = _CUAD_THRESHOLDS.get(second_idx, 0.40)
916
- if second_conf > second_threshold and second_idx < len(CUAD_LABELS):
917
- label2 = CUAD_LABELS[second_idx]
918
- conf2 = second_conf
919
- label2, conf2 = _apply_guardrails(label2, original_text, conf2)
920
- if not (label2 == "Other" and conf2 < 0.3):
921
- if not results or results[0]["label"] != label2:
922
- risk2 = RISK_MAP.get(label2, "LOW")
923
- risk2 = _refine_severity(label2, original_text, risk2)
924
- results.append({
925
- "label": label2,
926
- "confidence": round(conf2, 3),
927
- "risk": risk2,
928
- "description": DESC_MAP.get(label2, label2),
929
- "source": "ml",
930
- })
931
-
932
- results.sort(key=lambda x: x["confidence"], reverse=True)
933
-
934
- if not results:
935
- results = _classify_regex(original_text)
936
-
937
- # Cache the result
938
- h = _text_hash(batch_texts[j][:512])
939
- _prediction_cache.put(h, results)
940
-
941
- # Update placeholder in all_results
942
- global_idx = uncached_indices[batch_start + j]
943
- for k, (idx, _) in enumerate(all_results):
944
- if idx == global_idx:
945
- all_results[k] = (idx, results)
946
- break
947
-
948
- except Exception as e:
949
- print(f"[ClauseGuard] Batch CUAD inference error: {e}")
950
- # Fallback to regex for this batch
951
- for j in range(len(batch_texts)):
952
- global_idx = uncached_indices[batch_start + j]
953
- results = _classify_regex(batch_original[j])
954
- for k, (idx, _) in enumerate(all_results):
955
- if idx == global_idx:
956
- all_results[k] = (idx, results)
957
- break
958
-
959
- return [r for _, r in sorted(all_results)]
960
-
961
- # FIX v4.1: Extended regex patterns to cover more CUAD categories
962
  _REGEX_PATTERNS = {
963
  "Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
964
  "Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
@@ -970,60 +593,30 @@ _REGEX_PATTERNS = {
970
  "Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
971
  "Governing Law": [r"governed by", r"laws of", r"jurisdiction of"],
972
  "Termination for Convenience": [r"terminat.*for convenience", r"terminat.*without cause", r"terminat.*at any time"],
973
- "Non-Compete": [r"non-compete", r"shall not compete", r"competition restriction"],
974
  "Exclusivity": [r"exclusive(?:ly)?(?:\s+(?:deal|relationship|partner|right))", r"exclusivity"],
975
- "IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign", r"work.?for.?hire"],
976
  "Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
977
  "Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed", r"aggregate liability.*not exceed"],
978
  "Indemnification": [r"indemnif", r"hold harmless", r"defend.*against.*claim"],
979
  "Confidentiality": [r"confidential(?:ity)?", r"non-disclosure", r"\bnda\b"],
980
  "Force Majeure": [r"force majeure", r"act of god", r"beyond.*(?:reasonable\s+)?control"],
981
  "Penalties": [r"penalt(?:y|ies)", r"late fee", r"default charge", r"interest on overdue"],
982
- # FIX v4.1: Added missing regex patterns for more CUAD categories
983
- "Audit Rights": [r"audit rights?", r"right to audit", r"inspect.*records?", r"examination of.*records?", r"access to.*books"],
984
- "Warranty Duration": [r"warrant(?:y|ies).*(?:period|duration|term|months?|years?)", r"warranty.*shall.*(?:remain|last|continue)", r"limited warranty"],
985
- "Insurance": [r"(?:shall|must).*maintain.*insurance", r"insurance.*coverage", r"policy of insurance", r"certificate of insurance"],
986
- "Source Code Escrow": [r"source code escrow", r"escrow.*source code", r"escrow agent"],
987
- "Post-Termination Services": [r"post.?termination.*(?:service|obligation|support)", r"(?:after|following|upon).*termination.*(?:shall|must|will).*(?:provide|continue)"],
988
- "Renewal Term": [r"renew(?:al)?.*term", r"auto(?:matic(?:ally)?)?.*renew", r"successive.*(?:term|period)"],
989
- "Notice Period to Terminate Renewal": [r"notice.*(?:to\s+)?terminat.*renew", r"(?:days?|months?).*(?:prior|advance).*(?:notice|written).*(?:terminat|renew)", r"notice of non.?renewal"],
990
- "Change of Control": [r"change of control", r"change in.*(?:ownership|control)", r"merger.*acquisition", r"sale of.*(?:all|substantially).*assets"],
991
- "Anti-Assignment": [r"(?:shall|may)\s+not\s+assign", r"anti.?assignment", r"no.*assignment.*without.*consent"],
992
- "Revenue/Profit Sharing": [r"revenue.*shar", r"profit.*shar", r"royalt(?:y|ies)"],
993
- "Liquidated Damages": [r"liquidated.*damages?", r"pre.?determined.*damage", r"stipulated.*damage"],
994
- "Covenant Not to Sue": [r"covenant not to sue", r"(?:shall|agree).*not.*(?:bring|file|commence).*(?:action|claim|suit)"],
995
- "Joint IP Ownership": [r"joint(?:ly)?.*own(?:ed|ership)?.*(?:ip|intellectual property)", r"co.?own(?:ed|ership)?"],
996
- "License Grant": [r"(?:grant|license).*(?:non.?exclusive|exclusive|perpetual|irrevocable).*(?:license|right)", r"hereby grants?.*license"],
997
- "Non-Transferable License": [r"non.?transferable.*license", r"license.*(?:shall|may)\s+not.*(?:transfer|assign|sublicense)"],
998
- "ROFR/ROFO/ROFN": [r"right of first.*(?:refusal|offer|negotiation)", r"ROFR", r"ROFO", r"ROFN"],
999
- "No-Solicit of Customers": [r"(?:shall|must|agree).*not.*solicit.*customer", r"no.?solicit.*customer", r"non.?solicitation.*customer"],
1000
- "No-Solicit of Employees": [r"(?:shall|must|agree).*not.*solicit.*employee", r"no.?solicit.*employee", r"non.?solicitation.*employee", r"no.?hire"],
1001
- "Non-Disparagement": [r"non.?disparagement", r"(?:shall|must|agree).*not.*(?:disparag|defam|make.*negative)", r"not.*make.*derogatory"],
1002
- "Most Favored Nation": [r"most favou?red.*nation", r"MFN", r"most favou?red.*(?:customer|pricing|terms)"],
1003
- "Third Party Beneficiary": [r"third.?party.*beneficiar", r"no.*third.?party.*beneficiar"],
1004
- "Minimum Commitment": [r"minimum.*(?:commitment|purchase|order|volume|spend)", r"(?:shall|must).*(?:purchase|order).*(?:at least|minimum|no less than)"],
1005
- "Volume Restriction": [r"volume.*(?:restriction|limitation|cap|ceiling)", r"(?:shall|may).*not.*exceed.*(?:volume|quantity)"],
1006
- "Price Restriction": [r"price.*(?:restriction|limitation|ceiling|cap|floor)", r"(?:shall|may).*not.*(?:increase|raise|exceed).*price"],
1007
  }
1008
 
1009
- # FIX v4.2: Pre-compile regex patterns at module level (was recompiling per call)
1010
- _REGEX_PATTERNS_COMPILED = {}
1011
- for _label, _pats in _REGEX_PATTERNS.items():
1012
- _REGEX_PATTERNS_COMPILED[_label] = [re.compile(p, re.IGNORECASE) for p in _pats]
1013
-
1014
  def _classify_regex(text):
1015
  """Regex fallback — returns pattern match, NOT fake confidence."""
1016
  text_lower = text.lower()
1017
  results = []
1018
  seen = set()
1019
- for label, patterns in _REGEX_PATTERNS_COMPILED.items():
1020
  for pat in patterns:
1021
- if pat.search(text_lower):
1022
  if label not in seen:
1023
  risk = RISK_MAP.get(label, "MEDIUM")
1024
  results.append({
1025
  "label": label,
1026
- "confidence": None,
1027
  "risk": risk,
1028
  "description": DESC_MAP.get(label, label),
1029
  "source": "pattern",
@@ -1034,25 +627,20 @@ def _classify_regex(text):
1034
 
1035
  # ═══════════════════════════════════════════════════════════════════════
1036
  # 6. LEGAL NER — ML model with regex fallback
1037
- # FIX v4.1: Batch all chunks in single pipeline call
1038
  # ═══════════════════════════════════════════════════════════════════════
1039
 
1040
  def extract_entities(text):
1041
  """Extract entities using ML model (matterstack/legal-bert-ner) with regex fallback."""
1042
  entities = []
1043
 
 
1044
  if _HAS_NER_MODEL and ner_pipeline is not None:
1045
  try:
1046
- # FIX v4.1: Create overlapping chunks but batch them in a SINGLE pipeline call
1047
- max_text = min(len(text), 10000)
1048
- chunks = [text[i:i+512] for i in range(0, max_text, 450)]
1049
- offsets = list(range(0, max_text, 450))
1050
-
1051
- # Single batched pipeline call instead of sequential
1052
- all_ner_results = ner_pipeline(chunks, batch_size=8)
1053
-
1054
- for chunk_idx, ner_results in enumerate(all_ner_results):
1055
- offset = offsets[chunk_idx]
1056
  for ent in ner_results:
1057
  if ent.get("score", 0) > 0.5:
1058
  entities.append({
@@ -1063,55 +651,16 @@ def extract_entities(text):
1063
  "score": round(ent["score"], 3),
1064
  "source": "ml",
1065
  })
 
1066
  except Exception as e:
1067
  print(f"[ClauseGuard] ML NER error, falling back to regex: {e}")
1068
  entities = _extract_entities_regex(text)
1069
  else:
1070
  entities = _extract_entities_regex(text)
1071
 
1072
- # FIX v4.3: Post-process ML entities to clean up WordPiece artefacts
1073
- cleaned_entities = []
1074
- for e in entities:
1075
- text_val = e.get("text", "")
1076
- # Strip WordPiece subword tokens (## prefix)
1077
- if "##" in text_val:
1078
- text_val = re.sub(r'##\w*', '', text_val).strip()
1079
- text_val = re.sub(r'\s+', ' ', text_val).strip()
1080
- # Discard entities that are too short, start/end with hyphens, or are garbled
1081
- if len(text_val) < 2:
1082
- continue
1083
- if text_val.startswith("-") or text_val.endswith("-"):
1084
- continue
1085
- # Discard low-confidence MISC entities (almost always tokenisation artefacts)
1086
- if e.get("type") == "MISC" and e.get("score", 1.0) < 0.6:
1087
- continue
1088
- # Discard entities that are mostly punctuation/symbols
1089
- alpha_ratio = sum(1 for c in text_val if c.isalnum()) / max(len(text_val), 1)
1090
- if alpha_ratio < 0.4:
1091
- continue
1092
- e["text"] = text_val
1093
- cleaned_entities.append(e)
1094
- entities = cleaned_entities
1095
-
1096
- # FIX v4.3: Split concatenated MONEY/QUANTITY entities
1097
- # e.g., "usd $ 485, 000,usd $ 72, 000" → separate entities
1098
- _CURRENCY_SPLIT = re.compile(r'(?<=[\d,])\s*(?=(?:USD|usd|EUR|GBP|\$|£|€))', re.IGNORECASE)
1099
- split_entities = []
1100
- for e in entities:
1101
- if e.get("type") in ("MONEY", "QUANTITY") and _CURRENCY_SPLIT.search(e["text"]):
1102
- parts = _CURRENCY_SPLIT.split(e["text"])
1103
- for part in parts:
1104
- part = part.strip().strip(",").strip()
1105
- if len(part) >= 2:
1106
- new_ent = dict(e)
1107
- new_ent["text"] = re.sub(r'\s+', '', part) if "$" in part or "USD" in part.upper() else part
1108
- split_entities.append(new_ent)
1109
- else:
1110
- split_entities.append(e)
1111
- entities = split_entities
1112
-
1113
  # Always supplement with regex patterns for things NER often misses
1114
  regex_ents = _extract_entities_regex(text)
 
1115
  ml_spans = set()
1116
  for e in entities:
1117
  for pos in range(e["start"], e["end"]):
@@ -1131,13 +680,20 @@ def extract_entities(text):
1131
  return filtered
1132
 
1133
  def _map_ner_label(label):
 
1134
  label = label.upper()
1135
  mapping = {
1136
- "PER": "PERSON", "PERSON": "PERSON",
1137
- "ORG": "PARTY", "ORGANIZATION": "PARTY",
1138
- "LOC": "JURISDICTION", "LOCATION": "JURISDICTION",
1139
- "GPE": "JURISDICTION", "DATE": "DATE",
1140
- "MONEY": "MONEY", "MISC": "MISC", "LAW": "LEGAL_REF",
 
 
 
 
 
 
1141
  }
1142
  return mapping.get(label, label)
1143
 
@@ -1145,19 +701,26 @@ def _extract_entities_regex(text):
1145
  """Regex-based NER fallback."""
1146
  entities = []
1147
  patterns = [
 
1148
  (r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
1149
  (r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
1150
  (r'\b\d{1,2}-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2,4}\b', "DATE"),
1151
  (r'\b(?:Effective|Commencement|Expiration|Termination)\s+Date\b', "DATE_REF"),
 
1152
  (r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
1153
  (r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros|pounds)', "MONEY"),
1154
  (r'\b(?:USD|EUR|GBP)\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?', "MONEY"),
 
1155
  (r'\b\d+(?:\.\d+)?%', "PERCENTAGE"),
 
1156
  (r'\b\d+\s*(?:year|month|week|day|business day)s?\b', "DURATION"),
 
1157
  (r'\b[A-Z][A-Za-z0-9\s&,]+?(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH|AG|S\.A\.?|B\.V\.?|L\.P\.?|LLP)\b', "PARTY"),
1158
  (r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Customer|Vendor|Client)\b', "PARTY_ROLE"),
 
1159
  (r'\b(?:State|Commonwealth)\s+of\s+[A-Z][a-zA-Z\s]+', "JURISDICTION"),
1160
  (r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong|Ontario|British Columbia)\b', "JURISDICTION"),
 
1161
  (r'"([A-Z][A-Za-z\s]{1,40})"', "DEFINED_TERM"),
1162
  (r'\((?:the\s+)?"([A-Z][A-Za-z\s]{1,40})"\)', "DEFINED_TERM"),
1163
  ]
@@ -1174,29 +737,9 @@ def _extract_entities_regex(text):
1174
  return entities
1175
 
1176
  # ═══════════════════════════════════════════════════════════════════════
1177
- # 7. NLI / CONTRADICTION DETECTION
1178
- # FIX v4.1: Pass (text_a, text_b) as dict with proper keys for
1179
- # cross-encoder pipeline, not [SEP]-concatenated string
1180
  # ═══════════════════════════════════════════════════════════════════════
1181
 
1182
- def _run_nli(text_a, text_b):
1183
- """Run NLI using CrossEncoder with correct input format.
1184
- FIX v4.2: Use sentence_transformers.CrossEncoder.predict() which accepts
1185
- a list of (text_a, text_b) tuples. Returns scores for [contradiction, entailment, neutral].
1186
- The old code used pipeline("text-classification") with dict input, which was broken."""
1187
- try:
1188
- # CrossEncoder.predict returns numpy array of shape (n_pairs, 3)
1189
- # Columns: [contradiction, entailment, neutral]
1190
- scores = nli_model.predict([(text_a[:256], text_b[:256])])
1191
- label_mapping = ["contradiction", "entailment", "neutral"]
1192
- top_idx = int(scores[0].argmax())
1193
- top_score = float(scores[0][top_idx])
1194
- return [{"label": label_mapping[top_idx], "score": top_score}]
1195
- except Exception as e:
1196
- print(f"[ClauseGuard] NLI inference error: {e}")
1197
- return None
1198
-
1199
-
1200
  def detect_contradictions(clause_results, raw_text=""):
1201
  """
1202
  Detect contradictions using:
@@ -1213,7 +756,8 @@ def detect_contradictions(clause_results, raw_text=""):
1213
  clause_texts_by_label[cr["label"]].append(cr.get("text", ""))
1214
 
1215
  # ── 1. Semantic NLI (if model available) ──
1216
- if _HAS_NLI_MODEL and nli_model is not None:
 
1217
  conflict_pairs = [
1218
  ("Uncapped Liability", "Cap on Liability",
1219
  "Liability cannot be both uncapped and capped simultaneously."),
@@ -1228,20 +772,24 @@ def detect_contradictions(clause_results, raw_text=""):
1228
  texts_b = clause_texts_by_label[label_b]
1229
  for ta in texts_a[:2]:
1230
  for tb in texts_b[:2]:
1231
- # FIX v4.1: Use proper NLI input format
1232
- nli_result = _run_nli(ta, tb)
1233
- if nli_result is None:
1234
- continue
1235
- for r in (nli_result if isinstance(nli_result, list) else [nli_result]):
1236
- if r.get("label", "").lower() == "contradiction" and r.get("score", 0) > 0.6:
1237
- contradictions.append({
1238
- "type": "CONTRADICTION",
1239
- "explanation": explanation,
1240
- "severity": "HIGH",
1241
- "clauses": [label_a, label_b],
1242
- "confidence": round(r["score"], 3),
1243
- "source": "nli_model",
1244
- })
 
 
 
 
1245
 
1246
  # Also check for internal contradictions within governing law / termination
1247
  for label in ["Governing Law", "Termination for Convenience"]:
@@ -1249,19 +797,23 @@ def detect_contradictions(clause_results, raw_text=""):
1249
  if len(texts) >= 2:
1250
  for i in range(len(texts)):
1251
  for j in range(i + 1, min(len(texts), i + 3)):
1252
- nli_result = _run_nli(texts[i], texts[j])
1253
- if nli_result is None:
1254
- continue
1255
- for r in (nli_result if isinstance(nli_result, list) else [nli_result]):
1256
- if r.get("label", "").lower() == "contradiction" and r.get("score", 0) > 0.6:
1257
- contradictions.append({
1258
- "type": "CONTRADICTION",
1259
- "explanation": f"Conflicting {label} provisions detected — clauses contradict each other.",
1260
- "severity": "HIGH",
1261
- "clauses": [label],
1262
- "confidence": round(r["score"], 3),
1263
- "source": "nli_model",
1264
- })
 
 
 
 
1265
  else:
1266
  # ── Heuristic fallback (improved) ──
1267
  _heuristic_pairs = [
@@ -1282,7 +834,7 @@ def detect_contradictions(clause_results, raw_text=""):
1282
  "source": "heuristic",
1283
  })
1284
 
1285
- # ── 2. Missing critical clauses ──
1286
  _REQUIRED_CLAUSE_PATTERNS = {
1287
  "Governing Law": re.compile(
1288
  r'govern(?:ed|ing).{0,15}law|applicable.{0,10}law|laws?\s+of\s+the\s+state',
@@ -1302,6 +854,7 @@ def detect_contradictions(clause_results, raw_text=""):
1302
  ),
1303
  }
1304
  for clause_name, pattern in _REQUIRED_CLAUSE_PATTERNS.items():
 
1305
  if not pattern.search(raw_text):
1306
  contradictions.append({
1307
  "type": "MISSING",
@@ -1324,7 +877,6 @@ def detect_contradictions(clause_results, raw_text=""):
1324
 
1325
  # ═══════════════════════════════════════════════════════════════════════
1326
  # 8. RISK SCORING
1327
- # FIX v4.1: Absolute risk based on findings, not normalized by doc length
1328
  # ═══════════════════════════════════════════════════════════════════════
1329
 
1330
  def compute_risk_score(clause_results, total_clauses):
@@ -1334,28 +886,8 @@ def compute_risk_score(clause_results, total_clauses):
1334
  sev_counts[sev] += 1
1335
  if total_clauses == 0:
1336
  return 0, "A", sev_counts
1337
-
1338
- # FIX v4.3: Revised risk formula — scale denominator with clause count
1339
- # to prevent small contracts from always scoring 80+.
1340
- # The old formula used a fixed /30 denominator which meant even 2 CRITICAL
1341
- # flags scored 73, making almost every contract grade F.
1342
- #
1343
- # New approach: dynamic denominator based on total clauses analysed.
1344
- # This means risk is relative to document complexity.
1345
- # - 1 CRITICAL in 5 clauses = high risk
1346
- # - 1 CRITICAL in 50 clauses = moderate risk (proportionally less of the contract)
1347
  weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
1348
-
1349
- # Dynamic max: what if every clause were CRITICAL?
1350
- max_possible = total_clauses * RISK_WEIGHTS["CRITICAL"]
1351
- if max_possible == 0:
1352
- max_possible = 1
1353
-
1354
- # Blend: 60% absolute (diminishing returns) + 40% relative (to total clauses)
1355
- absolute_risk = 100 * (1 - (1 / (1 + weighted / 50))) # /50 instead of /30 = softer curve
1356
- relative_risk = min(100, (weighted / max_possible) * 100)
1357
- risk = min(100, round(0.6 * absolute_risk + 0.4 * relative_risk))
1358
-
1359
  if risk >= 70: grade = "F"
1360
  elif risk >= 50: grade = "D"
1361
  elif risk >= 30: grade = "C"
@@ -1373,12 +905,9 @@ def analyze_contract(text):
1373
  clauses = split_clauses(text)
1374
  if not clauses:
1375
  return None, "No clauses detected in document"
1376
-
1377
- # PERF v4.3: Batch classification — single forward pass instead of per-clause
1378
- batch_predictions = classify_cuad_batch(clauses, batch_size=8)
1379
-
1380
  clause_results = []
1381
- for clause, predictions in zip(clauses, batch_predictions):
 
1382
  if predictions:
1383
  for pred in predictions:
1384
  clause_results.append({
@@ -1393,8 +922,10 @@ def analyze_contract(text):
1393
  contradictions = detect_contradictions(clause_results, text)
1394
  risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
1395
  obligations = extract_obligations(text)
 
1396
  compliance = check_compliance(text)
1397
 
 
1398
  flagged_clause_count = len(clause_results)
1399
  unique_flagged_texts = len(set(cr["text"] for cr in clause_results))
1400
 
@@ -1422,7 +953,7 @@ def analyze_contract(text):
1422
  return result, None
1423
 
1424
  # ═══════════════════════════════════════════════════════════════════════
1425
- # 10. EXPORT FUNCTIONS
1426
  # ═══════════════════════════════════════════════════════════════════════
1427
 
1428
  def export_json(result):
@@ -1450,7 +981,7 @@ def export_csv(result):
1450
  return output.getvalue()
1451
 
1452
  # ═══════════════════════════════════════════════════════════════════════
1453
- # 11. UI RENDERING
1454
  # ═══════════════════════════════════════════════════════════════════════
1455
 
1456
  def render_summary(result):
@@ -1618,29 +1149,34 @@ def render_document_viewer(result):
1618
  entities = sorted(result.get("entities", []), key=lambda x: x["start"])
1619
  html_parts = []
1620
  last_end = 0
1621
- entity_colors = {
1622
- "DATE": "#3b82f6", "DATE_REF": "#60a5fa", "MONEY": "#22c55e",
1623
- "PERCENTAGE": "#10b981", "DURATION": "#6366f1", "PARTY": "#8b5cf6",
1624
- "PARTY_ROLE": "#a78bfa", "PERSON": "#ec4899", "JURISDICTION": "#f59e0b",
1625
- "DEFINED_TERM": "#ec4899", "LEGAL_REF": "#6b7280", "MISC": "#9ca3af",
1626
- }
1627
  for e in entities:
1628
  if e["start"] >= last_end:
1629
- plain = text[last_end:e["start"]].replace("<", "&lt;").replace(">", "&gt;")
1630
- html_parts.append(plain)
1631
- color = entity_colors.get(e["type"], "#6b7280")
1632
- entity_text = text[e["start"]:e["end"]].replace("<", "&lt;").replace(">", "&gt;")
 
 
 
 
 
 
 
 
1633
  html_parts.append(
1634
- f'<span style="background:{color}20;color:{color};border-bottom:2px solid {color};padding:0 2px;border-radius:2px;" '
1635
- f'title="{e["type"]}">{entity_text}</span>'
1636
  )
1637
  last_end = e["end"]
1638
- if last_end < len(text):
1639
- html_parts.append(text[last_end:].replace("<", "&lt;").replace(">", "&gt;"))
1640
- return f'<div style="font-family:ui-monospace,monospace;font-size:13px;line-height:1.8;white-space:pre-wrap;padding:16px;">{"".join(html_parts)}</div>'
 
 
 
 
1641
 
1642
  # ═══════════════════════════════════════════════════════════════════════
1643
- # 12. COMPARISON WRAPPER
1644
  # ═══════════════════════════════════════════════════════════════════════
1645
 
1646
  def run_comparison(text_a, text_b):
@@ -1780,10 +1316,6 @@ This Master Service Agreement ("MSA") is entered into as of March 1, 2024 (the "
1780
 
1781
  14. THIRD PARTY BENEFICIARY. No third party shall have rights under this Agreement except as expressly provided."""
1782
 
1783
- # ═══════════════════════════════════════════════════════════════════════
1784
- # 14. GRADIO BLOCKS
1785
- # ═══════════════════════════════════════════════════════════════════════
1786
-
1787
  with gr.Blocks(
1788
  title="ClauseGuard — AI Contract Analysis",
1789
  css="""
@@ -1802,7 +1334,7 @@ with gr.Blocks(
1802
  <h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">🛡️ ClauseGuard</h1>
1803
  <p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis · 41 Clause Categories · Risk Scoring · ML NER · NLI Contradictions · Compliance · Obligations · <strong>Q&A Chatbot</strong> · <strong>Clause Redlining</strong> · <strong>OCR</strong></p>
1804
  </div>
1805
- <div style="font-size:12px;color:#9ca3af;">v4.3 · Precision Legal AI</div>
1806
  </div>
1807
  """)
1808
 
@@ -1925,7 +1457,7 @@ with gr.Blocks(
1925
  <h3 style="margin:0;font-size:16px;color:#1f2937;">Contract Q&A Chatbot</h3>
1926
  </div>
1927
  <p style="font-size:12px;color:#6b7280;margin:0;line-height:1.5;">
1928
- Ask questions about your analyzed contract. The chatbot uses <strong>RAG</strong> (Retrieval-Augmented Generation)
1929
  to find relevant clauses and generate accurate answers grounded in your contract text.
1930
  <br>
1931
  <strong>Step 1:</strong> Analyze a contract in the "📄 Single Contract Analysis" tab.
@@ -1994,8 +1526,7 @@ with gr.Blocks(
1994
  doc_html, obligations_html, compliance_html, redlining_html,
1995
  json_file, csv_file, status_msg, analysis_state,
1996
  chunks_state, embeddings_state, chatbot_index_status,
1997
- ],
1998
- api_name="analyze",
1999
  )
2000
 
2001
  clear_btn.click(
@@ -2011,20 +1542,18 @@ with gr.Blocks(
2011
  comp_btn.click(
2012
  run_comparison,
2013
  inputs=[comp_text_a, comp_text_b],
2014
- outputs=[comp_result_html, comp_json],
2015
- api_name="compare",
2016
  )
2017
 
2018
  gr.HTML("""
2019
  <div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
2020
  <p style="font-size:11px;color:#9ca3af;">
2021
  ⚠️ Not legal advice. For informational purposes only.
2022
- · Classifier: <a href="https://huggingface.co/gaurv007/clauseguard-onnx-int8" style="color:#6b7280;">Legal-BERT ONNX INT8 (41 CUAD classes)</a>
2023
  · NER: <a href="https://huggingface.co/matterstack/legal-bert-ner" style="color:#6b7280;">Legal-BERT NER</a>
2024
  · NLI: <a href="https://huggingface.co/cross-encoder/nli-deberta-v3-base" style="color:#6b7280;">DeBERTa-v3 NLI</a>
2025
  · LLM: <a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color:#6b7280;">Qwen2.5-7B</a>
2026
  · OCR: <a href="https://github.com/mindee/doctr" style="color:#6b7280;">docTR</a>
2027
- · Embeddings: <a href="https://huggingface.co/BAAI/bge-small-en-v1.5" style="color:#6b7280;">BGE-small-en</a>
2028
  · Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
2029
  · <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
2030
  </p>
 
1
  """
2
+ ClauseGuard — World's Best Legal Contract Analysis Tool (v4.0)
3
  ═══════════════════════════════════════════════════════════════
4
+ New in v4.0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  • OCR support for scanned PDFs (docTR engine with smart native/scanned routing)
6
  • Contract Q&A Chatbot (RAG: embedding retrieval + HF Inference API streaming)
7
  • Clause Redlining (3-tier: template lookup + RAG + LLM refinement)
8
+
9
+ Carried from v3.0:
10
+ • Fixed CUAD label mapping (added missing index 6: "Notice Period to Terminate Renewal")
11
+ • Switched from softmax → sigmoid for proper multi-label classification
12
+ • Per-class optimized thresholds instead of flat 0.15
13
+ • Structure-aware clause splitting (respects section numbering)
14
  • Real NLI contradiction detection via cross-encoder model
15
+ • ML-based Legal NER (matterstack/legal-bert-ner) with regex fallback
16
  • Semantic compliance checking with negation handling
17
  • Improved obligation extraction with false-positive filtering
18
+ • LLM-powered clause explanations (via HF Inference API)
19
+ • Prediction caching (LRU) for performance
20
  • Per-session temp files (no collision)
21
+ • Model health reporting to user
22
+ • Document structure parsing
23
 
24
  Models:
25
  • Clause classifier: Mokshith31/legalbert-contract-clause-classification
 
39
  import uuid
40
  import tempfile
41
  import hashlib
42
+ from collections import defaultdict
 
43
  from datetime import datetime
44
  from functools import lru_cache
45
 
 
72
  )
73
  from peft import PeftModel
74
  _HAS_TORCH = True
 
 
 
 
75
  except Exception:
76
  pass
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # ── Import submodules ───────────────────────────────────────────────
79
  from compare import compare_contracts, render_comparison_html
80
  from obligations import extract_obligations, render_obligations_html
 
137
  "Jurisdiction", "Arbitration"
138
  ]
139
 
140
+ _ALL_LABELS = CUAD_LABELS + _UNFAIR_LABELS
 
 
 
 
 
141
 
142
  RISK_MAP = {
143
  # Critical
 
193
  "Other": "LOW",
194
  "ROFR/ROFO/ROFN": "LOW",
195
  "Contract by using": "LOW",
 
 
 
 
 
196
  }
197
 
198
  DESC_MAP = {label: label.replace("_", " ") for label in _ALL_LABELS}
 
233
  "Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
234
  "Unlimited/All-You-Can-Eat License": "License with no usage limits.",
235
  "Notice Period to Terminate Renewal": "Required notice period before automatic renewal.",
 
 
 
 
 
236
  })
237
 
238
  RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  RISK_STYLES = {
241
  "CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
242
  "HIGH": ("#ea580c", "#fff7ed", "⚡"),
 
244
  "LOW": ("#16a34a", "#f0fdf4", "✓"),
245
  }
246
 
247
+ # Per-class optimized thresholds (tuned on validation set; classes with F1=0 get high threshold)
248
+ # Classes 0,1,2,7,9,21,22,27,37,38 scored F1=0.00 in the model card → raise thresholds
 
 
 
249
  _CUAD_THRESHOLDS = {}
250
  _WEAK_CLASSES = {0, 1, 2, 7, 9, 21, 22, 27, 37, 38}
251
  for _i in range(41):
252
  if _i in _WEAK_CLASSES:
253
  _CUAD_THRESHOLDS[_i] = 0.85 # Only flag if very confident (these classes are unreliable)
254
  else:
255
+ _CUAD_THRESHOLDS[_i] = 0.40 # Reasonable threshold for sigmoid outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  # ═══════════════════════════════════════════════════════════════════════
258
  # 2. MODEL LOADING
 
261
  cuad_tokenizer = None
262
  cuad_model = None
263
  ner_pipeline = None
264
+ nli_pipeline = None
265
  _model_status = {"cuad": "not_loaded", "ner": "not_loaded", "nli": "not_loaded"}
266
 
267
  def _load_cuad_model():
268
  global cuad_tokenizer, cuad_model, _model_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  if not _HAS_TORCH:
270
  print("[ClauseGuard] PyTorch not available — using regex fallback")
271
  _model_status["cuad"] = "unavailable"
 
273
  try:
274
  base = "nlpaueb/legal-bert-base-uncased"
275
  adapter = "Mokshith31/legalbert-contract-clause-classification"
276
+ print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
277
  cuad_tokenizer = AutoTokenizer.from_pretrained(base)
278
  base_model = AutoModelForSequenceClassification.from_pretrained(
279
  base, num_labels=41, ignore_mismatched_sizes=True
280
  )
281
  cuad_model = PeftModel.from_pretrained(base_model, adapter)
282
  cuad_model.eval()
283
+ _model_status["cuad"] = "loaded"
284
+ print("[ClauseGuard] CUAD model loaded successfully")
285
  except Exception as e:
286
  print(f"[ClauseGuard] CUAD model load failed: {e}")
287
  cuad_tokenizer = None
 
309
  _model_status["ner"] = f"failed: {e}"
310
 
311
  def _load_nli_model():
312
+ global nli_pipeline, _model_status, _HAS_NLI_MODEL
313
+ if not _HAS_TORCH:
314
+ _model_status["nli"] = "unavailable"
315
  return
316
  try:
317
+ print("[ClauseGuard] Loading NLI model: cross-encoder/nli-deberta-v3-base")
318
+ nli_pipeline = pipeline(
319
+ "text-classification",
320
+ model="cross-encoder/nli-deberta-v3-base",
321
+ device=-1,
322
+ )
323
  _HAS_NLI_MODEL = True
324
  _model_status["nli"] = "loaded"
325
+ print("[ClauseGuard] NLI model loaded successfully")
326
  except Exception as e:
327
  print(f"[ClauseGuard] NLI model load failed (using heuristic fallback): {e}")
328
  _model_status["nli"] = f"failed: {e}"
 
384
  return None, f"Unsupported file type: {ext}"
385
 
386
  # ═══════════════════════════════════════════════════════════════════════
387
+ # 4. DETERMINISTIC CLAUSE SPLITTING (Fix 1 from bug report)
 
388
  # ═══════════════════════════════════════════════════════════════════════
389
 
390
+ # Document-level chunk cache: same text always produces same chunks
391
+ _chunk_cache = {}
 
 
 
 
 
 
 
 
 
 
 
392
 
393
  def split_clauses(text):
394
  """Deterministic, structure-aware clause splitting.
395
+ Fix 1: Same input ALWAYS produces same output. Normalized text is hashed
396
  and cached so repeated runs on identical documents are identical."""
397
+ # Normalize whitespace before hashing for determinism
398
  normalized = re.sub(r'\s+', ' ', text.strip())
399
  text_hash = hashlib.sha256(normalized.encode()).hexdigest()
400
+ if text_hash in _chunk_cache:
401
+ return _chunk_cache[text_hash]
 
402
 
403
  text = re.sub(r'\n{3,}', '\n\n', text.strip())
404
 
405
  # First try to detect numbered sections (1., 2., 3.1, (a), etc.)
406
+ section_pattern = re.compile(
407
+ r'(?:^|\n\n)'
408
+ r'(?='
409
+ r'\d+(?:\.\d+)*[.)]\s' # 1. 2. 3.1. 3.1)
410
+ r'|[A-Z]{2,}[A-Z\s]*\n' # ALL CAPS HEADERS
411
+ r'|\([a-z]\)\s' # (a) (b) (c)
412
+ r'|(?:Section|Article|Clause)\s+\d+' # Section 1, Article 2
413
+ r')',
414
+ re.MULTILINE
415
+ )
416
+
417
+ positions = [m.start() for m in section_pattern.finditer(text)]
418
 
419
  if len(positions) >= 3:
420
+ # Document has clear section structure — split on sections
421
  clauses = []
422
  for i, pos in enumerate(positions):
423
  end = positions[i + 1] if i + 1 < len(positions) else len(text)
424
  chunk = text[pos:end].strip()
425
  if len(chunk) > 30:
426
+ # If a section is very long, split on paragraph breaks within it
427
  if len(chunk) > 1500:
428
  sub_parts = chunk.split('\n\n')
429
  current = ""
 
438
  clauses.append(current.strip())
439
  else:
440
  clauses.append(chunk)
441
+ # Also capture anything before the first section
442
  if positions and positions[0] > 50:
443
  preamble = text[:positions[0]].strip()
444
  if len(preamble) > 30:
445
  clauses.insert(0, preamble)
446
  result = clauses if clauses else _fallback_split(text)
447
+ _chunk_cache[text_hash] = result
448
  return result
449
  else:
450
  result = _fallback_split(text)
451
+ _chunk_cache[text_hash] = result
452
  return result
453
 
454
  def _fallback_split(text):
455
  """Fallback: split on paragraph breaks and sentence boundaries."""
456
+ # Try paragraph-based splitting first
457
  paragraphs = text.split('\n\n')
458
  if len(paragraphs) >= 3:
459
  clauses = []
 
461
  p = p.strip()
462
  if len(p) > 30:
463
  if len(p) > 1500:
464
+ # Split long paragraphs on sentences
465
  sents = re.split(r'(?<=[.!?])\s+(?=[A-Z])', p)
466
  current = ""
467
  for s in sents:
 
477
  clauses.append(p)
478
  return clauses
479
 
480
+ # Last resort: sentence splitting
481
  parts = re.split(r'(?<=[.!?])\s+(?=[A-Z0-9(])', text)
482
  return [p.strip() for p in parts if len(p.strip()) > 30]
483
 
484
  # ═══════════════════════════════════════════════════════════════════════
485
+ # 5. CLAUSE DETECTION — FIXED: sigmoid + per-class thresholds + caching
486
+ # Fix 3: Strip section headings before classification
487
+ # Fix 6: Label guardrails for high-confidence false positives
 
488
  # ═══════════════════════════════════════════════════════════════════════
489
 
490
+ # Fix 3: Section heading pattern — strip before classifying
491
  _HEADING_RE = re.compile(r'^\d+(?:\.\d+)*\s+[A-Z][A-Z\s&,/]+$', re.MULTILINE)
492
 
493
  def _strip_heading(text):
 
498
  return stripped if len(stripped) > 20 else text
499
  return text
500
 
501
+ # Fix 6: Label guardrails — keyword validation for high-confidence labels
502
  _LABEL_GUARDRAILS = {
503
  "Liquidated Damages": re.compile(
504
  r'liquidated|pre-?determined.{0,10}damage|agreed.{0,10}sum|penalty clause|stipulated.{0,10}damage',
 
508
  r'uncapped|unlimited.{0,10}liabilit|no.{0,10}(limit|cap).{0,10}liabilit',
509
  re.IGNORECASE
510
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  }
512
 
513
  def _apply_guardrails(label, text, confidence):
514
+ """Fix 6: If label has a guardrail and text lacks required keywords, demote."""
 
 
 
 
 
515
  guard = _LABEL_GUARDRAILS.get(label)
516
  if guard and not guard.search(text):
517
+ return "Other", confidence * 0.3 # demote to Other with reduced confidence
 
 
 
 
 
518
  return label, confidence
519
 
520
  def _text_hash(text):
521
  return hashlib.md5(text.encode()).hexdigest()
522
 
523
+ _prediction_cache = {}
524
+ _CACHE_MAX = 2000
525
 
526
  def classify_cuad(clause_text):
527
  if cuad_model is None or cuad_tokenizer is None:
528
  return _classify_regex(clause_text)
529
 
530
+ # Fix 3: Strip section headings before classification
531
  clean_text = _strip_heading(clause_text)
532
 
533
+ # Check cache
534
  h = _text_hash(clean_text[:512])
535
+ if h in _prediction_cache:
536
+ return _prediction_cache[h]
 
537
 
538
  try:
 
539
  inputs = cuad_tokenizer(
540
  clean_text,
541
  return_tensors="pt",
542
  truncation=True,
543
+ max_length=256,
544
  padding=True
545
  )
546
  with torch.no_grad():
547
  logits = cuad_model(**inputs).logits
548
 
549
+ # FIXED: Use sigmoid for multi-label (not softmax)
550
+ probs = torch.sigmoid(logits)[0]
 
 
 
 
 
 
551
 
552
  results = []
553
+ for i, prob in enumerate(probs):
554
+ threshold = _CUAD_THRESHOLDS.get(i, 0.40)
555
+ if float(prob) > threshold and i < len(CUAD_LABELS):
556
+ label = CUAD_LABELS[i]
557
+ conf = float(prob)
558
+ # Fix 6: Apply guardrails — reject high-confidence false positives
559
+ label, conf = _apply_guardrails(label, clause_text, conf)
560
+ if label == "Other" and conf < 0.3:
561
+ continue # Skip demoted labels
562
  risk = RISK_MAP.get(label, "LOW")
 
563
  results.append({
564
  "label": label,
565
  "confidence": round(conf, 3),
 
567
  "description": DESC_MAP.get(label, label),
568
  "source": "ml",
569
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  results.sort(key=lambda x: x["confidence"], reverse=True)
571
 
572
  # If no ML results, also try regex to catch what model misses
573
  if not results:
574
  results = _classify_regex(clause_text)
575
 
576
+ # Cache result
577
+ if len(_prediction_cache) < _CACHE_MAX:
578
+ _prediction_cache[h] = results
579
+
580
  return results
581
  except Exception as e:
582
  print(f"[ClauseGuard] CUAD inference error: {e}")
583
  return _classify_regex(clause_text)
584
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
  _REGEX_PATTERNS = {
586
  "Limitation of liability": [r"not liable", r"shall not be (liable|responsible)", r"in no event.*liable", r"limitation of liability", r"without warranty", r"disclaim"],
587
  "Unilateral termination": [r"terminat.*at any time", r"suspend.*account.*without", r"we may (terminat|suspend|discontinu)", r"right to (terminat|suspend)"],
 
593
  "Arbitration": [r"arbitrat", r"binding arbitration", r"waive.*right.*court", r"class action waiver"],
594
  "Governing Law": [r"governed by", r"laws of", r"jurisdiction of"],
595
  "Termination for Convenience": [r"terminat.*for convenience", r"terminat.*without cause", r"terminat.*at any time"],
596
+ "Non-Compete": [r"non-compete", r"shall not compete", r"competition"],
597
  "Exclusivity": [r"exclusive(?:ly)?(?:\s+(?:deal|relationship|partner|right))", r"exclusivity"],
598
+ "IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
599
  "Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
600
  "Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed", r"aggregate liability.*not exceed"],
601
  "Indemnification": [r"indemnif", r"hold harmless", r"defend.*against.*claim"],
602
  "Confidentiality": [r"confidential(?:ity)?", r"non-disclosure", r"\bnda\b"],
603
  "Force Majeure": [r"force majeure", r"act of god", r"beyond.*(?:reasonable\s+)?control"],
604
  "Penalties": [r"penalt(?:y|ies)", r"late fee", r"default charge", r"interest on overdue"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
  }
606
 
 
 
 
 
 
607
  def _classify_regex(text):
608
  """Regex fallback — returns pattern match, NOT fake confidence."""
609
  text_lower = text.lower()
610
  results = []
611
  seen = set()
612
+ for label, patterns in _REGEX_PATTERNS.items():
613
  for pat in patterns:
614
+ if re.search(pat, text_lower):
615
  if label not in seen:
616
  risk = RISK_MAP.get(label, "MEDIUM")
617
  results.append({
618
  "label": label,
619
+ "confidence": None, # FIXED: no fake confidence for regex
620
  "risk": risk,
621
  "description": DESC_MAP.get(label, label),
622
  "source": "pattern",
 
627
 
628
  # ═══════════════════════════════════════════════════════════════════════
629
  # 6. LEGAL NER — ML model with regex fallback
 
630
  # ═══════════════════════════════════════════════════════════════════════
631
 
632
  def extract_entities(text):
633
  """Extract entities using ML model (matterstack/legal-bert-ner) with regex fallback."""
634
  entities = []
635
 
636
+ # Try ML NER first
637
  if _HAS_NER_MODEL and ner_pipeline is not None:
638
  try:
639
+ # Process in chunks (model has max length limits)
640
+ chunks = [text[i:i+512] for i in range(0, min(len(text), 10000), 450)]
641
+ offset = 0
642
+ for chunk in chunks:
643
+ ner_results = ner_pipeline(chunk)
 
 
 
 
 
644
  for ent in ner_results:
645
  if ent.get("score", 0) > 0.5:
646
  entities.append({
 
651
  "score": round(ent["score"], 3),
652
  "source": "ml",
653
  })
654
+ offset += 450
655
  except Exception as e:
656
  print(f"[ClauseGuard] ML NER error, falling back to regex: {e}")
657
  entities = _extract_entities_regex(text)
658
  else:
659
  entities = _extract_entities_regex(text)
660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  # Always supplement with regex patterns for things NER often misses
662
  regex_ents = _extract_entities_regex(text)
663
+ # Merge: add regex entities that don't overlap with ML entities
664
  ml_spans = set()
665
  for e in entities:
666
  for pos in range(e["start"], e["end"]):
 
680
  return filtered
681
 
682
  def _map_ner_label(label):
683
+ """Map NER model labels to our entity types."""
684
  label = label.upper()
685
  mapping = {
686
+ "PER": "PERSON",
687
+ "PERSON": "PERSON",
688
+ "ORG": "PARTY",
689
+ "ORGANIZATION": "PARTY",
690
+ "LOC": "JURISDICTION",
691
+ "LOCATION": "JURISDICTION",
692
+ "GPE": "JURISDICTION",
693
+ "DATE": "DATE",
694
+ "MONEY": "MONEY",
695
+ "MISC": "MISC",
696
+ "LAW": "LEGAL_REF",
697
  }
698
  return mapping.get(label, label)
699
 
 
701
  """Regex-based NER fallback."""
702
  entities = []
703
  patterns = [
704
+ # Dates
705
  (r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
706
  (r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
707
  (r'\b\d{1,2}-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\d{2,4}\b', "DATE"),
708
  (r'\b(?:Effective|Commencement|Expiration|Termination)\s+Date\b', "DATE_REF"),
709
+ # Money
710
  (r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
711
  (r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros|pounds)', "MONEY"),
712
  (r'\b(?:USD|EUR|GBP)\s*\d{1,3}(?:,\d{3})*(?:\.\d{2})?', "MONEY"),
713
+ # Percentages
714
  (r'\b\d+(?:\.\d+)?%', "PERCENTAGE"),
715
+ # Durations
716
  (r'\b\d+\s*(?:year|month|week|day|business day)s?\b', "DURATION"),
717
+ # Parties (require suffix to reduce false positives)
718
  (r'\b[A-Z][A-Za-z0-9\s&,]+?(?:Inc\.?|LLC|Ltd\.?|Limited|Corp\.?|Corporation|PLC|GmbH|AG|S\.A\.?|B\.V\.?|L\.P\.?|LLP)\b', "PARTY"),
719
  (r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Customer|Vendor|Client)\b', "PARTY_ROLE"),
720
+ # Jurisdictions
721
  (r'\b(?:State|Commonwealth)\s+of\s+[A-Z][a-zA-Z\s]+', "JURISDICTION"),
722
  (r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong|Ontario|British Columbia)\b', "JURISDICTION"),
723
+ # Defined Terms (quoted or parenthesized)
724
  (r'"([A-Z][A-Za-z\s]{1,40})"', "DEFINED_TERM"),
725
  (r'\((?:the\s+)?"([A-Z][A-Za-z\s]{1,40})"\)', "DEFINED_TERM"),
726
  ]
 
737
  return entities
738
 
739
  # ═══════════════════════════════════════════════════════════════════════
740
+ # 7. NLI / CONTRADICTION DETECTION — Real semantic analysis
 
 
741
  # ═══════════════════════════════════════════════════════════════════════
742
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
743
  def detect_contradictions(clause_results, raw_text=""):
744
  """
745
  Detect contradictions using:
 
756
  clause_texts_by_label[cr["label"]].append(cr.get("text", ""))
757
 
758
  # ── 1. Semantic NLI (if model available) ──
759
+ if _HAS_NLI_MODEL and nli_pipeline is not None:
760
+ # Check clauses that belong to potentially conflicting categories
761
  conflict_pairs = [
762
  ("Uncapped Liability", "Cap on Liability",
763
  "Liability cannot be both uncapped and capped simultaneously."),
 
772
  texts_b = clause_texts_by_label[label_b]
773
  for ta in texts_a[:2]:
774
  for tb in texts_b[:2]:
775
+ try:
776
+ nli_result = nli_pipeline(
777
+ f"{ta[:256]} [SEP] {tb[:256]}",
778
+ truncation=True
779
+ )
780
+ # Check if model predicts contradiction
781
+ for r in (nli_result if isinstance(nli_result, list) else [nli_result]):
782
+ if r.get("label", "").lower() == "contradiction" and r.get("score", 0) > 0.6:
783
+ contradictions.append({
784
+ "type": "CONTRADICTION",
785
+ "explanation": explanation,
786
+ "severity": "HIGH",
787
+ "clauses": [label_a, label_b],
788
+ "confidence": round(r["score"], 3),
789
+ "source": "nli_model",
790
+ })
791
+ except Exception:
792
+ pass
793
 
794
  # Also check for internal contradictions within governing law / termination
795
  for label in ["Governing Law", "Termination for Convenience"]:
 
797
  if len(texts) >= 2:
798
  for i in range(len(texts)):
799
  for j in range(i + 1, min(len(texts), i + 3)):
800
+ try:
801
+ nli_result = nli_pipeline(
802
+ f"{texts[i][:256]} [SEP] {texts[j][:256]}",
803
+ truncation=True
804
+ )
805
+ for r in (nli_result if isinstance(nli_result, list) else [nli_result]):
806
+ if r.get("label", "").lower() == "contradiction" and r.get("score", 0) > 0.6:
807
+ contradictions.append({
808
+ "type": "CONTRADICTION",
809
+ "explanation": f"Conflicting {label} provisions detected — clauses contradict each other.",
810
+ "severity": "HIGH",
811
+ "clauses": [label],
812
+ "confidence": round(r["score"], 3),
813
+ "source": "nli_model",
814
+ })
815
+ except Exception:
816
+ pass
817
  else:
818
  # ── Heuristic fallback (improved) ──
819
  _heuristic_pairs = [
 
834
  "source": "heuristic",
835
  })
836
 
837
+ # ── 2. Missing critical clauses (Fix 4: check raw_text, not labels) ──
838
  _REQUIRED_CLAUSE_PATTERNS = {
839
  "Governing Law": re.compile(
840
  r'govern(?:ed|ing).{0,15}law|applicable.{0,10}law|laws?\s+of\s+the\s+state',
 
854
  ),
855
  }
856
  for clause_name, pattern in _REQUIRED_CLAUSE_PATTERNS.items():
857
+ # Check raw_text directly — it's stable and deterministic
858
  if not pattern.search(raw_text):
859
  contradictions.append({
860
  "type": "MISSING",
 
877
 
878
  # ═══════════════════════════════════════════════════════════════════════
879
  # 8. RISK SCORING
 
880
  # ═══════════════════════════════════════════════════════════════════════
881
 
882
  def compute_risk_score(clause_results, total_clauses):
 
886
  sev_counts[sev] += 1
887
  if total_clauses == 0:
888
  return 0, "A", sev_counts
 
 
 
 
 
 
 
 
 
 
889
  weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
890
+ risk = min(100, round(weighted / max(1, total_clauses) * 10))
 
 
 
 
 
 
 
 
 
 
891
  if risk >= 70: grade = "F"
892
  elif risk >= 50: grade = "D"
893
  elif risk >= 30: grade = "C"
 
905
  clauses = split_clauses(text)
906
  if not clauses:
907
  return None, "No clauses detected in document"
 
 
 
 
908
  clause_results = []
909
+ for clause in clauses:
910
+ predictions = classify_cuad(clause)
911
  if predictions:
912
  for pred in predictions:
913
  clause_results.append({
 
922
  contradictions = detect_contradictions(clause_results, text)
923
  risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
924
  obligations = extract_obligations(text)
925
+ # Fix 5: Compliance runs against full raw_text (already done in compliance.py)
926
  compliance = check_compliance(text)
927
 
928
+ # Fix 2: Compute flagged_clauses AFTER all processing is complete
929
  flagged_clause_count = len(clause_results)
930
  unique_flagged_texts = len(set(cr["text"] for cr in clause_results))
931
 
 
953
  return result, None
954
 
955
  # ═══════════════════════════════════════════════════════════════════════
956
+ # 10. EXPORT FUNCTIONS — FIXED: per-session temp files
957
  # ═══════════════════════════════════════════════════════════════════════
958
 
959
  def export_json(result):
 
981
  return output.getvalue()
982
 
983
  # ═══════════════════════════════════════════════════════════════════════
984
+ # 11. UI RENDERING — FIXED: shows confidence source properly
985
  # ═══════════════════════════════════════════════════════════════════════
986
 
987
  def render_summary(result):
 
1149
  entities = sorted(result.get("entities", []), key=lambda x: x["start"])
1150
  html_parts = []
1151
  last_end = 0
 
 
 
 
 
 
1152
  for e in entities:
1153
  if e["start"] >= last_end:
1154
+ html_parts.append(text[last_end:e["start"]].replace("<", "&lt;").replace(">", "&gt;"))
1155
+ color = {
1156
+ "DATE": "#bfdbfe", "DATE_REF": "#bfdbfe",
1157
+ "MONEY": "#bbf7d0", "PERCENTAGE": "#a7f3d0",
1158
+ "DURATION": "#c7d2fe",
1159
+ "PARTY": "#ddd6fe", "PARTY_ROLE": "#ddd6fe",
1160
+ "PERSON": "#fbcfe8",
1161
+ "JURISDICTION": "#fde68a",
1162
+ "DEFINED_TERM": "#fbcfe8",
1163
+ "LEGAL_REF": "#e5e7eb",
1164
+ }.get(e["type"], "#e5e7eb")
1165
+ label = e["type"].replace("_", " ")
1166
  html_parts.append(
1167
+ f'<mark style="background:{color};padding:1px 2px;border-radius:2px;font-size:12px;" title="{label}">{e["text"].replace("<","&lt;").replace(">","&gt;")}</mark>'
 
1168
  )
1169
  last_end = e["end"]
1170
+ html_parts.append(text[last_end:].replace("<", "&lt;").replace(">", "&gt;"))
1171
+ highlighted = "".join(html_parts)
1172
+ return f"""
1173
+ <div style="font-family:monospace;font-size:13px;line-height:1.6;padding:16px;border:1px solid #e5e7eb;border-radius:8px;background:#fff;max-height:600px;overflow-y:auto;white-space:pre-wrap;">
1174
+ {highlighted}
1175
+ </div>
1176
+ """
1177
 
1178
  # ═══════════════════════════════════════════════════════════════════════
1179
+ # 12. COMPARISON UI FUNCTIONS
1180
  # ═══════════════════════════════════════════════════════════════════════
1181
 
1182
  def run_comparison(text_a, text_b):
 
1316
 
1317
  14. THIRD PARTY BENEFICIARY. No third party shall have rights under this Agreement except as expressly provided."""
1318
 
 
 
 
 
1319
  with gr.Blocks(
1320
  title="ClauseGuard — AI Contract Analysis",
1321
  css="""
 
1334
  <h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">🛡️ ClauseGuard</h1>
1335
  <p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis · 41 Clause Categories · Risk Scoring · ML NER · NLI Contradictions · Compliance · Obligations · <strong>Q&A Chatbot</strong> · <strong>Clause Redlining</strong> · <strong>OCR</strong></p>
1336
  </div>
1337
+ <div style="font-size:12px;color:#9ca3af;">v4.0 · Precision Legal AI</div>
1338
  </div>
1339
  """)
1340
 
 
1457
  <h3 style="margin:0;font-size:16px;color:#1f2937;">Contract Q&A Chatbot</h3>
1458
  </div>
1459
  <p style="font-size:12px;color:#6b7280;margin:0;line-height:1.5;">
1460
+ Ask questions about your analyzed contract. The chatbot uses <strong>RAG</strong> (Retrieval-Augmented Generation)
1461
  to find relevant clauses and generate accurate answers grounded in your contract text.
1462
  <br>
1463
  <strong>Step 1:</strong> Analyze a contract in the "📄 Single Contract Analysis" tab.
 
1526
  doc_html, obligations_html, compliance_html, redlining_html,
1527
  json_file, csv_file, status_msg, analysis_state,
1528
  chunks_state, embeddings_state, chatbot_index_status,
1529
+ ]
 
1530
  )
1531
 
1532
  clear_btn.click(
 
1542
  comp_btn.click(
1543
  run_comparison,
1544
  inputs=[comp_text_a, comp_text_b],
1545
+ outputs=[comp_result_html, comp_json]
 
1546
  )
1547
 
1548
  gr.HTML("""
1549
  <div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
1550
  <p style="font-size:11px;color:#9ca3af;">
1551
  ⚠️ Not legal advice. For informational purposes only.
1552
+ · Model: <a href="https://huggingface.co/Mokshith31/legalbert-contract-clause-classification" style="color:#6b7280;">Legal-BERT + CUAD (41 classes)</a>
1553
  · NER: <a href="https://huggingface.co/matterstack/legal-bert-ner" style="color:#6b7280;">Legal-BERT NER</a>
1554
  · NLI: <a href="https://huggingface.co/cross-encoder/nli-deberta-v3-base" style="color:#6b7280;">DeBERTa-v3 NLI</a>
1555
  · LLM: <a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color:#6b7280;">Qwen2.5-7B</a>
1556
  · OCR: <a href="https://github.com/mindee/doctr" style="color:#6b7280;">docTR</a>
 
1557
  · Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
1558
  · <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
1559
  </p>
chatbot.py CHANGED
@@ -52,9 +52,7 @@ except ImportError:
52
  _chatbot_status = {"embedder": "not_loaded", "llm": "not_loaded"}
53
 
54
  def _load_embedder():
55
- """Load sentence-transformers embedding model (lazy).
56
- PERF v4.3: Upgraded from all-MiniLM-L6-v2 to BAAI/bge-small-en-v1.5
57
- (+21% MTEB retrieval accuracy, same 384-dim, same latency)."""
58
  global _embedder, _chatbot_status
59
  if _embedder is not None:
60
  return _embedder
@@ -62,10 +60,10 @@ def _load_embedder():
62
  _chatbot_status["embedder"] = "unavailable"
63
  return None
64
  try:
65
- print("[ClauseGuard Chat] Loading embedding model: BAAI/bge-small-en-v1.5...")
66
- _embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
67
  _chatbot_status["embedder"] = "loaded"
68
- print("[ClauseGuard Chat] Embedding model loaded (BGE-small, 384-dim)")
69
  return _embedder
70
  except Exception as e:
71
  _chatbot_status["embedder"] = f"failed: {e}"
@@ -196,9 +194,7 @@ def retrieve_chunks(query, chunks, embeddings, top_k=5):
196
  return []
197
 
198
  try:
199
- # PERF v4.3: BGE models require query instruction prefix for retrieval
200
- _BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
201
- q_emb = embedder.encode([_BGE_QUERY_PREFIX + query], normalize_embeddings=True)
202
  scores = (q_emb @ embeddings.T)[0]
203
  top_indices = np.argsort(scores)[::-1][:top_k]
204
 
 
52
  _chatbot_status = {"embedder": "not_loaded", "llm": "not_loaded"}
53
 
54
  def _load_embedder():
55
+ """Load sentence-transformers embedding model (lazy)."""
 
 
56
  global _embedder, _chatbot_status
57
  if _embedder is not None:
58
  return _embedder
 
60
  _chatbot_status["embedder"] = "unavailable"
61
  return None
62
  try:
63
+ print("[ClauseGuard Chat] Loading embedding model: all-MiniLM-L6-v2...")
64
+ _embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
65
  _chatbot_status["embedder"] = "loaded"
66
+ print("[ClauseGuard Chat] Embedding model loaded")
67
  return _embedder
68
  except Exception as e:
69
  _chatbot_status["embedder"] = f"failed: {e}"
 
194
  return []
195
 
196
  try:
197
+ q_emb = embedder.encode([query], normalize_embeddings=True)
 
 
198
  scores = (q_emb @ embeddings.T)[0]
199
  top_indices = np.argsort(scores)[::-1][:top_k]
200
 
compare.py CHANGED
@@ -1,36 +1,34 @@
1
  """
2
- ClauseGuard — Contract Comparison Engine v3.1
3
  ═════════════════════════════════════════════
4
- FIXED in v3.1:
5
- PERF: Pre-compute all embeddings once, use matrix multiplication (was O(n²) per-pair encoding)
6
- FIX: Shared SentenceTransformer singleton (no duplicate model loading)
7
- FIX: Raised similarity thresholds to reduce false matches
 
8
  """
9
 
10
  import re
11
  from difflib import SequenceMatcher
12
  from collections import defaultdict
13
- import numpy as np
14
 
15
  # Try to load sentence-transformers for semantic comparison
16
  _HAS_EMBEDDINGS = False
17
  _embedder = None
18
 
19
  try:
20
- from sentence_transformers import SentenceTransformer
21
  _HAS_EMBEDDINGS = True
22
  except ImportError:
23
  pass
24
 
25
 
26
  def _load_embedder():
27
- """Load shared SentenceTransformer singleton.
28
- PERF v4.3: Upgraded to BAAI/bge-small-en-v1.5 (+21% retrieval accuracy)."""
29
  global _embedder
30
  if _HAS_EMBEDDINGS and _embedder is None:
31
  try:
32
- _embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
33
- print("[ClauseGuard] Sentence embeddings loaded for comparison (BGE-small)")
34
  except Exception as e:
35
  print(f"[ClauseGuard] Embeddings not available: {e}")
36
 
@@ -43,34 +41,18 @@ def _normalize_clause(text):
43
  return text
44
 
45
 
46
- def _compute_similarity_matrix(clauses_a, clauses_b):
47
- """
48
- FIX v3.1: Compute similarity matrix using pre-computed embeddings + matrix multiply.
49
- Was: O(n²) individual encode() calls per pair.
50
- Now: O(n+m) encode calls + O(n*m) dot product (fast numpy).
51
- """
52
  if _embedder is not None:
53
  try:
54
- # Encode all clauses at once (batched)
55
- texts_a = [c[:512] for c in clauses_a]
56
- texts_b = [c[:512] for c in clauses_b]
57
- emb_a = _embedder.encode(texts_a, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
58
- emb_b = _embedder.encode(texts_b, normalize_embeddings=True, batch_size=32, show_progress_bar=False)
59
- # Cosine similarity via dot product (embeddings are L2-normalized)
60
- sim_matrix = np.dot(emb_a, emb_b.T)
61
- return sim_matrix, "semantic"
62
  except Exception:
63
  pass
64
-
65
- # Fallback: string matching (still compute matrix)
66
- n, m = len(clauses_a), len(clauses_b)
67
- sim_matrix = np.zeros((n, m))
68
- for i in range(n):
69
- norm_a = _normalize_clause(clauses_a[i])
70
- for j in range(m):
71
- norm_b = _normalize_clause(clauses_b[j])
72
- sim_matrix[i, j] = SequenceMatcher(None, norm_a, norm_b).ratio()
73
- return sim_matrix, "lexical"
74
 
75
 
76
  def _extract_clause_type(clause_text):
@@ -107,14 +89,16 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
107
  if not text_a or not text_b:
108
  return {"error": "Both contracts required"}
109
 
 
110
  _load_embedder()
111
 
 
112
  if clauses_a is None:
113
  clauses_a = _split_clauses(text_a)
114
  if clauses_b is None:
115
  clauses_b = _split_clauses(text_b)
116
 
117
- # Detect contract types and flag cross-domain comparisons
118
  _CONTRACT_TYPE_KEYWORDS = {
119
  "employment": ["employee", "employer", "salary", "compensation", "benefits", "vacation", "severance", "at-will"],
120
  "lease": ["landlord", "tenant", "rent", "premises", "lease", "occupancy", "security deposit", "eviction"],
@@ -144,35 +128,25 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
144
  for c in clauses_b:
145
  type_map_b[_extract_clause_type(c)].append(c)
146
 
147
- # FIX v3.1: Compute similarity matrix once (O(n+m) encoding + O(n*m) dot product)
148
- if clauses_a and clauses_b:
149
- sim_matrix, method_type = _compute_similarity_matrix(clauses_a, clauses_b)
150
- else:
151
- sim_matrix = np.zeros((0, 0))
152
- method_type = "none"
153
-
154
- # Find matches using the pre-computed matrix
155
  matched_a = set()
156
  matched_b = set()
157
  modified = []
158
 
159
- SIMILARITY_THRESHOLD = 0.75
160
- MODIFIED_THRESHOLD = 0.55
161
-
162
- for i in range(len(clauses_a)):
163
- if len(clauses_b) == 0:
164
- break
165
- # Find best match for clause i in A
166
- row = sim_matrix[i]
167
- # Mask already-matched B clauses
168
- available = np.ones(len(clauses_b), dtype=bool)
169
- for j in matched_b:
170
- available[j] = False
171
- if not available.any():
172
- break
173
- masked_row = np.where(available, row, -1.0)
174
- best_j = int(np.argmax(masked_row))
175
- best_sim = masked_row[best_j]
176
 
177
  if best_sim >= SIMILARITY_THRESHOLD:
178
  matched_a.add(i)
@@ -180,20 +154,21 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
180
  if best_sim < 0.95:
181
  modified.append({
182
  "type": "modified",
183
- "similarity": round(float(best_sim), 3),
184
- "clause_a": clauses_a[i][:200],
185
  "clause_b": clauses_b[best_j][:200],
186
- "clause_type": _extract_clause_type(clauses_a[i]),
187
  })
188
  elif best_sim >= MODIFIED_THRESHOLD:
189
  matched_a.add(i)
190
- matched_b.add(best_j)
 
191
  modified.append({
192
  "type": "partial",
193
- "similarity": round(float(best_sim), 3),
194
- "clause_a": clauses_a[i][:200],
195
- "clause_b": clauses_b[best_j][:200],
196
- "clause_type": _extract_clause_type(clauses_a[i]),
197
  })
198
 
199
  removed = [clauses_a[i] for i in range(len(clauses_a)) if i not in matched_a]
@@ -201,9 +176,12 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
201
 
202
  # Compute alignment score
203
  total_pairs = max(len(clauses_a), len(clauses_b))
204
- alignment = len(matched_a) / total_pairs if total_pairs > 0 else 0.0
 
 
 
205
 
206
- # Risk delta
207
  risk_keywords = ["unlimited", "unilateral", "waive", "arbitration", "indemnif",
208
  "not liable", "no warranty", "sole discretion", "terminate",
209
  "non-compete", "liquidated damages", "uncapped"]
@@ -226,11 +204,12 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
226
  risk_delta = "Similar risk profiles"
227
  risk_winner = "tie"
228
 
 
229
  if is_cross_domain:
230
  risk_delta = f"Cross-domain comparison ({type_a} vs {type_b}) — risk delta not meaningful across different contract types"
231
  risk_winner = "cross-domain"
232
 
233
- comparison_method = f"semantic (sentence embeddings)" if method_type == "semantic" else "lexical (string matching)"
234
 
235
  return {
236
  "alignment_score": round(alignment, 3),
@@ -253,12 +232,14 @@ def compare_contracts(text_a, text_b, clauses_a=None, clauses_b=None):
253
  def _split_clauses(text):
254
  """Split text into clauses."""
255
  text = re.sub(r'\n{3,}', '\n\n', text.strip())
 
256
  section_splits = re.split(
257
  r'(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|(?:Section|Article|Clause)\s+\d+)',
258
  text
259
  )
260
  if len(section_splits) >= 3:
261
  return [p.strip() for p in section_splits if len(p.strip()) > 30]
 
262
  parts = re.split(
263
  r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)',
264
  text
@@ -304,6 +285,7 @@ def render_comparison_html(result):
304
  </div>
305
  '''
306
 
 
307
  if result["modified_clauses"]:
308
  html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">📝 Modified Clauses</h3>'
309
  for m in result["modified_clauses"][:20]:
@@ -318,12 +300,14 @@ def render_comparison_html(result):
318
  '''
319
  html += '</div>'
320
 
 
321
  if result["added_clauses"]:
322
  html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➕ Added in Contract B</h3>'
323
  for a in result["added_clauses"][:15]:
324
  html += f'<div style="background:#f0fdf4;padding:8px;border-radius:4px;font-size:12px;color:#166534;margin-bottom:4px;border-left:3px solid #22c55e;"><b>{a["type"].upper()}</b> · {a["text"][:150]}...</div>'
325
  html += '</div>'
326
 
 
327
  if result["removed_clauses"]:
328
  html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➖ Removed from Contract A</h3>'
329
  for r in result["removed_clauses"][:15]:
 
1
  """
2
+ ClauseGuard — Contract Comparison Engine v3.0
3
  ═════════════════════════════════════════════
4
+ FIXED in v3.0:
5
+ Semantic similarity using sentence embeddings (when available)
6
+ Better clause type detection with legal taxonomy
7
+ Improved diff visualization
8
+ • Fallback to SequenceMatcher when embeddings unavailable
9
  """
10
 
11
  import re
12
  from difflib import SequenceMatcher
13
  from collections import defaultdict
 
14
 
15
  # Try to load sentence-transformers for semantic comparison
16
  _HAS_EMBEDDINGS = False
17
  _embedder = None
18
 
19
  try:
20
+ from sentence_transformers import SentenceTransformer, util
21
  _HAS_EMBEDDINGS = True
22
  except ImportError:
23
  pass
24
 
25
 
26
  def _load_embedder():
 
 
27
  global _embedder
28
  if _HAS_EMBEDDINGS and _embedder is None:
29
  try:
30
+ _embedder = SentenceTransformer("all-MiniLM-L6-v2")
31
+ print("[ClauseGuard] Sentence embeddings loaded for comparison")
32
  except Exception as e:
33
  print(f"[ClauseGuard] Embeddings not available: {e}")
34
 
 
41
  return text
42
 
43
 
44
+ def _clause_similarity(a, b):
45
+ """Compute similarity using semantic embeddings or string matching."""
 
 
 
 
46
  if _embedder is not None:
47
  try:
48
+ emb_a = _embedder.encode(a[:512], convert_to_tensor=True)
49
+ emb_b = _embedder.encode(b[:512], convert_to_tensor=True)
50
+ sim = util.cos_sim(emb_a, emb_b).item()
51
+ return max(0, min(1, sim))
 
 
 
 
52
  except Exception:
53
  pass
54
+ # Fallback to string matching
55
+ return SequenceMatcher(None, _normalize_clause(a), _normalize_clause(b)).ratio()
 
 
 
 
 
 
 
 
56
 
57
 
58
  def _extract_clause_type(clause_text):
 
89
  if not text_a or not text_b:
90
  return {"error": "Both contracts required"}
91
 
92
+ # Try to load embedder
93
  _load_embedder()
94
 
95
+ # Split into clauses if not provided
96
  if clauses_a is None:
97
  clauses_a = _split_clauses(text_a)
98
  if clauses_b is None:
99
  clauses_b = _split_clauses(text_b)
100
 
101
+ # Fix 9: Detect contract types and flag cross-domain comparisons
102
  _CONTRACT_TYPE_KEYWORDS = {
103
  "employment": ["employee", "employer", "salary", "compensation", "benefits", "vacation", "severance", "at-will"],
104
  "lease": ["landlord", "tenant", "rent", "premises", "lease", "occupancy", "security deposit", "eviction"],
 
128
  for c in clauses_b:
129
  type_map_b[_extract_clause_type(c)].append(c)
130
 
131
+ # Find matches
 
 
 
 
 
 
 
132
  matched_a = set()
133
  matched_b = set()
134
  modified = []
135
 
136
+ # Fix 10: Raise thresholds to reject false "modified" matches
137
+ SIMILARITY_THRESHOLD = 0.75 # was 0.70 — too many false matches
138
+ MODIFIED_THRESHOLD = 0.55 # was 0.40 — "Good Reason" ≠ "Force Majeure"
139
+
140
+ for i, ca in enumerate(clauses_a):
141
+ best_sim = 0
142
+ best_j = -1
143
+ for j, cb in enumerate(clauses_b):
144
+ if j in matched_b:
145
+ continue
146
+ sim = _clause_similarity(ca, cb)
147
+ if sim > best_sim:
148
+ best_sim = sim
149
+ best_j = j
 
 
 
150
 
151
  if best_sim >= SIMILARITY_THRESHOLD:
152
  matched_a.add(i)
 
154
  if best_sim < 0.95:
155
  modified.append({
156
  "type": "modified",
157
+ "similarity": round(best_sim, 3),
158
+ "clause_a": ca[:200],
159
  "clause_b": clauses_b[best_j][:200],
160
+ "clause_type": _extract_clause_type(ca),
161
  })
162
  elif best_sim >= MODIFIED_THRESHOLD:
163
  matched_a.add(i)
164
+ if best_j >= 0:
165
+ matched_b.add(best_j)
166
  modified.append({
167
  "type": "partial",
168
+ "similarity": round(best_sim, 3),
169
+ "clause_a": ca[:200],
170
+ "clause_b": clauses_b[best_j][:200] if best_j >= 0 else "",
171
+ "clause_type": _extract_clause_type(ca),
172
  })
173
 
174
  removed = [clauses_a[i] for i in range(len(clauses_a)) if i not in matched_a]
 
176
 
177
  # Compute alignment score
178
  total_pairs = max(len(clauses_a), len(clauses_b))
179
+ if total_pairs > 0:
180
+ alignment = len(matched_a) / total_pairs
181
+ else:
182
+ alignment = 0.0
183
 
184
+ # Risk delta: compare risk keywords with context
185
  risk_keywords = ["unlimited", "unilateral", "waive", "arbitration", "indemnif",
186
  "not liable", "no warranty", "sole discretion", "terminate",
187
  "non-compete", "liquidated damages", "uncapped"]
 
204
  risk_delta = "Similar risk profiles"
205
  risk_winner = "tie"
206
 
207
+ # Fix 9: Cross-domain warning
208
  if is_cross_domain:
209
  risk_delta = f"Cross-domain comparison ({type_a} vs {type_b}) — risk delta not meaningful across different contract types"
210
  risk_winner = "cross-domain"
211
 
212
+ comparison_method = "semantic (sentence embeddings)" if _embedder is not None else "lexical (string matching)"
213
 
214
  return {
215
  "alignment_score": round(alignment, 3),
 
232
  def _split_clauses(text):
233
  """Split text into clauses."""
234
  text = re.sub(r'\n{3,}', '\n\n', text.strip())
235
+ # Try section-based splitting first
236
  section_splits = re.split(
237
  r'(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|(?:Section|Article|Clause)\s+\d+)',
238
  text
239
  )
240
  if len(section_splits) >= 3:
241
  return [p.strip() for p in section_splits if len(p.strip()) > 30]
242
+ # Fallback to paragraph/sentence splitting
243
  parts = re.split(
244
  r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)',
245
  text
 
285
  </div>
286
  '''
287
 
288
+ # Modified clauses
289
  if result["modified_clauses"]:
290
  html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">📝 Modified Clauses</h3>'
291
  for m in result["modified_clauses"][:20]:
 
300
  '''
301
  html += '</div>'
302
 
303
+ # Added clauses
304
  if result["added_clauses"]:
305
  html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➕ Added in Contract B</h3>'
306
  for a in result["added_clauses"][:15]:
307
  html += f'<div style="background:#f0fdf4;padding:8px;border-radius:4px;font-size:12px;color:#166534;margin-bottom:4px;border-left:3px solid #22c55e;"><b>{a["type"].upper()}</b> · {a["text"][:150]}...</div>'
308
  html += '</div>'
309
 
310
+ # Removed clauses
311
  if result["removed_clauses"]:
312
  html += '<div style="margin-bottom:16px;"><h3 style="font-size:14px;color:#374151;margin-bottom:8px;">➖ Removed from Contract A</h3>'
313
  for r in result["removed_clauses"][:15]:
compliance.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- ClauseGuard — Compliance Checker v3.1
3
  ═════════════════════════════════════
4
- FIXED in v3.1:
5
- FIX: Expanded negation window from 100 to 200 chars to catch cross-sentence negation
6
- FIX: Added sentence-boundary-aware negation detection
7
- FIX: Improved context extraction with sentence boundaries
8
- FIX: Added AMBIGUOUS handling for mixed positive/negative signals
9
  """
10
 
11
  import re
@@ -13,19 +13,13 @@ from collections import defaultdict
13
 
14
  # Negation patterns that invert compliance meaning
15
  _NEGATION_PATTERNS = [
16
- r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain|comply|adhere|support|acknowledge)",
17
- r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty|commitment|responsibility|duty)",
18
- r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject|eliminat|remov|revok)",
19
- r"shall\s+not\s+be\s+(?:required|obligated|responsible|liable|bound|subject)",
20
- r"is\s+not\s+(?:responsible|liable|required|obligated|bound|subject)",
21
- r"expressly\s+(?:disclaim|exclud|waiv|reject)",
22
- r"to\s+the\s+(?:maximum|fullest)\s+extent\s+(?:permitted|allowed).*(?:disclaim|exclud|waiv)",
23
- r"notwithstanding.*(?:shall\s+not|does\s+not|is\s+not)",
24
  ]
25
 
26
- # FIX v4.2: Pre-compile negation patterns at module level
27
- _NEGATION_PATTERNS_COMPILED = [re.compile(p, re.IGNORECASE) for p in _NEGATION_PATTERNS]
28
-
29
  # Regulatory requirement definitions
30
  REGULATIONS = {
31
  "GDPR": {
@@ -178,59 +172,24 @@ RISK_STYLES = {
178
  }
179
 
180
 
181
- def _get_sentence_containing(text_lower, keyword_lower, start_idx):
182
- """FIX v3.1: Extract the full sentence containing the keyword match."""
183
- # Find sentence boundaries around the match
184
- # Look backward for sentence start
185
- sent_start = start_idx
186
- for i in range(start_idx - 1, max(0, start_idx - 500), -1):
187
- if text_lower[i] in '.!?' and i < start_idx - 2:
188
- sent_start = i + 1
189
- break
190
- else:
191
- sent_start = max(0, start_idx - 500)
192
-
193
- # Look forward for sentence end
194
- sent_end = start_idx + len(keyword_lower)
195
- for i in range(sent_end, min(len(text_lower), sent_end + 500)):
196
- if text_lower[i] in '.!?':
197
- sent_end = i + 1
198
- break
199
- else:
200
- sent_end = min(len(text_lower), sent_end + 500)
201
-
202
- return text_lower[sent_start:sent_end].strip()
203
-
204
-
205
- def _check_negation(text_lower, keyword, window=200):
206
- """FIX v3.1: Check if a keyword match is negated — uses sentence-aware window."""
207
  idx = text_lower.find(keyword.lower())
208
  if idx == -1:
209
  return False
210
-
211
- # Get sentence-aware context (more accurate than fixed window)
212
- sentence = _get_sentence_containing(text_lower, keyword.lower(), idx)
213
-
214
- # Also get a wider window for cross-sentence negation
215
  start = max(0, idx - window)
216
  end = min(len(text_lower), idx + len(keyword) + window)
217
- wider_context = text_lower[start:end]
218
-
219
- # Check sentence first (higher confidence)
220
- for neg_pat in _NEGATION_PATTERNS_COMPILED:
221
- if neg_pat.search(sentence):
222
- return True
223
 
224
- # Then check wider window (lower confidence, still relevant)
225
- for neg_pat in _NEGATION_PATTERNS_COMPILED[:4]: # Only strong negation patterns for wider window
226
- if neg_pat.search(wider_context):
227
  return True
228
-
229
  return False
230
 
231
 
232
- def _get_context(text, keyword, window=100):
233
- """Extract context around a keyword match with sentence boundaries."""
234
  text_lower = text.lower()
235
  idx = text_lower.find(keyword.lower())
236
  if idx == -1:
@@ -245,55 +204,12 @@ def _get_context(text, keyword, window=100):
245
  return context
246
 
247
 
248
- # FIX v4.3: Regulation applicability gates — only apply regulations relevant to the contract type
249
- _REGULATION_GATES = {
250
- "SOX": re.compile(
251
- r'financial\s+statement|internal\s+control|audit\s+committee|public\s+company|sec\s+filing|pcaob|sarbanes',
252
- re.IGNORECASE
253
- ),
254
- "HIPAA": re.compile(
255
- r'protected\s+health|(?<!\w)phi(?!\w)|health\s+information|medical\s+record|business\s+associate\s+agreement|(?<!\w)baa(?!\w)|hipaa',
256
- re.IGNORECASE
257
- ),
258
- "FINRA": re.compile(
259
- r'securities|broker[\-\s]?dealer|investment\s+advis|financial\s+industry|(?<!\w)finra(?!\w)|registered\s+representative',
260
- re.IGNORECASE
261
- ),
262
- }
263
-
264
-
265
  def check_compliance(text):
266
- """Check contract text against applicable regulatory frameworks with negation handling.
267
-
268
- FIX v4.3:
269
- - Regulation applicability gates: SOX/HIPAA/FINRA only checked if contract contains relevant terms
270
- - Whole-word keyword matching: prevents substring false positives (e.g. "SAR" in "Year 3")
271
- - GDPR and CCPA always checked (broadly applicable)
272
- """
273
  text_lower = text.lower()
274
  results = {}
275
 
276
- # FIX v4.3: Determine which regulations apply to this contract
277
- applicable_regs = {"GDPR", "CCPA"} # Always check these
278
- for reg_name, gate_pattern in _REGULATION_GATES.items():
279
- if gate_pattern.search(text):
280
- applicable_regs.add(reg_name)
281
-
282
  for reg_name, reg_data in REGULATIONS.items():
283
- # FIX v4.3: Skip regulations that don't apply to this contract
284
- if reg_name not in applicable_regs:
285
- # Still include in results but mark as not applicable
286
- results[reg_name] = {
287
- "description": reg_data["description"],
288
- "compliance_rate": -1, # -1 = not applicable
289
- "checks": [],
290
- "overall_status": "NOT_APPLICABLE",
291
- "negated_count": 0,
292
- "ambiguous_count": 0,
293
- "note": f"{reg_name} does not appear applicable to this contract type.",
294
- }
295
- continue
296
-
297
  checks = []
298
  for req_name, req_data in reg_data["requirements"].items():
299
  matched = False
@@ -302,27 +218,17 @@ def check_compliance(text):
302
  context_snippets = []
303
 
304
  for kw in req_data["keywords"]:
305
- # FIX v4.3: Use whole-word matching to prevent substring false positives
306
- # e.g., "SAR" should not match "Year 3" tokenised fragments
307
- kw_lower = kw.lower()
308
- if len(kw_lower) <= 4:
309
- # Short keywords (SAR, DPO, PHI, BAA) — require word boundaries
310
- pattern = re.compile(r'\b' + re.escape(kw_lower) + r'\b', re.IGNORECASE)
311
- if not pattern.search(text_lower):
312
- continue
313
- else:
314
- # Longer keywords — substring is OK
315
- if kw_lower not in text_lower:
316
- continue
317
-
318
- matched_keywords.append(kw)
319
- if _check_negation(text_lower, kw):
320
- negated = True
321
- else:
322
- matched = True
323
- ctx = _get_context(text, kw)
324
- if ctx:
325
- context_snippets.append(ctx)
326
 
327
  if matched and not negated:
328
  status = "PASS"
@@ -339,7 +245,7 @@ def check_compliance(text):
339
  "severity": req_data["severity"],
340
  "status": status,
341
  "matched_keywords": matched_keywords,
342
- "context": context_snippets[:2],
343
  })
344
 
345
  passed = sum(1 for c in checks if c["status"] == "PASS")
@@ -356,6 +262,7 @@ def check_compliance(text):
356
  else:
357
  overall = "NON-COMPLIANT"
358
 
 
359
  if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
360
  overall = "WARNING"
361
 
@@ -379,28 +286,6 @@ def render_compliance_html(results):
379
  rate = reg_result["compliance_rate"]
380
  status = reg_result["overall_status"]
381
 
382
- # FIX v4.3: Handle NOT_APPLICABLE regulations
383
- if status == "NOT_APPLICABLE":
384
- note = reg_result.get("note", f"{reg_name} not applicable to this contract.")
385
- html += f'''
386
- <div style="border:1px solid #e5e7eb;border-radius:10px;margin-bottom:16px;overflow:hidden;opacity:0.6;">
387
- <div style="display:flex;justify-content:space-between;align-items:center;padding:12px 16px;background:#f9fafb;border-bottom:1px solid #e5e7eb;">
388
- <div>
389
- <span style="font-size:16px;font-weight:700;color:#9ca3af;">{reg_name}</span>
390
- <p style="font-size:11px;color:#9ca3af;margin:2px 0 0 0;">{reg_result["description"]}</p>
391
- </div>
392
- <div style="text-align:right;">
393
- <div style="font-size:12px;font-weight:600;color:#9ca3af;">N/A</div>
394
- <div style="font-size:10px;color:#9ca3af;">Not Applicable</div>
395
- </div>
396
- </div>
397
- <div style="padding:10px 16px;font-size:11px;color:#9ca3af;font-style:italic;">
398
- {note}
399
- </div>
400
- </div>
401
- '''
402
- continue
403
-
404
  status_colors = {
405
  "COMPLIANT": ("#16a34a", "#f0fdf4"),
406
  "PARTIAL": ("#ca8a04", "#fefce8"),
 
1
  """
2
+ ClauseGuard — Compliance Checker v3.0
3
  ═════════════════════════════════════
4
+ FIXED in v3.0:
5
+ Negation handling (clause saying "we do NOT" won't score as PASS)
6
+ Context windows around keyword matches (shows what the clause actually says)
7
+ Semantic scoring (keyword proximity + negation awareness)
8
+ • Added more regulatory frameworks
9
  """
10
 
11
  import re
 
13
 
14
  # Negation patterns that invert compliance meaning
15
  _NEGATION_PATTERNS = [
16
+ r"(?:does?\s+)?not\s+(?:require|provide|include|offer|grant|guarantee|ensure|maintain)",
17
+ r"(?:no|without)\s+(?:obligation|requirement|guarantee|warranty)",
18
+ r"(?:exclud|waiv|disclaim|exempt|refus|deny|reject)",
19
+ r"shall\s+not\s+be\s+(?:required|obligated|responsible)",
20
+ r"is\s+not\s+(?:responsible|liable|required|obligated)",
 
 
 
21
  ]
22
 
 
 
 
23
  # Regulatory requirement definitions
24
  REGULATIONS = {
25
  "GDPR": {
 
172
  }
173
 
174
 
175
+ def _check_negation(text_lower, keyword, window=100):
176
+ """Check if a keyword match is negated by nearby negation words."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  idx = text_lower.find(keyword.lower())
178
  if idx == -1:
179
  return False
180
+ # Get context window around the match
 
 
 
 
181
  start = max(0, idx - window)
182
  end = min(len(text_lower), idx + len(keyword) + window)
183
+ context = text_lower[start:end]
 
 
 
 
 
184
 
185
+ for neg_pat in _NEGATION_PATTERNS:
186
+ if re.search(neg_pat, context, re.IGNORECASE):
 
187
  return True
 
188
  return False
189
 
190
 
191
+ def _get_context(text, keyword, window=80):
192
+ """Extract context around a keyword match."""
193
  text_lower = text.lower()
194
  idx = text_lower.find(keyword.lower())
195
  if idx == -1:
 
204
  return context
205
 
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  def check_compliance(text):
208
+ """Check contract text against all regulatory frameworks with negation handling."""
 
 
 
 
 
 
209
  text_lower = text.lower()
210
  results = {}
211
 
 
 
 
 
 
 
212
  for reg_name, reg_data in REGULATIONS.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  checks = []
214
  for req_name, req_data in reg_data["requirements"].items():
215
  matched = False
 
218
  context_snippets = []
219
 
220
  for kw in req_data["keywords"]:
221
+ if kw.lower() in text_lower:
222
+ matched_keywords.append(kw)
223
+ # Check if the match is negated
224
+ if _check_negation(text_lower, kw):
225
+ negated = True
226
+ else:
227
+ matched = True
228
+ # Get context
229
+ ctx = _get_context(text, kw)
230
+ if ctx:
231
+ context_snippets.append(ctx)
 
 
 
 
 
 
 
 
 
 
232
 
233
  if matched and not negated:
234
  status = "PASS"
 
245
  "severity": req_data["severity"],
246
  "status": status,
247
  "matched_keywords": matched_keywords,
248
+ "context": context_snippets[:2], # Keep top 2 context snippets
249
  })
250
 
251
  passed = sum(1 for c in checks if c["status"] == "PASS")
 
262
  else:
263
  overall = "NON-COMPLIANT"
264
 
265
+ # Override if there are negated critical requirements
266
  if any(c["status"] == "NEGATED" and c["severity"] in ("CRITICAL", "HIGH") for c in checks):
267
  overall = "WARNING"
268
 
 
286
  rate = reg_result["compliance_rate"]
287
  status = reg_result["overall_status"]
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  status_colors = {
290
  "COMPLIANT": ("#16a34a", "#f0fdf4"),
291
  "PARTIAL": ("#ca8a04", "#fefce8"),
extension/background.js CHANGED
@@ -1,19 +1,12 @@
1
  /**
2
- * ClauseGuard — Background Service Worker v4.3
3
- * FIXED v4.3: API_BASE now routes through the Netlify web app which has
4
- * proper Gradio SSE polling logic. The old URL pointed at the Gradio Space
5
- * directly, which doesn't expose a REST /api/analyze endpoint.
6
- * FIXED v4.3: session_id from analyze response is now stored so chat can use it.
7
- * FIXED v4.3: sidePanel.open() is properly awaited.
8
  */
9
 
10
- // FIX v4.3: Route through the Netlify web app — it already has Gradio SSE
11
- // polling in its /api/analyze route. The extension just needs a REST endpoint.
12
- // Previously pointed to "https://gaurv007-clauseguard.hf.space" which is a
13
- // Gradio Space that only exposes /gradio_api/call/analyze (SSE, not REST).
14
- const API_BASE = "https://clauseguardweb.netlify.app";
15
  const FREE_SCANS_PER_MONTH = 10;
16
- const API_TIMEOUT_MS = 90000; // Increased to 90s — web route polls Gradio which can be slow
17
 
18
  const SITE_ORIGINS = [
19
  "https://clauseguardweb.netlify.app",
@@ -40,15 +33,10 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
40
  case "GET_AUTH": return await getAuth();
41
  case "GET_USER": return await getUser();
42
  case "CHECK_USAGE": return await checkUsage();
43
- case "OPEN_SIDEPANEL":
44
- if (sender.tab?.id) {
45
- try { await chrome.sidePanel.open({ tabId: sender.tab.id }); } catch(e) { console.warn("sidePanel.open failed:", e); }
46
- }
47
- return { ok: true };
48
  case "GET_RESULTS": return await getStoredResults(sender.tab?.id || message.tabId);
49
  case "SYNC_AUTH": return await syncAuthFromWebsite();
50
  case "GET_SCAN_HISTORY": return await getScanHistory();
51
- case "GET_SESSION_ID": return await getStoredSessionId(sender.tab?.id || message.tabId);
52
  default: return null;
53
  }
54
  };
@@ -106,8 +94,7 @@ async function handleAnalyze(payload, tabId) {
106
  let results;
107
  try {
108
  const auth = await getAuth();
109
- // FIX v4.3: Send {text, source_url} to the Netlify web route which
110
- // handles Gradio SSE polling internally and returns plain JSON.
111
  const resp = await fetchWithTimeout(`${API_BASE}/api/analyze`, {
112
  method: "POST",
113
  headers: {
@@ -121,17 +108,9 @@ async function handleAnalyze(payload, tabId) {
121
  return { error: "rate_limited", message: "Too many requests. Please wait a moment." };
122
  }
123
 
124
- if (resp.status === 401) {
125
- // Web route requires auth — fall back to local analysis for guests
126
- console.warn("API returned 401, using local analysis for guest user");
127
- results = localAnalyze(text);
128
- results.source = "local";
129
- } else if (!resp.ok) {
130
- throw new Error(`HTTP ${resp.status}`);
131
- } else {
132
- results = await resp.json();
133
- results.source = "api";
134
- }
135
  } catch (err) {
136
  console.warn("API unavailable, using local:", err.message);
137
  results = localAnalyze(text);
@@ -141,12 +120,6 @@ async function handleAnalyze(payload, tabId) {
141
  // Store results
142
  if (tabId) {
143
  await chrome.storage.local.set({ [`results_${tabId}`]: results });
144
-
145
- // FIX v4.3: Also store session_id so the chat feature can use it
146
- if (results.session_id) {
147
- await chrome.storage.local.set({ [`session_${tabId}`]: results.session_id });
148
- }
149
-
150
  const flagged = results.results?.filter(r => r.categories?.length > 0).length || results.flagged_count || 0;
151
  chrome.action.setBadgeText({ text: flagged > 0 ? String(flagged) : "", tabId });
152
  if (flagged > 0) chrome.action.setBadgeBackgroundColor({ color: flagged > 3 ? "#ef4444" : "#f59e0b", tabId });
@@ -178,12 +151,6 @@ async function getScanHistory() {
178
  return { history: scanHistory };
179
  }
180
 
181
- // ─── Get stored session ID (for chat) ───
182
- async function getStoredSessionId(tabId) {
183
- if (!tabId) return null;
184
- return new Promise(r => chrome.storage.local.get([`session_${tabId}`], d => r(d[`session_${tabId}`] || null)));
185
- }
186
-
187
  // ─── Sync auth from website ───
188
  async function syncAuthFromWebsite() {
189
  return await getAuth();
@@ -215,16 +182,12 @@ function localAnalyze(text) {
215
 
216
  const flagged = results.filter(r => r.categories.length > 0);
217
  const sev = { HIGH: 0, MEDIUM: 0, LOW: 0 };
218
- flagged.forEach(r => r.categories.forEach(c => {
219
- if (sev.hasOwnProperty(c.severity)) sev[c.severity]++;
220
- else sev.MEDIUM++;
221
- }));
222
- const weighted = sev.HIGH * 20 + sev.MEDIUM * 10 + sev.LOW * 3;
223
- const risk = Math.min(100, Math.round(100 * (1 - (1 / (1 + weighted / 30)))));
224
 
225
  return {
226
  risk_score: risk,
227
- grade: risk >= 70 ? "F" : risk >= 50 ? "D" : risk >= 30 ? "C" : risk >= 15 ? "B" : "A",
228
  total_clauses: clauses.length, flagged_count: flagged.length, results,
229
  };
230
  }
@@ -275,4 +238,4 @@ async function getStoredResults(tabId) {
275
  return new Promise(r => chrome.storage.local.get([`results_${tabId}`], d => r(d[`results_${tabId}`]||null)));
276
  }
277
 
278
- chrome.tabs.onRemoved.addListener(tabId => chrome.storage.local.remove([`results_${tabId}`, `session_${tabId}`]));
 
1
  /**
2
+ * ClauseGuard — Background Service Worker v3.0
3
+ * FIXED: API payload now sends {text, source_url} (not {clauses})
4
+ * FIXED: Error handling and retry logic
 
 
 
5
  */
6
 
7
+ const API_BASE = "https://gaurv007-clauseguard-api.hf.space";
 
 
 
 
8
  const FREE_SCANS_PER_MONTH = 10;
9
+ const API_TIMEOUT_MS = 45000;
10
 
11
  const SITE_ORIGINS = [
12
  "https://clauseguardweb.netlify.app",
 
33
  case "GET_AUTH": return await getAuth();
34
  case "GET_USER": return await getUser();
35
  case "CHECK_USAGE": return await checkUsage();
36
+ case "OPEN_SIDEPANEL": if (sender.tab?.id) chrome.sidePanel.open({ tabId: sender.tab.id }); return { ok: true };
 
 
 
 
37
  case "GET_RESULTS": return await getStoredResults(sender.tab?.id || message.tabId);
38
  case "SYNC_AUTH": return await syncAuthFromWebsite();
39
  case "GET_SCAN_HISTORY": return await getScanHistory();
 
40
  default: return null;
41
  }
42
  };
 
94
  let results;
95
  try {
96
  const auth = await getAuth();
97
+ // FIXED: Send {text, source_url} not {clauses}
 
98
  const resp = await fetchWithTimeout(`${API_BASE}/api/analyze`, {
99
  method: "POST",
100
  headers: {
 
108
  return { error: "rate_limited", message: "Too many requests. Please wait a moment." };
109
  }
110
 
111
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
112
+ results = await resp.json();
113
+ results.source = "api";
 
 
 
 
 
 
 
 
114
  } catch (err) {
115
  console.warn("API unavailable, using local:", err.message);
116
  results = localAnalyze(text);
 
120
  // Store results
121
  if (tabId) {
122
  await chrome.storage.local.set({ [`results_${tabId}`]: results });
 
 
 
 
 
 
123
  const flagged = results.results?.filter(r => r.categories?.length > 0).length || results.flagged_count || 0;
124
  chrome.action.setBadgeText({ text: flagged > 0 ? String(flagged) : "", tabId });
125
  if (flagged > 0) chrome.action.setBadgeBackgroundColor({ color: flagged > 3 ? "#ef4444" : "#f59e0b", tabId });
 
151
  return { history: scanHistory };
152
  }
153
 
 
 
 
 
 
 
154
  // ─── Sync auth from website ───
155
  async function syncAuthFromWebsite() {
156
  return await getAuth();
 
182
 
183
  const flagged = results.filter(r => r.categories.length > 0);
184
  const sev = { HIGH: 0, MEDIUM: 0, LOW: 0 };
185
+ flagged.forEach(r => r.categories.forEach(c => sev[c.severity]++));
186
+ const risk = Math.min(100, Math.round((sev.HIGH*20 + sev.MEDIUM*10 + sev.LOW*5) / Math.max(1, clauses.length) * 100));
 
 
 
 
187
 
188
  return {
189
  risk_score: risk,
190
+ grade: risk >= 60 ? "F" : risk >= 40 ? "D" : risk >= 20 ? "C" : risk >= 10 ? "B" : "A",
191
  total_clauses: clauses.length, flagged_count: flagged.length, results,
192
  };
193
  }
 
238
  return new Promise(r => chrome.storage.local.get([`results_${tabId}`], d => r(d[`results_${tabId}`]||null)));
239
  }
240
 
241
+ chrome.tabs.onRemoved.addListener(tabId => chrome.storage.local.remove([`results_${tabId}`]));
extension/content.js CHANGED
@@ -1,8 +1,6 @@
1
  /**
2
- * ClauseGuard — Content Script v4.3
3
  * Page scanning + highlighting + auth bridge.
4
- *
5
- * FIXED v4.3: CRITICAL severity is now handled in highlights and tooltips.
6
  *
7
  * Auth bridge: listens for postMessage from the website's ExtensionBridge component.
8
  * Content scripts CAN receive window.postMessage from the page — they share the same
@@ -18,9 +16,6 @@
18
  let isScanning = false;
19
  let currentHighlights = [];
20
 
21
- // Severity ordering (higher = more severe)
22
- const SEV_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
23
-
24
  // ─── Auth Bridge ───
25
  // Listen for auth sync from our website (ExtensionBridge component sends this)
26
  window.addEventListener("message", (event) => {
@@ -108,18 +103,13 @@
108
  try {
109
  const range = document.createRange();
110
  range.setStart(textNode, start); range.setEnd(textNode, end);
111
- // FIX v4.3: Use numeric ordering that includes CRITICAL
112
- const severity = clauseData.categories.reduce((m, c) =>
113
- (SEV_ORDER[c.severity] || 0) > (SEV_ORDER[m] || 0) ? c.severity : m
114
- , "LOW");
115
  const mark = document.createElement("mark");
116
  mark.className = `clauseguard-highlight clauseguard-${severity.toLowerCase()}`;
117
  mark.dataset.categories = JSON.stringify(clauseData.categories);
118
  mark.addEventListener("mouseenter", showTooltip);
119
  mark.addEventListener("mouseleave", hideTooltip);
120
- mark.addEventListener("click", () => {
121
- try { chrome.runtime.sendMessage({ type: "OPEN_SIDEPANEL" }); } catch {}
122
- });
123
  range.surroundContents(mark);
124
  currentHighlights.push(mark);
125
  } catch (e) {}
 
1
  /**
2
+ * ClauseGuard — Content Script
3
  * Page scanning + highlighting + auth bridge.
 
 
4
  *
5
  * Auth bridge: listens for postMessage from the website's ExtensionBridge component.
6
  * Content scripts CAN receive window.postMessage from the page — they share the same
 
16
  let isScanning = false;
17
  let currentHighlights = [];
18
 
 
 
 
19
  // ─── Auth Bridge ───
20
  // Listen for auth sync from our website (ExtensionBridge component sends this)
21
  window.addEventListener("message", (event) => {
 
103
  try {
104
  const range = document.createRange();
105
  range.setStart(textNode, start); range.setEnd(textNode, end);
106
+ const severity = clauseData.categories.reduce((m, c) => ({ HIGH:3,MEDIUM:2,LOW:1 }[c.severity] > ({ HIGH:3,MEDIUM:2,LOW:1 }[m]) ? c.severity : m), "LOW");
 
 
 
107
  const mark = document.createElement("mark");
108
  mark.className = `clauseguard-highlight clauseguard-${severity.toLowerCase()}`;
109
  mark.dataset.categories = JSON.stringify(clauseData.categories);
110
  mark.addEventListener("mouseenter", showTooltip);
111
  mark.addEventListener("mouseleave", hideTooltip);
112
+ mark.addEventListener("click", () => { try { chrome.runtime.sendMessage({ type: "OPEN_SIDEPANEL" }); } catch {} });
 
 
113
  range.surroundContents(mark);
114
  currentHighlights.push(mark);
115
  } catch (e) {}
extension/manifest.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "manifest_version": 3,
3
  "name": "ClauseGuard — AI Fine Print Scanner",
4
- "version": "1.1.0",
5
  "description": "Highlights unfair clauses in Terms of Service, contracts, and lease agreements.",
6
  "permissions": [
7
  "activeTab",
8
  "storage",
9
  "sidePanel",
10
- "scripting"
 
11
  ],
12
  "host_permissions": [
13
  "https://gaurv007-clauseguard-api.hf.space/*",
14
- "https://gaurv007-clauseguard.hf.space/*",
15
  "https://clauseguardweb.netlify.app/*",
16
  "https://*.netlify.app/*"
17
  ],
 
1
  {
2
  "manifest_version": 3,
3
  "name": "ClauseGuard — AI Fine Print Scanner",
4
+ "version": "1.0.1",
5
  "description": "Highlights unfair clauses in Terms of Service, contracts, and lease agreements.",
6
  "permissions": [
7
  "activeTab",
8
  "storage",
9
  "sidePanel",
10
+ "scripting",
11
+ "cookies"
12
  ],
13
  "host_permissions": [
14
  "https://gaurv007-clauseguard-api.hf.space/*",
 
15
  "https://clauseguardweb.netlify.app/*",
16
  "https://*.netlify.app/*"
17
  ],
extension/popup.js CHANGED
@@ -1,10 +1,6 @@
1
  /**
2
- * ClauseGuard — Popup Script v4.3
3
  * Shows user status (logged in / guest), scan results, usage.
4
- *
5
- * FIXED v4.3: sidePanel.open() is properly awaited.
6
- * FIXED v4.3: CRITICAL severity is now counted and displayed.
7
- * FIXED v4.3: Shows scan source ("Legal-BERT" / "Local") accurately.
8
  */
9
 
10
  document.addEventListener("DOMContentLoaded", async () => {
@@ -82,17 +78,16 @@ document.addEventListener("DOMContentLoaded", async () => {
82
  try { await chrome.tabs.sendMessage(tab.id, { type: "TRIGGER_SCAN" }); } catch {} window.close();
83
  });
84
 
85
- // FIX v4.3: Properly await async sidePanel.open() so errors are caught
86
  const btnDetails = document.getElementById("btn-details");
87
- if (btnDetails) btnDetails.addEventListener("click", async () => {
88
- try { await chrome.sidePanel.open({ tabId: tab.id }); } catch(e) { console.warn("sidePanel.open failed:", e); }
89
- window.close();
90
  });
91
 
92
  // Login button
93
  const btnLogin = document.getElementById("btn-login");
94
  if (btnLogin) btnLogin.addEventListener("click", () => {
95
- chrome.tabs.create({ url: "https://clauseguardweb.netlify.app/auth/login" });
96
  });
97
  });
98
 
@@ -115,20 +110,15 @@ function showResults(results) {
115
  bar.className = "bar-fill " + (results.risk_score >= 60 ? "bar-red" : results.risk_score >= 30 ? "bar-amber" : "bar-green");
116
  }
117
 
118
- // FIX v4.3: Count CRITICAL severity too (backend can return it)
119
- const counts = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
120
- (results.results || []).forEach(r => (r.categories || []).forEach(c => {
121
- if (counts[c.severity] !== undefined) counts[c.severity]++;
122
- else counts.MEDIUM++; // Unknown severities default to MEDIUM
123
- }));
124
- // Merge CRITICAL into HIGH for display (popup only has 3 columns)
125
- if (el("c-high")) el("c-high").textContent = counts.CRITICAL + counts.HIGH;
126
  if (el("c-med")) el("c-med").textContent = counts.MEDIUM;
127
  if (el("c-low")) el("c-low").textContent = counts.LOW;
128
 
129
  // Show source indicator
130
  const src = el("scan-source");
131
- if (src) src.textContent = results.source === "api" ? "Legal-BERT" : results.source === "local" ? "Local (offline)" : "";
132
  }
133
 
134
  function updateUsage(usage) {
 
1
  /**
2
+ * ClauseGuard — Popup Script
3
  * Shows user status (logged in / guest), scan results, usage.
 
 
 
 
4
  */
5
 
6
  document.addEventListener("DOMContentLoaded", async () => {
 
78
  try { await chrome.tabs.sendMessage(tab.id, { type: "TRIGGER_SCAN" }); } catch {} window.close();
79
  });
80
 
81
+ // Details
82
  const btnDetails = document.getElementById("btn-details");
83
+ if (btnDetails) btnDetails.addEventListener("click", () => {
84
+ try { chrome.sidePanel.open({ tabId: tab.id }); } catch {} window.close();
 
85
  });
86
 
87
  // Login button
88
  const btnLogin = document.getElementById("btn-login");
89
  if (btnLogin) btnLogin.addEventListener("click", () => {
90
+ chrome.tabs.create({ url: "https://clauseguardweb.netlify.app/auth/login" }); // Update with your actual URL
91
  });
92
  });
93
 
 
110
  bar.className = "bar-fill " + (results.risk_score >= 60 ? "bar-red" : results.risk_score >= 30 ? "bar-amber" : "bar-green");
111
  }
112
 
113
+ const counts = { HIGH: 0, MEDIUM: 0, LOW: 0 };
114
+ (results.results || []).forEach(r => (r.categories || []).forEach(c => { if (counts[c.severity] !== undefined) counts[c.severity]++; }));
115
+ if (el("c-high")) el("c-high").textContent = counts.HIGH;
 
 
 
 
 
116
  if (el("c-med")) el("c-med").textContent = counts.MEDIUM;
117
  if (el("c-low")) el("c-low").textContent = counts.LOW;
118
 
119
  // Show source indicator
120
  const src = el("scan-source");
121
+ if (src) src.textContent = results.source === "api" ? "Legal-BERT" : results.source === "local" ? "Local" : "";
122
  }
123
 
124
  function updateUsage(usage) {
extension/sidepanel.html CHANGED
@@ -29,7 +29,6 @@
29
  .filter-btn.active { background: #18181b; color: #fff; border-color: #18181b; }
30
  .filter-count { font-size: 10px; opacity: 0.6; }
31
  .dot { width: 6px; height: 6px; border-radius: 50%; }
32
- .dot-purple { background: #a855f7; }
33
  .dot-red { background: #ef4444; }
34
  .dot-amber { background: #f59e0b; }
35
  .dot-blue { background: #3b82f6; }
@@ -37,7 +36,6 @@
37
  .clause-list { padding: 8px; }
38
  .clause-card { border: 1px solid #e4e4e7; border-radius: 10px; padding: 12px; margin-bottom: 6px; transition: all 0.15s; cursor: default; }
39
  .clause-card:hover { border-color: #d4d4d8; box-shadow: 0 1px 3px rgba(0,0,0,0.04); }
40
- .clause-card.sev-critical { border-left: 3px solid #a855f7; }
41
  .clause-card.sev-high { border-left: 3px solid #ef4444; }
42
  .clause-card.sev-medium { border-left: 3px solid #f59e0b; }
43
  .clause-card.sev-low { border-left: 3px solid #3b82f6; }
@@ -45,7 +43,6 @@
45
  .clause-tags { display: flex; flex-wrap: wrap; gap: 4px; }
46
  .tag { font-size: 10px; font-weight: 600; padding: 2px 8px; border-radius: 4px; border: 1px solid; display: inline-flex; align-items: center; gap: 3px; }
47
  .tag svg { width: 10px; height: 10px; }
48
- .tag-critical { background: #faf5ff; color: #7c3aed; border-color: #d8b4fe; }
49
  .tag-high { background: #fef2f2; color: #b91c1c; border-color: #fecaca; }
50
  .tag-medium { background: #fffbeb; color: #a16207; border-color: #fde68a; }
51
  .tag-low { background: #eff6ff; color: #1d4ed8; border-color: #bfdbfe; }
@@ -74,7 +71,6 @@
74
 
75
  <div class="filters" id="filters" style="display:none;">
76
  <button class="filter-btn active" data-filter="all">All</button>
77
- <button class="filter-btn" data-filter="CRITICAL" id="filter-critical" style="display:none;"><span class="dot dot-purple"></span>Critical <span class="filter-count" id="fc-crit">0</span></button>
78
  <button class="filter-btn" data-filter="HIGH"><span class="dot dot-red"></span>High <span class="filter-count" id="fc-high">0</span></button>
79
  <button class="filter-btn" data-filter="MEDIUM"><span class="dot dot-amber"></span>Medium <span class="filter-count" id="fc-med">0</span></button>
80
  <button class="filter-btn" data-filter="LOW"><span class="dot dot-blue"></span>Low <span class="filter-count" id="fc-low">0</span></button>
 
29
  .filter-btn.active { background: #18181b; color: #fff; border-color: #18181b; }
30
  .filter-count { font-size: 10px; opacity: 0.6; }
31
  .dot { width: 6px; height: 6px; border-radius: 50%; }
 
32
  .dot-red { background: #ef4444; }
33
  .dot-amber { background: #f59e0b; }
34
  .dot-blue { background: #3b82f6; }
 
36
  .clause-list { padding: 8px; }
37
  .clause-card { border: 1px solid #e4e4e7; border-radius: 10px; padding: 12px; margin-bottom: 6px; transition: all 0.15s; cursor: default; }
38
  .clause-card:hover { border-color: #d4d4d8; box-shadow: 0 1px 3px rgba(0,0,0,0.04); }
 
39
  .clause-card.sev-high { border-left: 3px solid #ef4444; }
40
  .clause-card.sev-medium { border-left: 3px solid #f59e0b; }
41
  .clause-card.sev-low { border-left: 3px solid #3b82f6; }
 
43
  .clause-tags { display: flex; flex-wrap: wrap; gap: 4px; }
44
  .tag { font-size: 10px; font-weight: 600; padding: 2px 8px; border-radius: 4px; border: 1px solid; display: inline-flex; align-items: center; gap: 3px; }
45
  .tag svg { width: 10px; height: 10px; }
 
46
  .tag-high { background: #fef2f2; color: #b91c1c; border-color: #fecaca; }
47
  .tag-medium { background: #fffbeb; color: #a16207; border-color: #fde68a; }
48
  .tag-low { background: #eff6ff; color: #1d4ed8; border-color: #bfdbfe; }
 
71
 
72
  <div class="filters" id="filters" style="display:none;">
73
  <button class="filter-btn active" data-filter="all">All</button>
 
74
  <button class="filter-btn" data-filter="HIGH"><span class="dot dot-red"></span>High <span class="filter-count" id="fc-high">0</span></button>
75
  <button class="filter-btn" data-filter="MEDIUM"><span class="dot dot-amber"></span>Medium <span class="filter-count" id="fc-med">0</span></button>
76
  <button class="filter-btn" data-filter="LOW"><span class="dot dot-blue"></span>Low <span class="filter-count" id="fc-low">0</span></button>
extension/sidepanel.js CHANGED
@@ -1,8 +1,5 @@
1
  /**
2
- * ClauseGuard — Side Panel v4.3
3
- *
4
- * FIXED v4.3: Added CRITICAL severity support (filter, cards, icons, descriptions).
5
- * FIXED v4.3: Severity ordering now uses numeric mapping consistently.
6
  */
7
 
8
  const DESCS = {
@@ -16,12 +13,8 @@ const DESCS = {
16
  "Arbitration": "You waive your right to sue in court.",
17
  };
18
 
19
- // Severity numeric ordering (higher = more severe)
20
- const SEV_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
21
-
22
  // SVG icons for severity
23
  const SEV_ICONS = {
24
- CRITICAL: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/></svg>',
25
  HIGH: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/></svg>',
26
  MEDIUM: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 8v4"/><path d="M12 16h.01"/></svg>',
27
  LOW: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg>',
@@ -58,20 +51,10 @@ async function loadResults() {
58
  pf.style.width = `${results.risk_score}%`;
59
  pf.style.background = results.risk_score >= 60 ? "#ef4444" : results.risk_score >= 30 ? "#f59e0b" : "#22c55e";
60
 
61
- // FIX v4.3: Count CRITICAL severity too
62
- const counts = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
63
  const flagged = results.results.filter(r => r.categories?.length > 0);
64
- flagged.forEach(r => r.categories.forEach(c => {
65
- if (counts[c.severity] !== undefined) counts[c.severity]++;
66
- else counts.MEDIUM++; // Default unknown to MEDIUM
67
- }));
68
-
69
- // Show CRITICAL count in the filter if any exist
70
- const fcCrit = document.getElementById("fc-crit");
71
- const critFilter = document.getElementById("filter-critical");
72
- if (fcCrit) fcCrit.textContent = counts.CRITICAL;
73
- if (critFilter) critFilter.style.display = counts.CRITICAL > 0 ? "flex" : "none";
74
-
75
  document.getElementById("fc-high").textContent = counts.HIGH;
76
  document.getElementById("fc-med").textContent = counts.MEDIUM;
77
  document.getElementById("fc-low").textContent = counts.LOW;
@@ -91,10 +74,11 @@ function renderClauses() {
91
 
92
  list.innerHTML = filtered.map((clause, i) => {
93
  const maxSev = clause.categories.reduce((m, c) => {
94
- return (SEV_ORDER[c.severity] || 0) > (SEV_ORDER[m] || 0) ? c.severity : m;
 
95
  }, "LOW");
96
 
97
- const tagMap = { CRITICAL: "tag-critical", HIGH: "tag-high", MEDIUM: "tag-medium", LOW: "tag-low" };
98
 
99
  const tags = clause.categories.map(c =>
100
  `<span class="tag ${tagMap[c.severity] || "tag-medium"}">${SEV_ICONS[c.severity] || ""} ${c.name}</span>`
 
1
  /**
2
+ * ClauseGuard — Side Panel (redesigned)
 
 
 
3
  */
4
 
5
  const DESCS = {
 
13
  "Arbitration": "You waive your right to sue in court.",
14
  };
15
 
 
 
 
16
  // SVG icons for severity
17
  const SEV_ICONS = {
 
18
  HIGH: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/></svg>',
19
  MEDIUM: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 8v4"/><path d="M12 16h.01"/></svg>',
20
  LOW: '<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><path d="M12 16v-4"/><path d="M12 8h.01"/></svg>',
 
51
  pf.style.width = `${results.risk_score}%`;
52
  pf.style.background = results.risk_score >= 60 ? "#ef4444" : results.risk_score >= 30 ? "#f59e0b" : "#22c55e";
53
 
54
+ // Counts
55
+ const counts = { HIGH: 0, MEDIUM: 0, LOW: 0 };
56
  const flagged = results.results.filter(r => r.categories?.length > 0);
57
+ flagged.forEach(r => r.categories.forEach(c => { if (counts[c.severity] !== undefined) counts[c.severity]++; }));
 
 
 
 
 
 
 
 
 
 
58
  document.getElementById("fc-high").textContent = counts.HIGH;
59
  document.getElementById("fc-med").textContent = counts.MEDIUM;
60
  document.getElementById("fc-low").textContent = counts.LOW;
 
74
 
75
  list.innerHTML = filtered.map((clause, i) => {
76
  const maxSev = clause.categories.reduce((m, c) => {
77
+ const o = { HIGH: 3, MEDIUM: 2, LOW: 1 };
78
+ return (o[c.severity] || 0) > (o[m] || 0) ? c.severity : m;
79
  }, "LOW");
80
 
81
+ const tagMap = { HIGH: "tag-high", MEDIUM: "tag-medium", LOW: "tag-low" };
82
 
83
  const tags = clause.categories.map(c =>
84
  `<span class="tag ${tagMap[c.severity] || "tag-medium"}">${SEV_ICONS[c.severity] || ""} ${c.name}</span>`
extension/styles/content.css CHANGED
@@ -1,5 +1,4 @@
1
  /* ClauseGuard — Content Script Styles (injected into web pages) */
2
- /* v4.3: Added CRITICAL severity styles */
3
 
4
  /* Highlight severity levels */
5
  .clauseguard-highlight {
@@ -10,16 +9,6 @@
10
  position: relative;
11
  }
12
 
13
- /* CRITICAL — purple (most severe) */
14
- .clauseguard-critical {
15
- background: rgba(168, 85, 247, 0.22);
16
- border-bottom: 2.5px solid #a855f7;
17
- }
18
- .clauseguard-critical:hover {
19
- background: rgba(168, 85, 247, 0.38);
20
- }
21
-
22
- /* HIGH — red */
23
  .clauseguard-high {
24
  background: rgba(239, 68, 68, 0.22);
25
  border-bottom: 2.5px solid #ef4444;
@@ -28,7 +17,6 @@
28
  background: rgba(239, 68, 68, 0.35);
29
  }
30
 
31
- /* MEDIUM — amber */
32
  .clauseguard-medium {
33
  background: rgba(245, 158, 11, 0.18);
34
  border-bottom: 2.5px solid #f59e0b;
@@ -37,7 +25,6 @@
37
  background: rgba(245, 158, 11, 0.32);
38
  }
39
 
40
- /* LOW — blue */
41
  .clauseguard-low {
42
  background: rgba(59, 130, 246, 0.14);
43
  border-bottom: 2.5px solid #3b82f6;
@@ -87,10 +74,6 @@
87
  letter-spacing: 0.5px;
88
  }
89
 
90
- .clauseguard-badge-critical {
91
- background: #e9d5ff;
92
- color: #6b21a8;
93
- }
94
  .clauseguard-badge-high {
95
  background: #fecaca;
96
  color: #991b1b;
 
1
  /* ClauseGuard — Content Script Styles (injected into web pages) */
 
2
 
3
  /* Highlight severity levels */
4
  .clauseguard-highlight {
 
9
  position: relative;
10
  }
11
 
 
 
 
 
 
 
 
 
 
 
12
  .clauseguard-high {
13
  background: rgba(239, 68, 68, 0.22);
14
  border-bottom: 2.5px solid #ef4444;
 
17
  background: rgba(239, 68, 68, 0.35);
18
  }
19
 
 
20
  .clauseguard-medium {
21
  background: rgba(245, 158, 11, 0.18);
22
  border-bottom: 2.5px solid #f59e0b;
 
25
  background: rgba(245, 158, 11, 0.32);
26
  }
27
 
 
28
  .clauseguard-low {
29
  background: rgba(59, 130, 246, 0.14);
30
  border-bottom: 2.5px solid #3b82f6;
 
74
  letter-spacing: 0.5px;
75
  }
76
 
 
 
 
 
77
  .clauseguard-badge-high {
78
  background: #fecaca;
79
  color: #991b1b;
ml/export_onnx_v2.py DELETED
@@ -1,169 +0,0 @@
1
- """
2
- ClauseGuard — ONNX Export + INT8 Quantization Pipeline (v2)
3
- ═══════════════════════════════════════════════════════════
4
- PERF v4.3: Full pipeline to export the CUAD LoRA classifier to ONNX+INT8.
5
-
6
- Steps:
7
- 1. Load base Legal-BERT + LoRA adapter
8
- 2. merge_and_unload() → plain PreTrainedModel
9
- 3. Export to ONNX via optimum
10
- 4. Dynamic INT8 quantization (no calibration data needed)
11
- 5. Push quantized model to HuggingFace Hub
12
-
13
- Usage:
14
- pip install "optimum[onnxruntime]" peft transformers torch
15
- python export_onnx_v2.py
16
-
17
- # Or with custom paths:
18
- HUB_MODEL_ID=gaurv007/clauseguard-onnx-int8 python export_onnx_v2.py
19
-
20
- Hardware: Any CPU (no GPU needed for export)
21
- Time: ~2-5 minutes
22
- """
23
-
24
- import os
25
- import sys
26
- import shutil
27
-
28
- # ── Configuration ──
29
- BASE_MODEL = os.environ.get("BASE_MODEL", "nlpaueb/legal-bert-base-uncased")
30
- ADAPTER_MODEL = os.environ.get("ADAPTER_MODEL", "Mokshith31/legalbert-contract-clause-classification")
31
- HUB_MODEL_ID = os.environ.get("HUB_MODEL_ID", "gaurv007/clauseguard-onnx-int8")
32
- PUSH_TO_HUB = os.environ.get("PUSH_TO_HUB", "true").lower() == "true"
33
-
34
- MERGED_DIR = "./merged_legalbert"
35
- ONNX_DIR = "./onnx_legalbert"
36
- QUANT_DIR = "./onnx_legalbert_int8"
37
-
38
- CUAD_LABELS = [
39
- "Document Name", "Parties", "Agreement Date", "Effective Date",
40
- "Expiration Date", "Renewal Term", "Notice Period to Terminate Renewal",
41
- "Governing Law", "Most Favored Nation", "Non-Compete", "Exclusivity",
42
- "No-Solicit of Customers", "No-Solicit of Employees", "Non-Disparagement",
43
- "Termination for Convenience", "ROFR/ROFO/ROFN", "Change of Control",
44
- "Anti-Assignment", "Revenue/Profit Sharing", "Price Restriction",
45
- "Minimum Commitment", "Volume Restriction", "IP Ownership Assignment",
46
- "Joint IP Ownership", "License Grant", "Non-Transferable License",
47
- "Affiliate License-Licensor", "Affiliate License-Licensee",
48
- "Unlimited/All-You-Can-Eat License", "Irrevocable or Perpetual License",
49
- "Source Code Escrow", "Post-Termination Services", "Audit Rights",
50
- "Uncapped Liability", "Cap on Liability", "Liquidated Damages",
51
- "Warranty Duration", "Insurance", "Covenant Not to Sue",
52
- "Third Party Beneficiary", "Other",
53
- ]
54
-
55
-
56
- def main():
57
- print("🛡️ ClauseGuard ONNX Export + INT8 Quantization")
58
- print("=" * 60)
59
- print(f" Base model: {BASE_MODEL}")
60
- print(f" LoRA adapter: {ADAPTER_MODEL}")
61
- print(f" Hub target: {HUB_MODEL_ID}")
62
- print()
63
-
64
- # ── Step 1: Load and merge LoRA ──
65
- print("📦 Step 1: Loading base model + LoRA adapter...")
66
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
67
- from peft import PeftModel
68
-
69
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
70
- base_model = AutoModelForSequenceClassification.from_pretrained(
71
- BASE_MODEL, num_labels=41, ignore_mismatched_sizes=True
72
- )
73
- peft_model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
74
-
75
- print("🔀 Step 2: Merging LoRA weights into base model...")
76
- merged_model = peft_model.merge_and_unload(safe_merge=True)
77
-
78
- # Set label mapping
79
- merged_model.config.id2label = {str(i): name for i, name in enumerate(CUAD_LABELS)}
80
- merged_model.config.label2id = {name: i for i, name in enumerate(CUAD_LABELS)}
81
-
82
- os.makedirs(MERGED_DIR, exist_ok=True)
83
- merged_model.save_pretrained(MERGED_DIR)
84
- tokenizer.save_pretrained(MERGED_DIR)
85
- print(f" ✅ Merged model saved to {MERGED_DIR}")
86
-
87
- # Free memory
88
- del peft_model, base_model, merged_model
89
- import gc
90
- gc.collect()
91
-
92
- # ── Step 3: Export to ONNX ──
93
- print("\n📤 Step 3: Exporting to ONNX...")
94
- from optimum.onnxruntime import ORTModelForSequenceClassification
95
-
96
- ort_model = ORTModelForSequenceClassification.from_pretrained(
97
- MERGED_DIR, export=True
98
- )
99
- os.makedirs(ONNX_DIR, exist_ok=True)
100
- ort_model.save_pretrained(ONNX_DIR)
101
- tokenizer.save_pretrained(ONNX_DIR)
102
- print(f" ✅ ONNX model saved to {ONNX_DIR}")
103
-
104
- # ── Step 4: Dynamic INT8 Quantization ──
105
- print("\n⚡ Step 4: Applying dynamic INT8 quantization...")
106
- from optimum.onnxruntime.configuration import AutoQuantizationConfig
107
- from optimum.onnxruntime import ORTQuantizer
108
-
109
- qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
110
- quantizer = ORTQuantizer.from_pretrained(ort_model)
111
- os.makedirs(QUANT_DIR, exist_ok=True)
112
- quantizer.quantize(save_dir=QUANT_DIR, quantization_config=qconfig)
113
-
114
- # Copy tokenizer files to quantized dir
115
- tokenizer.save_pretrained(QUANT_DIR)
116
- # Copy config.json too
117
- shutil.copy2(os.path.join(ONNX_DIR, "config.json"), QUANT_DIR)
118
- print(f" ✅ Quantized model saved to {QUANT_DIR}")
119
-
120
- # ── Step 5: Verify ──
121
- print("\n🧪 Step 5: Verifying quantized model...")
122
- quant_model = ORTModelForSequenceClassification.from_pretrained(
123
- QUANT_DIR, file_name="model_quantized.onnx"
124
- )
125
- quant_tokenizer = AutoTokenizer.from_pretrained(QUANT_DIR)
126
-
127
- test_texts = [
128
- "The company may terminate your account at any time without notice.",
129
- "Either party shall indemnify and hold harmless the other party.",
130
- "This Agreement shall be governed by the laws of the State of Delaware.",
131
- ]
132
- inputs = quant_tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
133
-
134
- import torch
135
- with torch.no_grad():
136
- outputs = quant_model(**inputs)
137
- probs = torch.softmax(outputs.logits, dim=-1)
138
-
139
- for i, text in enumerate(test_texts):
140
- top_prob, top_idx = torch.max(probs[i], dim=0)
141
- label = CUAD_LABELS[int(top_idx)] if int(top_idx) < len(CUAD_LABELS) else f"Class-{int(top_idx)}"
142
- print(f" Text: {text[:60]}...")
143
- print(f" → {label} ({top_prob:.3f})")
144
-
145
- # ── Step 6: Push to Hub ──
146
- if PUSH_TO_HUB:
147
- print(f"\n🚀 Step 6: Pushing to {HUB_MODEL_ID}...")
148
- quant_model.push_to_hub(HUB_MODEL_ID, use_auth_token=True)
149
- quant_tokenizer.push_to_hub(HUB_MODEL_ID, use_auth_token=True)
150
- print(f" ✅ Pushed to https://huggingface.co/{HUB_MODEL_ID}")
151
- else:
152
- print("\n⏭️ Skipping Hub push (PUSH_TO_HUB=false)")
153
-
154
- # ── Summary ──
155
- onnx_size = os.path.getsize(os.path.join(ONNX_DIR, "model.onnx")) / 1e6
156
- quant_size = os.path.getsize(os.path.join(QUANT_DIR, "model_quantized.onnx")) / 1e6
157
- print(f"\n{'='*60}")
158
- print(f" 📊 ONNX model size: {onnx_size:.1f} MB")
159
- print(f" 📊 Quantized model size: {quant_size:.1f} MB")
160
- print(f" 📊 Size reduction: {(1 - quant_size/onnx_size)*100:.0f}%")
161
- print(f" 🔥 Expected speedup: 2-4x on CPU")
162
- print(f"{'='*60}")
163
- print("\n✅ Export complete!")
164
- print(f"\nTo use in ClauseGuard, set ONNX_MODEL_PATH={QUANT_DIR}")
165
- print("or point to the Hub model: gaurv007/clauseguard-onnx-int8")
166
-
167
-
168
- if __name__ == "__main__":
169
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
obligations.py CHANGED
@@ -85,26 +85,11 @@ _PRIORITY_MAP = {
85
  "delivery": 1,
86
  }
87
 
88
- # FIX v4.2: Pre-compile obligation patterns at module level (was recompiling per sentence)
89
- _OBLIGATION_PATTERNS_COMPILED = {
90
- otype: [re.compile(p, re.IGNORECASE) for p in patterns]
91
- for otype, patterns in OBLIGATION_PATTERNS.items()
92
- }
93
-
94
- # FIX v4.2: Pre-compile false positive patterns
95
- _FALSE_POSITIVE_PATTERNS_COMPILED = [re.compile(p, re.IGNORECASE) for p in _FALSE_POSITIVE_PATTERNS]
96
-
97
- # FIX v4.2: Pre-compile time patterns
98
- _TIME_PATTERNS_COMPILED = [(re.compile(p, re.IGNORECASE), ptype) for p, ptype in TIME_PATTERNS]
99
-
100
- # FIX v4.2: Pre-compile party patterns
101
- _PARTY_PATTERNS_COMPILED = [re.compile(p) for p in PARTY_PATTERNS]
102
-
103
 
104
  def _is_false_positive(sentence):
105
  """Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
106
- for fp in _FALSE_POSITIVE_PATTERNS_COMPILED:
107
- if fp.search(sentence):
108
  return True
109
  return False
110
 
@@ -126,9 +111,9 @@ def extract_obligations(text):
126
  continue
127
 
128
  found_types = set()
129
- for otype, patterns in _OBLIGATION_PATTERNS_COMPILED.items():
130
  for pat in patterns:
131
- if pat.search(sentence):
132
  found_types.add(otype)
133
  break
134
 
@@ -143,8 +128,8 @@ def extract_obligations(text):
143
  party = obligation_direction
144
  else:
145
  # Fallback to pattern matching within the sentence
146
- for pp in _PARTY_PATTERNS_COMPILED:
147
- m = pp.search(sentence)
148
  if m:
149
  candidate = m.group(0).strip()
150
  # Fix 8: Reject party strings >40 chars (header bleed-through)
@@ -155,8 +140,8 @@ def extract_obligations(text):
155
  # Extract timeframe
156
  deadline = "Not specified"
157
  deadline_urgency = 0
158
- for pat, ptype in _TIME_PATTERNS_COMPILED:
159
- m = pat.search(sentence)
160
  if m:
161
  if ptype == "relative":
162
  num = m.group(1)
@@ -192,26 +177,6 @@ def extract_obligations(text):
192
  # Sort by priority (highest first)
193
  obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
194
 
195
- # FIX v4.3: Deduplicate obligations — same text producing multiple types
196
- # Keep the more specific type (termination > compliance > monetary > general)
197
- _TYPE_PRIORITY = {"termination": 1, "compliance": 2, "reporting": 3, "delivery": 4, "monetary": 5}
198
- seen_texts = {}
199
- deduped = []
200
- for ob in obligations:
201
- # Hash on first 80 chars of description + party
202
- key = hash(ob["description"][:80] + ob["party"])
203
- type_pri = _TYPE_PRIORITY.get(ob["type"], 99)
204
- if key not in seen_texts:
205
- seen_texts[key] = (type_pri, len(deduped))
206
- deduped.append(ob)
207
- else:
208
- existing_pri, existing_idx = seen_texts[key]
209
- if type_pri < existing_pri:
210
- # This type is more specific — replace
211
- deduped[existing_idx] = ob
212
- seen_texts[key] = (type_pri, existing_idx)
213
- obligations = deduped
214
-
215
  return obligations
216
 
217
 
 
85
  "delivery": 1,
86
  }
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def _is_false_positive(sentence):
90
  """Check if a sentence is a common false positive (definition/interpretation, not obligation)."""
91
+ for fp in _FALSE_POSITIVE_PATTERNS:
92
+ if re.search(fp, sentence, re.IGNORECASE):
93
  return True
94
  return False
95
 
 
111
  continue
112
 
113
  found_types = set()
114
+ for otype, patterns in OBLIGATION_PATTERNS.items():
115
  for pat in patterns:
116
+ if re.search(pat, sentence, re.IGNORECASE):
117
  found_types.add(otype)
118
  break
119
 
 
128
  party = obligation_direction
129
  else:
130
  # Fallback to pattern matching within the sentence
131
+ for pp in PARTY_PATTERNS:
132
+ m = re.search(pp, sentence)
133
  if m:
134
  candidate = m.group(0).strip()
135
  # Fix 8: Reject party strings >40 chars (header bleed-through)
 
140
  # Extract timeframe
141
  deadline = "Not specified"
142
  deadline_urgency = 0
143
+ for pat, ptype in TIME_PATTERNS:
144
+ m = re.search(pat, sentence, re.IGNORECASE)
145
  if m:
146
  if ptype == "relative":
147
  num = m.group(1)
 
177
  # Sort by priority (highest first)
178
  obligations.sort(key=lambda x: x.get("priority", 0), reverse=True)
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  return obligations
181
 
182
 
redlining.py CHANGED
@@ -408,66 +408,24 @@ Write the refined safer clause (adapt the template to this specific contract's c
408
 
409
 
410
  # ═══════════════════════════════════════════════════════════════════════
411
- # FIX v4.3: Keyword validation — ensure original clause matches the label
412
  # ═══════════════════════════════════════════════════════════════════════
413
 
414
- _LABEL_KEYWORDS = {
415
- "Limitation of liability": ["liable", "liability", "damages", "limitation of liability", "in no event"],
416
- "Uncapped Liability": ["uncapped", "unlimited", "no limit", "no cap"],
417
- "Governing Law": ["governed by", "governing law", "jurisdiction", "laws of"],
418
- "Termination for Convenience": ["terminat", "cancel", "convenience", "without cause"],
419
- "Non-Compete": ["non-compete", "not compete", "competition restriction"],
420
- "No-Solicit of Employees": ["solicit", "recruit", "induce", "encourage", "employee"],
421
- "No-Solicit of Customers": ["solicit", "customer", "client", "divert"],
422
- "Non-Disparagement": ["disparag", "defam", "negative", "derogatory"],
423
- "Arbitration": ["arbitrat", "binding arbitration", "waive", "class action"],
424
- "IP Ownership Assignment": ["intellectual property", "ip", "assign", "work for hire", "ownership"],
425
- "Indemnification": ["indemnif", "hold harmless", "defend"],
426
- "Confidentiality": ["confidential", "non-disclosure", "nda"],
427
- "Exclusivity": ["exclusive", "exclusivity"],
428
- "Anti-Assignment": ["assign", "transfer", "without consent"],
429
- "Content removal": ["remove", "delete", "content"],
430
- "Unilateral change": ["modify", "change", "amend", "sole discretion"],
431
- "Unilateral termination": ["terminat", "suspend", "at any time"],
432
- "Liquidated Damages": ["liquidated", "pre-determined", "stipulated"],
433
- "Choice of law": ["governed by", "laws of", "choice of law"],
434
- "Jurisdiction": ["jurisdiction", "courts of", "exclusive jurisdiction"],
435
- "Contract by using": ["by using", "continued use", "acceptance"],
436
- }
437
-
438
- # FIX v4.3.1: Exclusion keywords — if ANY of these appear, the clause is rejected for this label.
439
- # Catches chunks that span two sections (e.g., §12.5 Waiver + §12.6 Non-Solicitation merged into one chunk).
440
- _LABEL_EXCLUDE_KEYWORDS = {
441
- "No-Solicit of Employees": ["waiver of", "waive any", "waives the right", "failure to enforce"],
442
- "No-Solicit of Customers": ["waiver of", "waive any", "waives the right", "failure to enforce"],
443
- "Non-Disparagement": ["arbitrat", "aaa", "jams", "class action", "waives any right to participate"],
444
- }
445
-
446
-
447
- def _validate_clause_match(label, clause_text):
448
- """FIX v4.3.1: Validate clause matches label — checks BOTH required AND excluded keywords."""
449
- text_lower = clause_text.lower()
450
-
451
- # Check exclusions first — hard reject
452
- exclusions = _LABEL_EXCLUDE_KEYWORDS.get(label, [])
453
- if exclusions and any(kw in text_lower for kw in exclusions):
454
- return False
455
-
456
- # Check required keywords
457
- keywords = _LABEL_KEYWORDS.get(label, [])
458
- if not keywords:
459
- return True
460
- return any(kw in text_lower for kw in keywords)
461
-
462
-
463
  def generate_redlines(analysis_result, use_llm=True):
464
  """
465
  Generate redline suggestions for all flagged clauses in the analysis.
466
-
467
- FIX v4.3:
468
- - Validates original clause matches label keywords before showing
469
- - Deduplicates by suggested text (catches template mapping bugs)
470
- - Picks the BEST clause for each label (highest confidence + keyword match)
 
 
 
 
 
 
 
471
  """
472
  if analysis_result is None:
473
  return []
@@ -476,40 +434,23 @@ def generate_redlines(analysis_result, use_llm=True):
476
  if not clauses:
477
  return []
478
 
479
- # FIX v4.3: Group clauses by label and pick the best match for each
480
- label_clauses = {}
481
- for clause in clauses:
482
- label = clause.get("label", "")
483
- risk = clause.get("risk", "LOW")
484
- text = clause.get("text", "")
485
- confidence = clause.get("confidence", 0) or 0
486
-
487
- if risk == "LOW":
488
- continue
489
-
490
- # Validate that the clause text actually matches the label
491
- if not _validate_clause_match(label, text):
492
- continue
493
-
494
- # Keep the highest-confidence match for each label
495
- if label not in label_clauses or confidence > (label_clauses[label].get("confidence", 0) or 0):
496
- label_clauses[label] = clause
497
-
498
  redlines = []
499
- seen_alternatives = set() # FIX v4.3: Dedup by suggested text
500
 
501
  # Sort by risk level: CRITICAL first
502
  risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
503
- sorted_labels = sorted(
504
- label_clauses.keys(),
505
- key=lambda l: risk_order.get(label_clauses[l].get("risk", "LOW"), 3)
506
- )
507
 
508
- for label in sorted_labels:
509
- clause = label_clauses[label]
510
  risk = clause.get("risk", "LOW")
511
  text = clause.get("text", "")
512
 
 
 
 
 
 
513
  # Find matching templates (Tier 1 + Tier 2)
514
  matches = _find_similar_templates(label, text)
515
  if not matches:
@@ -517,12 +458,6 @@ def generate_redlines(analysis_result, use_llm=True):
517
 
518
  best_key, best_template, score = matches[0]
519
 
520
- # FIX v4.3: Dedup — skip if this template's alternative was already used
521
- alt_fingerprint = best_template["safe_alternative"][:120]
522
- if alt_fingerprint in seen_alternatives:
523
- continue
524
- seen_alternatives.add(alt_fingerprint)
525
-
526
  # Tier 3: Try LLM refinement if enabled
527
  refined_text = None
528
  tier = "template"
 
408
 
409
 
410
  # ═══════════════════════════════════════════════════════════════════════
411
+ # PUBLIC API
412
  # ═══════════════════════════════════════════════════════════════════════
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  def generate_redlines(analysis_result, use_llm=True):
415
  """
416
  Generate redline suggestions for all flagged clauses in the analysis.
417
+
418
+ Returns list of redline suggestions:
419
+ [{
420
+ "original_text": str,
421
+ "clause_label": str,
422
+ "risk_level": str,
423
+ "safe_alternative": str,
424
+ "legal_basis": str,
425
+ "consumer_standard": str,
426
+ "tier": "template" | "llm_refined",
427
+ "confidence": str,
428
+ }]
429
  """
430
  if analysis_result is None:
431
  return []
 
434
  if not clauses:
435
  return []
436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  redlines = []
438
+ seen_labels = set() # Deduplicate by label
439
 
440
  # Sort by risk level: CRITICAL first
441
  risk_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
442
+ sorted_clauses = sorted(clauses, key=lambda c: risk_order.get(c.get("risk", "LOW"), 3))
 
 
 
443
 
444
+ for clause in sorted_clauses:
445
+ label = clause.get("label", "")
446
  risk = clause.get("risk", "LOW")
447
  text = clause.get("text", "")
448
 
449
+ # Skip LOW risk and already-seen labels
450
+ if risk == "LOW" or label in seen_labels:
451
+ continue
452
+ seen_labels.add(label)
453
+
454
  # Find matching templates (Tier 1 + Tier 2)
455
  matches = _find_similar_templates(label, text)
456
  if not matches:
 
458
 
459
  best_key, best_template, score = matches[0]
460
 
 
 
 
 
 
 
461
  # Tier 3: Try LLM refinement if enabled
462
  refined_text = None
463
  tier = "template"
requirements.txt CHANGED
@@ -9,4 +9,3 @@ accelerate>=1.2.0
9
  sentence-transformers>=3.0.0
10
  python-doctr[torch]>=0.9.0
11
  huggingface_hub>=0.25.0
12
- optimum[onnxruntime]>=1.23.0
 
9
  sentence-transformers>=3.0.0
10
  python-doctr[torch]>=0.9.0
11
  huggingface_hub>=0.25.0
 
web/.env.example CHANGED
@@ -17,13 +17,7 @@ RESEND_API_KEY=re_...
17
 
18
  # App
19
  NEXT_PUBLIC_SITE_URL=http://localhost:3000
20
-
21
- # ClauseGuard Gradio Space URL (used by analyze, compare, redline routes)
22
- CLAUSEGUARD_GRADIO_URL=https://gaurv007-clauseguard.hf.space
23
-
24
- # Optional: FastAPI backend URL (only needed if deployed separately for chat/RAG sessions)
25
- # If not set, chat will direct users to the Gradio Space
26
- CLAUSEGUARD_API_URL=
27
 
28
  # HF Inference API (for chatbot + redlining LLM)
29
  HF_TOKEN=hf_...
 
17
 
18
  # App
19
  NEXT_PUBLIC_SITE_URL=http://localhost:3000
20
+ CLAUSEGUARD_API_URL=https://gaurv007-clauseguard-api.hf.space
 
 
 
 
 
 
21
 
22
  # HF Inference API (for chatbot + redlining LLM)
23
  HF_TOKEN=hf_...
web/app/api/analyze/route.ts CHANGED
@@ -1,22 +1,11 @@
1
  import { NextRequest, NextResponse } from "next/server";
2
- import { createClient } from "@/lib/supabase/server";
3
 
4
  const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
5
 
6
- // FIX v4.1: Max text size validation (prevent oversized payloads)
7
- const MAX_TEXT_LENGTH = 200_000; // 200KB
8
-
9
  export async function POST(req: NextRequest) {
10
  try {
11
- const supabase = await createClient();
12
- const { data: { user } } = await supabase.auth.getUser();
13
-
14
- if (!user) {
15
- return NextResponse.json({ error: "Unauthorized. Please log in to analyze texts." }, { status: 401 });
16
- }
17
-
18
  const body = await req.json();
19
- let { text } = body;
20
 
21
  if (!text || typeof text !== "string" || text.trim().length < 50) {
22
  return NextResponse.json(
@@ -25,41 +14,8 @@ export async function POST(req: NextRequest) {
25
  );
26
  }
27
 
28
- // FIX v4.1: Input size validation
29
- if (text.length > MAX_TEXT_LENGTH) {
30
- return NextResponse.json(
31
- { error: `Text too long (${(text.length / 1000).toFixed(0)}KB). Maximum is ${MAX_TEXT_LENGTH / 1000}KB.` },
32
- { status: 400 }
33
- );
34
- }
35
-
36
- // FIX v4.1: REMOVED the XSS sanitization that corrupted contract text.
37
- // The old code did: text = text.replace(/</g, "&lt;").replace(/>/g, "&gt;");
38
- // This PERMANENTLY MUTATED the text before analysis, corrupting contracts
39
- // that contain < or > characters (e.g., "shall not exceed >$10,000").
40
- // Sanitization should happen at RENDER TIME in the frontend, not at analysis time.
41
- // The frontend already uses React which auto-escapes HTML in JSX.
42
-
43
- // Check scan limits — FIX v4.1: query the CORRECT table name
44
- const { data: profile } = await supabase
45
- .from("profiles")
46
- .select("plan, role, analyses_this_month")
47
- .eq("id", user.id)
48
- .single();
49
-
50
- const isAdmin = profile?.role === "admin";
51
- const plan = profile?.plan || "free";
52
-
53
- // FIX v4.1: Use analyses_this_month from profiles (already tracked), not a separate count query
54
- const scanCount = profile?.analyses_this_month ?? 0;
55
- const limit = isAdmin ? 999999 : plan === "free" ? 10 : 999999;
56
- if (scanCount >= limit) {
57
- return NextResponse.json({ error: "Monthly scan limit reached. Please upgrade to Pro." }, { status: 403 });
58
- }
59
-
60
  // Step 1: Submit to Gradio Space
61
- // FIX v4.3: Use the explicit api_name="analyze" set in app.py scan_btn.click()
62
- const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/analyze`, {
63
  method: "POST",
64
  headers: { "Content-Type": "application/json" },
65
  body: JSON.stringify({ data: [text] }),
@@ -72,16 +28,15 @@ export async function POST(req: NextRequest) {
72
  const { event_id } = await submitRes.json();
73
  if (!event_id) throw new Error("No event_id from Gradio");
74
 
75
- // FIX v4.1: Improved SSE polling with proper streaming support
76
- // Uses exponential backoff instead of fixed 1s intervals
77
  let resultText = "";
78
  let attempts = 0;
79
- const maxAttempts = 90; // 90 seconds max (increased from 60)
80
- let delay = 500; // Start at 500ms, increase
81
 
82
  while (attempts < maxAttempts) {
83
  const resultRes = await fetch(
84
- `${GRADIO_URL}/gradio_api/call/analyze/${event_id}`,
85
  { headers: { Accept: "text/event-stream" } }
86
  );
87
 
@@ -93,26 +48,31 @@ export async function POST(req: NextRequest) {
93
  throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
94
  }
95
 
96
- await new Promise(r => setTimeout(r, delay));
97
- delay = Math.min(delay * 1.2, 2000); // Cap at 2s
98
  attempts++;
99
  }
100
 
101
  if (!resultText.includes("event: complete")) {
102
- throw new Error("Analysis timed out. The backend may be loading models. Please try again in 30 seconds.");
103
  }
104
 
105
  // Step 3: Parse the SSE data
 
 
106
  const completeIdx = resultText.indexOf("event: complete");
107
  const dataIdx = resultText.indexOf("data: ", completeIdx);
108
  if (dataIdx === -1) throw new Error("No data in response");
109
 
110
  const dataStr = resultText.substring(dataIdx + 6).trim();
111
 
 
 
112
  let gradioData: any[];
113
  try {
114
  gradioData = JSON.parse(dataStr);
115
  } catch {
 
116
  const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
117
  if (ch === "\n") return "\\n";
118
  if (ch === "\r") return "\\r";
@@ -123,11 +83,13 @@ export async function POST(req: NextRequest) {
123
  }
124
 
125
  // Step 4: Download the JSON report file (structured data)
 
126
  const jsonFileObj = gradioData[8];
127
  if (!jsonFileObj?.url) {
128
  throw new Error("No JSON report generated");
129
  }
130
 
 
131
  const jsonRes = await fetch(jsonFileObj.url);
132
  if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
133
  const analysisData = await jsonRes.json();
@@ -153,80 +115,37 @@ export async function POST(req: NextRequest) {
153
  }
154
  const results = Array.from(clauseMap.values());
155
 
156
- // FIX v4.1: Parse redlines from structured JSON data instead of fragile HTML regex
157
  const redlines: any[] = [];
158
-
159
- // Try to extract redlines from the analysis JSON first (if available)
160
- if (analysisData.redlines && Array.isArray(analysisData.redlines)) {
161
- for (const rl of analysisData.redlines) {
162
- redlines.push({
163
- clause_label: rl.clause_label || "",
164
- risk_level: rl.risk_level || "MEDIUM",
165
- original_text: rl.original_text || "",
166
- safe_alternative: rl.safe_alternative || "",
167
- template_alternative: rl.template_alternative || "",
168
- legal_basis: rl.legal_basis || "",
169
- consumer_standard: rl.consumer_standard || "",
170
- tier: rl.tier || "template",
171
- });
172
- }
173
- }
174
-
175
- // Fallback: try parsing from HTML only if no structured data
176
- if (redlines.length === 0) {
177
- const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
178
- if (redlineHtml.includes("Clause Redlining")) {
179
- const blocks = redlineHtml.split(/border-left:4px solid #/);
180
- for (let i = 1; i < blocks.length; i++) {
181
- const block = blocks[i];
182
- const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
183
- const origMatch = block.match(/<del>([^<]*)<\/del>/);
184
- const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
185
- const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
186
- const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
187
- const isLLM = block.includes("LLM Refined");
188
-
189
- if (labelMatch) {
190
- redlines.push({
191
- clause_label: labelMatch[1].trim(),
192
- risk_level: labelMatch[2].trim(),
193
- original_text: origMatch ? origMatch[1].trim() : "",
194
- safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
195
- legal_basis: legalMatch ? legalMatch[1].trim() : "",
196
- consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
197
- tier: isLLM ? "llm_refined" : "template",
198
- });
199
- }
200
  }
201
  }
202
  }
203
 
204
  const modelStatus = analysisData.metadata?.model || "";
205
 
206
- // FIX v4.1: Increment scan count in profiles table
207
- await supabase
208
- .from("profiles")
209
- .update({ analyses_this_month: scanCount + 1 })
210
- .eq("id", user.id);
211
-
212
- // FIX v4.3: Save analysis to DB so it shows in history
213
- // Wrapped in Promise.resolve() because Supabase returns PromiseLike (no .catch)
214
- Promise.resolve(
215
- supabase.from("analyses").insert({
216
- user_id: user.id,
217
- total_clauses: totalClauses,
218
- flagged_count: flaggedCount,
219
- risk_score: riskScore,
220
- grade,
221
- clauses: results,
222
- entities: analysisData.entities || [],
223
- contradictions: analysisData.contradictions || [],
224
- obligations: analysisData.obligations || [],
225
- compliance: analysisData.compliance || {},
226
- model: modelStatus.includes("loaded") ? "ml" : "regex",
227
- })
228
- ).catch(() => {}); // fire-and-forget, don't block response
229
-
230
  return NextResponse.json({
231
  risk_score: riskScore,
232
  grade,
 
1
  import { NextRequest, NextResponse } from "next/server";
 
2
 
3
  const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
4
 
 
 
 
5
  export async function POST(req: NextRequest) {
6
  try {
 
 
 
 
 
 
 
7
  const body = await req.json();
8
+ const { text } = body;
9
 
10
  if (!text || typeof text !== "string" || text.trim().length < 50) {
11
  return NextResponse.json(
 
14
  );
15
  }
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  // Step 1: Submit to Gradio Space
18
+ const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/_analysis_and_index`, {
 
19
  method: "POST",
20
  headers: { "Content-Type": "application/json" },
21
  body: JSON.stringify({ data: [text] }),
 
28
  const { event_id } = await submitRes.json();
29
  if (!event_id) throw new Error("No event_id from Gradio");
30
 
31
+ // Step 2: Poll for result (SSE)
32
+ // The Gradio API streams but we need the full response
33
  let resultText = "";
34
  let attempts = 0;
35
+ const maxAttempts = 60; // 60 seconds max
 
36
 
37
  while (attempts < maxAttempts) {
38
  const resultRes = await fetch(
39
+ `${GRADIO_URL}/gradio_api/call/_analysis_and_index/${event_id}`,
40
  { headers: { Accept: "text/event-stream" } }
41
  );
42
 
 
48
  throw new Error(errMatch ? errMatch[1] : "Analysis failed in backend");
49
  }
50
 
51
+ // Wait 1 second and retry
52
+ await new Promise(r => setTimeout(r, 1000));
53
  attempts++;
54
  }
55
 
56
  if (!resultText.includes("event: complete")) {
57
+ throw new Error("Analysis timed out");
58
  }
59
 
60
  // Step 3: Parse the SSE data
61
+ // Format: "event: complete\ndata: [...]"
62
+ // The data contains HTML with literal newlines, so we need to find 'data: ' after 'event: complete'
63
  const completeIdx = resultText.indexOf("event: complete");
64
  const dataIdx = resultText.indexOf("data: ", completeIdx);
65
  if (dataIdx === -1) throw new Error("No data in response");
66
 
67
  const dataStr = resultText.substring(dataIdx + 6).trim();
68
 
69
+ // Parse JSON — the HTML strings contain control characters so we need to handle that
70
+ // In JS, JSON.parse is more lenient with control chars in strings than Python's strict mode
71
  let gradioData: any[];
72
  try {
73
  gradioData = JSON.parse(dataStr);
74
  } catch {
75
+ // If direct parse fails, try replacing problematic control characters
76
  const cleaned = dataStr.replace(/[\x00-\x1f]/g, (ch: string) => {
77
  if (ch === "\n") return "\\n";
78
  if (ch === "\r") return "\\r";
 
83
  }
84
 
85
  // Step 4: Download the JSON report file (structured data)
86
+ // gradioData[8] is the JSON file object with { url, path, ... }
87
  const jsonFileObj = gradioData[8];
88
  if (!jsonFileObj?.url) {
89
  throw new Error("No JSON report generated");
90
  }
91
 
92
+ // Download immediately (temp files expire quickly)
93
  const jsonRes = await fetch(jsonFileObj.url);
94
  if (!jsonRes.ok) throw new Error("Failed to download analysis JSON");
95
  const analysisData = await jsonRes.json();
 
115
  }
116
  const results = Array.from(clauseMap.values());
117
 
118
+ // Parse redlines from HTML (gradioData[7])
119
  const redlines: any[] = [];
120
+ const redlineHtml = typeof gradioData[7] === "string" ? gradioData[7] : "";
121
+ if (redlineHtml.includes("Clause Redlining")) {
122
+ // Split by redline card borders
123
+ const blocks = redlineHtml.split(/border-left:4px solid #/);
124
+ for (let i = 1; i < blocks.length; i++) {
125
+ const block = blocks[i];
126
+ const labelMatch = block.match(/font-weight:600[^>]*>([^<]+)<\/span>\s*<span[^>]*font-weight:600[^>]*>([^<]+)/);
127
+ const origMatch = block.match(/<del>([^<]*)<\/del>/);
128
+ const safeBlock = block.match(/Suggested Alternative[\s\S]*?<div[^>]*color:#166534[^>]*>([\s\S]*?)<\/div>/);
129
+ const legalMatch = block.match(/Legal Basis<\/div>\s*<div[^>]*>([^<]+)/);
130
+ const consumerMatch = block.match(/Consumer Standard<\/div>\s*<div[^>]*>([^<]+)/);
131
+ const isLLM = block.includes("LLM Refined");
132
+
133
+ if (labelMatch) {
134
+ redlines.push({
135
+ clause_label: labelMatch[1].trim(),
136
+ risk_level: labelMatch[2].trim(),
137
+ original_text: origMatch ? origMatch[1].trim() : "",
138
+ safe_alternative: safeBlock ? safeBlock[1].replace(/<[^>]+>/g, "").trim() : "",
139
+ legal_basis: legalMatch ? legalMatch[1].trim() : "",
140
+ consumer_standard: consumerMatch ? consumerMatch[1].trim() : "",
141
+ tier: isLLM ? "llm_refined" : "template",
142
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  }
144
  }
145
  }
146
 
147
  const modelStatus = analysisData.metadata?.model || "";
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  return NextResponse.json({
150
  risk_score: riskScore,
151
  grade,
web/app/api/chat/route.ts CHANGED
@@ -1,35 +1,11 @@
1
  import { NextRequest, NextResponse } from "next/server";
2
- import { createClient } from "@/lib/supabase/server";
3
 
4
- /**
5
- * FIX v4.3: Chat route completely rewritten.
6
- *
7
- * ARCHITECTURE:
8
- * The Gradio ChatInterface uses gr.State for RAG embeddings — these are
9
- * per-browser-session and NOT accessible via the Gradio REST API. Every API
10
- * call creates a new session with empty state, so chat via Gradio API will
11
- * NEVER have contract context.
12
- *
13
- * The correct approach:
14
- * 1. PRIMARY: Use the FastAPI backend (/api/chat) which manages RAG sessions
15
- * with proper TTL-based expiry. The session_id comes from /api/analyze.
16
- * 2. FALLBACK: If FastAPI is unavailable, return a clear error directing
17
- * the user to use the Gradio Space directly.
18
- *
19
- * The old code tried to call a non-existent Gradio "chat" endpoint which
20
- * always failed. Removed the broken Gradio fallback entirely.
21
- */
22
  export async function POST(req: NextRequest) {
23
  try {
24
- const supabase = await createClient();
25
- const { data: { user } } = await supabase.auth.getUser();
26
-
27
- if (!user) {
28
- return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
29
- }
30
-
31
  const body = await req.json();
32
- const { message, history, session_id } = body;
33
 
34
  if (!message) {
35
  return NextResponse.json(
@@ -38,64 +14,55 @@ export async function POST(req: NextRequest) {
38
  );
39
  }
40
 
41
- if (message.length > 2000) {
42
- return NextResponse.json(
43
- { error: "Message too long (max 2000 characters)" },
44
- { status: 400 }
45
- );
46
- }
 
 
47
 
48
- // Try the FastAPI backend (it has proper RAG session management)
49
- const apiUrl = process.env.CLAUSEGUARD_API_URL || "";
50
- if (apiUrl && session_id) {
51
- try {
52
- const apiRes = await fetch(`${apiUrl}/api/chat`, {
53
- method: "POST",
54
- headers: { "Content-Type": "application/json" },
55
- body: JSON.stringify({ message, session_id, history: history || [] }),
56
- });
57
- if (apiRes.ok) {
58
- const data = await apiRes.json();
59
- return NextResponse.json({ response: data.response });
60
- }
61
- // If 404, session expired
62
- if (apiRes.status === 404) {
63
- return NextResponse.json({
64
- response: "⚠️ Your chat session has expired (sessions last 1 hour). " +
65
- "Please analyze the contract again to start a new chat session."
66
- });
67
- }
68
- } catch {
69
- // FastAPI backend unreachable — fall through to error message
70
- }
71
  }
72
 
73
- // No FastAPI backend available or no session_id
74
- // FIX v4.3: Return a clear, helpful message instead of trying a broken Gradio endpoint
75
- if (!apiUrl) {
76
- return NextResponse.json({
77
- response: "⚠️ Contract Q&A chat requires the FastAPI backend which is not currently deployed. " +
78
- "You can use the chat feature directly in the [Gradio Space](https://gaurv007-clauseguard.hf.space) " +
79
- "— analyze a contract there, then switch to the Q&A tab."
80
- });
 
 
 
81
  }
82
 
83
- if (!session_id) {
84
- return NextResponse.json({
85
- response: "⚠️ No active chat session. Please analyze a contract first — " +
86
- "the chat session is created when you run analysis."
87
- });
 
 
 
 
 
 
88
  }
89
 
90
- return NextResponse.json({
91
- response: "⚠️ Chat service is temporarily unavailable. Please try again, or use the " +
92
- "[Gradio Space](https://gaurv007-clauseguard.hf.space) directly."
93
- });
94
 
 
95
  } catch (error: any) {
96
  console.error("Chat error:", error.message);
97
  return NextResponse.json(
98
- { error: error.message || "Chat failed. Make sure you analyzed a contract first." },
99
  { status: 500 }
100
  );
101
  }
 
1
  import { NextRequest, NextResponse } from "next/server";
 
2
 
3
+ const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
4
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  export async function POST(req: NextRequest) {
6
  try {
 
 
 
 
 
 
 
7
  const body = await req.json();
8
+ const { message, history } = body;
9
 
10
  if (!message) {
11
  return NextResponse.json(
 
14
  );
15
  }
16
 
17
+ // The Gradio ChatInterface endpoint is /chat
18
+ // It accepts: message (str), then the additional_inputs are handled by Gradio state
19
+ // We need to call the Gradio API with the message
20
+ const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/chat`, {
21
+ method: "POST",
22
+ headers: { "Content-Type": "application/json" },
23
+ body: JSON.stringify({ data: [message] }),
24
+ });
25
 
26
+ if (!submitRes.ok) {
27
+ const errText = await submitRes.text().catch(() => "");
28
+ throw new Error(`Chat submit failed (${submitRes.status}): ${errText}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
 
31
+ const { event_id } = await submitRes.json();
32
+ if (!event_id) throw new Error("No event_id from Gradio chat");
33
+
34
+ // Poll for streaming result
35
+ const resultRes = await fetch(
36
+ `${GRADIO_URL}/gradio_api/call/chat/${event_id}`,
37
+ { headers: { Accept: "text/event-stream" } }
38
+ );
39
+
40
+ if (!resultRes.ok) {
41
+ throw new Error(`Chat result failed: ${resultRes.status}`);
42
  }
43
 
44
+ const resultText = await resultRes.text();
45
+
46
+ // Find the complete event data
47
+ const dataMatch = resultText.match(/event:\s*complete\s*\ndata:\s*(.+)/);
48
+ if (!dataMatch) {
49
+ // Check for error
50
+ const errMatch = resultText.match(/event:\s*error\s*\ndata:\s*(.+)/);
51
+ if (errMatch) {
52
+ throw new Error(`Chat error: ${errMatch[1]}`);
53
+ }
54
+ throw new Error("No response from chatbot. Analyze a contract first in the Gradio Space, then try chatting.");
55
  }
56
 
57
+ const responseData = JSON.parse(dataMatch[1]);
58
+ // The ChatInterface returns the response as a string
59
+ const responseText = typeof responseData === "string" ? responseData : responseData[0] || "";
 
60
 
61
+ return NextResponse.json({ response: responseText });
62
  } catch (error: any) {
63
  console.error("Chat error:", error.message);
64
  return NextResponse.json(
65
+ { error: error.message || "Chat failed. Make sure you analyzed a contract in the Gradio Space first." },
66
  { status: 500 }
67
  );
68
  }
web/app/api/compare/route.ts CHANGED
@@ -1,17 +1,9 @@
1
  import { NextRequest, NextResponse } from "next/server";
2
- import { createClient } from "@/lib/supabase/server";
3
 
4
  const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
5
 
6
  export async function POST(req: NextRequest) {
7
  try {
8
- const supabase = await createClient();
9
- const { data: { user } } = await supabase.auth.getUser();
10
-
11
- if (!user) {
12
- return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
13
- }
14
-
15
  const body = await req.json();
16
  const { text_a, text_b } = body;
17
 
@@ -22,13 +14,8 @@ export async function POST(req: NextRequest) {
22
  );
23
  }
24
 
25
- // FIX v4.3: REMOVED HTML-escaping that CORRUPTED contract text before analysis.
26
- // The old code did text_a.replace(/</g, "&lt;") which permanently mutated
27
- // the text (e.g., ">$10,000" → "&gt;$10,000"). Sanitization is the
28
- // frontend's job — React auto-escapes in JSX. Never mutate analysis input.
29
-
30
  // Call Gradio Space API
31
- const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/compare`, {
32
  method: "POST",
33
  headers: { "Content-Type": "application/json" },
34
  body: JSON.stringify({ data: [text_a, text_b] }),
@@ -41,44 +28,24 @@ export async function POST(req: NextRequest) {
41
  const { event_id } = await submitRes.json();
42
  if (!event_id) throw new Error("No event_id from Gradio");
43
 
44
- // Poll for result with retry
45
- let resultText = "";
46
- let attempts = 0;
47
- const maxAttempts = 60;
48
- let delay = 500;
49
-
50
- while (attempts < maxAttempts) {
51
- const resultRes = await fetch(
52
- `${GRADIO_URL}/gradio_api/call/compare/${event_id}`,
53
- { headers: { Accept: "text/event-stream" } }
54
- );
55
-
56
- resultText = await resultRes.text();
57
-
58
- if (resultText.includes("event: complete")) break;
59
- if (resultText.includes("event: error")) {
60
- const errMatch = resultText.match(/data:\s*(.+)/);
61
- throw new Error(errMatch ? errMatch[1] : "Comparison failed in backend");
62
- }
63
-
64
- await new Promise(r => setTimeout(r, delay));
65
- delay = Math.min(delay * 1.2, 2000);
66
- attempts++;
67
- }
68
 
69
- if (!resultText.includes("event: complete")) {
70
- throw new Error("Comparison timed out. Please try again.");
71
  }
72
 
73
- const completeIdx = resultText.indexOf("event: complete");
74
- const dataIdx = resultText.indexOf("data: ", completeIdx);
75
- if (dataIdx === -1) throw new Error("No data in response");
76
-
77
- const dataStr = resultText.substring(dataIdx + 6).trim();
78
- const gradioData = JSON.parse(dataStr);
79
 
 
80
  // gradioData[0] = comparison HTML
81
  // gradioData[1] = raw JSON comparison data
 
82
  const comparisonResult = gradioData[1];
83
  if (typeof comparisonResult === "object" && comparisonResult !== null) {
84
  return NextResponse.json(comparisonResult);
 
1
  import { NextRequest, NextResponse } from "next/server";
 
2
 
3
  const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
4
 
5
  export async function POST(req: NextRequest) {
6
  try {
 
 
 
 
 
 
 
7
  const body = await req.json();
8
  const { text_a, text_b } = body;
9
 
 
14
  );
15
  }
16
 
 
 
 
 
 
17
  // Call Gradio Space API
18
+ const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/run_comparison`, {
19
  method: "POST",
20
  headers: { "Content-Type": "application/json" },
21
  body: JSON.stringify({ data: [text_a, text_b] }),
 
28
  const { event_id } = await submitRes.json();
29
  if (!event_id) throw new Error("No event_id from Gradio");
30
 
31
+ // Poll for result
32
+ const resultRes = await fetch(
33
+ `${GRADIO_URL}/gradio_api/call/run_comparison/${event_id}`,
34
+ { headers: { Accept: "text/event-stream" } }
35
+ );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ if (!resultRes.ok) {
38
+ throw new Error(`Gradio result failed: ${resultRes.status}`);
39
  }
40
 
41
+ const resultText = await resultRes.text();
42
+ const dataMatch = resultText.match(/event:\s*complete\s*\ndata:\s*(.+)/);
43
+ if (!dataMatch) throw new Error("No complete event from Gradio");
 
 
 
44
 
45
+ const gradioData = JSON.parse(dataMatch[1]);
46
  // gradioData[0] = comparison HTML
47
  // gradioData[1] = raw JSON comparison data
48
+
49
  const comparisonResult = gradioData[1];
50
  if (typeof comparisonResult === "object" && comparisonResult !== null) {
51
  return NextResponse.json(comparisonResult);
web/app/api/parse-upload/route.ts CHANGED
@@ -1,20 +1,9 @@
1
  import { NextRequest, NextResponse } from "next/server";
2
- import { createClient } from "@/lib/supabase/server";
3
 
4
  export const runtime = "nodejs";
5
 
6
- // Add a 5MB size limit
7
- const MAX_FILE_SIZE = 5 * 1024 * 1024;
8
-
9
  export async function POST(req: NextRequest) {
10
  try {
11
- const supabase = await createClient();
12
- const { data: { user } } = await supabase.auth.getUser();
13
-
14
- if (!user) {
15
- return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
16
- }
17
-
18
  const formData = await req.formData();
19
  const file = formData.get("file") as File | null;
20
 
@@ -22,10 +11,6 @@ export async function POST(req: NextRequest) {
22
  return NextResponse.json({ error: "No file uploaded" }, { status: 400 });
23
  }
24
 
25
- if (file.size > MAX_FILE_SIZE) {
26
- return NextResponse.json({ error: "File exceeds 5MB size limit" }, { status: 400 });
27
- }
28
-
29
  const name = file.name.toLowerCase();
30
  const buffer = Buffer.from(await file.arrayBuffer());
31
  let text = "";
@@ -33,20 +18,13 @@ export async function POST(req: NextRequest) {
33
  if (name.endsWith(".txt") || name.endsWith(".md")) {
34
  text = new TextDecoder().decode(buffer);
35
  } else if (name.endsWith(".pdf")) {
36
- // pdf-parse v2 API: named export PDFParse class + worker import
37
- try {
38
- await import("pdf-parse/worker");
39
- const { PDFParse } = await import("pdf-parse");
40
- const parser = new PDFParse({ data: buffer });
41
- const result = await parser.getText();
42
- text = result.text;
43
- await parser.destroy();
44
- } catch (pdfErr: any) {
45
- console.error("PDF parse error:", pdfErr);
46
- return NextResponse.json({
47
- error: "PDF parsing failed. Please copy-paste the text directly, or use the Gradio Space which has OCR support."
48
- }, { status: 400 });
49
- }
50
  } else if (name.endsWith(".docx")) {
51
  const mammoth = (await import("mammoth")).default;
52
  const result = await mammoth.extractRawText({ buffer });
 
1
  import { NextRequest, NextResponse } from "next/server";
 
2
 
3
  export const runtime = "nodejs";
4
 
 
 
 
5
  export async function POST(req: NextRequest) {
6
  try {
 
 
 
 
 
 
 
7
  const formData = await req.formData();
8
  const file = formData.get("file") as File | null;
9
 
 
11
  return NextResponse.json({ error: "No file uploaded" }, { status: 400 });
12
  }
13
 
 
 
 
 
14
  const name = file.name.toLowerCase();
15
  const buffer = Buffer.from(await file.arrayBuffer());
16
  let text = "";
 
18
  if (name.endsWith(".txt") || name.endsWith(".md")) {
19
  text = new TextDecoder().decode(buffer);
20
  } else if (name.endsWith(".pdf")) {
21
+ // pdf-parse v2
22
+ await import("pdf-parse/worker");
23
+ const { PDFParse } = await import("pdf-parse");
24
+ const parser = new PDFParse({ data: buffer });
25
+ const result = await parser.getText();
26
+ text = result.text;
27
+ await parser.destroy();
 
 
 
 
 
 
 
28
  } else if (name.endsWith(".docx")) {
29
  const mammoth = (await import("mammoth")).default;
30
  const result = await mammoth.extractRawText({ buffer });
web/app/api/redline/route.ts CHANGED
@@ -1,25 +1,9 @@
1
  import { NextRequest, NextResponse } from "next/server";
2
- import { createClient } from "@/lib/supabase/server";
3
 
4
- /**
5
- * FIX v4.3: Redline route now works through the Gradio Space directly.
6
- * The old code pointed to a non-existent FastAPI Space (gaurv007-clauseguard-api.hf.space).
7
- * Since redlining is already part of the analyze pipeline (returned in analysis results),
8
- * this endpoint is primarily for re-running redlines on existing text.
9
- */
10
-
11
- const GRADIO_URL = process.env.CLAUSEGUARD_GRADIO_URL || "https://gaurv007-clauseguard.hf.space";
12
- const API_URL = process.env.CLAUSEGUARD_API_URL || "";
13
 
14
  export async function POST(req: NextRequest) {
15
  try {
16
- const supabase = await createClient();
17
- const { data: { user } } = await supabase.auth.getUser();
18
-
19
- if (!user) {
20
- return NextResponse.json({ error: "Unauthorized. Please log in." }, { status: 401 });
21
- }
22
-
23
  const body = await req.json();
24
  const { session_id, text, use_llm } = body;
25
 
@@ -30,89 +14,19 @@ export async function POST(req: NextRequest) {
30
  );
31
  }
32
 
33
- // Try FastAPI backend first (if configured and available)
34
- if (API_URL) {
35
- try {
36
- const response = await fetch(`${API_URL}/api/redline`, {
37
- method: "POST",
38
- headers: { "Content-Type": "application/json" },
39
- body: JSON.stringify({ session_id, text, use_llm: use_llm ?? true }),
40
- });
41
-
42
- if (response.ok) {
43
- const result = await response.json();
44
- return NextResponse.json(result);
45
- }
46
- } catch {
47
- // Fall through to Gradio approach
48
- }
49
- }
50
-
51
- // Fallback: If text is provided, run full analysis via Gradio (includes redlines)
52
- if (text) {
53
- if (text.trim().length < 50) {
54
- return NextResponse.json({ error: "Text too short (min 50 chars)" }, { status: 400 });
55
- }
56
-
57
- const submitRes = await fetch(`${GRADIO_URL}/gradio_api/call/analyze`, {
58
- method: "POST",
59
- headers: { "Content-Type": "application/json" },
60
- body: JSON.stringify({ data: [text] }),
61
- });
62
 
63
- if (!submitRes.ok) {
64
- throw new Error(`Gradio submit failed: ${submitRes.status}`);
65
- }
66
-
67
- const { event_id } = await submitRes.json();
68
- if (!event_id) throw new Error("No event_id from Gradio");
69
-
70
- let resultText = "";
71
- let attempts = 0;
72
- while (attempts < 90) {
73
- const resultRes = await fetch(
74
- `${GRADIO_URL}/gradio_api/call/analyze/${event_id}`,
75
- { headers: { Accept: "text/event-stream" } }
76
- );
77
- resultText = await resultRes.text();
78
- if (resultText.includes("event: complete")) break;
79
- if (resultText.includes("event: error")) throw new Error("Redline analysis failed");
80
- await new Promise(r => setTimeout(r, 1000));
81
- attempts++;
82
- }
83
-
84
- if (!resultText.includes("event: complete")) {
85
- throw new Error("Analysis timed out");
86
- }
87
-
88
- // Parse the result to extract redlines from the JSON report
89
- const completeIdx = resultText.indexOf("event: complete");
90
- const dataIdx = resultText.indexOf("data: ", completeIdx);
91
- if (dataIdx === -1) throw new Error("No data in response");
92
-
93
- const dataStr = resultText.substring(dataIdx + 6).trim();
94
- const gradioData = JSON.parse(dataStr);
95
-
96
- // Download JSON report file
97
- const jsonFileObj = gradioData[8];
98
- if (jsonFileObj?.url) {
99
- const jsonRes = await fetch(jsonFileObj.url);
100
- if (jsonRes.ok) {
101
- const analysisData = await jsonRes.json();
102
- if (analysisData.redlines) {
103
- return NextResponse.json({ redlines: analysisData.redlines, count: analysisData.redlines.length });
104
- }
105
- }
106
- }
107
-
108
- return NextResponse.json({ redlines: [], count: 0 });
109
  }
110
 
111
- // No FastAPI backend and only session_id provided (can't access Gradio sessions)
112
- return NextResponse.json({
113
- error: "Redline by session_id requires the FastAPI backend. Provide contract text instead, or use the analysis results which already include redline suggestions.",
114
- }, { status: 400 });
115
-
116
  } catch (error: any) {
117
  console.error("Redline error:", error.message);
118
  return NextResponse.json(
 
1
  import { NextRequest, NextResponse } from "next/server";
 
2
 
3
+ const API_URL = process.env.CLAUSEGUARD_API_URL || "https://gaurv007-clauseguard-api.hf.space";
 
 
 
 
 
 
 
 
4
 
5
  export async function POST(req: NextRequest) {
6
  try {
 
 
 
 
 
 
 
7
  const body = await req.json();
8
  const { session_id, text, use_llm } = body;
9
 
 
14
  );
15
  }
16
 
17
+ const response = await fetch(`${API_URL}/api/redline`, {
18
+ method: "POST",
19
+ headers: { "Content-Type": "application/json" },
20
+ body: JSON.stringify({ session_id, text, use_llm: use_llm ?? true }),
21
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ if (!response.ok) {
24
+ const err = await response.text().catch(() => "");
25
+ throw new Error(err || `Backend error: ${response.status}`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
 
28
+ const result = await response.json();
29
+ return NextResponse.json(result);
 
 
 
30
  } catch (error: any) {
31
  console.error("Redline error:", error.message);
32
  return NextResponse.json(
web/app/api/subscribe/create/route.ts CHANGED
@@ -13,7 +13,7 @@ export async function POST(req: NextRequest) {
13
  return NextResponse.json({ error: "Invalid plan" }, { status: 400 });
14
  }
15
 
16
- const planId = PLANS[plan as keyof typeof PLANS].razorpay_plan_id;
17
  if (!planId) return NextResponse.json({ error: "Plan not configured" }, { status: 500 });
18
 
19
  const razorpay = getRazorpay();
 
13
  return NextResponse.json({ error: "Invalid plan" }, { status: 400 });
14
  }
15
 
16
+ const planId = PLANS[plan].razorpay_plan_id;
17
  if (!planId) return NextResponse.json({ error: "Plan not configured" }, { status: 500 });
18
 
19
  const razorpay = getRazorpay();
web/app/auth/callback/route.ts CHANGED
@@ -4,14 +4,9 @@ import { NextResponse } from "next/server";
4
  export async function GET(request: Request) {
5
  const requestUrl = new URL(request.url);
6
  const code = requestUrl.searchParams.get("code");
7
- let next = requestUrl.searchParams.get("next") || "/dashboard-pages/dashboard";
8
  const origin = requestUrl.origin;
9
 
10
- // Prevent open redirect
11
- if (next && !next.startsWith("/")) {
12
- next = "/dashboard-pages/dashboard";
13
- }
14
-
15
  if (code) {
16
  const supabase = await createClient();
17
  const { error } = await supabase.auth.exchangeCodeForSession(code);
 
4
  export async function GET(request: Request) {
5
  const requestUrl = new URL(request.url);
6
  const code = requestUrl.searchParams.get("code");
7
+ const next = requestUrl.searchParams.get("next") || "/dashboard-pages/dashboard";
8
  const origin = requestUrl.origin;
9
 
 
 
 
 
 
10
  if (code) {
11
  const supabase = await createClient();
12
  const { error } = await supabase.auth.exchangeCodeForSession(code);
web/app/auth/login/page.tsx CHANGED
@@ -1,13 +1,13 @@
1
  "use client";
2
 
3
- import { useState, useEffect, Suspense } from "react";
4
  import { createClient } from "@/lib/supabase/client";
5
  import { getBaseUrl } from "@/lib/auth-url";
6
  import Link from "next/link";
7
- import { useSearchParams, useRouter } from "next/navigation";
8
  import { ArrowLeft, Mail, Loader2 } from "lucide-react";
9
 
10
- function LoginForm() {
11
  const [email, setEmail] = useState("");
12
  const [password, setPassword] = useState("");
13
  const [error, setError] = useState("");
@@ -16,22 +16,21 @@ function LoginForm() {
16
  const [magicSent, setMagicSent] = useState(false);
17
  const supabase = createClient();
18
  const searchParams = useSearchParams();
19
- const router = useRouter();
20
  const next = searchParams.get("next") || "/dashboard-pages/dashboard";
21
 
22
  // Check if already logged in — redirect immediately
23
  useEffect(() => {
24
  supabase.auth.getUser().then(({ data: { user } }) => {
25
- if (user) { router.push(next); }
26
  else { setChecking(false); }
27
  });
28
- }, [next, supabase.auth, router]);
29
 
30
  async function handleLogin(e: React.FormEvent) {
31
  e.preventDefault(); setLoading(true); setError("");
32
  const { error } = await supabase.auth.signInWithPassword({ email, password });
33
  if (error) { setError(error.message); setLoading(false); }
34
- else { router.push(next); }
35
  }
36
 
37
  async function handleMagicLink() {
@@ -120,15 +119,3 @@ function LoginForm() {
120
  </div>
121
  );
122
  }
123
-
124
- export default function LoginPage() {
125
- return (
126
- <Suspense fallback={
127
- <div className="min-h-screen flex items-center justify-center bg-white">
128
- <Loader2 className="w-5 h-5 text-zinc-300 animate-spin" />
129
- </div>
130
- }>
131
- <LoginForm />
132
- </Suspense>
133
- );
134
- }
 
1
  "use client";
2
 
3
+ import { useState, useEffect } from "react";
4
  import { createClient } from "@/lib/supabase/client";
5
  import { getBaseUrl } from "@/lib/auth-url";
6
  import Link from "next/link";
7
+ import { useSearchParams } from "next/navigation";
8
  import { ArrowLeft, Mail, Loader2 } from "lucide-react";
9
 
10
+ export default function LoginPage() {
11
  const [email, setEmail] = useState("");
12
  const [password, setPassword] = useState("");
13
  const [error, setError] = useState("");
 
16
  const [magicSent, setMagicSent] = useState(false);
17
  const supabase = createClient();
18
  const searchParams = useSearchParams();
 
19
  const next = searchParams.get("next") || "/dashboard-pages/dashboard";
20
 
21
  // Check if already logged in — redirect immediately
22
  useEffect(() => {
23
  supabase.auth.getUser().then(({ data: { user } }) => {
24
+ if (user) { window.location.href = next; }
25
  else { setChecking(false); }
26
  });
27
+ }, []);
28
 
29
  async function handleLogin(e: React.FormEvent) {
30
  e.preventDefault(); setLoading(true); setError("");
31
  const { error } = await supabase.auth.signInWithPassword({ email, password });
32
  if (error) { setError(error.message); setLoading(false); }
33
+ else { window.location.href = next; }
34
  }
35
 
36
  async function handleMagicLink() {
 
119
  </div>
120
  );
121
  }
 
 
 
 
 
 
 
 
 
 
 
 
web/app/auth/signup/page.tsx CHANGED
@@ -4,7 +4,6 @@ import { useState, useEffect } from "react";
4
  import { createClient } from "@/lib/supabase/client";
5
  import { getBaseUrl } from "@/lib/auth-url";
6
  import Link from "next/link";
7
- import { useRouter } from "next/navigation";
8
  import { ArrowLeft, Loader2 } from "lucide-react";
9
 
10
  export default function SignupPage() {
@@ -15,15 +14,14 @@ export default function SignupPage() {
15
  const [checking, setChecking] = useState(true);
16
  const [done, setDone] = useState(false);
17
  const supabase = createClient();
18
- const router = useRouter();
19
 
20
  // Redirect if already logged in
21
  useEffect(() => {
22
  supabase.auth.getUser().then(({ data: { user } }) => {
23
- if (user) { router.push("/dashboard-pages/dashboard"); }
24
  else { setChecking(false); }
25
  });
26
- }, [router, supabase.auth]);
27
 
28
  async function handleSignup(e: React.FormEvent) {
29
  e.preventDefault(); setLoading(true); setError("");
 
4
  import { createClient } from "@/lib/supabase/client";
5
  import { getBaseUrl } from "@/lib/auth-url";
6
  import Link from "next/link";
 
7
  import { ArrowLeft, Loader2 } from "lucide-react";
8
 
9
  export default function SignupPage() {
 
14
  const [checking, setChecking] = useState(true);
15
  const [done, setDone] = useState(false);
16
  const supabase = createClient();
 
17
 
18
  // Redirect if already logged in
19
  useEffect(() => {
20
  supabase.auth.getUser().then(({ data: { user } }) => {
21
+ if (user) { window.location.href = "/dashboard-pages/dashboard"; }
22
  else { setChecking(false); }
23
  });
24
+ }, []);
25
 
26
  async function handleSignup(e: React.FormEvent) {
27
  e.preventDefault(); setLoading(true); setError("");
web/app/dashboard-pages/analyze/loading.tsx DELETED
@@ -1,50 +0,0 @@
1
- /**
2
- * ClauseGuard — Loading skeleton for Analyze page
3
- * FIX v4.1: Added loading.tsx for instant navigation feedback
4
- * Next.js App Router automatically shows this while the page component loads
5
- */
6
-
7
- import { ScanText } from "lucide-react";
8
-
9
- export default function AnalyzeLoading() {
10
- return (
11
- <div className="min-h-screen bg-zinc-50/30">
12
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-6 sm:py-10">
13
- {/* Header skeleton */}
14
- <div className="mb-6 sm:mb-8">
15
- <h1 className="text-xl sm:text-2xl font-semibold tracking-tight flex items-center gap-2">
16
- <ScanText className="w-5 h-5 sm:w-6 sm:h-6 text-zinc-400" />
17
- Scan a document
18
- </h1>
19
- <div className="mt-2 h-4 w-96 bg-zinc-200 rounded animate-pulse" />
20
- </div>
21
-
22
- <div className="grid lg:grid-cols-5 gap-4 sm:gap-6">
23
- {/* Input panel skeleton */}
24
- <div className="lg:col-span-2">
25
- <div className="bg-white border border-zinc-200 rounded-xl p-3 sm:p-4">
26
- <div className="w-full h-[260px] sm:h-[360px] bg-zinc-100 rounded-lg animate-pulse" />
27
- <div className="mt-3 flex gap-2">
28
- <div className="flex-1 h-10 bg-zinc-900/10 rounded-lg animate-pulse" />
29
- <div className="w-20 h-10 bg-zinc-100 rounded-lg animate-pulse" />
30
- <div className="w-10 h-10 bg-zinc-100 rounded-lg animate-pulse" />
31
- </div>
32
- </div>
33
- </div>
34
-
35
- {/* Results panel skeleton */}
36
- <div className="lg:col-span-3">
37
- <div className="bg-white border border-zinc-200 rounded-xl p-4 sm:p-5">
38
- <div className="flex items-center justify-center h-48 text-zinc-300">
39
- <div className="text-center">
40
- <ScanText className="w-10 h-10 mx-auto mb-3 text-zinc-200" />
41
- <div className="h-4 w-48 bg-zinc-100 rounded mx-auto animate-pulse" />
42
- </div>
43
- </div>
44
- </div>
45
- </div>
46
- </div>
47
- </div>
48
- </div>
49
- );
50
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/app/dashboard-pages/analyze/page.tsx CHANGED
@@ -3,7 +3,7 @@
3
  import { useState, useRef, useEffect } from "react";
4
  import {
5
  ScanText, ScanLine, TriangleAlert, CircleAlert, CircleCheck, Info,
6
- ChevronDown, ChevronUp, Copy, Check, Upload, FileText,
7
  ShieldCheck, ShieldAlert, Scale, Gavel, Ban, Globe, Eye, Stamp, FileX,
8
  Lock, Sparkles as SparklesIcon, X, Layers, Landmark, Briefcase,
9
  AlertTriangle, Tag, BookOpen, ClipboardList, DollarSign,
@@ -12,7 +12,6 @@ import {
12
  ShieldOff, CircleSlash, MessageSquareWarning, Construction,
13
  MessageSquare, Send, Loader2
14
  } from "lucide-react";
15
- import { ExportDropdown } from "@/components/export-dropdown";
16
 
17
  interface Cat { name: string; severity: string; description?: string; confidence?: number; }
18
  interface Clause { text: string; categories: Cat[]; }
@@ -20,7 +19,7 @@ interface Entity { text: string; type: string; score?: number; source?: string;
20
  interface Contradiction { type: string; explanation: string; severity: string; confidence?: number; source?: string; }
21
  interface Obligation { type: string; party: string; description: string; deadline: string; priority?: number; }
22
  interface ComplianceCheck { requirement: string; description: string; severity: string; status: string; matched_keywords: string[]; context?: string[]; }
23
- interface ComplianceReg { description: string; compliance_rate: number; checks: ComplianceCheck[]; overall_status: string; negated_count?: number; ambiguous_count?: number; note?: string; }
24
  interface Redline {
25
  original_text: string;
26
  clause_label: string;
@@ -101,7 +100,6 @@ const COMPLIANCE_STATUS: Record<string, { bg: string; text: string; border: stri
101
  PARTIAL: { bg: "bg-amber-50", text: "text-amber-700", border: "border-amber-200" },
102
  "NON-COMPLIANT": { bg: "bg-red-50", text: "text-red-700", border: "border-red-200" },
103
  WARNING: { bg: "bg-orange-50", text: "text-orange-700", border: "border-orange-200" },
104
- NOT_APPLICABLE: { bg: "bg-zinc-50", text: "text-zinc-400", border: "border-zinc-200" },
105
  };
106
 
107
  function SourceBadge({ isML, confidence }: { isML: boolean; confidence?: number | null }) {
@@ -235,6 +233,17 @@ export default function AnalyzePage() {
235
  if (fileInputRef.current) fileInputRef.current.value = "";
236
  }
237
 
 
 
 
 
 
 
 
 
 
 
 
238
  function handleCopy() {
239
  if (!results) return;
240
  const summary = `ClauseGuard Report\nRisk: ${results.risk_score}/100 (Grade ${results.grade})\n${results.flagged_count} of ${results.total_clauses} clauses flagged\nEntities: ${results.entities.length}\nContradictions: ${results.contradictions.length}\nObligations: ${results.obligations.length}\n\n` +
@@ -360,51 +369,41 @@ export default function AnalyzePage() {
360
  <div className="lg:col-span-3">
361
  {results ? (
362
  <div className="space-y-3 sm:space-y-4">
363
- {/* Score Card — redesigned with circular gauge */}
364
- <div className="bg-white border border-zinc-200 rounded-2xl p-5 sm:p-6 shadow-sm">
365
- <div className="flex items-center gap-5 sm:gap-6">
366
- {/* Circular score gauge */}
367
- <div className="relative w-20 h-20 sm:w-24 sm:h-24 shrink-0">
368
- <svg className="w-full h-full -rotate-90" viewBox="0 0 100 100">
369
- <circle cx="50" cy="50" r="42" fill="none" stroke="#f4f4f5" strokeWidth="8" />
370
- <circle cx="50" cy="50" r="42" fill="none"
371
- stroke={results.risk_score >= 60 ? "#ef4444" : results.risk_score >= 30 ? "#f59e0b" : "#22c55e"}
372
- strokeWidth="8" strokeLinecap="round"
373
- strokeDasharray={`${results.risk_score * 2.64} 264`}
374
- className="transition-all duration-1000 ease-out" />
375
- </svg>
376
- <div className="absolute inset-0 flex flex-col items-center justify-center">
377
- <span className="text-xl sm:text-2xl font-bold tracking-tight">{results.risk_score}</span>
378
- <span className="text-[9px] text-zinc-400 -mt-0.5">/ 100</span>
379
  </div>
380
- </div>
381
-
382
- <div className="flex-1 min-w-0">
383
- <div className="flex items-center gap-2 mb-2">
384
- <span className={`text-sm font-bold px-3 py-1 rounded-lg border ${GRADE_STYLE[results.grade] || GRADE_STYLE.C}`}>
385
- Grade {results.grade}
386
- </span>
387
- <span className="text-xs text-zinc-400">
388
- {results.risk_score < 20 ? "Low Risk" : results.risk_score < 40 ? "Moderate Risk" : results.risk_score < 60 ? "Elevated Risk" : results.risk_score < 80 ? "High Risk" : "Critical Risk"}
389
- </span>
390
  </div>
 
 
 
 
 
391
 
392
- {/* Severity breakdown compact horizontal */}
393
- <div className="grid grid-cols-4 gap-1.5">
394
- {(["CRITICAL", "HIGH", "MEDIUM", "LOW"] as const).map(sev => {
395
- const c = SEV_CONFIG[sev];
396
- return (
397
- <div key={sev} className={`text-center py-1.5 px-1 rounded-lg ${c.bg} border ${c.border}`}>
398
- <span className={`text-sm font-bold ${c.text}`}>{sevCounts[sev]}</span>
399
- <p className={`text-[9px] ${c.text} opacity-70`}>{c.label}</p>
400
- </div>
401
- );
402
- })}
403
- </div>
404
 
405
- {/* Meta stats */}
406
- <div className="mt-2.5 flex items-center gap-2 text-[10px] text-zinc-400 flex-wrap">
407
- <span className="flex items-center gap-1"><Layers className="w-3 h-3" />{results.total_clauses} clauses</span>
408
  <span className="w-px h-3 bg-zinc-200" />
409
  <span className="flex items-center gap-1"><Tag className="w-3 h-3" />{results.entities.length} entities</span>
410
  <span className="w-px h-3 bg-zinc-200" />
@@ -412,11 +411,9 @@ export default function AnalyzePage() {
412
  <span className="w-px h-3 bg-zinc-200" />
413
  <span className="flex items-center gap-1"><Clock className="w-3 h-3" />{results.latency_ms}ms</span>
414
  <span className="w-px h-3 bg-zinc-200" />
415
- <span className="flex items-center gap-1">
416
- {results.model !== "regex" ? <><Cpu className="w-3 h-3" /> ML Models</> : <><FileSearch className="w-3 h-3" /> Pattern fallback</>}
417
- </span>
418
- </div>
419
- </div>
420
  </div>
421
  </div>
422
 
@@ -436,11 +433,11 @@ export default function AnalyzePage() {
436
  </button>
437
  ))}
438
  </div>
439
- <div className="flex gap-1.5 self-end sm:self-auto items-center">
440
  <button onClick={handleCopy} className="p-2 rounded-md hover:bg-zinc-100 text-zinc-400 hover:text-zinc-600 transition-colors" title="Copy summary">
441
  {copied ? <Check className="w-4 h-4 text-emerald-500" /> : <Copy className="w-4 h-4" />}
442
  </button>
443
- <ExportDropdown results={results} />
444
  </div>
445
  </div>
446
 
@@ -460,7 +457,7 @@ export default function AnalyzePage() {
460
  </div>
461
 
462
  {/* Tab Content */}
463
- <div className="max-h-[450px] sm:max-h-[560px] overflow-y-auto pr-1 scroll-smooth">
464
 
465
  {/* Clauses */}
466
  {activeTab === "clauses" && (
@@ -660,9 +657,8 @@ export default function AnalyzePage() {
660
  </div>
661
  ) : Object.entries(results.compliance).map(([regName, reg]) => {
662
  const status = COMPLIANCE_STATUS[reg.overall_status] || COMPLIANCE_STATUS.PARTIAL;
663
- const isNA = reg.overall_status === "NOT_APPLICABLE";
664
  return (
665
- <div key={regName} className={`bg-white border border-zinc-200 rounded-xl overflow-hidden ${isNA ? "opacity-60" : ""}`}>
666
  <div className={`flex flex-col sm:flex-row sm:items-center justify-between p-4 border-b ${status.bg} ${status.border}`}>
667
  <div>
668
  <div className="flex items-center gap-2 flex-wrap">
@@ -681,15 +677,10 @@ export default function AnalyzePage() {
681
  <p className="text-[11px] text-zinc-500 mt-0.5">{reg.description}</p>
682
  </div>
683
  <div className="text-left sm:text-right mt-2 sm:mt-0">
684
- <span className={`text-lg font-bold ${status.text}`}>{isNA ? "N/A" : `${reg.compliance_rate}%`}</span>
685
  <span className={`text-[11px] font-medium block ${status.text}`}>{reg.overall_status}</span>
686
  </div>
687
  </div>
688
- {isNA ? (
689
- <div className="p-3 text-xs text-zinc-400 italic">
690
- {reg.note || `${regName} does not appear applicable to this contract type.`}
691
- </div>
692
- ) : (
693
  <div className="p-3 space-y-0.5">
694
  {reg.checks.map((check, i) => {
695
  const sev = SEV_CONFIG[check.severity] || SEV_CONFIG.MEDIUM;
@@ -716,7 +707,6 @@ export default function AnalyzePage() {
716
  );
717
  })}
718
  </div>
719
- )}
720
  </div>
721
  );
722
  })}
@@ -857,18 +847,8 @@ export default function AnalyzePage() {
857
  )}
858
  </div>
859
  </div>
860
- ) : loading ? (
861
- <div className="bg-white border border-zinc-200 rounded-2xl h-[300px] sm:h-[420px] flex flex-col items-center justify-center shadow-sm">
862
- <div className="relative w-16 h-16 mb-4">
863
- <div className="absolute inset-0 rounded-full border-2 border-zinc-100" />
864
- <div className="absolute inset-0 rounded-full border-2 border-t-zinc-900 animate-spin" />
865
- <ScanLine className="absolute inset-0 m-auto w-6 h-6 text-zinc-400" />
866
- </div>
867
- <p className="text-sm font-medium text-zinc-700">Analyzing contract...</p>
868
- <p className="text-xs text-zinc-400 mt-1">Running 6 ML models · This may take 30-60 seconds</p>
869
- </div>
870
  ) : (
871
- <div className="bg-white border border-dashed border-zinc-200 rounded-2xl h-[300px] sm:h-[420px] flex flex-col items-center justify-center">
872
  <ScanText className="w-10 h-10 text-zinc-200 mb-3" />
873
  <p className="text-sm text-zinc-300">Paste text and analyze to see results</p>
874
  </div>
 
3
  import { useState, useRef, useEffect } from "react";
4
  import {
5
  ScanText, ScanLine, TriangleAlert, CircleAlert, CircleCheck, Info,
6
+ FileDown, ChevronDown, ChevronUp, Copy, Check, Upload, FileText,
7
  ShieldCheck, ShieldAlert, Scale, Gavel, Ban, Globe, Eye, Stamp, FileX,
8
  Lock, Sparkles as SparklesIcon, X, Layers, Landmark, Briefcase,
9
  AlertTriangle, Tag, BookOpen, ClipboardList, DollarSign,
 
12
  ShieldOff, CircleSlash, MessageSquareWarning, Construction,
13
  MessageSquare, Send, Loader2
14
  } from "lucide-react";
 
15
 
16
  interface Cat { name: string; severity: string; description?: string; confidence?: number; }
17
  interface Clause { text: string; categories: Cat[]; }
 
19
  interface Contradiction { type: string; explanation: string; severity: string; confidence?: number; source?: string; }
20
  interface Obligation { type: string; party: string; description: string; deadline: string; priority?: number; }
21
  interface ComplianceCheck { requirement: string; description: string; severity: string; status: string; matched_keywords: string[]; context?: string[]; }
22
+ interface ComplianceReg { description: string; compliance_rate: number; checks: ComplianceCheck[]; overall_status: string; negated_count?: number; ambiguous_count?: number; }
23
  interface Redline {
24
  original_text: string;
25
  clause_label: string;
 
100
  PARTIAL: { bg: "bg-amber-50", text: "text-amber-700", border: "border-amber-200" },
101
  "NON-COMPLIANT": { bg: "bg-red-50", text: "text-red-700", border: "border-red-200" },
102
  WARNING: { bg: "bg-orange-50", text: "text-orange-700", border: "border-orange-200" },
 
103
  };
104
 
105
  function SourceBadge({ isML, confidence }: { isML: boolean; confidence?: number | null }) {
 
233
  if (fileInputRef.current) fileInputRef.current.value = "";
234
  }
235
 
236
+ async function handleDownloadPDF() {
237
+ if (!results) return;
238
+ try {
239
+ const res = await fetch("/api/pdf/report", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(results) });
240
+ const blob = await res.blob();
241
+ const url = URL.createObjectURL(blob);
242
+ const a = document.createElement("a"); a.href = url; a.download = "clauseguard-report.pdf"; a.click();
243
+ URL.revokeObjectURL(url);
244
+ } catch {}
245
+ }
246
+
247
  function handleCopy() {
248
  if (!results) return;
249
  const summary = `ClauseGuard Report\nRisk: ${results.risk_score}/100 (Grade ${results.grade})\n${results.flagged_count} of ${results.total_clauses} clauses flagged\nEntities: ${results.entities.length}\nContradictions: ${results.contradictions.length}\nObligations: ${results.obligations.length}\n\n` +
 
369
  <div className="lg:col-span-3">
370
  {results ? (
371
  <div className="space-y-3 sm:space-y-4">
372
+ {/* Score Card */}
373
+ <div className="bg-white border border-zinc-200 rounded-xl p-4 sm:p-5">
374
+ <div className="flex flex-col sm:flex-row sm:items-start sm:justify-between gap-3">
375
+ <div>
376
+ <div className="flex items-baseline gap-2">
377
+ <span className="text-3xl sm:text-4xl font-semibold tracking-tight">{results.risk_score}</span>
378
+ <span className="text-sm text-zinc-400">/100 risk</span>
 
 
 
 
 
 
 
 
 
379
  </div>
380
+ <div className="mt-2 h-1.5 w-full sm:w-48 bg-zinc-100 rounded-full overflow-hidden">
381
+ <div className={`h-full rounded-full transition-all duration-700 ${
382
+ results.risk_score >= 60 ? "bg-red-500" : results.risk_score >= 30 ? "bg-amber-400" : "bg-emerald-500"
383
+ }`} style={{ width: `${results.risk_score}%` }} />
 
 
 
 
 
 
384
  </div>
385
+ </div>
386
+ <span className={`self-start text-sm font-semibold px-3 py-1 rounded-lg border ${GRADE_STYLE[results.grade] || GRADE_STYLE.C}`}>
387
+ Grade {results.grade}
388
+ </span>
389
+ </div>
390
 
391
+ {/* Severity breakdown grid */}
392
+ <div className="mt-4 grid grid-cols-4 gap-2">
393
+ {(["CRITICAL", "HIGH", "MEDIUM", "LOW"] as const).map(sev => {
394
+ const c = SEV_CONFIG[sev];
395
+ return (
396
+ <div key={sev} className={`text-center p-2 rounded-lg ${c.bg} border ${c.border}`}>
397
+ <span className={`text-lg font-bold ${c.text}`}>{sevCounts[sev]}</span>
398
+ <p className={`text-[10px] ${c.text} opacity-70`}>{c.label}</p>
399
+ </div>
400
+ );
401
+ })}
402
+ </div>
403
 
404
+ {/* Meta stats */}
405
+ <div className="mt-3 flex items-center gap-2 sm:gap-3 text-[11px] text-zinc-400 flex-wrap">
406
+ <span className="flex items-center gap-1"><Layers className="w-3 h-3" />{results.total_clauses} clauses</span>
407
  <span className="w-px h-3 bg-zinc-200" />
408
  <span className="flex items-center gap-1"><Tag className="w-3 h-3" />{results.entities.length} entities</span>
409
  <span className="w-px h-3 bg-zinc-200" />
 
411
  <span className="w-px h-3 bg-zinc-200" />
412
  <span className="flex items-center gap-1"><Clock className="w-3 h-3" />{results.latency_ms}ms</span>
413
  <span className="w-px h-3 bg-zinc-200" />
414
+ <span className="flex items-center gap-1">
415
+ {results.model !== "regex" ? <><Cpu className="w-3 h-3" /> ML Models</> : <><FileSearch className="w-3 h-3" /> Pattern fallback</>}
416
+ </span>
 
 
417
  </div>
418
  </div>
419
 
 
433
  </button>
434
  ))}
435
  </div>
436
+ <div className="flex gap-1.5 self-end sm:self-auto">
437
  <button onClick={handleCopy} className="p-2 rounded-md hover:bg-zinc-100 text-zinc-400 hover:text-zinc-600 transition-colors" title="Copy summary">
438
  {copied ? <Check className="w-4 h-4 text-emerald-500" /> : <Copy className="w-4 h-4" />}
439
  </button>
440
+ <button onClick={handleDownloadPDF} className="p-2 rounded-md hover:bg-zinc-100 text-zinc-400 hover:text-zinc-600 transition-colors" title="Download PDF"><FileDown className="w-4 h-4" /></button>
441
  </div>
442
  </div>
443
 
 
457
  </div>
458
 
459
  {/* Tab Content */}
460
+ <div className="max-h-[350px] sm:max-h-[420px] overflow-y-auto pr-1">
461
 
462
  {/* Clauses */}
463
  {activeTab === "clauses" && (
 
657
  </div>
658
  ) : Object.entries(results.compliance).map(([regName, reg]) => {
659
  const status = COMPLIANCE_STATUS[reg.overall_status] || COMPLIANCE_STATUS.PARTIAL;
 
660
  return (
661
+ <div key={regName} className="bg-white border border-zinc-200 rounded-xl overflow-hidden">
662
  <div className={`flex flex-col sm:flex-row sm:items-center justify-between p-4 border-b ${status.bg} ${status.border}`}>
663
  <div>
664
  <div className="flex items-center gap-2 flex-wrap">
 
677
  <p className="text-[11px] text-zinc-500 mt-0.5">{reg.description}</p>
678
  </div>
679
  <div className="text-left sm:text-right mt-2 sm:mt-0">
680
+ <span className={`text-lg font-bold ${status.text}`}>{reg.compliance_rate}%</span>
681
  <span className={`text-[11px] font-medium block ${status.text}`}>{reg.overall_status}</span>
682
  </div>
683
  </div>
 
 
 
 
 
684
  <div className="p-3 space-y-0.5">
685
  {reg.checks.map((check, i) => {
686
  const sev = SEV_CONFIG[check.severity] || SEV_CONFIG.MEDIUM;
 
707
  );
708
  })}
709
  </div>
 
710
  </div>
711
  );
712
  })}
 
847
  )}
848
  </div>
849
  </div>
 
 
 
 
 
 
 
 
 
 
850
  ) : (
851
+ <div className="bg-white border border-dashed border-zinc-200 rounded-xl h-[300px] sm:h-[420px] flex flex-col items-center justify-center">
852
  <ScanText className="w-10 h-10 text-zinc-200 mb-3" />
853
  <p className="text-sm text-zinc-300">Paste text and analyze to see results</p>
854
  </div>
web/app/dashboard-pages/compare/loading.tsx DELETED
@@ -1,22 +0,0 @@
1
- import { GitCompare } from "lucide-react";
2
-
3
- export default function CompareLoading() {
4
- return (
5
- <div className="min-h-screen bg-zinc-50/30">
6
- <div className="max-w-6xl mx-auto px-4 sm:px-6 py-8 sm:py-12">
7
- <div className="flex items-center gap-3 mb-8">
8
- <GitCompare className="w-6 h-6 text-zinc-400 animate-pulse" />
9
- <div className="h-7 w-44 bg-zinc-200 rounded-lg animate-pulse" />
10
- </div>
11
- <div className="grid md:grid-cols-2 gap-6">
12
- {[...Array(2)].map((_, i) => (
13
- <div key={i} className="bg-white rounded-xl p-4 border border-zinc-200">
14
- <div className="h-3 w-24 bg-zinc-100 rounded animate-pulse mb-3" />
15
- <div className="h-[280px] bg-zinc-50 rounded-lg animate-pulse" />
16
- </div>
17
- ))}
18
- </div>
19
- </div>
20
- </div>
21
- );
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/app/dashboard-pages/dashboard/loading.tsx DELETED
@@ -1,45 +0,0 @@
1
- import { ShieldCheck, Loader2 } from "lucide-react";
2
-
3
- export default function DashboardLoading() {
4
- return (
5
- <div className="min-h-screen bg-zinc-50/30">
6
- <div className="max-w-6xl mx-auto px-4 sm:px-6 py-8 sm:py-12">
7
- <div className="flex items-center gap-3 mb-8">
8
- <ShieldCheck className="w-6 h-6 text-indigo-400 animate-pulse" />
9
- <div className="h-7 w-32 bg-zinc-200 rounded-lg animate-pulse" />
10
- </div>
11
- <div className="grid grid-cols-2 lg:grid-cols-4 gap-4 mb-10">
12
- {[...Array(4)].map((_, i) => (
13
- <div key={i} className="bg-white rounded-xl p-5 border border-zinc-200">
14
- <div className="h-3 w-20 bg-zinc-100 rounded animate-pulse mb-3" />
15
- <div className="h-7 w-16 bg-zinc-200 rounded animate-pulse" />
16
- </div>
17
- ))}
18
- </div>
19
- <div className="grid sm:grid-cols-3 gap-4 mb-10">
20
- {[...Array(3)].map((_, i) => (
21
- <div key={i} className="bg-white rounded-xl p-5 border border-zinc-200 flex items-center gap-4">
22
- <div className="w-10 h-10 rounded-lg bg-zinc-100 animate-pulse" />
23
- <div>
24
- <div className="h-3 w-24 bg-zinc-100 rounded animate-pulse mb-2" />
25
- <div className="h-5 w-12 bg-zinc-200 rounded animate-pulse" />
26
- </div>
27
- </div>
28
- ))}
29
- </div>
30
- <div className="bg-white rounded-xl border border-zinc-200 p-6">
31
- <div className="h-5 w-28 bg-zinc-200 rounded animate-pulse mb-6" />
32
- {[...Array(4)].map((_, i) => (
33
- <div key={i} className="flex items-center justify-between py-4 border-b border-zinc-50">
34
- <div>
35
- <div className="h-4 w-48 bg-zinc-100 rounded animate-pulse mb-2" />
36
- <div className="h-3 w-32 bg-zinc-50 rounded animate-pulse" />
37
- </div>
38
- <div className="h-7 w-14 bg-zinc-100 rounded-full animate-pulse" />
39
- </div>
40
- ))}
41
- </div>
42
- </div>
43
- </div>
44
- );
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/app/dashboard-pages/settings/loading.tsx DELETED
@@ -1,23 +0,0 @@
1
- import { Settings } from "lucide-react";
2
-
3
- export default function SettingsLoading() {
4
- return (
5
- <div className="min-h-screen bg-zinc-50/30">
6
- <div className="max-w-3xl mx-auto px-4 sm:px-6 py-8 sm:py-12">
7
- <div className="flex items-center gap-3 mb-8">
8
- <Settings className="w-6 h-6 text-zinc-400 animate-pulse" />
9
- <div className="h-7 w-28 bg-zinc-200 rounded-lg animate-pulse" />
10
- </div>
11
- {[...Array(3)].map((_, i) => (
12
- <div key={i} className="bg-white rounded-xl p-6 border border-zinc-200 mb-4">
13
- <div className="h-5 w-32 bg-zinc-200 rounded animate-pulse mb-4" />
14
- <div className="space-y-3">
15
- <div className="h-10 bg-zinc-50 rounded-lg animate-pulse" />
16
- <div className="h-10 bg-zinc-50 rounded-lg animate-pulse" />
17
- </div>
18
- </div>
19
- ))}
20
- </div>
21
- </div>
22
- );
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/components/export-dropdown.tsx DELETED
@@ -1,69 +0,0 @@
1
- "use client";
2
-
3
- import { useState, useRef, useEffect } from "react";
4
- import { FileDown, ChevronDown, Loader2 } from "lucide-react";
5
- import { EXPORT_FORMATS } from "@/lib/export-utils";
6
- import type { AnalysisResult } from "@/lib/types";
7
-
8
- export function ExportDropdown({ results }: { results: AnalysisResult }) {
9
- const [open, setOpen] = useState(false);
10
- const [exporting, setExporting] = useState<string | null>(null);
11
- const ref = useRef<HTMLDivElement>(null);
12
-
13
- useEffect(() => {
14
- function handleClickOutside(e: MouseEvent) {
15
- if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false);
16
- }
17
- document.addEventListener("mousedown", handleClickOutside);
18
- return () => document.removeEventListener("mousedown", handleClickOutside);
19
- }, []);
20
-
21
- async function handleExport(key: string, fn: (r: AnalysisResult) => void | Promise<any>) {
22
- setExporting(key);
23
- try {
24
- await fn(results);
25
- } catch (e) {
26
- console.error("Export failed:", e);
27
- }
28
- setExporting(null);
29
- setOpen(false);
30
- }
31
-
32
- return (
33
- <div ref={ref} className="relative">
34
- <button
35
- onClick={() => setOpen(!open)}
36
- className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium text-zinc-600 bg-white border border-zinc-200 rounded-lg hover:bg-zinc-50 hover:border-zinc-300 transition-all shadow-sm"
37
- >
38
- <FileDown className="w-3.5 h-3.5" />
39
- Export
40
- <ChevronDown className={`w-3 h-3 transition-transform ${open ? "rotate-180" : ""}`} />
41
- </button>
42
-
43
- {open && (
44
- <div className="absolute right-0 top-full mt-1.5 w-64 bg-white border border-zinc-200 rounded-xl shadow-xl z-50 overflow-hidden animate-in fade-in slide-in-from-top-1 duration-150">
45
- <div className="px-3 py-2 border-b border-zinc-100">
46
- <p className="text-[10px] font-semibold text-zinc-400 uppercase tracking-wider">Export Report</p>
47
- </div>
48
- <div className="py-1">
49
- {EXPORT_FORMATS.map((fmt) => (
50
- <button
51
- key={fmt.key}
52
- onClick={() => handleExport(fmt.key, fmt.fn)}
53
- disabled={exporting !== null}
54
- className="w-full flex items-center gap-3 px-3 py-2.5 text-left hover:bg-zinc-50 transition-colors disabled:opacity-40"
55
- >
56
- <span className="text-base w-5 text-center">{fmt.icon}</span>
57
- <div className="flex-1 min-w-0">
58
- <p className="text-sm font-medium text-zinc-700">{fmt.label}</p>
59
- <p className="text-[10px] text-zinc-400">{fmt.description}</p>
60
- </div>
61
- {exporting === fmt.key && <Loader2 className="w-3.5 h-3.5 text-zinc-400 animate-spin" />}
62
- </button>
63
- ))}
64
- </div>
65
- </div>
66
- )}
67
- </div>
68
- );
69
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/components/extension-bridge.tsx CHANGED
@@ -57,24 +57,20 @@ export function ExtensionBridge() {
57
  const { data: { subscription } } = supabase.auth.onAuthStateChange(async (event, session) => {
58
  // Handle ALL events that mean "user is logged in"
59
  if (session && (event === "SIGNED_IN" || event === "INITIAL_SESSION" || event === "TOKEN_REFRESHED")) {
60
- let profileData = null;
61
- try {
62
- const result = await supabase
63
- .from("profiles")
64
- .select("plan, full_name")
65
- .eq("id", session.user.id)
66
- .single();
67
- profileData = result.data;
68
- } catch {
69
- // ignore
70
- }
71
 
72
  sendAuthToExtension(
73
  session.access_token,
74
  session.user.email || "",
75
- profileData?.full_name || session.user.user_metadata?.full_name || "",
76
  session.user.id,
77
- profileData?.plan || "free",
78
  );
79
  }
80
 
 
57
  const { data: { subscription } } = supabase.auth.onAuthStateChange(async (event, session) => {
58
  // Handle ALL events that mean "user is logged in"
59
  if (session && (event === "SIGNED_IN" || event === "INITIAL_SESSION" || event === "TOKEN_REFRESHED")) {
60
+ const { data: profile } = await supabase
61
+ .from("profiles")
62
+ .select("plan, full_name")
63
+ .eq("id", session.user.id)
64
+ .single()
65
+ .then(r => r)
66
+ .catch(() => ({ data: null }));
 
 
 
 
67
 
68
  sendAuthToExtension(
69
  session.access_token,
70
  session.user.email || "",
71
+ profile?.full_name || session.user.user_metadata?.full_name || "",
72
  session.user.id,
73
+ profile?.plan || "free",
74
  );
75
  }
76
 
web/components/nav.tsx CHANGED
@@ -29,69 +29,33 @@ export function Nav() {
29
  const hasTeam = !!userTeam;
30
 
31
  useEffect(() => {
32
- let cancelled = false;
33
  const supabase = createClient();
34
-
35
- // Single source of truth: onAuthStateChange.
36
- // Fires INITIAL_SESSION immediately on setup (synchronous replay of stored session).
37
- // Then fires SIGNED_IN, SIGNED_OUT, TOKEN_REFRESHED on auth changes.
38
- // No separate getSession/getUser call that was causing race conditions.
39
- const { data: { subscription } } = supabase.auth.onAuthStateChange(
40
- (event, session) => {
41
- if (cancelled) return;
42
-
43
- if (!session?.user) {
44
- setUserEmail(null);
45
- setUserRole(null);
46
- setUserTeam(null);
47
- setLoaded(true);
48
- return;
49
- }
50
-
51
- // User is authenticated — show logged-in nav immediately
52
- setUserEmail(session.user.email || null);
53
- setLoaded(true);
54
-
55
- // Fetch profile (role, team) in background — don't block the UI
56
- supabase
57
  .from("profiles")
58
  .select("role, team_id")
59
- .eq("id", session.user.id)
60
- .single()
61
- .then(({ data: profile, error }) => {
62
- if (cancelled) return;
63
- if (error) {
64
- console.error("[ClauseGuard Nav] Profile error:", error.message);
65
- setUserRole("user");
66
- setUserTeam(null);
67
- } else {
68
- setUserRole(profile?.role || "user");
69
- setUserTeam(profile?.team_id || null);
70
- }
71
- });
72
  }
73
- );
74
-
75
- return () => {
76
- cancelled = true;
77
- subscription.unsubscribe();
78
- };
79
- }, []); // eslint-disable-line react-hooks/exhaustive-deps
80
 
81
  async function handleSignOut() {
82
- try {
83
- const supabase = createClient();
84
- await supabase.auth.signOut();
85
- } catch {
86
- // ignore
87
- }
88
  setUserEmail(null);
89
  setUserRole(null);
90
  setUserTeam(null);
91
  window.location.href = "/";
92
  }
93
 
94
- // Public links - always visible
95
  const mainLinks: NavLink[] = [
96
  { href: "/#features", label: "Features", icon: Sparkles },
97
  { href: "/#pricing", label: "Pricing", icon: CreditCard },
@@ -106,12 +70,12 @@ export function Nav() {
106
  <Link href="/" className="flex items-center gap-2">
107
  <ShieldCheck className="w-5 h-5 text-zinc-900" strokeWidth={2.2} />
108
  <span className="font-semibold text-[15px] tracking-tight text-zinc-900">ClauseGuard</span>
109
- <span className="hidden sm:inline text-[10px] font-medium text-zinc-400 ml-1 border border-zinc-200 px-1.5 py-0.5 rounded">v4.3</span>
110
  </Link>
111
 
112
- {/* Desktop Nav */}
113
  <div className="hidden md:flex items-center gap-0.5">
114
- {/* Public links - always visible */}
115
  {mainLinks.map((l) => {
116
  const isActive = pathname === l.href;
117
  return (
@@ -124,7 +88,7 @@ export function Nav() {
124
  );
125
  })}
126
 
127
- {/* Loading skeleton while auth state resolves */}
128
  {!loaded && (
129
  <>
130
  <div className="w-px h-4 bg-zinc-200 mx-1.5" />
@@ -135,7 +99,7 @@ export function Nav() {
135
  </>
136
  )}
137
 
138
- {/* Logged-in links */}
139
  {loaded && isLoggedIn && (
140
  <>
141
  {/* Dashboard */}
@@ -149,7 +113,7 @@ export function Nav() {
149
  Dashboard
150
  </Link>
151
 
152
- {/* Team - only when user belongs to a team */}
153
  {hasTeam && (
154
  <Link href="/dashboard-pages/team"
155
  className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
@@ -162,7 +126,7 @@ export function Nav() {
162
  </Link>
163
  )}
164
 
165
- {/* Admin - only for admin role */}
166
  {isAdmin && (
167
  <Link href="/admin"
168
  className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
@@ -187,13 +151,14 @@ export function Nav() {
187
  <Settings className="w-3.5 h-3.5" />
188
  Settings
189
  </Link>
190
- {/* User indicator with hover dropdown */}
 
191
  <div className="relative group ml-1">
192
  <button className="flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] text-zinc-500 hover:text-zinc-900 rounded-md hover:bg-zinc-50 transition-colors">
193
  <UserCircle className="w-3.5 h-3.5" />
194
  <span className="max-w-[100px] truncate">{userEmail?.split("@")[0]}</span>
195
  </button>
196
- {/* Dropdown on hover */}
197
  <div className="absolute right-0 top-full mt-1 w-52 bg-white border border-zinc-200 rounded-xl shadow-lg opacity-0 invisible group-hover:opacity-100 group-hover:visible transition-all duration-150 z-50">
198
  <div className="px-3 py-2.5 border-b border-zinc-100">
199
  <p className="text-xs text-zinc-400">Signed in as</p>
@@ -233,7 +198,7 @@ export function Nav() {
233
  </>
234
  )}
235
 
236
- {/* Logged-out links */}
237
  {loaded && !isLoggedIn && (
238
  <>
239
  <div className="w-px h-4 bg-zinc-200 mx-1.5" />
@@ -261,7 +226,7 @@ export function Nav() {
261
  </button>
262
  </div>
263
 
264
- {/* Mobile Menu */}
265
  {open && (
266
  <div className="md:hidden border-t border-zinc-100 bg-white px-5 py-3 space-y-0.5">
267
  {/* Public links */}
@@ -279,7 +244,7 @@ export function Nav() {
279
  );
280
  })}
281
 
282
- {/* Mobile loading skeleton */}
283
  {!loaded && (
284
  <>
285
  <div className="h-px bg-zinc-100 my-1.5" />
@@ -290,12 +255,12 @@ export function Nav() {
290
  </>
291
  )}
292
 
293
- {/* Mobile: Logged-in links */}
294
  {loaded && isLoggedIn && (
295
  <>
296
  <div className="h-px bg-zinc-100 my-1.5" />
297
 
298
- {/* User info banner */}
299
  <div className="px-3 py-2">
300
  <p className="text-xs text-zinc-400">Signed in as</p>
301
  <p className="text-sm text-zinc-700 font-medium truncate">{userEmail}</p>
@@ -319,7 +284,7 @@ export function Nav() {
319
  <Settings className="w-4 h-4 text-zinc-400" /> Settings
320
  </Link>
321
 
322
- {/* Team link */}
323
  {hasTeam && (
324
  <Link href="/dashboard-pages/team" onClick={() => setOpen(false)}
325
  className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
@@ -331,7 +296,7 @@ export function Nav() {
331
  </Link>
332
  )}
333
 
334
- {/* Admin link */}
335
  {isAdmin && (
336
  <Link href="/admin" onClick={() => setOpen(false)}
337
  className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
@@ -359,7 +324,7 @@ export function Nav() {
359
  </>
360
  )}
361
 
362
- {/* Mobile: Logged-out links */}
363
  {loaded && !isLoggedIn && (
364
  <>
365
  <div className="h-px bg-zinc-100 my-1.5" />
 
29
  const hasTeam = !!userTeam;
30
 
31
  useEffect(() => {
 
32
  const supabase = createClient();
33
+ supabase.auth.getUser().then(async ({ data }) => {
34
+ const user = data.user;
35
+ setUserEmail(user?.email || null);
36
+ if (user) {
37
+ const { data: profile } = await supabase
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  .from("profiles")
39
  .select("role, team_id")
40
+ .eq("id", user.id)
41
+ .single();
42
+ setUserRole(profile?.role || "user");
43
+ setUserTeam(profile?.team_id || null);
 
 
 
 
 
 
 
 
 
44
  }
45
+ setLoaded(true);
46
+ });
47
+ }, []);
 
 
 
 
48
 
49
  async function handleSignOut() {
50
+ const supabase = createClient();
51
+ await supabase.auth.signOut();
 
 
 
 
52
  setUserEmail(null);
53
  setUserRole(null);
54
  setUserTeam(null);
55
  window.location.href = "/";
56
  }
57
 
58
+ // Public links always visible
59
  const mainLinks: NavLink[] = [
60
  { href: "/#features", label: "Features", icon: Sparkles },
61
  { href: "/#pricing", label: "Pricing", icon: CreditCard },
 
70
  <Link href="/" className="flex items-center gap-2">
71
  <ShieldCheck className="w-5 h-5 text-zinc-900" strokeWidth={2.2} />
72
  <span className="font-semibold text-[15px] tracking-tight text-zinc-900">ClauseGuard</span>
73
+ <span className="hidden sm:inline text-[10px] font-medium text-zinc-400 ml-1 border border-zinc-200 px-1.5 py-0.5 rounded">v4.0</span>
74
  </Link>
75
 
76
+ {/* ─── Desktop Nav ─── */}
77
  <div className="hidden md:flex items-center gap-0.5">
78
+ {/* Public links always visible */}
79
  {mainLinks.map((l) => {
80
  const isActive = pathname === l.href;
81
  return (
 
88
  );
89
  })}
90
 
91
+ {/* ── Loading skeleton ── */}
92
  {!loaded && (
93
  <>
94
  <div className="w-px h-4 bg-zinc-200 mx-1.5" />
 
99
  </>
100
  )}
101
 
102
+ {/* ── Logged-in links ── */}
103
  {loaded && isLoggedIn && (
104
  <>
105
  {/* Dashboard */}
 
113
  Dashboard
114
  </Link>
115
 
116
+ {/* Team only when user has a team */}
117
  {hasTeam && (
118
  <Link href="/dashboard-pages/team"
119
  className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
 
126
  </Link>
127
  )}
128
 
129
+ {/* Admin only for admin role */}
130
  {isAdmin && (
131
  <Link href="/admin"
132
  className={`flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] rounded-md transition-colors ${
 
151
  <Settings className="w-3.5 h-3.5" />
152
  Settings
153
  </Link>
154
+
155
+ {/* User indicator + sign out dropdown */}
156
  <div className="relative group ml-1">
157
  <button className="flex items-center gap-1.5 px-2.5 py-1.5 text-[13px] text-zinc-500 hover:text-zinc-900 rounded-md hover:bg-zinc-50 transition-colors">
158
  <UserCircle className="w-3.5 h-3.5" />
159
  <span className="max-w-[100px] truncate">{userEmail?.split("@")[0]}</span>
160
  </button>
161
+ {/* Dropdown */}
162
  <div className="absolute right-0 top-full mt-1 w-52 bg-white border border-zinc-200 rounded-xl shadow-lg opacity-0 invisible group-hover:opacity-100 group-hover:visible transition-all duration-150 z-50">
163
  <div className="px-3 py-2.5 border-b border-zinc-100">
164
  <p className="text-xs text-zinc-400">Signed in as</p>
 
198
  </>
199
  )}
200
 
201
+ {/* ── Logged-out links ── */}
202
  {loaded && !isLoggedIn && (
203
  <>
204
  <div className="w-px h-4 bg-zinc-200 mx-1.5" />
 
226
  </button>
227
  </div>
228
 
229
+ {/* ─── Mobile Menu ─── */}
230
  {open && (
231
  <div className="md:hidden border-t border-zinc-100 bg-white px-5 py-3 space-y-0.5">
232
  {/* Public links */}
 
244
  );
245
  })}
246
 
247
+ {/* ── Mobile loading skeleton ── */}
248
  {!loaded && (
249
  <>
250
  <div className="h-px bg-zinc-100 my-1.5" />
 
255
  </>
256
  )}
257
 
258
+ {/* ── Mobile: Logged-in links ── */}
259
  {loaded && isLoggedIn && (
260
  <>
261
  <div className="h-px bg-zinc-100 my-1.5" />
262
 
263
+ {/* User info */}
264
  <div className="px-3 py-2">
265
  <p className="text-xs text-zinc-400">Signed in as</p>
266
  <p className="text-sm text-zinc-700 font-medium truncate">{userEmail}</p>
 
284
  <Settings className="w-4 h-4 text-zinc-400" /> Settings
285
  </Link>
286
 
287
+ {/* Team */}
288
  {hasTeam && (
289
  <Link href="/dashboard-pages/team" onClick={() => setOpen(false)}
290
  className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
 
296
  </Link>
297
  )}
298
 
299
+ {/* Admin */}
300
  {isAdmin && (
301
  <Link href="/admin" onClick={() => setOpen(false)}
302
  className={`flex items-center gap-2.5 px-3 py-2.5 text-sm rounded-md ${
 
324
  </>
325
  )}
326
 
327
+ {/* ── Mobile: Logged-out links ── */}
328
  {loaded && !isLoggedIn && (
329
  <>
330
  <div className="h-px bg-zinc-100 my-1.5" />
web/lib/export-utils.ts DELETED
@@ -1,454 +0,0 @@
1
- /**
2
- * ClauseGuard — Multi-format Report Export Utility
3
- * Generates reports in: JSON, CSV, Markdown, Plain Text, HTML
4
- * PDF and DOCX use server-side generation via API routes.
5
- */
6
-
7
- import type { AnalysisResult, Clause, Entity, Contradiction, Obligation, ComplianceReg, Redline } from "./types";
8
-
9
- // ── Severity ordering ──
10
- const SEV_ORDER: Record<string, number> = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
11
-
12
- function sevSort(a: string, b: string) {
13
- return (SEV_ORDER[b] || 0) - (SEV_ORDER[a] || 0);
14
- }
15
-
16
- function timestamp() {
17
- return new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
18
- }
19
-
20
- function download(content: string | Blob, filename: string, mime: string) {
21
- const blob = content instanceof Blob ? content : new Blob([content], { type: mime });
22
- const url = URL.createObjectURL(blob);
23
- const a = document.createElement("a");
24
- a.href = url;
25
- a.download = filename;
26
- document.body.appendChild(a);
27
- a.click();
28
- document.body.removeChild(a);
29
- URL.revokeObjectURL(url);
30
- }
31
-
32
- // ═══════════════════════════════════════════════════════════════
33
- // JSON Export
34
- // ═══════════════════════════════════════════════════════════════
35
-
36
- export function exportJSON(results: AnalysisResult, formatted = true) {
37
- const json = formatted
38
- ? JSON.stringify(results, null, 2)
39
- : JSON.stringify(results);
40
- download(json, `clauseguard-report-${timestamp()}.json`, "application/json");
41
- }
42
-
43
- // ═══════════════════════════════════════════════════════════════
44
- // CSV Export
45
- // ═══════════════════════════════════════════════════════════════
46
-
47
- function escapeCSV(val: string): string {
48
- if (val.includes(",") || val.includes('"') || val.includes("\n")) {
49
- return `"${val.replace(/"/g, '""')}"`;
50
- }
51
- return val;
52
- }
53
-
54
- export function exportCSV(results: AnalysisResult) {
55
- const rows: string[] = [];
56
-
57
- // Header
58
- rows.push("Section,Category,Severity,Confidence,Source,Text,Description");
59
-
60
- // Clauses
61
- for (const clause of results.results) {
62
- for (const cat of clause.categories) {
63
- rows.push([
64
- "Clause",
65
- escapeCSV(cat.name),
66
- cat.severity,
67
- cat.confidence != null ? String(Math.round(cat.confidence * 100)) + "%" : "pattern",
68
- cat.confidence != null ? "ML" : "Pattern",
69
- escapeCSV(clause.text.slice(0, 500)),
70
- escapeCSV(cat.description || ""),
71
- ].join(","));
72
- }
73
- }
74
-
75
- // Entities
76
- for (const ent of results.entities) {
77
- rows.push([
78
- "Entity",
79
- escapeCSV(ent.type),
80
- "",
81
- ent.score ? String(Math.round(ent.score * 100)) + "%" : "",
82
- ent.source || "",
83
- escapeCSV(ent.text),
84
- "",
85
- ].join(","));
86
- }
87
-
88
- // Contradictions
89
- for (const c of results.contradictions) {
90
- rows.push([
91
- "Contradiction",
92
- escapeCSV(c.type),
93
- c.severity,
94
- c.confidence ? String(Math.round(c.confidence * 100)) + "%" : "",
95
- c.source || "",
96
- escapeCSV(c.explanation),
97
- "",
98
- ].join(","));
99
- }
100
-
101
- // Obligations
102
- for (const o of results.obligations) {
103
- rows.push([
104
- "Obligation",
105
- escapeCSV(o.type),
106
- o.priority != null && o.priority >= 3 ? "HIGH" : o.priority === 2 ? "MEDIUM" : "LOW",
107
- "",
108
- "",
109
- escapeCSV(o.description),
110
- escapeCSV(`${o.party} · ${o.deadline}`),
111
- ].join(","));
112
- }
113
-
114
- download(rows.join("\n"), `clauseguard-report-${timestamp()}.csv`, "text/csv");
115
- }
116
-
117
- // ═══════════════════════════════════════════════════════════════
118
- // Markdown Export
119
- // ═══════════════════════════════════════════════════════════════
120
-
121
- export function exportMarkdown(results: AnalysisResult) {
122
- const lines: string[] = [];
123
- const flagged = results.results.filter(r => r.categories.length > 0);
124
- const sevCounts = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
125
- flagged.forEach(r => r.categories.forEach(c => {
126
- if (sevCounts[c.severity as keyof typeof sevCounts] !== undefined) sevCounts[c.severity as keyof typeof sevCounts]++;
127
- }));
128
-
129
- lines.push("# 🛡️ ClauseGuard Analysis Report");
130
- lines.push("");
131
- lines.push(`**Generated:** ${new Date().toLocaleString()}`);
132
- lines.push(`**Risk Score:** ${results.risk_score}/100 · **Grade:** ${results.grade}`);
133
- lines.push(`**Clauses:** ${results.total_clauses} total · ${results.flagged_count} flagged`);
134
- lines.push(`**Model:** ${results.model === "ml" || results.model !== "regex" ? "ML Models" : "Pattern Matching"}`);
135
- lines.push("");
136
-
137
- // Severity breakdown
138
- lines.push("## 📊 Risk Breakdown");
139
- lines.push("");
140
- lines.push("| Severity | Count |");
141
- lines.push("|----------|-------|");
142
- lines.push(`| 🔴 Critical | ${sevCounts.CRITICAL} |`);
143
- lines.push(`| 🟠 High | ${sevCounts.HIGH} |`);
144
- lines.push(`| 🟡 Medium | ${sevCounts.MEDIUM} |`);
145
- lines.push(`| 🟢 Low | ${sevCounts.LOW} |`);
146
- lines.push("");
147
-
148
- // Flagged clauses
149
- if (flagged.length > 0) {
150
- lines.push("## ⚠️ Flagged Clauses");
151
- lines.push("");
152
- for (const clause of flagged) {
153
- const labels = clause.categories.map(c => `**${c.name}** (${c.severity})`).join(", ");
154
- lines.push(`### ${labels}`);
155
- lines.push("");
156
- lines.push(`> ${clause.text.slice(0, 500)}${clause.text.length > 500 ? "..." : ""}`);
157
- lines.push("");
158
- for (const cat of clause.categories) {
159
- if (cat.description) lines.push(`- ${cat.description}`);
160
- const src = cat.confidence != null ? `ML ${Math.round(cat.confidence * 100)}%` : "Pattern match";
161
- lines.push(`- *Source: ${src}*`);
162
- }
163
- lines.push("");
164
- }
165
- }
166
-
167
- // Entities
168
- if (results.entities.length > 0) {
169
- lines.push("## 🏷️ Extracted Entities");
170
- lines.push("");
171
- const grouped: Record<string, string[]> = {};
172
- results.entities.forEach(e => {
173
- if (!grouped[e.type]) grouped[e.type] = [];
174
- if (!grouped[e.type].includes(e.text)) grouped[e.type].push(e.text);
175
- });
176
- for (const [type, items] of Object.entries(grouped)) {
177
- lines.push(`**${type.replace(/_/g, " ")}:** ${items.join(", ")}`);
178
- }
179
- lines.push("");
180
- }
181
-
182
- // Contradictions
183
- if (results.contradictions.length > 0) {
184
- lines.push("## 🔍 Contradictions & Issues");
185
- lines.push("");
186
- for (const c of results.contradictions) {
187
- lines.push(`- **[${c.severity}] ${c.type}:** ${c.explanation}`);
188
- }
189
- lines.push("");
190
- }
191
-
192
- // Obligations
193
- if (results.obligations.length > 0) {
194
- lines.push("## 📋 Obligations");
195
- lines.push("");
196
- lines.push("| Type | Party | Description | Deadline |");
197
- lines.push("|------|-------|-------------|----------|");
198
- for (const o of results.obligations) {
199
- lines.push(`| ${o.type} | ${o.party} | ${o.description.slice(0, 100)} | ${o.deadline} |`);
200
- }
201
- lines.push("");
202
- }
203
-
204
- // Compliance
205
- if (Object.keys(results.compliance).length > 0) {
206
- lines.push("## ⚖️ Compliance");
207
- lines.push("");
208
- for (const [name, reg] of Object.entries(results.compliance)) {
209
- lines.push(`### ${name} — ${reg.compliance_rate}% (${reg.overall_status})`);
210
- lines.push(`*${reg.description}*`);
211
- lines.push("");
212
- for (const check of reg.checks) {
213
- const icon = check.status === "PASS" ? "✅" : check.status === "MISSING" ? "❌" : "⚠️";
214
- lines.push(`${icon} ${check.description} (${check.severity})`);
215
- }
216
- lines.push("");
217
- }
218
- }
219
-
220
- // Redlines
221
- if (results.redlines && results.redlines.length > 0) {
222
- lines.push("## ✏️ Redlining Suggestions");
223
- lines.push("");
224
- for (const rl of results.redlines) {
225
- lines.push(`### ${rl.clause_label} (${rl.risk_level})`);
226
- lines.push("");
227
- lines.push(`~~${rl.original_text.slice(0, 200)}~~`);
228
- lines.push("");
229
- lines.push(`✅ **Suggested:** ${rl.safe_alternative}`);
230
- lines.push(`📚 ${rl.legal_basis} · 🛡️ ${rl.consumer_standard}`);
231
- lines.push("");
232
- }
233
- }
234
-
235
- lines.push("---");
236
- lines.push("*⚠️ Not legal advice. Generated by ClauseGuard AI.*");
237
-
238
- download(lines.join("\n"), `clauseguard-report-${timestamp()}.md`, "text/markdown");
239
- }
240
-
241
- // ═══════════════════════════════════════════════════════════════
242
- // Plain Text Export
243
- // ═══════════════════════════════════════════════════════════════
244
-
245
- export function exportText(results: AnalysisResult) {
246
- const lines: string[] = [];
247
- const flagged = results.results.filter(r => r.categories.length > 0);
248
-
249
- lines.push("═══════════════════════════════════════════════════════");
250
- lines.push(" CLAUSEGUARD ANALYSIS REPORT");
251
- lines.push("═══════════════════════════════════════════════════════");
252
- lines.push("");
253
- lines.push(`Date: ${new Date().toLocaleString()}`);
254
- lines.push(`Risk Score: ${results.risk_score}/100`);
255
- lines.push(`Grade: ${results.grade}`);
256
- lines.push(`Clauses: ${results.total_clauses} total, ${results.flagged_count} flagged`);
257
- lines.push(`Entities: ${results.entities.length}`);
258
- lines.push(`Issues: ${results.contradictions.length}`);
259
- lines.push(`Obligations: ${results.obligations.length}`);
260
- lines.push("");
261
- lines.push("───────────────────────────────────────────────────────");
262
- lines.push(" FLAGGED CLAUSES");
263
- lines.push("───────────────────────────────────────────────────────");
264
- lines.push("");
265
-
266
- for (let i = 0; i < flagged.length; i++) {
267
- const clause = flagged[i];
268
- const labels = clause.categories.map(c => `[${c.severity}] ${c.name}`).join(", ");
269
- lines.push(`${i + 1}. ${labels}`);
270
- lines.push(` ${clause.text.slice(0, 300)}${clause.text.length > 300 ? "..." : ""}`);
271
- lines.push("");
272
- }
273
-
274
- if (results.entities.length > 0) {
275
- lines.push("───────────────────────────────────────────────────────");
276
- lines.push(" ENTITIES");
277
- lines.push("───────────────────────────────────────────────────────");
278
- lines.push("");
279
- const grouped: Record<string, string[]> = {};
280
- results.entities.forEach(e => {
281
- if (!grouped[e.type]) grouped[e.type] = [];
282
- if (!grouped[e.type].includes(e.text)) grouped[e.type].push(e.text);
283
- });
284
- for (const [type, items] of Object.entries(grouped)) {
285
- lines.push(` ${type}: ${items.join(", ")}`);
286
- }
287
- lines.push("");
288
- }
289
-
290
- if (results.contradictions.length > 0) {
291
- lines.push("───────────────────────────────────────────────────────");
292
- lines.push(" CONTRADICTIONS & ISSUES");
293
- lines.push("───────────────────────────────────────────────────────");
294
- lines.push("");
295
- for (const c of results.contradictions) {
296
- lines.push(` [${c.severity}] ${c.type}: ${c.explanation}`);
297
- }
298
- lines.push("");
299
- }
300
-
301
- if (results.obligations.length > 0) {
302
- lines.push("───────────────────────────────────────────────────────");
303
- lines.push(" OBLIGATIONS");
304
- lines.push("───────────────────────────────────────────────────────");
305
- lines.push("");
306
- for (const o of results.obligations) {
307
- lines.push(` [${o.type}] ${o.party}: ${o.description} (${o.deadline})`);
308
- }
309
- lines.push("");
310
- }
311
-
312
- if (results.redlines && results.redlines.length > 0) {
313
- lines.push("───────────────────────────────────────────────────────");
314
- lines.push(" REDLINING SUGGESTIONS");
315
- lines.push("───────────────────────────────────────────────────────");
316
- lines.push("");
317
- for (const rl of results.redlines) {
318
- lines.push(` [${rl.risk_level}] ${rl.clause_label}`);
319
- lines.push(` ORIGINAL: ${rl.original_text.slice(0, 200)}`);
320
- lines.push(` SUGGESTED: ${rl.safe_alternative}`);
321
- lines.push("");
322
- }
323
- }
324
-
325
- lines.push("═══════════════════════════════════════════════════════");
326
- lines.push(" NOT LEGAL ADVICE — Generated by ClauseGuard AI");
327
- lines.push("═══════════════════════════════════════════════════════");
328
-
329
- download(lines.join("\n"), `clauseguard-report-${timestamp()}.txt`, "text/plain");
330
- }
331
-
332
- // ═══════════════════════════════════════════════════════════════
333
- // HTML Export (self-contained styled report)
334
- // ═══════════════════════════════════════════════════════════════
335
-
336
- export function exportHTML(results: AnalysisResult) {
337
- const flagged = results.results.filter(r => r.categories.length > 0);
338
- const sevCounts = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
339
- flagged.forEach(r => r.categories.forEach(c => {
340
- if (sevCounts[c.severity as keyof typeof sevCounts] !== undefined) sevCounts[c.severity as keyof typeof sevCounts]++;
341
- }));
342
-
343
- const sevColor: Record<string, string> = { CRITICAL: "#dc2626", HIGH: "#ea580c", MEDIUM: "#ca8a04", LOW: "#16a34a" };
344
-
345
- const clauseHTML = flagged.map(clause => {
346
- const tags = clause.categories.map(c =>
347
- `<span style="display:inline-block;background:${sevColor[c.severity] || '#888'}15;color:${sevColor[c.severity] || '#888'};border:1px solid ${sevColor[c.severity] || '#888'}40;padding:2px 10px;border-radius:4px;font-size:12px;font-weight:600;margin-right:4px;">${c.name} (${c.severity})</span>`
348
- ).join("");
349
- return `<div style="border:1px solid #e5e7eb;border-radius:8px;padding:16px;margin-bottom:12px;">
350
- <div style="margin-bottom:8px;">${tags}</div>
351
- <p style="font-size:13px;color:#374151;line-height:1.7;margin:0;">${clause.text.replace(/</g, "&lt;").slice(0, 500)}</p>
352
- </div>`;
353
- }).join("\n");
354
-
355
- const entityHTML = (() => {
356
- const grouped: Record<string, string[]> = {};
357
- results.entities.forEach(e => {
358
- if (!grouped[e.type]) grouped[e.type] = [];
359
- if (!grouped[e.type].includes(e.text)) grouped[e.type].push(e.text);
360
- });
361
- return Object.entries(grouped).map(([type, items]) =>
362
- `<div style="margin-bottom:12px;"><strong style="font-size:12px;text-transform:uppercase;color:#6b7280;">${type.replace(/_/g, " ")}</strong><div style="margin-top:4px;">${items.map(t => `<span style="display:inline-block;background:#f3f4f6;padding:3px 10px;border-radius:4px;font-size:12px;margin:2px;">${t}</span>`).join("")}</div></div>`
363
- ).join("\n");
364
- })();
365
-
366
- const html = `<!DOCTYPE html>
367
- <html lang="en">
368
- <head>
369
- <meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
370
- <title>ClauseGuard Report — ${new Date().toLocaleDateString()}</title>
371
- <style>
372
- *{margin:0;padding:0;box-sizing:border-box}
373
- body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;color:#1f2937;background:#fff;padding:40px;max-width:800px;margin:0 auto}
374
- h1{font-size:24px;font-weight:700;margin-bottom:4px}
375
- h2{font-size:16px;font-weight:600;margin:24px 0 12px;padding-bottom:8px;border-bottom:1px solid #e5e7eb}
376
- .meta{font-size:12px;color:#9ca3af}
377
- .score-card{display:flex;justify-content:space-between;align-items:center;background:#fafafa;border:1px solid #e5e7eb;border-radius:12px;padding:20px;margin:16px 0}
378
- .score{font-size:36px;font-weight:700}
379
- .grade{font-size:18px;font-weight:700;padding:6px 16px;border-radius:8px;border:1px solid #e5e7eb}
380
- .sev-grid{display:grid;grid-template-columns:repeat(4,1fr);gap:8px;margin:12px 0}
381
- .sev-item{text-align:center;padding:8px;border-radius:8px}
382
- .disclaimer{margin-top:32px;padding:12px;background:#fefce8;border:1px solid #fde68a;border-radius:8px;font-size:11px;color:#92400e}
383
- @media print{body{padding:20px}h2{break-before:auto}}
384
- </style>
385
- </head>
386
- <body>
387
- <h1>🛡️ ClauseGuard Analysis Report</h1>
388
- <p class="meta">${new Date().toLocaleString()} · ${results.model !== "regex" ? "ML Models" : "Pattern Matching"}</p>
389
-
390
- <div class="score-card">
391
- <div>
392
- <p class="meta">RISK SCORE</p>
393
- <p class="score">${results.risk_score}<span style="font-size:16px;color:#9ca3af">/100</span></p>
394
- </div>
395
- <span class="grade">Grade ${results.grade}</span>
396
- </div>
397
-
398
- <div class="sev-grid">
399
- <div class="sev-item" style="background:#fef2f2"><strong style="color:#dc2626">${sevCounts.CRITICAL}</strong><br><small style="color:#dc2626">Critical</small></div>
400
- <div class="sev-item" style="background:#fff7ed"><strong style="color:#ea580c">${sevCounts.HIGH}</strong><br><small style="color:#ea580c">High</small></div>
401
- <div class="sev-item" style="background:#fefce8"><strong style="color:#ca8a04">${sevCounts.MEDIUM}</strong><br><small style="color:#ca8a04">Medium</small></div>
402
- <div class="sev-item" style="background:#f0fdf4"><strong style="color:#16a34a">${sevCounts.LOW}</strong><br><small style="color:#16a34a">Low</small></div>
403
- </div>
404
-
405
- <p class="meta">${results.total_clauses} clauses · ${results.flagged_count} flagged · ${results.entities.length} entities · ${results.obligations.length} obligations</p>
406
-
407
- ${flagged.length > 0 ? `<h2>⚠️ Flagged Clauses (${flagged.length})</h2>${clauseHTML}` : ""}
408
- ${results.entities.length > 0 ? `<h2>🏷️ Entities (${results.entities.length})</h2>${entityHTML}` : ""}
409
- ${results.contradictions.length > 0 ? `<h2>🔍 Issues (${results.contradictions.length})</h2>${results.contradictions.map(c => `<div style="border:1px solid #e5e7eb;border-left:3px solid ${sevColor[c.severity] || '#888'};border-radius:6px;padding:12px;margin-bottom:8px;"><strong style="color:${sevColor[c.severity]};font-size:11px;text-transform:uppercase">${c.type} (${c.severity})</strong><p style="font-size:13px;margin-top:4px">${c.explanation}</p></div>`).join("")}` : ""}
410
- ${results.obligations.length > 0 ? `<h2>📋 Obligations (${results.obligations.length})</h2><table style="width:100%;border-collapse:collapse;font-size:12px"><thead><tr style="background:#f9fafb;border-bottom:1px solid #e5e7eb"><th style="text-align:left;padding:8px">Type</th><th style="text-align:left;padding:8px">Party</th><th style="text-align:left;padding:8px">Description</th><th style="text-align:left;padding:8px">Deadline</th></tr></thead><tbody>${results.obligations.map(o => `<tr style="border-bottom:1px solid #f3f4f6"><td style="padding:8px;font-weight:500">${o.type}</td><td style="padding:8px">${o.party}</td><td style="padding:8px">${o.description.slice(0, 120)}</td><td style="padding:8px">${o.deadline}</td></tr>`).join("")}</tbody></table>` : ""}
411
- ${results.redlines && results.redlines.length > 0 ? `<h2>✏️ Redlining (${results.redlines.length})</h2>${results.redlines.map(rl => `<div style="border:1px solid #e5e7eb;border-radius:8px;padding:16px;margin-bottom:12px"><strong style="color:${sevColor[rl.risk_level]}">${rl.clause_label} (${rl.risk_level})</strong><div style="background:#fef2f2;padding:8px;border-radius:4px;margin:8px 0;font-size:12px;text-decoration:line-through;color:#991b1b">${rl.original_text.slice(0, 200)}</div><div style="background:#f0fdf4;padding:8px;border-radius:4px;font-size:12px;color:#166534">${rl.safe_alternative}</div><p style="font-size:10px;color:#9ca3af;margin-top:6px">📚 ${rl.legal_basis} · 🛡️ ${rl.consumer_standard}</p></div>`).join("")}` : ""}
412
-
413
- <div class="disclaimer">⚠️ <strong>Not legal advice.</strong> This report was generated by ClauseGuard AI for informational purposes only. Consult a licensed attorney for legal decisions.</div>
414
- </body>
415
- </html>`;
416
-
417
- download(html, `clauseguard-report-${timestamp()}.html`, "text/html");
418
- }
419
-
420
- // ═══════════════════════════════════════════════════════════════
421
- // PDF Export (via server-side API route)
422
- // ═══════════════════════════════════════════════════════════════
423
-
424
- export async function exportPDF(results: AnalysisResult) {
425
- try {
426
- const res = await fetch("/api/pdf/report", {
427
- method: "POST",
428
- headers: { "Content-Type": "application/json" },
429
- body: JSON.stringify(results),
430
- });
431
- if (!res.ok) throw new Error("PDF generation failed");
432
- const blob = await res.blob();
433
- download(blob, `clauseguard-report-${timestamp()}.pdf`, "application/pdf");
434
- return true;
435
- } catch {
436
- // Fallback: print HTML version
437
- exportHTML(results);
438
- return false;
439
- }
440
- }
441
-
442
- // ═══════════════════════════════════════════════════════════════
443
- // Export formats manifest (for the UI dropdown)
444
- // ═══════════════════════════════════════════════════════════════
445
-
446
- export const EXPORT_FORMATS = [
447
- { key: "pdf", label: "PDF Report", icon: "📄", description: "Formatted PDF document", fn: exportPDF },
448
- { key: "html", label: "HTML Report", icon: "🌐", description: "Styled HTML (printable)", fn: exportHTML },
449
- { key: "md", label: "Markdown", icon: "📝", description: "GitHub-flavored markdown", fn: exportMarkdown },
450
- { key: "txt", label: "Plain Text", icon: "📋", description: "Simple text format", fn: exportText },
451
- { key: "csv", label: "CSV Spreadsheet", icon: "📊", description: "For Excel / Google Sheets", fn: exportCSV },
452
- { key: "json", label: "JSON (formatted)", icon: "🔧", description: "Full structured data", fn: (r: AnalysisResult) => exportJSON(r, true) },
453
- { key: "json-raw", label: "JSON (raw)", icon: "⚡", description: "Compact, no whitespace", fn: (r: AnalysisResult) => exportJSON(r, false) },
454
- ] as const;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/lib/supabase/client.ts CHANGED
@@ -2,8 +2,8 @@ import { createBrowserClient } from "@supabase/ssr";
2
 
3
  export function createClient() {
4
  return createBrowserClient(
5
- process.env.NEXT_PUBLIC_SUPABASE_URL || "https://dummy-project.supabase.co",
6
- process.env.NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY || "dummy-anon-key",
7
  {
8
  auth: {
9
  autoRefreshToken: true,
 
2
 
3
  export function createClient() {
4
  return createBrowserClient(
5
+ process.env.NEXT_PUBLIC_SUPABASE_URL!,
6
+ process.env.NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY!,
7
  {
8
  auth: {
9
  autoRefreshToken: true,
web/lib/supabase/schema.sql CHANGED
@@ -1,5 +1,4 @@
1
- -- ClauseGuard — Full Database Schema v3.1
2
- -- FIX v4.1: Removed hardcoded admin email (was committed to public repo)
3
  -- Tables ordered by dependency (no forward references)
4
 
5
  -- ─── 1. Teams (no dependencies) ───
@@ -128,35 +127,24 @@ ALTER TABLE public.api_keys ENABLE ROW LEVEL SECURITY;
128
  ALTER TABLE public.custom_rules ENABLE ROW LEVEL SECURITY;
129
  ALTER TABLE public.admin_logs ENABLE ROW LEVEL SECURITY;
130
 
131
- -- ─── FIX v4.3: SECURITY DEFINER function to check admin role ───
132
- -- Querying profiles FROM a profiles policy causes infinite recursion (42P17).
133
- -- SECURITY DEFINER bypasses RLS, breaking the cycle.
134
- CREATE OR REPLACE FUNCTION public.is_admin()
135
- RETURNS boolean AS $$
136
- SELECT EXISTS (
137
- SELECT 1 FROM public.profiles
138
- WHERE id = auth.uid() AND role = 'admin'
139
- );
140
- $$ LANGUAGE sql SECURITY DEFINER STABLE;
141
-
142
  -- Profiles
143
  CREATE POLICY "Users see own profile" ON public.profiles FOR SELECT USING (auth.uid() = id);
144
  CREATE POLICY "Users update own profile" ON public.profiles FOR UPDATE USING (auth.uid() = id);
145
- CREATE POLICY "Admins read all profiles" ON public.profiles FOR SELECT USING (public.is_admin());
146
- CREATE POLICY "Admins update all profiles" ON public.profiles FOR UPDATE USING (public.is_admin());
147
 
148
  -- Analyses
149
  CREATE POLICY "Users see own analyses" ON public.analyses FOR SELECT
150
  USING (auth.uid() = user_id OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
151
  CREATE POLICY "Users insert analyses" ON public.analyses FOR INSERT WITH CHECK (auth.uid() = user_id);
152
  CREATE POLICY "Users delete own analyses" ON public.analyses FOR DELETE USING (auth.uid() = user_id);
153
- CREATE POLICY "Admins read all analyses" ON public.analyses FOR SELECT USING (public.is_admin());
154
 
155
  -- Teams
156
  CREATE POLICY "Team members can view" ON public.teams FOR SELECT
157
  USING (id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()) OR owner_id = auth.uid());
158
  CREATE POLICY "Owner can update team" ON public.teams FOR UPDATE USING (owner_id = auth.uid());
159
- CREATE POLICY "Admins read all teams" ON public.teams FOR SELECT USING (public.is_admin());
160
 
161
  -- Team invites
162
  CREATE POLICY "Members see team invites" ON public.team_invites FOR SELECT
@@ -167,17 +155,17 @@ CREATE POLICY "Users can invite" ON public.team_invites FOR INSERT WITH CHECK (i
167
  CREATE POLICY "Users see own API keys" ON public.api_keys FOR SELECT
168
  USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
169
  CREATE POLICY "Users manage own API keys" ON public.api_keys FOR ALL USING (user_id = auth.uid());
170
- CREATE POLICY "Admins read all api_keys" ON public.api_keys FOR SELECT USING (public.is_admin());
171
 
172
  -- Custom Rules
173
  CREATE POLICY "Users see own rules" ON public.custom_rules FOR SELECT
174
  USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
175
  CREATE POLICY "Users manage own rules" ON public.custom_rules FOR ALL USING (user_id = auth.uid());
176
- CREATE POLICY "Admins read all rules" ON public.custom_rules FOR SELECT USING (public.is_admin());
177
 
178
  -- Admin Logs
179
  CREATE POLICY "Admins manage logs" ON public.admin_logs FOR ALL
180
- USING (public.is_admin());
181
 
182
  -- ─── Auto-create profile on signup ───
183
  CREATE OR REPLACE FUNCTION public.handle_new_user()
@@ -198,19 +186,11 @@ CREATE TRIGGER on_auth_user_created
198
  AFTER INSERT ON auth.users
199
  FOR EACH ROW EXECUTE FUNCTION public.handle_new_user();
200
 
201
- -- ─── FIX v4.1: Admin setup via environment variable ───
202
- -- DO NOT hardcode admin emails in source code committed to public repos.
203
- -- Instead, run this manually after your first signup:
204
- --
205
- -- UPDATE public.profiles
206
- -- SET role = 'admin', plan = 'pro'
207
- -- WHERE email = '<YOUR_EMAIL>';
208
- --
209
- -- Or set ADMIN_EMAIL env var and run:
210
- -- DO $$ BEGIN
211
- -- UPDATE public.profiles SET role = 'admin', plan = 'pro'
212
- -- WHERE email = current_setting('app.admin_email', true);
213
- -- END $$;
214
 
215
  -- ─── Monthly reset function ───
216
  CREATE OR REPLACE FUNCTION public.reset_monthly_usage()
 
1
+ -- ClauseGuard — Full Database Schema v3.0
 
2
  -- Tables ordered by dependency (no forward references)
3
 
4
  -- ─── 1. Teams (no dependencies) ───
 
127
  ALTER TABLE public.custom_rules ENABLE ROW LEVEL SECURITY;
128
  ALTER TABLE public.admin_logs ENABLE ROW LEVEL SECURITY;
129
 
 
 
 
 
 
 
 
 
 
 
 
130
  -- Profiles
131
  CREATE POLICY "Users see own profile" ON public.profiles FOR SELECT USING (auth.uid() = id);
132
  CREATE POLICY "Users update own profile" ON public.profiles FOR UPDATE USING (auth.uid() = id);
133
+ CREATE POLICY "Admins read all profiles" ON public.profiles FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
134
+ CREATE POLICY "Admins update all profiles" ON public.profiles FOR UPDATE USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
135
 
136
  -- Analyses
137
  CREATE POLICY "Users see own analyses" ON public.analyses FOR SELECT
138
  USING (auth.uid() = user_id OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
139
  CREATE POLICY "Users insert analyses" ON public.analyses FOR INSERT WITH CHECK (auth.uid() = user_id);
140
  CREATE POLICY "Users delete own analyses" ON public.analyses FOR DELETE USING (auth.uid() = user_id);
141
+ CREATE POLICY "Admins read all analyses" ON public.analyses FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
142
 
143
  -- Teams
144
  CREATE POLICY "Team members can view" ON public.teams FOR SELECT
145
  USING (id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()) OR owner_id = auth.uid());
146
  CREATE POLICY "Owner can update team" ON public.teams FOR UPDATE USING (owner_id = auth.uid());
147
+ CREATE POLICY "Admins read all teams" ON public.teams FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
148
 
149
  -- Team invites
150
  CREATE POLICY "Members see team invites" ON public.team_invites FOR SELECT
 
155
  CREATE POLICY "Users see own API keys" ON public.api_keys FOR SELECT
156
  USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
157
  CREATE POLICY "Users manage own API keys" ON public.api_keys FOR ALL USING (user_id = auth.uid());
158
+ CREATE POLICY "Admins read all api_keys" ON public.api_keys FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
159
 
160
  -- Custom Rules
161
  CREATE POLICY "Users see own rules" ON public.custom_rules FOR SELECT
162
  USING (user_id = auth.uid() OR team_id IN (SELECT team_id FROM public.profiles WHERE id = auth.uid()));
163
  CREATE POLICY "Users manage own rules" ON public.custom_rules FOR ALL USING (user_id = auth.uid());
164
+ CREATE POLICY "Admins read all rules" ON public.custom_rules FOR SELECT USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
165
 
166
  -- Admin Logs
167
  CREATE POLICY "Admins manage logs" ON public.admin_logs FOR ALL
168
+ USING (auth.uid() IN (SELECT id FROM public.profiles WHERE role = 'admin'));
169
 
170
  -- ─── Auto-create profile on signup ───
171
  CREATE OR REPLACE FUNCTION public.handle_new_user()
 
186
  AFTER INSERT ON auth.users
187
  FOR EACH ROW EXECUTE FUNCTION public.handle_new_user();
188
 
189
+ -- ─── Set owner as admin with full access ───
190
+ -- Run this AFTER your first signup with your email:
191
+ UPDATE public.profiles
192
+ SET role = 'admin', plan = 'pro'
193
+ WHERE email = 'ankygaur9972@gmail.com';
 
 
 
 
 
 
 
 
194
 
195
  -- ─── Monthly reset function ───
196
  CREATE OR REPLACE FUNCTION public.reset_monthly_usage()
web/lib/types.ts DELETED
@@ -1,87 +0,0 @@
1
- // ClauseGuard — Shared TypeScript types for the web app
2
-
3
- export interface Cat {
4
- name: string;
5
- severity: string;
6
- description?: string;
7
- confidence?: number;
8
- }
9
-
10
- export interface Clause {
11
- text: string;
12
- categories: Cat[];
13
- }
14
-
15
- export interface Entity {
16
- text: string;
17
- type: string;
18
- score?: number;
19
- source?: string;
20
- }
21
-
22
- export interface Contradiction {
23
- type: string;
24
- explanation: string;
25
- severity: string;
26
- confidence?: number;
27
- source?: string;
28
- }
29
-
30
- export interface Obligation {
31
- type: string;
32
- party: string;
33
- description: string;
34
- deadline: string;
35
- priority?: number;
36
- }
37
-
38
- export interface ComplianceCheck {
39
- requirement: string;
40
- description: string;
41
- severity: string;
42
- status: string;
43
- matched_keywords: string[];
44
- context?: string[];
45
- }
46
-
47
- export interface ComplianceReg {
48
- description: string;
49
- compliance_rate: number;
50
- checks: ComplianceCheck[];
51
- overall_status: string;
52
- negated_count?: number;
53
- ambiguous_count?: number;
54
- note?: string;
55
- }
56
-
57
- export interface Redline {
58
- original_text: string;
59
- clause_label: string;
60
- risk_level: string;
61
- safe_alternative: string;
62
- template_alternative?: string;
63
- legal_basis: string;
64
- consumer_standard: string;
65
- tier: string;
66
- }
67
-
68
- export interface ChatMessage {
69
- role: "user" | "assistant";
70
- content: string;
71
- }
72
-
73
- export interface AnalysisResult {
74
- risk_score: number;
75
- grade: string;
76
- total_clauses: number;
77
- flagged_count: number;
78
- results: Clause[];
79
- entities: Entity[];
80
- contradictions: Contradiction[];
81
- obligations: Obligation[];
82
- compliance: Record<string, ComplianceReg>;
83
- redlines: Redline[];
84
- model: string;
85
- latency_ms: number;
86
- session_id?: string;
87
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
web/package-lock.json DELETED
The diff for this file is too large to render. See raw diff
 
web/package.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "clauseguard-web",
3
- "version": "1.1.0",
4
  "private": true,
5
  "scripts": {
6
  "dev": "next dev --turbopack",
 
1
  {
2
  "name": "clauseguard-web",
3
+ "version": "1.0.0",
4
  "private": true,
5
  "scripts": {
6
  "dev": "next dev --turbopack",
web/proxy.ts CHANGED
@@ -47,10 +47,5 @@ export async function proxy(request: NextRequest) {
47
  }
48
 
49
  export const config = {
50
- // FIX v4.3: Match ALL routes so auth cookies are refreshed on every page load.
51
- // Without this, navigating to / or other non-dashboard pages doesn't refresh
52
- // the Supabase session cookie, causing auth to break on page reload.
53
- matcher: [
54
- "/((?!_next/static|_next/image|favicon.ico|.*\\.(?:svg|png|jpg|jpeg|gif|webp|ico)$).*)",
55
- ],
56
  };
 
47
  }
48
 
49
  export const config = {
50
+ matcher: ["/dashboard-pages/:path*", "/auth/:path*", "/admin/:path*"],
 
 
 
 
 
51
  };
web/tsconfig.json CHANGED
@@ -1,11 +1,7 @@
1
  {
2
  "compilerOptions": {
3
  "target": "ES2017",
4
- "lib": [
5
- "dom",
6
- "dom.iterable",
7
- "esnext"
8
- ],
9
  "allowJs": true,
10
  "skipLibCheck": true,
11
  "strict": true,
@@ -15,27 +11,11 @@
15
  "moduleResolution": "bundler",
16
  "resolveJsonModule": true,
17
  "isolatedModules": true,
18
- "jsx": "react-jsx",
19
  "incremental": true,
20
- "plugins": [
21
- {
22
- "name": "next"
23
- }
24
- ],
25
- "paths": {
26
- "@/*": [
27
- "./*"
28
- ]
29
- }
30
  },
31
- "include": [
32
- "next-env.d.ts",
33
- "**/*.ts",
34
- "**/*.tsx",
35
- ".next/types/**/*.ts",
36
- ".next/dev/types/**/*.ts"
37
- ],
38
- "exclude": [
39
- "node_modules"
40
- ]
41
  }
 
1
  {
2
  "compilerOptions": {
3
  "target": "ES2017",
4
+ "lib": ["dom", "dom.iterable", "esnext"],
 
 
 
 
5
  "allowJs": true,
6
  "skipLibCheck": true,
7
  "strict": true,
 
11
  "moduleResolution": "bundler",
12
  "resolveJsonModule": true,
13
  "isolatedModules": true,
14
+ "jsx": "preserve",
15
  "incremental": true,
16
+ "plugins": [{ "name": "next" }],
17
+ "paths": { "@/*": ["./*"] }
 
 
 
 
 
 
 
 
18
  },
19
+ "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
20
+ "exclude": ["node_modules"]
 
 
 
 
 
 
 
 
21
  }