Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,9 @@ Features:
|
|
| 6 |
β’ 4-tier risk scoring (Critical / High / Medium / Low)
|
| 7 |
β’ Legal NER: parties, dates, monetary values, jurisdictions, defined terms
|
| 8 |
β’ NLI contradiction & missing-clause detection
|
|
|
|
|
|
|
|
|
|
| 9 |
β’ PDF / DOCX / TXT parsing
|
| 10 |
β’ Professional 3-panel Gradio UI
|
| 11 |
β’ JSON & CSV export
|
|
@@ -20,7 +23,6 @@ import re
|
|
| 20 |
import json
|
| 21 |
import csv
|
| 22 |
import io
|
| 23 |
-
import textwrap
|
| 24 |
from collections import defaultdict
|
| 25 |
from datetime import datetime
|
| 26 |
|
|
@@ -49,6 +51,11 @@ try:
|
|
| 49 |
except Exception:
|
| 50 |
_HAS_TORCH = False
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
# 1. CONFIGURATION
|
| 54 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -70,7 +77,6 @@ CUAD_LABELS = [
|
|
| 70 |
"Third Party Beneficiary", "Other"
|
| 71 |
]
|
| 72 |
|
| 73 |
-
# Original 8 unfair-clause labels (backward-compat + consumer contracts)
|
| 74 |
_UNFAIR_LABELS = [
|
| 75 |
"Limitation of liability", "Unilateral termination", "Unilateral change",
|
| 76 |
"Content removal", "Contract by using", "Choice of law",
|
|
@@ -87,6 +93,7 @@ RISK_MAP = {
|
|
| 87 |
"Termination for Convenience": "CRITICAL",
|
| 88 |
"Limitation of liability": "CRITICAL",
|
| 89 |
"Unilateral termination": "CRITICAL",
|
|
|
|
| 90 |
# High
|
| 91 |
"Non-Compete": "HIGH",
|
| 92 |
"Exclusivity": "HIGH",
|
|
@@ -95,7 +102,6 @@ RISK_MAP = {
|
|
| 95 |
"No-Solicit of Employees": "HIGH",
|
| 96 |
"Unilateral change": "HIGH",
|
| 97 |
"Content removal": "HIGH",
|
| 98 |
-
"Liquidated Damages": "HIGH",
|
| 99 |
"Anti-Assignment": "HIGH",
|
| 100 |
# Medium
|
| 101 |
"Governing Law": "MEDIUM",
|
|
@@ -107,6 +113,7 @@ RISK_MAP = {
|
|
| 107 |
"Non-Disparagement": "MEDIUM",
|
| 108 |
"Most Favored Nation": "MEDIUM",
|
| 109 |
"Revenue/Profit Sharing": "MEDIUM",
|
|
|
|
| 110 |
# Low
|
| 111 |
"Document Name": "LOW",
|
| 112 |
"Parties": "LOW",
|
|
@@ -125,7 +132,6 @@ RISK_MAP = {
|
|
| 125 |
"Post-Termination Services": "LOW",
|
| 126 |
"Audit Rights": "LOW",
|
| 127 |
"Cap on Liability": "LOW",
|
| 128 |
-
"Warranty Duration": "LOW",
|
| 129 |
"Insurance": "LOW",
|
| 130 |
"Covenant Not to Sue": "LOW",
|
| 131 |
"Third Party Beneficiary": "LOW",
|
|
@@ -162,12 +168,19 @@ DESC_MAP.update({
|
|
| 162 |
"Warranty Duration": "Length of time warranties remain in effect.",
|
| 163 |
"Covenant Not to Sue": "Agreement not to bring legal action against a party.",
|
| 164 |
"Third Party Beneficiary": "Non-party who benefits from the contract terms.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
})
|
| 166 |
|
| 167 |
-
# Risk weights for scoring
|
| 168 |
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 169 |
|
| 170 |
-
# Color / badge styles
|
| 171 |
RISK_STYLES = {
|
| 172 |
"CRITICAL": ("#dc2626", "#fef2f2", "β οΈ"),
|
| 173 |
"HIGH": ("#ea580c", "#fff7ed", "β‘"),
|
|
@@ -190,7 +203,6 @@ def _load_cuad_model():
|
|
| 190 |
try:
|
| 191 |
base = "nlpaueb/legal-bert-base-uncased"
|
| 192 |
adapter = "Mokshith31/legalbert-contract-clause-classification"
|
| 193 |
-
|
| 194 |
print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
|
| 195 |
cuad_tokenizer = AutoTokenizer.from_pretrained(base)
|
| 196 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
|
@@ -256,7 +268,6 @@ def parse_document(file_path):
|
|
| 256 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 257 |
|
| 258 |
def split_clauses(text):
|
| 259 |
-
"""Split contract text into individual clauses."""
|
| 260 |
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
| 261 |
parts = re.split(
|
| 262 |
r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|[A-Z][A-Z\s]{2,})',
|
|
@@ -270,10 +281,8 @@ def split_clauses(text):
|
|
| 270 |
return clauses
|
| 271 |
|
| 272 |
def classify_cuad(clause_text):
|
| 273 |
-
"""Classify a single clause using the CUAD model."""
|
| 274 |
if cuad_model is None or cuad_tokenizer is None:
|
| 275 |
return _classify_regex(clause_text)
|
| 276 |
-
|
| 277 |
try:
|
| 278 |
inputs = cuad_tokenizer(
|
| 279 |
clause_text,
|
|
@@ -285,8 +294,6 @@ def classify_cuad(clause_text):
|
|
| 285 |
with torch.no_grad():
|
| 286 |
logits = cuad_model(**inputs).logits
|
| 287 |
probs = torch.softmax(logits, dim=-1)[0]
|
| 288 |
-
|
| 289 |
-
# Multi-label: return all labels above threshold
|
| 290 |
threshold = 0.15
|
| 291 |
results = []
|
| 292 |
for i, prob in enumerate(probs):
|
|
@@ -299,9 +306,7 @@ def classify_cuad(clause_text):
|
|
| 299 |
"risk": risk,
|
| 300 |
"description": DESC_MAP.get(label, label),
|
| 301 |
})
|
| 302 |
-
# Sort by confidence descending
|
| 303 |
results.sort(key=lambda x: x["confidence"], reverse=True)
|
| 304 |
-
# If no labels above threshold, take top-1
|
| 305 |
if not results:
|
| 306 |
top_idx = int(probs.argmax())
|
| 307 |
label = CUAD_LABELS[top_idx] if top_idx < len(CUAD_LABELS) else "Other"
|
|
@@ -332,6 +337,10 @@ _REGEX_PATTERNS = {
|
|
| 332 |
"IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
|
| 333 |
"Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
|
| 334 |
"Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed"],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
}
|
| 336 |
|
| 337 |
def _classify_regex(text):
|
|
@@ -358,9 +367,7 @@ def _classify_regex(text):
|
|
| 358 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 359 |
|
| 360 |
def extract_entities(text):
|
| 361 |
-
"""Extract legal entities using regex patterns."""
|
| 362 |
entities = []
|
| 363 |
-
|
| 364 |
# Dates
|
| 365 |
date_patterns = [
|
| 366 |
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
|
@@ -371,7 +378,6 @@ def extract_entities(text):
|
|
| 371 |
for pat, etype in date_patterns:
|
| 372 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 373 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 374 |
-
|
| 375 |
# Monetary values
|
| 376 |
money_patterns = [
|
| 377 |
(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
|
@@ -380,16 +386,14 @@ def extract_entities(text):
|
|
| 380 |
for pat, etype in money_patterns:
|
| 381 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 382 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 383 |
-
|
| 384 |
# Party names
|
| 385 |
party_patterns = [
|
| 386 |
(r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
|
| 387 |
-
(r'\b(?:Party A|Party B|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer)\b', "PARTY_ROLE"),
|
| 388 |
]
|
| 389 |
for pat, etype in party_patterns:
|
| 390 |
for m in re.finditer(pat, text):
|
| 391 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 392 |
-
|
| 393 |
# Jurisdictions
|
| 394 |
jurisdiction_patterns = [
|
| 395 |
(r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
|
@@ -398,7 +402,6 @@ def extract_entities(text):
|
|
| 398 |
for pat, etype in jurisdiction_patterns:
|
| 399 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 400 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 401 |
-
|
| 402 |
# Defined Terms
|
| 403 |
defined_patterns = [
|
| 404 |
(r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
|
|
@@ -407,8 +410,7 @@ def extract_entities(text):
|
|
| 407 |
for pat, etype in defined_patterns:
|
| 408 |
for m in re.finditer(pat, text):
|
| 409 |
entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
|
| 410 |
-
|
| 411 |
-
# Deduplicate overlapping
|
| 412 |
entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
|
| 413 |
filtered = []
|
| 414 |
last_end = -1
|
|
@@ -434,16 +436,10 @@ _CONTRADICTION_PAIRS = [
|
|
| 434 |
]
|
| 435 |
|
| 436 |
def detect_contradictions(clause_results):
|
| 437 |
-
"""Detect contradictions and missing critical clauses."""
|
| 438 |
contradictions = []
|
| 439 |
labels_found = set()
|
| 440 |
-
texts_found = {}
|
| 441 |
-
|
| 442 |
for cr in clause_results:
|
| 443 |
labels_found.add(cr["label"])
|
| 444 |
-
texts_found[cr["label"]] = cr.get("text", "")
|
| 445 |
-
|
| 446 |
-
# Contradiction pairs
|
| 447 |
for group_a, group_b, explanation in _CONTRADICTION_PAIRS:
|
| 448 |
found_a = any(l in labels_found for l in group_a)
|
| 449 |
found_b = any(l in labels_found for l in group_b)
|
|
@@ -454,8 +450,6 @@ def detect_contradictions(clause_results):
|
|
| 454 |
"severity": "HIGH",
|
| 455 |
"clauses": list(set(group_a + group_b)),
|
| 456 |
})
|
| 457 |
-
|
| 458 |
-
# Missing critical clauses
|
| 459 |
critical_clauses = ["Governing Law", "Termination for Convenience", "Limitation of liability", "Arbitration"]
|
| 460 |
for cc in critical_clauses:
|
| 461 |
if cc not in labels_found:
|
|
@@ -465,7 +459,6 @@ def detect_contradictions(clause_results):
|
|
| 465 |
"severity": "MEDIUM",
|
| 466 |
"clauses": [cc],
|
| 467 |
})
|
| 468 |
-
|
| 469 |
return contradictions
|
| 470 |
|
| 471 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -477,19 +470,15 @@ def compute_risk_score(clause_results, total_clauses):
|
|
| 477 |
for cr in clause_results:
|
| 478 |
sev = cr.get("risk", "LOW")
|
| 479 |
sev_counts[sev] += 1
|
| 480 |
-
|
| 481 |
if total_clauses == 0:
|
| 482 |
return 0, "A", sev_counts
|
| 483 |
-
|
| 484 |
weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
|
| 485 |
risk = min(100, round(weighted / max(1, total_clauses) * 10))
|
| 486 |
-
|
| 487 |
if risk >= 70: grade = "F"
|
| 488 |
elif risk >= 50: grade = "D"
|
| 489 |
elif risk >= 30: grade = "C"
|
| 490 |
elif risk >= 15: grade = "B"
|
| 491 |
else: grade = "A"
|
| 492 |
-
|
| 493 |
return risk, grade, sev_counts
|
| 494 |
|
| 495 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -499,12 +488,9 @@ def compute_risk_score(clause_results, total_clauses):
|
|
| 499 |
def analyze_contract(text):
|
| 500 |
if not text or len(text.strip()) < 50:
|
| 501 |
return None, "Document too short (minimum 50 characters)"
|
| 502 |
-
|
| 503 |
clauses = split_clauses(text)
|
| 504 |
if not clauses:
|
| 505 |
return None, "No clauses detected in document"
|
| 506 |
-
|
| 507 |
-
# Analyze each clause
|
| 508 |
clause_results = []
|
| 509 |
for clause in clauses:
|
| 510 |
predictions = classify_cuad(clause)
|
|
@@ -517,17 +503,11 @@ def analyze_contract(text):
|
|
| 517 |
"risk": pred["risk"],
|
| 518 |
"description": pred["description"],
|
| 519 |
})
|
| 520 |
-
|
| 521 |
-
# NER
|
| 522 |
entities = extract_entities(text)
|
| 523 |
-
|
| 524 |
-
# NLI / contradictions
|
| 525 |
contradictions = detect_contradictions(clause_results)
|
| 526 |
-
|
| 527 |
-
# Risk scoring
|
| 528 |
risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
|
| 529 |
-
|
| 530 |
-
|
| 531 |
result = {
|
| 532 |
"metadata": {
|
| 533 |
"analysis_date": datetime.now().isoformat(),
|
|
@@ -543,9 +523,10 @@ def analyze_contract(text):
|
|
| 543 |
"clauses": clause_results,
|
| 544 |
"entities": entities,
|
| 545 |
"contradictions": contradictions,
|
|
|
|
|
|
|
| 546 |
"raw_text": text,
|
| 547 |
}
|
| 548 |
-
|
| 549 |
return result, None
|
| 550 |
|
| 551 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -580,19 +561,15 @@ def export_csv(result):
|
|
| 580 |
def render_summary(result):
|
| 581 |
if result is None:
|
| 582 |
return ""
|
| 583 |
-
|
| 584 |
risk = result["risk"]
|
| 585 |
score = risk["score"]
|
| 586 |
grade = risk["grade"]
|
| 587 |
breakdown = risk["breakdown"]
|
| 588 |
-
|
| 589 |
grade_color = {
|
| 590 |
"A": "#16a34a", "B": "#65a30d", "C": "#ca8a04",
|
| 591 |
"D": "#ea580c", "F": "#dc2626",
|
| 592 |
}.get(grade, "#6b7280")
|
| 593 |
-
|
| 594 |
crit, high, med, low = breakdown["CRITICAL"], breakdown["HIGH"], breakdown["MEDIUM"], breakdown["LOW"]
|
| 595 |
-
|
| 596 |
html = f"""
|
| 597 |
<div style="font-family:system-ui,sans-serif;padding:16px;border:1px solid #e5e7eb;border-radius:12px;background:#fff;">
|
| 598 |
<div style="text-align:center;margin-bottom:16px;">
|
|
@@ -631,35 +608,27 @@ def render_summary(result):
|
|
| 631 |
def render_clause_cards(result):
|
| 632 |
if result is None:
|
| 633 |
return ""
|
| 634 |
-
|
| 635 |
clauses = result.get("clauses", [])
|
| 636 |
if not clauses:
|
| 637 |
return '<div style="padding:24px;text-align:center;color:#6b7280;">No clauses detected.</div>'
|
| 638 |
-
|
| 639 |
-
# Group by clause text
|
| 640 |
grouped = defaultdict(list)
|
| 641 |
for cr in clauses:
|
| 642 |
grouped[cr["text"]].append(cr)
|
| 643 |
-
|
| 644 |
html = '<div style="font-family:system-ui,sans-serif;">'
|
| 645 |
for text, items in grouped.items():
|
| 646 |
max_risk = max(items, key=lambda x: {"CRITICAL":4,"HIGH":3,"MEDIUM":2,"LOW":1}[x["risk"]])["risk"]
|
| 647 |
border, bg, icon = RISK_STYLES[max_risk]
|
| 648 |
-
|
| 649 |
tags = ""
|
| 650 |
for item in items:
|
| 651 |
tag_bg = RISK_STYLES[item["risk"]][1]
|
| 652 |
tag_color = RISK_STYLES[item["risk"]][0]
|
| 653 |
tags += f'<span style="background:{tag_bg};color:{tag_color};border:1px solid {tag_color}33;padding:2px 8px;border-radius:12px;font-size:11px;font-weight:500;margin-right:4px;">{item["label"]} ({item["confidence"]})</span>'
|
| 654 |
-
|
| 655 |
descs = "".join(
|
| 656 |
f'<p style="font-size:12px;color:#6b7280;margin:4px 0 0 0;">{item["description"]}</p>'
|
| 657 |
for item in items
|
| 658 |
)
|
| 659 |
-
|
| 660 |
preview = text[:300] + ("..." if len(text) > 300 else "")
|
| 661 |
preview = preview.replace("<", "<").replace(">", ">")
|
| 662 |
-
|
| 663 |
html += f"""
|
| 664 |
<div style="border:1px solid #e5e7eb;border-left:4px solid {border};border-radius:8px;padding:14px;margin-bottom:10px;background:#fafafa;">
|
| 665 |
<div style="display:flex;align-items:center;gap:6px;margin-bottom:6px;">
|
|
@@ -677,16 +646,12 @@ def render_clause_cards(result):
|
|
| 677 |
def render_entities(result):
|
| 678 |
if result is None:
|
| 679 |
return ""
|
| 680 |
-
|
| 681 |
entities = result.get("entities", [])
|
| 682 |
if not entities:
|
| 683 |
return '<div style="padding:16px;color:#6b7280;">No entities detected.</div>'
|
| 684 |
-
|
| 685 |
-
# Group by type
|
| 686 |
grouped = defaultdict(list)
|
| 687 |
for e in entities:
|
| 688 |
grouped[e["type"]].append(e["text"])
|
| 689 |
-
|
| 690 |
html = '<div style="font-family:system-ui,sans-serif;">'
|
| 691 |
for etype, texts in grouped.items():
|
| 692 |
unique = list(dict.fromkeys(texts))[:20]
|
|
@@ -697,12 +662,10 @@ def render_entities(result):
|
|
| 697 |
"JURISDICTION": "#f59e0b",
|
| 698 |
"DEFINED_TERM": "#ec4899",
|
| 699 |
}.get(etype, "#6b7280")
|
| 700 |
-
|
| 701 |
items_html = "".join(
|
| 702 |
f'<span style="display:inline-block;background:{color}15;color:{color};border:1px solid {color}40;padding:3px 10px;border-radius:6px;font-size:12px;margin:3px;">{t}</span>'
|
| 703 |
for t in unique
|
| 704 |
)
|
| 705 |
-
|
| 706 |
html += f"""
|
| 707 |
<div style="margin-bottom:12px;">
|
| 708 |
<div style="font-size:12px;font-weight:600;color:#374151;margin-bottom:6px;text-transform:uppercase;">{etype}</div>
|
|
@@ -715,11 +678,9 @@ def render_entities(result):
|
|
| 715 |
def render_contradictions(result):
|
| 716 |
if result is None:
|
| 717 |
return ""
|
| 718 |
-
|
| 719 |
contradictions = result.get("contradictions", [])
|
| 720 |
if not contradictions:
|
| 721 |
return '<div style="padding:16px;color:#16a34a;">β No contradictions or missing clauses detected.</div>'
|
| 722 |
-
|
| 723 |
html = '<div style="font-family:system-ui,sans-serif;">'
|
| 724 |
for c in contradictions:
|
| 725 |
sev_color = RISK_STYLES[c["severity"]][0]
|
|
@@ -739,10 +700,8 @@ def render_contradictions(result):
|
|
| 739 |
def render_document_viewer(result):
|
| 740 |
if result is None:
|
| 741 |
return ""
|
| 742 |
-
|
| 743 |
text = result.get("raw_text", "")
|
| 744 |
entities = sorted(result.get("entities", []), key=lambda x: x["start"])
|
| 745 |
-
|
| 746 |
html_parts = []
|
| 747 |
last_end = 0
|
| 748 |
for e in entities:
|
|
@@ -760,10 +719,8 @@ def render_document_viewer(result):
|
|
| 760 |
f'<mark style="background:{color};padding:1px 2px;border-radius:2px;font-size:12px;" title="{label}">{e["text"].replace("<","<").replace(">",">")}</mark>'
|
| 761 |
)
|
| 762 |
last_end = e["end"]
|
| 763 |
-
|
| 764 |
html_parts.append(text[last_end:].replace("<", "<").replace(">", ">"))
|
| 765 |
highlighted = "".join(html_parts)
|
| 766 |
-
|
| 767 |
return f"""
|
| 768 |
<div style="font-family:monospace;font-size:13px;line-height:1.6;padding:16px;border:1px solid #e5e7eb;border-radius:8px;background:#fff;max-height:600px;overflow-y:auto;white-space:pre-wrap;">
|
| 769 |
{highlighted}
|
|
@@ -771,7 +728,19 @@ def render_document_viewer(result):
|
|
| 771 |
"""
|
| 772 |
|
| 773 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 774 |
-
# 11.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 775 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 776 |
|
| 777 |
def process_upload(file):
|
|
@@ -784,36 +753,35 @@ def process_upload(file):
|
|
| 784 |
|
| 785 |
def run_analysis(text):
|
| 786 |
if not text or len(text.strip()) < 50:
|
| 787 |
-
|
| 788 |
-
|
| 789 |
result, error = analyze_contract(text)
|
| 790 |
if error:
|
| 791 |
err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
|
| 792 |
-
return [err_html] *
|
| 793 |
-
|
| 794 |
# Save export files
|
| 795 |
json_path = "/tmp/clauseguard_report.json"
|
| 796 |
with open(json_path, "w") as f:
|
| 797 |
json.dump(result, f, indent=2, default=str)
|
| 798 |
-
|
| 799 |
csv_content = export_csv(result)
|
| 800 |
csv_path = "/tmp/clauseguard_report.csv"
|
| 801 |
with open(csv_path, "w") as f:
|
| 802 |
f.write(csv_content)
|
| 803 |
-
|
| 804 |
return [
|
| 805 |
render_summary(result),
|
| 806 |
render_clause_cards(result),
|
| 807 |
render_entities(result),
|
| 808 |
render_contradictions(result),
|
| 809 |
render_document_viewer(result),
|
|
|
|
|
|
|
| 810 |
json_path,
|
| 811 |
csv_path,
|
| 812 |
"Analysis complete",
|
| 813 |
]
|
| 814 |
|
| 815 |
def do_clear():
|
| 816 |
-
return [""] *
|
| 817 |
|
| 818 |
# ββ Example contracts ββ
|
| 819 |
SPOTIFY_TOS = """By using the Spotify Service, you agree to be bound by these Terms of Use.
|
|
@@ -858,10 +826,42 @@ This Non-Disclosure Agreement (the "Agreement") is entered into as of January 15
|
|
| 858 |
|
| 859 |
7. Non-Compete. During the term of this Agreement and for a period of two (2) years thereafter, the Receiving Party shall not engage in any business that competes with the Disclosing Party."""
|
| 860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
with gr.Blocks(
|
| 862 |
title="ClauseGuard β AI Contract Analysis",
|
| 863 |
css="""
|
| 864 |
-
.gradio-container { max-width:
|
| 865 |
"""
|
| 866 |
) as demo:
|
| 867 |
|
|
@@ -869,66 +869,119 @@ with gr.Blocks(
|
|
| 869 |
<div style="display:flex;align-items:center;justify-content:space-between;padding:12px 0;border-bottom:2px solid #e5e7eb;margin-bottom:16px;">
|
| 870 |
<div>
|
| 871 |
<h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">π‘οΈ ClauseGuard</h1>
|
| 872 |
-
<p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis Β· 41 Clause Categories Β· Risk Scoring Β· NER Β· NLI</p>
|
| 873 |
</div>
|
| 874 |
-
<div style="font-size:12px;color:#9ca3af;">v2.0 Β· World's Best Legal AI</div>
|
| 875 |
</div>
|
| 876 |
""")
|
| 877 |
|
| 878 |
-
# ββ
|
| 879 |
-
with gr.
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
with gr.
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
|
| 933 |
# ββ Events ββ
|
| 934 |
def _load_file(file):
|
|
@@ -938,25 +991,35 @@ with gr.Blocks(
|
|
| 938 |
return text, "Loaded successfully" if not err else err
|
| 939 |
|
| 940 |
load_btn.click(_load_file, inputs=[file_input], outputs=[text_input, load_status])
|
|
|
|
|
|
|
| 941 |
|
| 942 |
scan_btn.click(
|
| 943 |
run_analysis,
|
| 944 |
inputs=[text_input],
|
| 945 |
outputs=[summary_html, clauses_html, entities_html, nli_html,
|
| 946 |
-
doc_html,
|
|
|
|
| 947 |
)
|
| 948 |
|
| 949 |
clear_btn.click(
|
| 950 |
do_clear,
|
| 951 |
outputs=[summary_html, clauses_html, entities_html, nli_html,
|
| 952 |
-
doc_html,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 953 |
)
|
| 954 |
|
| 955 |
gr.HTML("""
|
| 956 |
<div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
|
| 957 |
<p style="font-size:11px;color:#9ca3af;">
|
| 958 |
β οΈ Not legal advice. For informational purposes only.
|
| 959 |
-
Β· Model: <a href="https://huggingface.co/Mokshith31/legalbert-contract-clause-classification" style="color:#6b7280;">Legal-BERT + CUAD</a>
|
| 960 |
Β· Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
|
| 961 |
Β· <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
|
| 962 |
</p>
|
|
|
|
| 6 |
β’ 4-tier risk scoring (Critical / High / Medium / Low)
|
| 7 |
β’ Legal NER: parties, dates, monetary values, jurisdictions, defined terms
|
| 8 |
β’ NLI contradiction & missing-clause detection
|
| 9 |
+
β’ Contract comparison engine (diff between 2 contracts)
|
| 10 |
+
β’ Obligation tracker (monetary, compliance, reporting, delivery)
|
| 11 |
+
β’ Compliance checker (GDPR, CCPA, SOX, HIPAA, FINRA)
|
| 12 |
β’ PDF / DOCX / TXT parsing
|
| 13 |
β’ Professional 3-panel Gradio UI
|
| 14 |
β’ JSON & CSV export
|
|
|
|
| 23 |
import json
|
| 24 |
import csv
|
| 25 |
import io
|
|
|
|
| 26 |
from collections import defaultdict
|
| 27 |
from datetime import datetime
|
| 28 |
|
|
|
|
| 51 |
except Exception:
|
| 52 |
_HAS_TORCH = False
|
| 53 |
|
| 54 |
+
# ββ Import submodules βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 55 |
+
from compare import compare_contracts, render_comparison_html
|
| 56 |
+
from obligations import extract_obligations, render_obligations_html
|
| 57 |
+
from compliance import check_compliance, render_compliance_html
|
| 58 |
+
|
| 59 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
# 1. CONFIGURATION
|
| 61 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 77 |
"Third Party Beneficiary", "Other"
|
| 78 |
]
|
| 79 |
|
|
|
|
| 80 |
_UNFAIR_LABELS = [
|
| 81 |
"Limitation of liability", "Unilateral termination", "Unilateral change",
|
| 82 |
"Content removal", "Contract by using", "Choice of law",
|
|
|
|
| 93 |
"Termination for Convenience": "CRITICAL",
|
| 94 |
"Limitation of liability": "CRITICAL",
|
| 95 |
"Unilateral termination": "CRITICAL",
|
| 96 |
+
"Liquidated Damages": "CRITICAL",
|
| 97 |
# High
|
| 98 |
"Non-Compete": "HIGH",
|
| 99 |
"Exclusivity": "HIGH",
|
|
|
|
| 102 |
"No-Solicit of Employees": "HIGH",
|
| 103 |
"Unilateral change": "HIGH",
|
| 104 |
"Content removal": "HIGH",
|
|
|
|
| 105 |
"Anti-Assignment": "HIGH",
|
| 106 |
# Medium
|
| 107 |
"Governing Law": "MEDIUM",
|
|
|
|
| 113 |
"Non-Disparagement": "MEDIUM",
|
| 114 |
"Most Favored Nation": "MEDIUM",
|
| 115 |
"Revenue/Profit Sharing": "MEDIUM",
|
| 116 |
+
"Warranty Duration": "MEDIUM",
|
| 117 |
# Low
|
| 118 |
"Document Name": "LOW",
|
| 119 |
"Parties": "LOW",
|
|
|
|
| 132 |
"Post-Termination Services": "LOW",
|
| 133 |
"Audit Rights": "LOW",
|
| 134 |
"Cap on Liability": "LOW",
|
|
|
|
| 135 |
"Insurance": "LOW",
|
| 136 |
"Covenant Not to Sue": "LOW",
|
| 137 |
"Third Party Beneficiary": "LOW",
|
|
|
|
| 168 |
"Warranty Duration": "Length of time warranties remain in effect.",
|
| 169 |
"Covenant Not to Sue": "Agreement not to bring legal action against a party.",
|
| 170 |
"Third Party Beneficiary": "Non-party who benefits from the contract terms.",
|
| 171 |
+
"Insurance": "Insurance coverage requirements.",
|
| 172 |
+
"Revenue/Profit Sharing": "Revenue or profit sharing arrangements between parties.",
|
| 173 |
+
"Price Restriction": "Restrictions on pricing or discounting.",
|
| 174 |
+
"Minimum Commitment": "Minimum purchase or usage commitment.",
|
| 175 |
+
"Volume Restriction": "Limits on volume of goods or services.",
|
| 176 |
+
"License Grant": "Permission to use intellectual property.",
|
| 177 |
+
"Non-Transferable License": "License that cannot be transferred to third parties.",
|
| 178 |
+
"Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
|
| 179 |
+
"Unlimited/All-You-Can-Eat License": "License with no usage limits.",
|
| 180 |
})
|
| 181 |
|
|
|
|
| 182 |
RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
|
| 183 |
|
|
|
|
| 184 |
RISK_STYLES = {
|
| 185 |
"CRITICAL": ("#dc2626", "#fef2f2", "β οΈ"),
|
| 186 |
"HIGH": ("#ea580c", "#fff7ed", "β‘"),
|
|
|
|
| 203 |
try:
|
| 204 |
base = "nlpaueb/legal-bert-base-uncased"
|
| 205 |
adapter = "Mokshith31/legalbert-contract-clause-classification"
|
|
|
|
| 206 |
print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
|
| 207 |
cuad_tokenizer = AutoTokenizer.from_pretrained(base)
|
| 208 |
base_model = AutoModelForSequenceClassification.from_pretrained(
|
|
|
|
| 268 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
|
| 270 |
def split_clauses(text):
|
|
|
|
| 271 |
text = re.sub(r'\n{3,}', '\n\n', text.strip())
|
| 272 |
parts = re.split(
|
| 273 |
r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|[A-Z][A-Z\s]{2,})',
|
|
|
|
| 281 |
return clauses
|
| 282 |
|
| 283 |
def classify_cuad(clause_text):
|
|
|
|
| 284 |
if cuad_model is None or cuad_tokenizer is None:
|
| 285 |
return _classify_regex(clause_text)
|
|
|
|
| 286 |
try:
|
| 287 |
inputs = cuad_tokenizer(
|
| 288 |
clause_text,
|
|
|
|
| 294 |
with torch.no_grad():
|
| 295 |
logits = cuad_model(**inputs).logits
|
| 296 |
probs = torch.softmax(logits, dim=-1)[0]
|
|
|
|
|
|
|
| 297 |
threshold = 0.15
|
| 298 |
results = []
|
| 299 |
for i, prob in enumerate(probs):
|
|
|
|
| 306 |
"risk": risk,
|
| 307 |
"description": DESC_MAP.get(label, label),
|
| 308 |
})
|
|
|
|
| 309 |
results.sort(key=lambda x: x["confidence"], reverse=True)
|
|
|
|
| 310 |
if not results:
|
| 311 |
top_idx = int(probs.argmax())
|
| 312 |
label = CUAD_LABELS[top_idx] if top_idx < len(CUAD_LABELS) else "Other"
|
|
|
|
| 337 |
"IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
|
| 338 |
"Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
|
| 339 |
"Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed"],
|
| 340 |
+
"Indemnification": [r"indemnif", r"hold harmless", r"defend"],
|
| 341 |
+
"Confidentiality": [r"confidential", r"non-disclosure", r"nda"],
|
| 342 |
+
"Force Majeure": [r"force majeure", r"act of god", r"beyond.*control"],
|
| 343 |
+
"Penalties": [r"penalt", r"late fee", r"default charge", r"interest on overdue"],
|
| 344 |
}
|
| 345 |
|
| 346 |
def _classify_regex(text):
|
|
|
|
| 367 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 368 |
|
| 369 |
def extract_entities(text):
|
|
|
|
| 370 |
entities = []
|
|
|
|
| 371 |
# Dates
|
| 372 |
date_patterns = [
|
| 373 |
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
|
|
|
| 378 |
for pat, etype in date_patterns:
|
| 379 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 380 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 381 |
# Monetary values
|
| 382 |
money_patterns = [
|
| 383 |
(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
|
|
|
| 386 |
for pat, etype in money_patterns:
|
| 387 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 388 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 389 |
# Party names
|
| 390 |
party_patterns = [
|
| 391 |
(r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
|
| 392 |
+
(r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
|
| 393 |
]
|
| 394 |
for pat, etype in party_patterns:
|
| 395 |
for m in re.finditer(pat, text):
|
| 396 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 397 |
# Jurisdictions
|
| 398 |
jurisdiction_patterns = [
|
| 399 |
(r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
|
|
|
| 402 |
for pat, etype in jurisdiction_patterns:
|
| 403 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 404 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 405 |
# Defined Terms
|
| 406 |
defined_patterns = [
|
| 407 |
(r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
|
|
|
|
| 410 |
for pat, etype in defined_patterns:
|
| 411 |
for m in re.finditer(pat, text):
|
| 412 |
entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
|
| 413 |
+
# Deduplicate
|
|
|
|
| 414 |
entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
|
| 415 |
filtered = []
|
| 416 |
last_end = -1
|
|
|
|
| 436 |
]
|
| 437 |
|
| 438 |
def detect_contradictions(clause_results):
|
|
|
|
| 439 |
contradictions = []
|
| 440 |
labels_found = set()
|
|
|
|
|
|
|
| 441 |
for cr in clause_results:
|
| 442 |
labels_found.add(cr["label"])
|
|
|
|
|
|
|
|
|
|
| 443 |
for group_a, group_b, explanation in _CONTRADICTION_PAIRS:
|
| 444 |
found_a = any(l in labels_found for l in group_a)
|
| 445 |
found_b = any(l in labels_found for l in group_b)
|
|
|
|
| 450 |
"severity": "HIGH",
|
| 451 |
"clauses": list(set(group_a + group_b)),
|
| 452 |
})
|
|
|
|
|
|
|
| 453 |
critical_clauses = ["Governing Law", "Termination for Convenience", "Limitation of liability", "Arbitration"]
|
| 454 |
for cc in critical_clauses:
|
| 455 |
if cc not in labels_found:
|
|
|
|
| 459 |
"severity": "MEDIUM",
|
| 460 |
"clauses": [cc],
|
| 461 |
})
|
|
|
|
| 462 |
return contradictions
|
| 463 |
|
| 464 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 470 |
for cr in clause_results:
|
| 471 |
sev = cr.get("risk", "LOW")
|
| 472 |
sev_counts[sev] += 1
|
|
|
|
| 473 |
if total_clauses == 0:
|
| 474 |
return 0, "A", sev_counts
|
|
|
|
| 475 |
weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
|
| 476 |
risk = min(100, round(weighted / max(1, total_clauses) * 10))
|
|
|
|
| 477 |
if risk >= 70: grade = "F"
|
| 478 |
elif risk >= 50: grade = "D"
|
| 479 |
elif risk >= 30: grade = "C"
|
| 480 |
elif risk >= 15: grade = "B"
|
| 481 |
else: grade = "A"
|
|
|
|
| 482 |
return risk, grade, sev_counts
|
| 483 |
|
| 484 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 488 |
def analyze_contract(text):
|
| 489 |
if not text or len(text.strip()) < 50:
|
| 490 |
return None, "Document too short (minimum 50 characters)"
|
|
|
|
| 491 |
clauses = split_clauses(text)
|
| 492 |
if not clauses:
|
| 493 |
return None, "No clauses detected in document"
|
|
|
|
|
|
|
| 494 |
clause_results = []
|
| 495 |
for clause in clauses:
|
| 496 |
predictions = classify_cuad(clause)
|
|
|
|
| 503 |
"risk": pred["risk"],
|
| 504 |
"description": pred["description"],
|
| 505 |
})
|
|
|
|
|
|
|
| 506 |
entities = extract_entities(text)
|
|
|
|
|
|
|
| 507 |
contradictions = detect_contradictions(clause_results)
|
|
|
|
|
|
|
| 508 |
risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
|
| 509 |
+
obligations = extract_obligations(text)
|
| 510 |
+
compliance = check_compliance(text)
|
| 511 |
result = {
|
| 512 |
"metadata": {
|
| 513 |
"analysis_date": datetime.now().isoformat(),
|
|
|
|
| 523 |
"clauses": clause_results,
|
| 524 |
"entities": entities,
|
| 525 |
"contradictions": contradictions,
|
| 526 |
+
"obligations": obligations,
|
| 527 |
+
"compliance": compliance,
|
| 528 |
"raw_text": text,
|
| 529 |
}
|
|
|
|
| 530 |
return result, None
|
| 531 |
|
| 532 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 561 |
def render_summary(result):
|
| 562 |
if result is None:
|
| 563 |
return ""
|
|
|
|
| 564 |
risk = result["risk"]
|
| 565 |
score = risk["score"]
|
| 566 |
grade = risk["grade"]
|
| 567 |
breakdown = risk["breakdown"]
|
|
|
|
| 568 |
grade_color = {
|
| 569 |
"A": "#16a34a", "B": "#65a30d", "C": "#ca8a04",
|
| 570 |
"D": "#ea580c", "F": "#dc2626",
|
| 571 |
}.get(grade, "#6b7280")
|
|
|
|
| 572 |
crit, high, med, low = breakdown["CRITICAL"], breakdown["HIGH"], breakdown["MEDIUM"], breakdown["LOW"]
|
|
|
|
| 573 |
html = f"""
|
| 574 |
<div style="font-family:system-ui,sans-serif;padding:16px;border:1px solid #e5e7eb;border-radius:12px;background:#fff;">
|
| 575 |
<div style="text-align:center;margin-bottom:16px;">
|
|
|
|
| 608 |
def render_clause_cards(result):
|
| 609 |
if result is None:
|
| 610 |
return ""
|
|
|
|
| 611 |
clauses = result.get("clauses", [])
|
| 612 |
if not clauses:
|
| 613 |
return '<div style="padding:24px;text-align:center;color:#6b7280;">No clauses detected.</div>'
|
|
|
|
|
|
|
| 614 |
grouped = defaultdict(list)
|
| 615 |
for cr in clauses:
|
| 616 |
grouped[cr["text"]].append(cr)
|
|
|
|
| 617 |
html = '<div style="font-family:system-ui,sans-serif;">'
|
| 618 |
for text, items in grouped.items():
|
| 619 |
max_risk = max(items, key=lambda x: {"CRITICAL":4,"HIGH":3,"MEDIUM":2,"LOW":1}[x["risk"]])["risk"]
|
| 620 |
border, bg, icon = RISK_STYLES[max_risk]
|
|
|
|
| 621 |
tags = ""
|
| 622 |
for item in items:
|
| 623 |
tag_bg = RISK_STYLES[item["risk"]][1]
|
| 624 |
tag_color = RISK_STYLES[item["risk"]][0]
|
| 625 |
tags += f'<span style="background:{tag_bg};color:{tag_color};border:1px solid {tag_color}33;padding:2px 8px;border-radius:12px;font-size:11px;font-weight:500;margin-right:4px;">{item["label"]} ({item["confidence"]})</span>'
|
|
|
|
| 626 |
descs = "".join(
|
| 627 |
f'<p style="font-size:12px;color:#6b7280;margin:4px 0 0 0;">{item["description"]}</p>'
|
| 628 |
for item in items
|
| 629 |
)
|
|
|
|
| 630 |
preview = text[:300] + ("..." if len(text) > 300 else "")
|
| 631 |
preview = preview.replace("<", "<").replace(">", ">")
|
|
|
|
| 632 |
html += f"""
|
| 633 |
<div style="border:1px solid #e5e7eb;border-left:4px solid {border};border-radius:8px;padding:14px;margin-bottom:10px;background:#fafafa;">
|
| 634 |
<div style="display:flex;align-items:center;gap:6px;margin-bottom:6px;">
|
|
|
|
| 646 |
def render_entities(result):
|
| 647 |
if result is None:
|
| 648 |
return ""
|
|
|
|
| 649 |
entities = result.get("entities", [])
|
| 650 |
if not entities:
|
| 651 |
return '<div style="padding:16px;color:#6b7280;">No entities detected.</div>'
|
|
|
|
|
|
|
| 652 |
grouped = defaultdict(list)
|
| 653 |
for e in entities:
|
| 654 |
grouped[e["type"]].append(e["text"])
|
|
|
|
| 655 |
html = '<div style="font-family:system-ui,sans-serif;">'
|
| 656 |
for etype, texts in grouped.items():
|
| 657 |
unique = list(dict.fromkeys(texts))[:20]
|
|
|
|
| 662 |
"JURISDICTION": "#f59e0b",
|
| 663 |
"DEFINED_TERM": "#ec4899",
|
| 664 |
}.get(etype, "#6b7280")
|
|
|
|
| 665 |
items_html = "".join(
|
| 666 |
f'<span style="display:inline-block;background:{color}15;color:{color};border:1px solid {color}40;padding:3px 10px;border-radius:6px;font-size:12px;margin:3px;">{t}</span>'
|
| 667 |
for t in unique
|
| 668 |
)
|
|
|
|
| 669 |
html += f"""
|
| 670 |
<div style="margin-bottom:12px;">
|
| 671 |
<div style="font-size:12px;font-weight:600;color:#374151;margin-bottom:6px;text-transform:uppercase;">{etype}</div>
|
|
|
|
| 678 |
def render_contradictions(result):
|
| 679 |
if result is None:
|
| 680 |
return ""
|
|
|
|
| 681 |
contradictions = result.get("contradictions", [])
|
| 682 |
if not contradictions:
|
| 683 |
return '<div style="padding:16px;color:#16a34a;">β No contradictions or missing clauses detected.</div>'
|
|
|
|
| 684 |
html = '<div style="font-family:system-ui,sans-serif;">'
|
| 685 |
for c in contradictions:
|
| 686 |
sev_color = RISK_STYLES[c["severity"]][0]
|
|
|
|
| 700 |
def render_document_viewer(result):
|
| 701 |
if result is None:
|
| 702 |
return ""
|
|
|
|
| 703 |
text = result.get("raw_text", "")
|
| 704 |
entities = sorted(result.get("entities", []), key=lambda x: x["start"])
|
|
|
|
| 705 |
html_parts = []
|
| 706 |
last_end = 0
|
| 707 |
for e in entities:
|
|
|
|
| 719 |
f'<mark style="background:{color};padding:1px 2px;border-radius:2px;font-size:12px;" title="{label}">{e["text"].replace("<","<").replace(">",">")}</mark>'
|
| 720 |
)
|
| 721 |
last_end = e["end"]
|
|
|
|
| 722 |
html_parts.append(text[last_end:].replace("<", "<").replace(">", ">"))
|
| 723 |
highlighted = "".join(html_parts)
|
|
|
|
| 724 |
return f"""
|
| 725 |
<div style="font-family:monospace;font-size:13px;line-height:1.6;padding:16px;border:1px solid #e5e7eb;border-radius:8px;background:#fff;max-height:600px;overflow-y:auto;white-space:pre-wrap;">
|
| 726 |
{highlighted}
|
|
|
|
| 728 |
"""
|
| 729 |
|
| 730 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 731 |
+
# 11. COMPARISON UI FUNCTIONS
|
| 732 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 733 |
+
|
| 734 |
+
def run_comparison(text_a, text_b):
|
| 735 |
+
if not text_a or len(text_a.strip()) < 50:
|
| 736 |
+
return "Contract A is too short", ""
|
| 737 |
+
if not text_b or len(text_b.strip()) < 50:
|
| 738 |
+
return "Contract B is too short", ""
|
| 739 |
+
result = compare_contracts(text_a, text_b)
|
| 740 |
+
return render_comparison_html(result), json.dumps(result, indent=2)
|
| 741 |
+
|
| 742 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 743 |
+
# 12. GRADIO UI
|
| 744 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 745 |
|
| 746 |
def process_upload(file):
|
|
|
|
| 753 |
|
| 754 |
def run_analysis(text):
|
| 755 |
if not text or len(text.strip()) < 50:
|
| 756 |
+
err_html = '<p style="color:#dc2626;padding:16px;">Document too short (minimum 50 characters)</p>'
|
| 757 |
+
return [err_html] * 7 + [None, None, ""]
|
| 758 |
result, error = analyze_contract(text)
|
| 759 |
if error:
|
| 760 |
err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
|
| 761 |
+
return [err_html] * 7 + [None, None, error]
|
|
|
|
| 762 |
# Save export files
|
| 763 |
json_path = "/tmp/clauseguard_report.json"
|
| 764 |
with open(json_path, "w") as f:
|
| 765 |
json.dump(result, f, indent=2, default=str)
|
|
|
|
| 766 |
csv_content = export_csv(result)
|
| 767 |
csv_path = "/tmp/clauseguard_report.csv"
|
| 768 |
with open(csv_path, "w") as f:
|
| 769 |
f.write(csv_content)
|
|
|
|
| 770 |
return [
|
| 771 |
render_summary(result),
|
| 772 |
render_clause_cards(result),
|
| 773 |
render_entities(result),
|
| 774 |
render_contradictions(result),
|
| 775 |
render_document_viewer(result),
|
| 776 |
+
render_obligations_html(result.get("obligations", [])),
|
| 777 |
+
render_compliance_html(result.get("compliance", {})),
|
| 778 |
json_path,
|
| 779 |
csv_path,
|
| 780 |
"Analysis complete",
|
| 781 |
]
|
| 782 |
|
| 783 |
def do_clear():
|
| 784 |
+
return [""] * 7 + [None, None, ""]
|
| 785 |
|
| 786 |
# ββ Example contracts ββ
|
| 787 |
SPOTIFY_TOS = """By using the Spotify Service, you agree to be bound by these Terms of Use.
|
|
|
|
| 826 |
|
| 827 |
7. Non-Compete. During the term of this Agreement and for a period of two (2) years thereafter, the Receiving Party shall not engage in any business that competes with the Disclosing Party."""
|
| 828 |
|
| 829 |
+
COMPLEX_CONTRACT = """MASTER SERVICE AGREEMENT
|
| 830 |
+
|
| 831 |
+
This Master Service Agreement ("MSA") is entered into as of March 1, 2024 (the "Effective Date") by and between CloudTech Solutions, Inc., a Delaware corporation ("Provider") and Global Retail Partners LLC, a New York limited liability company ("Customer").
|
| 832 |
+
|
| 833 |
+
1. SERVICES. Provider shall provide cloud hosting and data processing services as described in Exhibit A. Provider shall comply with all applicable laws including GDPR and CCPA.
|
| 834 |
+
|
| 835 |
+
2. TERM AND RENEWAL. The initial term is twelve (12) months, automatically renewing for successive one (1) year periods unless terminated in accordance with Section 7.
|
| 836 |
+
|
| 837 |
+
3. FEES AND PAYMENT. Customer shall pay a monthly fee of $25,000 within 30 days of invoice. Late payments incur a penalty of 1.5% per month. The total contract value is $300,000.
|
| 838 |
+
|
| 839 |
+
4. LIABILITY. Provider's aggregate liability shall not exceed $1,000,000. IN NO EVENT SHALL PROVIDER BE LIABLE FOR LOST PROFITS OR CONSEQUENTIAL DAMAGES. Customer assumes all risk of data loss.
|
| 840 |
+
|
| 841 |
+
5. INDEMNIFICATION. Each party shall indemnify the other for third-party claims arising from breach of this Agreement. Customer shall indemnify Provider for claims arising from Customer Data.
|
| 842 |
+
|
| 843 |
+
6. INTELLECTUAL PROPERTY. Provider retains all IP rights. Customer receives a non-transferable, non-exclusive license for the term. Upon termination, Customer shall return or destroy all Provider materials within 10 business days.
|
| 844 |
+
|
| 845 |
+
7. TERMINATION. Either party may terminate for convenience with 90 days notice. Provider may terminate immediately for non-payment. Upon termination, Customer shall pay all outstanding fees.
|
| 846 |
+
|
| 847 |
+
8. GOVERNING LAW. This Agreement is governed by the laws of the State of Delaware. Disputes shall be resolved by binding arbitration in Wilmington, Delaware.
|
| 848 |
+
|
| 849 |
+
9. FORCE MAJEURE. Neither party shall be liable for delays due to acts of God, war, terrorism, or government action.
|
| 850 |
+
|
| 851 |
+
10. AUDIT RIGHTS. Customer may audit Provider's compliance annually. Provider shall provide SOC 2 Type II reports within 30 days of request.
|
| 852 |
+
|
| 853 |
+
11. INSURANCE. Provider shall maintain general liability insurance of at least $5,000,000 and cyber liability insurance of at least $2,000,000.
|
| 854 |
+
|
| 855 |
+
12. CONFIDENTIALITY. Both parties agree to keep Confidential Information secure for five (5) years. This obligation survives termination.
|
| 856 |
+
|
| 857 |
+
13. ASSIGNMENT. Neither party may assign this Agreement without prior written consent. Any attempted assignment is void.
|
| 858 |
+
|
| 859 |
+
14. THIRD PARTY BENEFICIARY. No third party shall have rights under this Agreement except as expressly provided."""
|
| 860 |
+
|
| 861 |
with gr.Blocks(
|
| 862 |
title="ClauseGuard β AI Contract Analysis",
|
| 863 |
css="""
|
| 864 |
+
.gradio-container { max-width: 1600px !important; }
|
| 865 |
"""
|
| 866 |
) as demo:
|
| 867 |
|
|
|
|
| 869 |
<div style="display:flex;align-items:center;justify-content:space-between;padding:12px 0;border-bottom:2px solid #e5e7eb;margin-bottom:16px;">
|
| 870 |
<div>
|
| 871 |
<h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">π‘οΈ ClauseGuard</h1>
|
| 872 |
+
<p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis Β· 41 Clause Categories Β· Risk Scoring Β· NER Β· NLI Β· Compliance Β· Obligations</p>
|
| 873 |
</div>
|
| 874 |
+
<div style="font-size:12px;color:#9ca3af;">v2.0 Β· World's Best Open-Source Legal AI</div>
|
| 875 |
</div>
|
| 876 |
""")
|
| 877 |
|
| 878 |
+
# ββ Main Tabs: Analysis vs Comparison ββ
|
| 879 |
+
with gr.Tabs() as main_tabs:
|
| 880 |
+
|
| 881 |
+
# βββββββ TAB 1: Single Contract Analysis βββββββ
|
| 882 |
+
with gr.Tab("π Single Contract Analysis"):
|
| 883 |
+
with gr.Row():
|
| 884 |
+
with gr.Column(scale=1):
|
| 885 |
+
file_input = gr.File(
|
| 886 |
+
label="π Upload Contract (PDF/DOCX/TXT)",
|
| 887 |
+
file_types=[".pdf", ".docx", ".doc", ".txt", ".md"],
|
| 888 |
+
)
|
| 889 |
+
load_btn = gr.Button("Load Document", variant="secondary", size="sm")
|
| 890 |
+
load_status = gr.Textbox(label="Status", interactive=False, lines=1)
|
| 891 |
+
|
| 892 |
+
with gr.Column(scale=3):
|
| 893 |
+
text_input = gr.Textbox(
|
| 894 |
+
label="π Contract Text",
|
| 895 |
+
placeholder="Paste contract text here, or upload a file above...",
|
| 896 |
+
lines=14,
|
| 897 |
+
max_lines=40,
|
| 898 |
+
show_copy_button=True,
|
| 899 |
+
)
|
| 900 |
+
|
| 901 |
+
with gr.Column(scale=1):
|
| 902 |
+
scan_btn = gr.Button("π Analyze Contract", variant="primary", size="lg")
|
| 903 |
+
clear_btn = gr.Button("Clear", variant="secondary", size="sm")
|
| 904 |
+
status_msg = gr.Textbox(label="Analysis Status", interactive=False, lines=1)
|
| 905 |
+
|
| 906 |
+
# ββ Examples ββ
|
| 907 |
+
with gr.Row():
|
| 908 |
+
gr.Examples(
|
| 909 |
+
examples=[[SPOTIFY_TOS], [RENTAL_AGREEMENT], [NDA_SAMPLE], [COMPLEX_CONTRACT]],
|
| 910 |
+
inputs=[text_input],
|
| 911 |
+
label="Example Contracts",
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
# ββ Results ββ
|
| 915 |
+
with gr.Row():
|
| 916 |
+
with gr.Column(scale=1):
|
| 917 |
+
gr.Markdown("### π Risk Summary")
|
| 918 |
+
summary_html = gr.HTML()
|
| 919 |
+
|
| 920 |
+
gr.Markdown("### π₯ Export Reports")
|
| 921 |
+
json_file = gr.File(label="JSON Report")
|
| 922 |
+
csv_file = gr.File(label="CSV Report")
|
| 923 |
+
|
| 924 |
+
with gr.Column(scale=3):
|
| 925 |
+
with gr.Tabs():
|
| 926 |
+
with gr.Tab("π Document"):
|
| 927 |
+
doc_html = gr.HTML(label="Document Viewer")
|
| 928 |
+
with gr.Tab("β οΈ Clauses (41 Categories)"):
|
| 929 |
+
clauses_html = gr.HTML(label="Detected Clauses")
|
| 930 |
+
with gr.Tab("π·οΈ Entities"):
|
| 931 |
+
entities_html = gr.HTML(label="Named Entities")
|
| 932 |
+
with gr.Tab("π Contradictions"):
|
| 933 |
+
nli_html = gr.HTML(label="Contradictions & Missing Clauses")
|
| 934 |
+
with gr.Tab("π Obligations"):
|
| 935 |
+
obligations_html = gr.HTML(label="Obligation Tracker")
|
| 936 |
+
with gr.Tab("βοΈ Compliance"):
|
| 937 |
+
compliance_html = gr.HTML(label="Compliance Checker")
|
| 938 |
+
|
| 939 |
+
# βββββββ TAB 2: Contract Comparison βββββββ
|
| 940 |
+
with gr.Tab("π Compare Contracts"):
|
| 941 |
+
with gr.Row():
|
| 942 |
+
with gr.Column(scale=1):
|
| 943 |
+
comp_file_a = gr.File(
|
| 944 |
+
label="π Contract A (PDF/DOCX/TXT)",
|
| 945 |
+
file_types=[".pdf", ".docx", ".doc", ".txt"],
|
| 946 |
+
)
|
| 947 |
+
comp_load_a = gr.Button("Load A", variant="secondary", size="sm")
|
| 948 |
+
comp_status_a = gr.Textbox(label="Status A", interactive=False, lines=1)
|
| 949 |
+
|
| 950 |
+
with gr.Column(scale=3):
|
| 951 |
+
comp_text_a = gr.Textbox(
|
| 952 |
+
label="Contract A",
|
| 953 |
+
placeholder="Paste contract A here...",
|
| 954 |
+
lines=12,
|
| 955 |
+
show_copy_button=True,
|
| 956 |
+
)
|
| 957 |
+
|
| 958 |
+
with gr.Column(scale=1):
|
| 959 |
+
comp_file_b = gr.File(
|
| 960 |
+
label="π Contract B (PDF/DOCX/TXT)",
|
| 961 |
+
file_types=[".pdf", ".docx", ".doc", ".txt"],
|
| 962 |
+
)
|
| 963 |
+
comp_load_b = gr.Button("Load B", variant="secondary", size="sm")
|
| 964 |
+
comp_status_b = gr.Textbox(label="Status B", interactive=False, lines=1)
|
| 965 |
+
|
| 966 |
+
with gr.Column(scale=3):
|
| 967 |
+
comp_text_b = gr.Textbox(
|
| 968 |
+
label="Contract B",
|
| 969 |
+
placeholder="Paste contract B here...",
|
| 970 |
+
lines=12,
|
| 971 |
+
show_copy_button=True,
|
| 972 |
+
)
|
| 973 |
+
|
| 974 |
+
with gr.Row():
|
| 975 |
+
with gr.Column(scale=1):
|
| 976 |
+
comp_btn = gr.Button("π Compare Contracts", variant="primary", size="lg")
|
| 977 |
+
with gr.Column(scale=5):
|
| 978 |
+
comp_status = gr.Textbox(label="Comparison Status", interactive=False, lines=1)
|
| 979 |
+
|
| 980 |
+
with gr.Row():
|
| 981 |
+
with gr.Column(scale=4):
|
| 982 |
+
comp_result_html = gr.HTML(label="Comparison Results")
|
| 983 |
+
with gr.Column(scale=2):
|
| 984 |
+
comp_json = gr.JSON(label="Raw Comparison Data")
|
| 985 |
|
| 986 |
# ββ Events ββ
|
| 987 |
def _load_file(file):
|
|
|
|
| 991 |
return text, "Loaded successfully" if not err else err
|
| 992 |
|
| 993 |
load_btn.click(_load_file, inputs=[file_input], outputs=[text_input, load_status])
|
| 994 |
+
comp_load_a.click(_load_file, inputs=[comp_file_a], outputs=[comp_text_a, comp_status_a])
|
| 995 |
+
comp_load_b.click(_load_file, inputs=[comp_file_b], outputs=[comp_text_b, comp_status_b])
|
| 996 |
|
| 997 |
scan_btn.click(
|
| 998 |
run_analysis,
|
| 999 |
inputs=[text_input],
|
| 1000 |
outputs=[summary_html, clauses_html, entities_html, nli_html,
|
| 1001 |
+
doc_html, obligations_html, compliance_html,
|
| 1002 |
+
json_file, csv_file, status_msg]
|
| 1003 |
)
|
| 1004 |
|
| 1005 |
clear_btn.click(
|
| 1006 |
do_clear,
|
| 1007 |
outputs=[summary_html, clauses_html, entities_html, nli_html,
|
| 1008 |
+
doc_html, obligations_html, compliance_html,
|
| 1009 |
+
json_file, csv_file, status_msg]
|
| 1010 |
+
)
|
| 1011 |
+
|
| 1012 |
+
comp_btn.click(
|
| 1013 |
+
run_comparison,
|
| 1014 |
+
inputs=[comp_text_a, comp_text_b],
|
| 1015 |
+
outputs=[comp_result_html, comp_json]
|
| 1016 |
)
|
| 1017 |
|
| 1018 |
gr.HTML("""
|
| 1019 |
<div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
|
| 1020 |
<p style="font-size:11px;color:#9ca3af;">
|
| 1021 |
β οΈ Not legal advice. For informational purposes only.
|
| 1022 |
+
Β· Model: <a href="https://huggingface.co/Mokshith31/legalbert-contract-clause-classification" style="color:#6b7280;">Legal-BERT + CUAD (41 classes)</a>
|
| 1023 |
Β· Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
|
| 1024 |
Β· <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
|
| 1025 |
</p>
|