gaurv007 commited on
Commit
e24206c
Β·
verified Β·
1 Parent(s): 7d46f4f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -140
app.py CHANGED
@@ -6,6 +6,9 @@ Features:
6
  β€’ 4-tier risk scoring (Critical / High / Medium / Low)
7
  β€’ Legal NER: parties, dates, monetary values, jurisdictions, defined terms
8
  β€’ NLI contradiction & missing-clause detection
 
 
 
9
  β€’ PDF / DOCX / TXT parsing
10
  β€’ Professional 3-panel Gradio UI
11
  β€’ JSON & CSV export
@@ -20,7 +23,6 @@ import re
20
  import json
21
  import csv
22
  import io
23
- import textwrap
24
  from collections import defaultdict
25
  from datetime import datetime
26
 
@@ -49,6 +51,11 @@ try:
49
  except Exception:
50
  _HAS_TORCH = False
51
 
 
 
 
 
 
52
  # ═══════════════════════════════════════════════════════════════════════
53
  # 1. CONFIGURATION
54
  # ═══════════════════════════════════════════════════════════════════════
@@ -70,7 +77,6 @@ CUAD_LABELS = [
70
  "Third Party Beneficiary", "Other"
71
  ]
72
 
73
- # Original 8 unfair-clause labels (backward-compat + consumer contracts)
74
  _UNFAIR_LABELS = [
75
  "Limitation of liability", "Unilateral termination", "Unilateral change",
76
  "Content removal", "Contract by using", "Choice of law",
@@ -87,6 +93,7 @@ RISK_MAP = {
87
  "Termination for Convenience": "CRITICAL",
88
  "Limitation of liability": "CRITICAL",
89
  "Unilateral termination": "CRITICAL",
 
90
  # High
91
  "Non-Compete": "HIGH",
92
  "Exclusivity": "HIGH",
@@ -95,7 +102,6 @@ RISK_MAP = {
95
  "No-Solicit of Employees": "HIGH",
96
  "Unilateral change": "HIGH",
97
  "Content removal": "HIGH",
98
- "Liquidated Damages": "HIGH",
99
  "Anti-Assignment": "HIGH",
100
  # Medium
101
  "Governing Law": "MEDIUM",
@@ -107,6 +113,7 @@ RISK_MAP = {
107
  "Non-Disparagement": "MEDIUM",
108
  "Most Favored Nation": "MEDIUM",
109
  "Revenue/Profit Sharing": "MEDIUM",
 
110
  # Low
111
  "Document Name": "LOW",
112
  "Parties": "LOW",
@@ -125,7 +132,6 @@ RISK_MAP = {
125
  "Post-Termination Services": "LOW",
126
  "Audit Rights": "LOW",
127
  "Cap on Liability": "LOW",
128
- "Warranty Duration": "LOW",
129
  "Insurance": "LOW",
130
  "Covenant Not to Sue": "LOW",
131
  "Third Party Beneficiary": "LOW",
@@ -162,12 +168,19 @@ DESC_MAP.update({
162
  "Warranty Duration": "Length of time warranties remain in effect.",
163
  "Covenant Not to Sue": "Agreement not to bring legal action against a party.",
164
  "Third Party Beneficiary": "Non-party who benefits from the contract terms.",
 
 
 
 
 
 
 
 
 
165
  })
166
 
167
- # Risk weights for scoring
168
  RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
169
 
170
- # Color / badge styles
171
  RISK_STYLES = {
172
  "CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
173
  "HIGH": ("#ea580c", "#fff7ed", "⚑"),
@@ -190,7 +203,6 @@ def _load_cuad_model():
190
  try:
191
  base = "nlpaueb/legal-bert-base-uncased"
192
  adapter = "Mokshith31/legalbert-contract-clause-classification"
193
-
194
  print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
195
  cuad_tokenizer = AutoTokenizer.from_pretrained(base)
196
  base_model = AutoModelForSequenceClassification.from_pretrained(
@@ -256,7 +268,6 @@ def parse_document(file_path):
256
  # ═══════════════════════════════════════════════════════════════════════
257
 
258
  def split_clauses(text):
259
- """Split contract text into individual clauses."""
260
  text = re.sub(r'\n{3,}', '\n\n', text.strip())
261
  parts = re.split(
262
  r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|[A-Z][A-Z\s]{2,})',
@@ -270,10 +281,8 @@ def split_clauses(text):
270
  return clauses
271
 
272
  def classify_cuad(clause_text):
273
- """Classify a single clause using the CUAD model."""
274
  if cuad_model is None or cuad_tokenizer is None:
275
  return _classify_regex(clause_text)
276
-
277
  try:
278
  inputs = cuad_tokenizer(
279
  clause_text,
@@ -285,8 +294,6 @@ def classify_cuad(clause_text):
285
  with torch.no_grad():
286
  logits = cuad_model(**inputs).logits
287
  probs = torch.softmax(logits, dim=-1)[0]
288
-
289
- # Multi-label: return all labels above threshold
290
  threshold = 0.15
291
  results = []
292
  for i, prob in enumerate(probs):
@@ -299,9 +306,7 @@ def classify_cuad(clause_text):
299
  "risk": risk,
300
  "description": DESC_MAP.get(label, label),
301
  })
302
- # Sort by confidence descending
303
  results.sort(key=lambda x: x["confidence"], reverse=True)
304
- # If no labels above threshold, take top-1
305
  if not results:
306
  top_idx = int(probs.argmax())
307
  label = CUAD_LABELS[top_idx] if top_idx < len(CUAD_LABELS) else "Other"
@@ -332,6 +337,10 @@ _REGEX_PATTERNS = {
332
  "IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
333
  "Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
334
  "Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed"],
 
 
 
 
335
  }
336
 
337
  def _classify_regex(text):
@@ -358,9 +367,7 @@ def _classify_regex(text):
358
  # ═══════════════════════════════════════════════════════════════════════
359
 
360
  def extract_entities(text):
361
- """Extract legal entities using regex patterns."""
362
  entities = []
363
-
364
  # Dates
365
  date_patterns = [
366
  (r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
@@ -371,7 +378,6 @@ def extract_entities(text):
371
  for pat, etype in date_patterns:
372
  for m in re.finditer(pat, text, re.IGNORECASE):
373
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
374
-
375
  # Monetary values
376
  money_patterns = [
377
  (r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
@@ -380,16 +386,14 @@ def extract_entities(text):
380
  for pat, etype in money_patterns:
381
  for m in re.finditer(pat, text, re.IGNORECASE):
382
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
383
-
384
  # Party names
385
  party_patterns = [
386
  (r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
387
- (r'\b(?:Party A|Party B|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer)\b', "PARTY_ROLE"),
388
  ]
389
  for pat, etype in party_patterns:
390
  for m in re.finditer(pat, text):
391
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
392
-
393
  # Jurisdictions
394
  jurisdiction_patterns = [
395
  (r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
@@ -398,7 +402,6 @@ def extract_entities(text):
398
  for pat, etype in jurisdiction_patterns:
399
  for m in re.finditer(pat, text, re.IGNORECASE):
400
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
401
-
402
  # Defined Terms
403
  defined_patterns = [
404
  (r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
@@ -407,8 +410,7 @@ def extract_entities(text):
407
  for pat, etype in defined_patterns:
408
  for m in re.finditer(pat, text):
409
  entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
410
-
411
- # Deduplicate overlapping
412
  entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
413
  filtered = []
414
  last_end = -1
@@ -434,16 +436,10 @@ _CONTRADICTION_PAIRS = [
434
  ]
435
 
436
  def detect_contradictions(clause_results):
437
- """Detect contradictions and missing critical clauses."""
438
  contradictions = []
439
  labels_found = set()
440
- texts_found = {}
441
-
442
  for cr in clause_results:
443
  labels_found.add(cr["label"])
444
- texts_found[cr["label"]] = cr.get("text", "")
445
-
446
- # Contradiction pairs
447
  for group_a, group_b, explanation in _CONTRADICTION_PAIRS:
448
  found_a = any(l in labels_found for l in group_a)
449
  found_b = any(l in labels_found for l in group_b)
@@ -454,8 +450,6 @@ def detect_contradictions(clause_results):
454
  "severity": "HIGH",
455
  "clauses": list(set(group_a + group_b)),
456
  })
457
-
458
- # Missing critical clauses
459
  critical_clauses = ["Governing Law", "Termination for Convenience", "Limitation of liability", "Arbitration"]
460
  for cc in critical_clauses:
461
  if cc not in labels_found:
@@ -465,7 +459,6 @@ def detect_contradictions(clause_results):
465
  "severity": "MEDIUM",
466
  "clauses": [cc],
467
  })
468
-
469
  return contradictions
470
 
471
  # ═══════════════════════════════════════════════════════════════════════
@@ -477,19 +470,15 @@ def compute_risk_score(clause_results, total_clauses):
477
  for cr in clause_results:
478
  sev = cr.get("risk", "LOW")
479
  sev_counts[sev] += 1
480
-
481
  if total_clauses == 0:
482
  return 0, "A", sev_counts
483
-
484
  weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
485
  risk = min(100, round(weighted / max(1, total_clauses) * 10))
486
-
487
  if risk >= 70: grade = "F"
488
  elif risk >= 50: grade = "D"
489
  elif risk >= 30: grade = "C"
490
  elif risk >= 15: grade = "B"
491
  else: grade = "A"
492
-
493
  return risk, grade, sev_counts
494
 
495
  # ═══════════════════════════════════════════════════════════════════════
@@ -499,12 +488,9 @@ def compute_risk_score(clause_results, total_clauses):
499
  def analyze_contract(text):
500
  if not text or len(text.strip()) < 50:
501
  return None, "Document too short (minimum 50 characters)"
502
-
503
  clauses = split_clauses(text)
504
  if not clauses:
505
  return None, "No clauses detected in document"
506
-
507
- # Analyze each clause
508
  clause_results = []
509
  for clause in clauses:
510
  predictions = classify_cuad(clause)
@@ -517,17 +503,11 @@ def analyze_contract(text):
517
  "risk": pred["risk"],
518
  "description": pred["description"],
519
  })
520
-
521
- # NER
522
  entities = extract_entities(text)
523
-
524
- # NLI / contradictions
525
  contradictions = detect_contradictions(clause_results)
526
-
527
- # Risk scoring
528
  risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
529
-
530
- # Build result object
531
  result = {
532
  "metadata": {
533
  "analysis_date": datetime.now().isoformat(),
@@ -543,9 +523,10 @@ def analyze_contract(text):
543
  "clauses": clause_results,
544
  "entities": entities,
545
  "contradictions": contradictions,
 
 
546
  "raw_text": text,
547
  }
548
-
549
  return result, None
550
 
551
  # ═══════════════════════════════════════════════════════════════════════
@@ -580,19 +561,15 @@ def export_csv(result):
580
  def render_summary(result):
581
  if result is None:
582
  return ""
583
-
584
  risk = result["risk"]
585
  score = risk["score"]
586
  grade = risk["grade"]
587
  breakdown = risk["breakdown"]
588
-
589
  grade_color = {
590
  "A": "#16a34a", "B": "#65a30d", "C": "#ca8a04",
591
  "D": "#ea580c", "F": "#dc2626",
592
  }.get(grade, "#6b7280")
593
-
594
  crit, high, med, low = breakdown["CRITICAL"], breakdown["HIGH"], breakdown["MEDIUM"], breakdown["LOW"]
595
-
596
  html = f"""
597
  <div style="font-family:system-ui,sans-serif;padding:16px;border:1px solid #e5e7eb;border-radius:12px;background:#fff;">
598
  <div style="text-align:center;margin-bottom:16px;">
@@ -631,35 +608,27 @@ def render_summary(result):
631
  def render_clause_cards(result):
632
  if result is None:
633
  return ""
634
-
635
  clauses = result.get("clauses", [])
636
  if not clauses:
637
  return '<div style="padding:24px;text-align:center;color:#6b7280;">No clauses detected.</div>'
638
-
639
- # Group by clause text
640
  grouped = defaultdict(list)
641
  for cr in clauses:
642
  grouped[cr["text"]].append(cr)
643
-
644
  html = '<div style="font-family:system-ui,sans-serif;">'
645
  for text, items in grouped.items():
646
  max_risk = max(items, key=lambda x: {"CRITICAL":4,"HIGH":3,"MEDIUM":2,"LOW":1}[x["risk"]])["risk"]
647
  border, bg, icon = RISK_STYLES[max_risk]
648
-
649
  tags = ""
650
  for item in items:
651
  tag_bg = RISK_STYLES[item["risk"]][1]
652
  tag_color = RISK_STYLES[item["risk"]][0]
653
  tags += f'<span style="background:{tag_bg};color:{tag_color};border:1px solid {tag_color}33;padding:2px 8px;border-radius:12px;font-size:11px;font-weight:500;margin-right:4px;">{item["label"]} ({item["confidence"]})</span>'
654
-
655
  descs = "".join(
656
  f'<p style="font-size:12px;color:#6b7280;margin:4px 0 0 0;">{item["description"]}</p>'
657
  for item in items
658
  )
659
-
660
  preview = text[:300] + ("..." if len(text) > 300 else "")
661
  preview = preview.replace("<", "&lt;").replace(">", "&gt;")
662
-
663
  html += f"""
664
  <div style="border:1px solid #e5e7eb;border-left:4px solid {border};border-radius:8px;padding:14px;margin-bottom:10px;background:#fafafa;">
665
  <div style="display:flex;align-items:center;gap:6px;margin-bottom:6px;">
@@ -677,16 +646,12 @@ def render_clause_cards(result):
677
  def render_entities(result):
678
  if result is None:
679
  return ""
680
-
681
  entities = result.get("entities", [])
682
  if not entities:
683
  return '<div style="padding:16px;color:#6b7280;">No entities detected.</div>'
684
-
685
- # Group by type
686
  grouped = defaultdict(list)
687
  for e in entities:
688
  grouped[e["type"]].append(e["text"])
689
-
690
  html = '<div style="font-family:system-ui,sans-serif;">'
691
  for etype, texts in grouped.items():
692
  unique = list(dict.fromkeys(texts))[:20]
@@ -697,12 +662,10 @@ def render_entities(result):
697
  "JURISDICTION": "#f59e0b",
698
  "DEFINED_TERM": "#ec4899",
699
  }.get(etype, "#6b7280")
700
-
701
  items_html = "".join(
702
  f'<span style="display:inline-block;background:{color}15;color:{color};border:1px solid {color}40;padding:3px 10px;border-radius:6px;font-size:12px;margin:3px;">{t}</span>'
703
  for t in unique
704
  )
705
-
706
  html += f"""
707
  <div style="margin-bottom:12px;">
708
  <div style="font-size:12px;font-weight:600;color:#374151;margin-bottom:6px;text-transform:uppercase;">{etype}</div>
@@ -715,11 +678,9 @@ def render_entities(result):
715
  def render_contradictions(result):
716
  if result is None:
717
  return ""
718
-
719
  contradictions = result.get("contradictions", [])
720
  if not contradictions:
721
  return '<div style="padding:16px;color:#16a34a;">βœ“ No contradictions or missing clauses detected.</div>'
722
-
723
  html = '<div style="font-family:system-ui,sans-serif;">'
724
  for c in contradictions:
725
  sev_color = RISK_STYLES[c["severity"]][0]
@@ -739,10 +700,8 @@ def render_contradictions(result):
739
  def render_document_viewer(result):
740
  if result is None:
741
  return ""
742
-
743
  text = result.get("raw_text", "")
744
  entities = sorted(result.get("entities", []), key=lambda x: x["start"])
745
-
746
  html_parts = []
747
  last_end = 0
748
  for e in entities:
@@ -760,10 +719,8 @@ def render_document_viewer(result):
760
  f'<mark style="background:{color};padding:1px 2px;border-radius:2px;font-size:12px;" title="{label}">{e["text"].replace("<","&lt;").replace(">","&gt;")}</mark>'
761
  )
762
  last_end = e["end"]
763
-
764
  html_parts.append(text[last_end:].replace("<", "&lt;").replace(">", "&gt;"))
765
  highlighted = "".join(html_parts)
766
-
767
  return f"""
768
  <div style="font-family:monospace;font-size:13px;line-height:1.6;padding:16px;border:1px solid #e5e7eb;border-radius:8px;background:#fff;max-height:600px;overflow-y:auto;white-space:pre-wrap;">
769
  {highlighted}
@@ -771,7 +728,19 @@ def render_document_viewer(result):
771
  """
772
 
773
  # ═══════════════════════════════════════════════════════════════════════
774
- # 11. GRADIO UI
 
 
 
 
 
 
 
 
 
 
 
 
775
  # ═══════════════════════════════════════════════════════════════════════
776
 
777
  def process_upload(file):
@@ -784,36 +753,35 @@ def process_upload(file):
784
 
785
  def run_analysis(text):
786
  if not text or len(text.strip()) < 50:
787
- return [""] * 5 + [None, None, "Document too short (minimum 50 characters)"]
788
-
789
  result, error = analyze_contract(text)
790
  if error:
791
  err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
792
- return [err_html] * 5 + [None, None, error]
793
-
794
  # Save export files
795
  json_path = "/tmp/clauseguard_report.json"
796
  with open(json_path, "w") as f:
797
  json.dump(result, f, indent=2, default=str)
798
-
799
  csv_content = export_csv(result)
800
  csv_path = "/tmp/clauseguard_report.csv"
801
  with open(csv_path, "w") as f:
802
  f.write(csv_content)
803
-
804
  return [
805
  render_summary(result),
806
  render_clause_cards(result),
807
  render_entities(result),
808
  render_contradictions(result),
809
  render_document_viewer(result),
 
 
810
  json_path,
811
  csv_path,
812
  "Analysis complete",
813
  ]
814
 
815
  def do_clear():
816
- return [""] * 5 + [None, None, ""]
817
 
818
  # ── Example contracts ──
819
  SPOTIFY_TOS = """By using the Spotify Service, you agree to be bound by these Terms of Use.
@@ -858,10 +826,42 @@ This Non-Disclosure Agreement (the "Agreement") is entered into as of January 15
858
 
859
  7. Non-Compete. During the term of this Agreement and for a period of two (2) years thereafter, the Receiving Party shall not engage in any business that competes with the Disclosing Party."""
860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  with gr.Blocks(
862
  title="ClauseGuard β€” AI Contract Analysis",
863
  css="""
864
- .gradio-container { max-width: 1400px !important; }
865
  """
866
  ) as demo:
867
 
@@ -869,66 +869,119 @@ with gr.Blocks(
869
  <div style="display:flex;align-items:center;justify-content:space-between;padding:12px 0;border-bottom:2px solid #e5e7eb;margin-bottom:16px;">
870
  <div>
871
  <h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">πŸ›‘οΈ ClauseGuard</h1>
872
- <p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis Β· 41 Clause Categories Β· Risk Scoring Β· NER Β· NLI</p>
873
  </div>
874
- <div style="font-size:12px;color:#9ca3af;">v2.0 Β· World's Best Legal AI</div>
875
  </div>
876
  """)
877
 
878
- # ── Upload / Input ──
879
- with gr.Row():
880
- with gr.Column(scale=1):
881
- file_input = gr.File(
882
- label="πŸ“ Upload Contract (PDF/DOCX/TXT)",
883
- file_types=[".pdf", ".docx", ".doc", ".txt", ".md"],
884
- )
885
- load_btn = gr.Button("Load Document", variant="secondary", size="sm")
886
- load_status = gr.Textbox(label="Status", interactive=False, lines=1)
887
-
888
- with gr.Column(scale=3):
889
- text_input = gr.Textbox(
890
- label="πŸ“„ Contract Text",
891
- placeholder="Paste contract text here, or upload a file above...",
892
- lines=14,
893
- max_lines=40,
894
- show_copy_button=True,
895
- )
896
-
897
- with gr.Column(scale=1):
898
- scan_btn = gr.Button("πŸ” Analyze Contract", variant="primary", size="lg")
899
- clear_btn = gr.Button("Clear", variant="secondary", size="sm")
900
- status_msg = gr.Textbox(label="Analysis Status", interactive=False, lines=1)
901
-
902
- # ── Examples ──
903
- with gr.Row():
904
- gr.Examples(
905
- examples=[[SPOTIFY_TOS], [RENTAL_AGREEMENT], [NDA_SAMPLE]],
906
- inputs=[text_input],
907
- label="Example Contracts",
908
- )
909
-
910
- # ── Results ──
911
- with gr.Row():
912
- # Left: Summary + Export
913
- with gr.Column(scale=1):
914
- gr.Markdown("### πŸ“Š Risk Summary")
915
- summary_html = gr.HTML()
916
-
917
- gr.Markdown("### πŸ“₯ Export Reports")
918
- json_file = gr.File(label="JSON Report")
919
- csv_file = gr.File(label="CSV Report")
920
-
921
- # Center: Main Content Tabs
922
- with gr.Column(scale=3):
923
- with gr.Tabs():
924
- with gr.Tab("πŸ“„ Document"):
925
- doc_html = gr.HTML(label="Document Viewer")
926
- with gr.Tab("⚠️ Clauses (41 Categories)"):
927
- clauses_html = gr.HTML(label="Detected Clauses")
928
- with gr.Tab("🏷️ Entities"):
929
- entities_html = gr.HTML(label="Named Entities")
930
- with gr.Tab("πŸ” Contradictions"):
931
- nli_html = gr.HTML(label="Contradictions & Missing Clauses")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932
 
933
  # ── Events ──
934
  def _load_file(file):
@@ -938,25 +991,35 @@ with gr.Blocks(
938
  return text, "Loaded successfully" if not err else err
939
 
940
  load_btn.click(_load_file, inputs=[file_input], outputs=[text_input, load_status])
 
 
941
 
942
  scan_btn.click(
943
  run_analysis,
944
  inputs=[text_input],
945
  outputs=[summary_html, clauses_html, entities_html, nli_html,
946
- doc_html, json_file, csv_file, status_msg]
 
947
  )
948
 
949
  clear_btn.click(
950
  do_clear,
951
  outputs=[summary_html, clauses_html, entities_html, nli_html,
952
- doc_html, json_file, csv_file, status_msg]
 
 
 
 
 
 
 
953
  )
954
 
955
  gr.HTML("""
956
  <div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
957
  <p style="font-size:11px;color:#9ca3af;">
958
  ⚠️ Not legal advice. For informational purposes only.
959
- Β· Model: <a href="https://huggingface.co/Mokshith31/legalbert-contract-clause-classification" style="color:#6b7280;">Legal-BERT + CUAD</a>
960
  Β· Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
961
  Β· <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
962
  </p>
 
6
  β€’ 4-tier risk scoring (Critical / High / Medium / Low)
7
  β€’ Legal NER: parties, dates, monetary values, jurisdictions, defined terms
8
  β€’ NLI contradiction & missing-clause detection
9
+ β€’ Contract comparison engine (diff between 2 contracts)
10
+ β€’ Obligation tracker (monetary, compliance, reporting, delivery)
11
+ β€’ Compliance checker (GDPR, CCPA, SOX, HIPAA, FINRA)
12
  β€’ PDF / DOCX / TXT parsing
13
  β€’ Professional 3-panel Gradio UI
14
  β€’ JSON & CSV export
 
23
  import json
24
  import csv
25
  import io
 
26
  from collections import defaultdict
27
  from datetime import datetime
28
 
 
51
  except Exception:
52
  _HAS_TORCH = False
53
 
54
+ # ── Import submodules ───────────────────────────────────────────────
55
+ from compare import compare_contracts, render_comparison_html
56
+ from obligations import extract_obligations, render_obligations_html
57
+ from compliance import check_compliance, render_compliance_html
58
+
59
  # ═══════════════════════════════════════════════════════════════════════
60
  # 1. CONFIGURATION
61
  # ═══════════════════════════════════════════════════════════════════════
 
77
  "Third Party Beneficiary", "Other"
78
  ]
79
 
 
80
  _UNFAIR_LABELS = [
81
  "Limitation of liability", "Unilateral termination", "Unilateral change",
82
  "Content removal", "Contract by using", "Choice of law",
 
93
  "Termination for Convenience": "CRITICAL",
94
  "Limitation of liability": "CRITICAL",
95
  "Unilateral termination": "CRITICAL",
96
+ "Liquidated Damages": "CRITICAL",
97
  # High
98
  "Non-Compete": "HIGH",
99
  "Exclusivity": "HIGH",
 
102
  "No-Solicit of Employees": "HIGH",
103
  "Unilateral change": "HIGH",
104
  "Content removal": "HIGH",
 
105
  "Anti-Assignment": "HIGH",
106
  # Medium
107
  "Governing Law": "MEDIUM",
 
113
  "Non-Disparagement": "MEDIUM",
114
  "Most Favored Nation": "MEDIUM",
115
  "Revenue/Profit Sharing": "MEDIUM",
116
+ "Warranty Duration": "MEDIUM",
117
  # Low
118
  "Document Name": "LOW",
119
  "Parties": "LOW",
 
132
  "Post-Termination Services": "LOW",
133
  "Audit Rights": "LOW",
134
  "Cap on Liability": "LOW",
 
135
  "Insurance": "LOW",
136
  "Covenant Not to Sue": "LOW",
137
  "Third Party Beneficiary": "LOW",
 
168
  "Warranty Duration": "Length of time warranties remain in effect.",
169
  "Covenant Not to Sue": "Agreement not to bring legal action against a party.",
170
  "Third Party Beneficiary": "Non-party who benefits from the contract terms.",
171
+ "Insurance": "Insurance coverage requirements.",
172
+ "Revenue/Profit Sharing": "Revenue or profit sharing arrangements between parties.",
173
+ "Price Restriction": "Restrictions on pricing or discounting.",
174
+ "Minimum Commitment": "Minimum purchase or usage commitment.",
175
+ "Volume Restriction": "Limits on volume of goods or services.",
176
+ "License Grant": "Permission to use intellectual property.",
177
+ "Non-Transferable License": "License that cannot be transferred to third parties.",
178
+ "Irrevocable or Perpetual License": "License that cannot be revoked or lasts indefinitely.",
179
+ "Unlimited/All-You-Can-Eat License": "License with no usage limits.",
180
  })
181
 
 
182
  RISK_WEIGHTS = {"CRITICAL": 40, "HIGH": 20, "MEDIUM": 10, "LOW": 3}
183
 
 
184
  RISK_STYLES = {
185
  "CRITICAL": ("#dc2626", "#fef2f2", "⚠️"),
186
  "HIGH": ("#ea580c", "#fff7ed", "⚑"),
 
203
  try:
204
  base = "nlpaueb/legal-bert-base-uncased"
205
  adapter = "Mokshith31/legalbert-contract-clause-classification"
 
206
  print(f"[ClauseGuard] Loading CUAD classifier: {adapter}")
207
  cuad_tokenizer = AutoTokenizer.from_pretrained(base)
208
  base_model = AutoModelForSequenceClassification.from_pretrained(
 
268
  # ═══════════════════════════════════════════════════════════════════════
269
 
270
  def split_clauses(text):
 
271
  text = re.sub(r'\n{3,}', '\n\n', text.strip())
272
  parts = re.split(
273
  r'(?<=[.!?])\s+(?=[A-Z0-9(])|(?:\n\n)(?=\d+[.)]\s|\([a-z]\)\s|[A-Z][A-Z\s]{2,})',
 
281
  return clauses
282
 
283
  def classify_cuad(clause_text):
 
284
  if cuad_model is None or cuad_tokenizer is None:
285
  return _classify_regex(clause_text)
 
286
  try:
287
  inputs = cuad_tokenizer(
288
  clause_text,
 
294
  with torch.no_grad():
295
  logits = cuad_model(**inputs).logits
296
  probs = torch.softmax(logits, dim=-1)[0]
 
 
297
  threshold = 0.15
298
  results = []
299
  for i, prob in enumerate(probs):
 
306
  "risk": risk,
307
  "description": DESC_MAP.get(label, label),
308
  })
 
309
  results.sort(key=lambda x: x["confidence"], reverse=True)
 
310
  if not results:
311
  top_idx = int(probs.argmax())
312
  label = CUAD_LABELS[top_idx] if top_idx < len(CUAD_LABELS) else "Other"
 
337
  "IP Ownership Assignment": [r"assign.*intellectual property", r"ownership of.*ip", r"all rights.*assign"],
338
  "Uncapped Liability": [r"unlimited liability", r"uncapped", r"no.*limit.*liability"],
339
  "Cap on Liability": [r"cap on liability", r"maximum liability", r"liability.*shall not exceed"],
340
+ "Indemnification": [r"indemnif", r"hold harmless", r"defend"],
341
+ "Confidentiality": [r"confidential", r"non-disclosure", r"nda"],
342
+ "Force Majeure": [r"force majeure", r"act of god", r"beyond.*control"],
343
+ "Penalties": [r"penalt", r"late fee", r"default charge", r"interest on overdue"],
344
  }
345
 
346
  def _classify_regex(text):
 
367
  # ═══════════════════════════════════════════════════════════════════════
368
 
369
  def extract_entities(text):
 
370
  entities = []
 
371
  # Dates
372
  date_patterns = [
373
  (r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
 
378
  for pat, etype in date_patterns:
379
  for m in re.finditer(pat, text, re.IGNORECASE):
380
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
381
  # Monetary values
382
  money_patterns = [
383
  (r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
 
386
  for pat, etype in money_patterns:
387
  for m in re.finditer(pat, text, re.IGNORECASE):
388
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
389
  # Party names
390
  party_patterns = [
391
  (r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
392
+ (r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
393
  ]
394
  for pat, etype in party_patterns:
395
  for m in re.finditer(pat, text):
396
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
397
  # Jurisdictions
398
  jurisdiction_patterns = [
399
  (r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
 
402
  for pat, etype in jurisdiction_patterns:
403
  for m in re.finditer(pat, text, re.IGNORECASE):
404
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
405
  # Defined Terms
406
  defined_patterns = [
407
  (r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
 
410
  for pat, etype in defined_patterns:
411
  for m in re.finditer(pat, text):
412
  entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
413
+ # Deduplicate
 
414
  entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
415
  filtered = []
416
  last_end = -1
 
436
  ]
437
 
438
  def detect_contradictions(clause_results):
 
439
  contradictions = []
440
  labels_found = set()
 
 
441
  for cr in clause_results:
442
  labels_found.add(cr["label"])
 
 
 
443
  for group_a, group_b, explanation in _CONTRADICTION_PAIRS:
444
  found_a = any(l in labels_found for l in group_a)
445
  found_b = any(l in labels_found for l in group_b)
 
450
  "severity": "HIGH",
451
  "clauses": list(set(group_a + group_b)),
452
  })
 
 
453
  critical_clauses = ["Governing Law", "Termination for Convenience", "Limitation of liability", "Arbitration"]
454
  for cc in critical_clauses:
455
  if cc not in labels_found:
 
459
  "severity": "MEDIUM",
460
  "clauses": [cc],
461
  })
 
462
  return contradictions
463
 
464
  # ═══════════════════════════════════════════════════════════════════════
 
470
  for cr in clause_results:
471
  sev = cr.get("risk", "LOW")
472
  sev_counts[sev] += 1
 
473
  if total_clauses == 0:
474
  return 0, "A", sev_counts
 
475
  weighted = sum(sev_counts[s] * RISK_WEIGHTS[s] for s in sev_counts)
476
  risk = min(100, round(weighted / max(1, total_clauses) * 10))
 
477
  if risk >= 70: grade = "F"
478
  elif risk >= 50: grade = "D"
479
  elif risk >= 30: grade = "C"
480
  elif risk >= 15: grade = "B"
481
  else: grade = "A"
 
482
  return risk, grade, sev_counts
483
 
484
  # ═══════════════════════════════════════════════════════════════════════
 
488
  def analyze_contract(text):
489
  if not text or len(text.strip()) < 50:
490
  return None, "Document too short (minimum 50 characters)"
 
491
  clauses = split_clauses(text)
492
  if not clauses:
493
  return None, "No clauses detected in document"
 
 
494
  clause_results = []
495
  for clause in clauses:
496
  predictions = classify_cuad(clause)
 
503
  "risk": pred["risk"],
504
  "description": pred["description"],
505
  })
 
 
506
  entities = extract_entities(text)
 
 
507
  contradictions = detect_contradictions(clause_results)
 
 
508
  risk, grade, sev_counts = compute_risk_score(clause_results, len(clauses))
509
+ obligations = extract_obligations(text)
510
+ compliance = check_compliance(text)
511
  result = {
512
  "metadata": {
513
  "analysis_date": datetime.now().isoformat(),
 
523
  "clauses": clause_results,
524
  "entities": entities,
525
  "contradictions": contradictions,
526
+ "obligations": obligations,
527
+ "compliance": compliance,
528
  "raw_text": text,
529
  }
 
530
  return result, None
531
 
532
  # ═══════════════════════════════════════════════════════════════════════
 
561
  def render_summary(result):
562
  if result is None:
563
  return ""
 
564
  risk = result["risk"]
565
  score = risk["score"]
566
  grade = risk["grade"]
567
  breakdown = risk["breakdown"]
 
568
  grade_color = {
569
  "A": "#16a34a", "B": "#65a30d", "C": "#ca8a04",
570
  "D": "#ea580c", "F": "#dc2626",
571
  }.get(grade, "#6b7280")
 
572
  crit, high, med, low = breakdown["CRITICAL"], breakdown["HIGH"], breakdown["MEDIUM"], breakdown["LOW"]
 
573
  html = f"""
574
  <div style="font-family:system-ui,sans-serif;padding:16px;border:1px solid #e5e7eb;border-radius:12px;background:#fff;">
575
  <div style="text-align:center;margin-bottom:16px;">
 
608
  def render_clause_cards(result):
609
  if result is None:
610
  return ""
 
611
  clauses = result.get("clauses", [])
612
  if not clauses:
613
  return '<div style="padding:24px;text-align:center;color:#6b7280;">No clauses detected.</div>'
 
 
614
  grouped = defaultdict(list)
615
  for cr in clauses:
616
  grouped[cr["text"]].append(cr)
 
617
  html = '<div style="font-family:system-ui,sans-serif;">'
618
  for text, items in grouped.items():
619
  max_risk = max(items, key=lambda x: {"CRITICAL":4,"HIGH":3,"MEDIUM":2,"LOW":1}[x["risk"]])["risk"]
620
  border, bg, icon = RISK_STYLES[max_risk]
 
621
  tags = ""
622
  for item in items:
623
  tag_bg = RISK_STYLES[item["risk"]][1]
624
  tag_color = RISK_STYLES[item["risk"]][0]
625
  tags += f'<span style="background:{tag_bg};color:{tag_color};border:1px solid {tag_color}33;padding:2px 8px;border-radius:12px;font-size:11px;font-weight:500;margin-right:4px;">{item["label"]} ({item["confidence"]})</span>'
 
626
  descs = "".join(
627
  f'<p style="font-size:12px;color:#6b7280;margin:4px 0 0 0;">{item["description"]}</p>'
628
  for item in items
629
  )
 
630
  preview = text[:300] + ("..." if len(text) > 300 else "")
631
  preview = preview.replace("<", "&lt;").replace(">", "&gt;")
 
632
  html += f"""
633
  <div style="border:1px solid #e5e7eb;border-left:4px solid {border};border-radius:8px;padding:14px;margin-bottom:10px;background:#fafafa;">
634
  <div style="display:flex;align-items:center;gap:6px;margin-bottom:6px;">
 
646
  def render_entities(result):
647
  if result is None:
648
  return ""
 
649
  entities = result.get("entities", [])
650
  if not entities:
651
  return '<div style="padding:16px;color:#6b7280;">No entities detected.</div>'
 
 
652
  grouped = defaultdict(list)
653
  for e in entities:
654
  grouped[e["type"]].append(e["text"])
 
655
  html = '<div style="font-family:system-ui,sans-serif;">'
656
  for etype, texts in grouped.items():
657
  unique = list(dict.fromkeys(texts))[:20]
 
662
  "JURISDICTION": "#f59e0b",
663
  "DEFINED_TERM": "#ec4899",
664
  }.get(etype, "#6b7280")
 
665
  items_html = "".join(
666
  f'<span style="display:inline-block;background:{color}15;color:{color};border:1px solid {color}40;padding:3px 10px;border-radius:6px;font-size:12px;margin:3px;">{t}</span>'
667
  for t in unique
668
  )
 
669
  html += f"""
670
  <div style="margin-bottom:12px;">
671
  <div style="font-size:12px;font-weight:600;color:#374151;margin-bottom:6px;text-transform:uppercase;">{etype}</div>
 
678
  def render_contradictions(result):
679
  if result is None:
680
  return ""
 
681
  contradictions = result.get("contradictions", [])
682
  if not contradictions:
683
  return '<div style="padding:16px;color:#16a34a;">βœ“ No contradictions or missing clauses detected.</div>'
 
684
  html = '<div style="font-family:system-ui,sans-serif;">'
685
  for c in contradictions:
686
  sev_color = RISK_STYLES[c["severity"]][0]
 
700
  def render_document_viewer(result):
701
  if result is None:
702
  return ""
 
703
  text = result.get("raw_text", "")
704
  entities = sorted(result.get("entities", []), key=lambda x: x["start"])
 
705
  html_parts = []
706
  last_end = 0
707
  for e in entities:
 
719
  f'<mark style="background:{color};padding:1px 2px;border-radius:2px;font-size:12px;" title="{label}">{e["text"].replace("<","&lt;").replace(">","&gt;")}</mark>'
720
  )
721
  last_end = e["end"]
 
722
  html_parts.append(text[last_end:].replace("<", "&lt;").replace(">", "&gt;"))
723
  highlighted = "".join(html_parts)
 
724
  return f"""
725
  <div style="font-family:monospace;font-size:13px;line-height:1.6;padding:16px;border:1px solid #e5e7eb;border-radius:8px;background:#fff;max-height:600px;overflow-y:auto;white-space:pre-wrap;">
726
  {highlighted}
 
728
  """
729
 
730
  # ═══════════════════════════════════════════════════════════════════════
731
+ # 11. COMPARISON UI FUNCTIONS
732
+ # ═══════════════════════════════════════════════════════════════════════
733
+
734
+ def run_comparison(text_a, text_b):
735
+ if not text_a or len(text_a.strip()) < 50:
736
+ return "Contract A is too short", ""
737
+ if not text_b or len(text_b.strip()) < 50:
738
+ return "Contract B is too short", ""
739
+ result = compare_contracts(text_a, text_b)
740
+ return render_comparison_html(result), json.dumps(result, indent=2)
741
+
742
+ # ═══════════════════════════════════════════════════════════════════════
743
+ # 12. GRADIO UI
744
  # ═══════════════════════════════════════════════════════════════════════
745
 
746
  def process_upload(file):
 
753
 
754
  def run_analysis(text):
755
  if not text or len(text.strip()) < 50:
756
+ err_html = '<p style="color:#dc2626;padding:16px;">Document too short (minimum 50 characters)</p>'
757
+ return [err_html] * 7 + [None, None, ""]
758
  result, error = analyze_contract(text)
759
  if error:
760
  err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
761
+ return [err_html] * 7 + [None, None, error]
 
762
  # Save export files
763
  json_path = "/tmp/clauseguard_report.json"
764
  with open(json_path, "w") as f:
765
  json.dump(result, f, indent=2, default=str)
 
766
  csv_content = export_csv(result)
767
  csv_path = "/tmp/clauseguard_report.csv"
768
  with open(csv_path, "w") as f:
769
  f.write(csv_content)
 
770
  return [
771
  render_summary(result),
772
  render_clause_cards(result),
773
  render_entities(result),
774
  render_contradictions(result),
775
  render_document_viewer(result),
776
+ render_obligations_html(result.get("obligations", [])),
777
+ render_compliance_html(result.get("compliance", {})),
778
  json_path,
779
  csv_path,
780
  "Analysis complete",
781
  ]
782
 
783
  def do_clear():
784
+ return [""] * 7 + [None, None, ""]
785
 
786
  # ── Example contracts ──
787
  SPOTIFY_TOS = """By using the Spotify Service, you agree to be bound by these Terms of Use.
 
826
 
827
  7. Non-Compete. During the term of this Agreement and for a period of two (2) years thereafter, the Receiving Party shall not engage in any business that competes with the Disclosing Party."""
828
 
829
+ COMPLEX_CONTRACT = """MASTER SERVICE AGREEMENT
830
+
831
+ This Master Service Agreement ("MSA") is entered into as of March 1, 2024 (the "Effective Date") by and between CloudTech Solutions, Inc., a Delaware corporation ("Provider") and Global Retail Partners LLC, a New York limited liability company ("Customer").
832
+
833
+ 1. SERVICES. Provider shall provide cloud hosting and data processing services as described in Exhibit A. Provider shall comply with all applicable laws including GDPR and CCPA.
834
+
835
+ 2. TERM AND RENEWAL. The initial term is twelve (12) months, automatically renewing for successive one (1) year periods unless terminated in accordance with Section 7.
836
+
837
+ 3. FEES AND PAYMENT. Customer shall pay a monthly fee of $25,000 within 30 days of invoice. Late payments incur a penalty of 1.5% per month. The total contract value is $300,000.
838
+
839
+ 4. LIABILITY. Provider's aggregate liability shall not exceed $1,000,000. IN NO EVENT SHALL PROVIDER BE LIABLE FOR LOST PROFITS OR CONSEQUENTIAL DAMAGES. Customer assumes all risk of data loss.
840
+
841
+ 5. INDEMNIFICATION. Each party shall indemnify the other for third-party claims arising from breach of this Agreement. Customer shall indemnify Provider for claims arising from Customer Data.
842
+
843
+ 6. INTELLECTUAL PROPERTY. Provider retains all IP rights. Customer receives a non-transferable, non-exclusive license for the term. Upon termination, Customer shall return or destroy all Provider materials within 10 business days.
844
+
845
+ 7. TERMINATION. Either party may terminate for convenience with 90 days notice. Provider may terminate immediately for non-payment. Upon termination, Customer shall pay all outstanding fees.
846
+
847
+ 8. GOVERNING LAW. This Agreement is governed by the laws of the State of Delaware. Disputes shall be resolved by binding arbitration in Wilmington, Delaware.
848
+
849
+ 9. FORCE MAJEURE. Neither party shall be liable for delays due to acts of God, war, terrorism, or government action.
850
+
851
+ 10. AUDIT RIGHTS. Customer may audit Provider's compliance annually. Provider shall provide SOC 2 Type II reports within 30 days of request.
852
+
853
+ 11. INSURANCE. Provider shall maintain general liability insurance of at least $5,000,000 and cyber liability insurance of at least $2,000,000.
854
+
855
+ 12. CONFIDENTIALITY. Both parties agree to keep Confidential Information secure for five (5) years. This obligation survives termination.
856
+
857
+ 13. ASSIGNMENT. Neither party may assign this Agreement without prior written consent. Any attempted assignment is void.
858
+
859
+ 14. THIRD PARTY BENEFICIARY. No third party shall have rights under this Agreement except as expressly provided."""
860
+
861
  with gr.Blocks(
862
  title="ClauseGuard β€” AI Contract Analysis",
863
  css="""
864
+ .gradio-container { max-width: 1600px !important; }
865
  """
866
  ) as demo:
867
 
 
869
  <div style="display:flex;align-items:center;justify-content:space-between;padding:12px 0;border-bottom:2px solid #e5e7eb;margin-bottom:16px;">
870
  <div>
871
  <h1 style="font-size:24px;font-weight:700;margin:0;color:#1f2937;">πŸ›‘οΈ ClauseGuard</h1>
872
+ <p style="font-size:13px;color:#6b7280;margin:4px 0 0 0;">AI-Powered Legal Contract Analysis Β· 41 Clause Categories Β· Risk Scoring Β· NER Β· NLI Β· Compliance Β· Obligations</p>
873
  </div>
874
+ <div style="font-size:12px;color:#9ca3af;">v2.0 Β· World's Best Open-Source Legal AI</div>
875
  </div>
876
  """)
877
 
878
+ # ── Main Tabs: Analysis vs Comparison ──
879
+ with gr.Tabs() as main_tabs:
880
+
881
+ # ═══════ TAB 1: Single Contract Analysis ═══════
882
+ with gr.Tab("πŸ“„ Single Contract Analysis"):
883
+ with gr.Row():
884
+ with gr.Column(scale=1):
885
+ file_input = gr.File(
886
+ label="πŸ“ Upload Contract (PDF/DOCX/TXT)",
887
+ file_types=[".pdf", ".docx", ".doc", ".txt", ".md"],
888
+ )
889
+ load_btn = gr.Button("Load Document", variant="secondary", size="sm")
890
+ load_status = gr.Textbox(label="Status", interactive=False, lines=1)
891
+
892
+ with gr.Column(scale=3):
893
+ text_input = gr.Textbox(
894
+ label="πŸ“„ Contract Text",
895
+ placeholder="Paste contract text here, or upload a file above...",
896
+ lines=14,
897
+ max_lines=40,
898
+ show_copy_button=True,
899
+ )
900
+
901
+ with gr.Column(scale=1):
902
+ scan_btn = gr.Button("πŸ” Analyze Contract", variant="primary", size="lg")
903
+ clear_btn = gr.Button("Clear", variant="secondary", size="sm")
904
+ status_msg = gr.Textbox(label="Analysis Status", interactive=False, lines=1)
905
+
906
+ # ── Examples ──
907
+ with gr.Row():
908
+ gr.Examples(
909
+ examples=[[SPOTIFY_TOS], [RENTAL_AGREEMENT], [NDA_SAMPLE], [COMPLEX_CONTRACT]],
910
+ inputs=[text_input],
911
+ label="Example Contracts",
912
+ )
913
+
914
+ # ── Results ──
915
+ with gr.Row():
916
+ with gr.Column(scale=1):
917
+ gr.Markdown("### πŸ“Š Risk Summary")
918
+ summary_html = gr.HTML()
919
+
920
+ gr.Markdown("### πŸ“₯ Export Reports")
921
+ json_file = gr.File(label="JSON Report")
922
+ csv_file = gr.File(label="CSV Report")
923
+
924
+ with gr.Column(scale=3):
925
+ with gr.Tabs():
926
+ with gr.Tab("πŸ“„ Document"):
927
+ doc_html = gr.HTML(label="Document Viewer")
928
+ with gr.Tab("⚠️ Clauses (41 Categories)"):
929
+ clauses_html = gr.HTML(label="Detected Clauses")
930
+ with gr.Tab("🏷️ Entities"):
931
+ entities_html = gr.HTML(label="Named Entities")
932
+ with gr.Tab("πŸ” Contradictions"):
933
+ nli_html = gr.HTML(label="Contradictions & Missing Clauses")
934
+ with gr.Tab("πŸ“‹ Obligations"):
935
+ obligations_html = gr.HTML(label="Obligation Tracker")
936
+ with gr.Tab("βš–οΈ Compliance"):
937
+ compliance_html = gr.HTML(label="Compliance Checker")
938
+
939
+ # ═══════ TAB 2: Contract Comparison ═══════
940
+ with gr.Tab("πŸ”€ Compare Contracts"):
941
+ with gr.Row():
942
+ with gr.Column(scale=1):
943
+ comp_file_a = gr.File(
944
+ label="πŸ“ Contract A (PDF/DOCX/TXT)",
945
+ file_types=[".pdf", ".docx", ".doc", ".txt"],
946
+ )
947
+ comp_load_a = gr.Button("Load A", variant="secondary", size="sm")
948
+ comp_status_a = gr.Textbox(label="Status A", interactive=False, lines=1)
949
+
950
+ with gr.Column(scale=3):
951
+ comp_text_a = gr.Textbox(
952
+ label="Contract A",
953
+ placeholder="Paste contract A here...",
954
+ lines=12,
955
+ show_copy_button=True,
956
+ )
957
+
958
+ with gr.Column(scale=1):
959
+ comp_file_b = gr.File(
960
+ label="πŸ“ Contract B (PDF/DOCX/TXT)",
961
+ file_types=[".pdf", ".docx", ".doc", ".txt"],
962
+ )
963
+ comp_load_b = gr.Button("Load B", variant="secondary", size="sm")
964
+ comp_status_b = gr.Textbox(label="Status B", interactive=False, lines=1)
965
+
966
+ with gr.Column(scale=3):
967
+ comp_text_b = gr.Textbox(
968
+ label="Contract B",
969
+ placeholder="Paste contract B here...",
970
+ lines=12,
971
+ show_copy_button=True,
972
+ )
973
+
974
+ with gr.Row():
975
+ with gr.Column(scale=1):
976
+ comp_btn = gr.Button("πŸ”€ Compare Contracts", variant="primary", size="lg")
977
+ with gr.Column(scale=5):
978
+ comp_status = gr.Textbox(label="Comparison Status", interactive=False, lines=1)
979
+
980
+ with gr.Row():
981
+ with gr.Column(scale=4):
982
+ comp_result_html = gr.HTML(label="Comparison Results")
983
+ with gr.Column(scale=2):
984
+ comp_json = gr.JSON(label="Raw Comparison Data")
985
 
986
  # ── Events ──
987
  def _load_file(file):
 
991
  return text, "Loaded successfully" if not err else err
992
 
993
  load_btn.click(_load_file, inputs=[file_input], outputs=[text_input, load_status])
994
+ comp_load_a.click(_load_file, inputs=[comp_file_a], outputs=[comp_text_a, comp_status_a])
995
+ comp_load_b.click(_load_file, inputs=[comp_file_b], outputs=[comp_text_b, comp_status_b])
996
 
997
  scan_btn.click(
998
  run_analysis,
999
  inputs=[text_input],
1000
  outputs=[summary_html, clauses_html, entities_html, nli_html,
1001
+ doc_html, obligations_html, compliance_html,
1002
+ json_file, csv_file, status_msg]
1003
  )
1004
 
1005
  clear_btn.click(
1006
  do_clear,
1007
  outputs=[summary_html, clauses_html, entities_html, nli_html,
1008
+ doc_html, obligations_html, compliance_html,
1009
+ json_file, csv_file, status_msg]
1010
+ )
1011
+
1012
+ comp_btn.click(
1013
+ run_comparison,
1014
+ inputs=[comp_text_a, comp_text_b],
1015
+ outputs=[comp_result_html, comp_json]
1016
  )
1017
 
1018
  gr.HTML("""
1019
  <div style="margin-top:24px;padding:16px 0;border-top:1px solid #e5e7eb;text-align:center;">
1020
  <p style="font-size:11px;color:#9ca3af;">
1021
  ⚠️ Not legal advice. For informational purposes only.
1022
+ Β· Model: <a href="https://huggingface.co/Mokshith31/legalbert-contract-clause-classification" style="color:#6b7280;">Legal-BERT + CUAD (41 classes)</a>
1023
  Β· Dataset: <a href="https://huggingface.co/datasets/theatticusproject/cuad-qa" style="color:#6b7280;">CUAD</a>
1024
  Β· <a href="https://huggingface.co/spaces/gaurv007/ClauseGuard" style="color:#6b7280;">ClauseGuard Space</a>
1025
  </p>