gaurv007 commited on
Commit
be855a6
Β·
verified Β·
1 Parent(s): d3099a5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -8
app.py CHANGED
@@ -368,7 +368,6 @@ def _classify_regex(text):
368
 
369
  def extract_entities(text):
370
  entities = []
371
- # Dates
372
  date_patterns = [
373
  (r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
374
  (r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
@@ -378,7 +377,6 @@ def extract_entities(text):
378
  for pat, etype in date_patterns:
379
  for m in re.finditer(pat, text, re.IGNORECASE):
380
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
381
- # Monetary values
382
  money_patterns = [
383
  (r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
384
  (r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros)', "MONEY"),
@@ -386,7 +384,6 @@ def extract_entities(text):
386
  for pat, etype in money_patterns:
387
  for m in re.finditer(pat, text, re.IGNORECASE):
388
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
389
- # Party names
390
  party_patterns = [
391
  (r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
392
  (r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
@@ -394,7 +391,6 @@ def extract_entities(text):
394
  for pat, etype in party_patterns:
395
  for m in re.finditer(pat, text):
396
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
397
- # Jurisdictions
398
  jurisdiction_patterns = [
399
  (r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
400
  (r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong)\b', "JURISDICTION"),
@@ -402,7 +398,6 @@ def extract_entities(text):
402
  for pat, etype in jurisdiction_patterns:
403
  for m in re.finditer(pat, text, re.IGNORECASE):
404
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
405
- # Defined Terms
406
  defined_patterns = [
407
  (r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
408
  (r'\(([A-Z][A-Z\s]+)\)', "DEFINED_TERM"),
@@ -410,7 +405,6 @@ def extract_entities(text):
410
  for pat, etype in defined_patterns:
411
  for m in re.finditer(pat, text):
412
  entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
413
- # Deduplicate
414
  entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
415
  filtered = []
416
  last_end = -1
@@ -759,7 +753,6 @@ def run_analysis(text):
759
  if error:
760
  err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
761
  return [err_html] * 7 + [None, None, error]
762
- # Save export files
763
  json_path = "/tmp/clauseguard_report.json"
764
  with open(json_path, "w") as f:
765
  json.dump(result, f, indent=2, default=str)
@@ -876,7 +869,7 @@ with gr.Blocks(
876
  """)
877
 
878
  # ── Main Tabs: Analysis vs Comparison ──
879
- with gr.Tabs() as main_tabs:
880
 
881
  # ═══════ TAB 1: Single Contract Analysis ═══════
882
  with gr.Tab("πŸ“„ Single Contract Analysis"):
 
368
 
369
  def extract_entities(text):
370
  entities = []
 
371
  date_patterns = [
372
  (r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
373
  (r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
 
377
  for pat, etype in date_patterns:
378
  for m in re.finditer(pat, text, re.IGNORECASE):
379
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
380
  money_patterns = [
381
  (r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
382
  (r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros)', "MONEY"),
 
384
  for pat, etype in money_patterns:
385
  for m in re.finditer(pat, text, re.IGNORECASE):
386
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
387
  party_patterns = [
388
  (r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
389
  (r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
 
391
  for pat, etype in party_patterns:
392
  for m in re.finditer(pat, text):
393
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
394
  jurisdiction_patterns = [
395
  (r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
396
  (r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong)\b', "JURISDICTION"),
 
398
  for pat, etype in jurisdiction_patterns:
399
  for m in re.finditer(pat, text, re.IGNORECASE):
400
  entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
 
401
  defined_patterns = [
402
  (r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
403
  (r'\(([A-Z][A-Z\s]+)\)', "DEFINED_TERM"),
 
405
  for pat, etype in defined_patterns:
406
  for m in re.finditer(pat, text):
407
  entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
 
408
  entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
409
  filtered = []
410
  last_end = -1
 
753
  if error:
754
  err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
755
  return [err_html] * 7 + [None, None, error]
 
756
  json_path = "/tmp/clauseguard_report.json"
757
  with open(json_path, "w") as f:
758
  json.dump(result, f, indent=2, default=str)
 
869
  """)
870
 
871
  # ── Main Tabs: Analysis vs Comparison ──
872
+ with gr.Tabs():
873
 
874
  # ═══════ TAB 1: Single Contract Analysis ═══════
875
  with gr.Tab("πŸ“„ Single Contract Analysis"):