Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -368,7 +368,6 @@ def _classify_regex(text):
|
|
| 368 |
|
| 369 |
def extract_entities(text):
|
| 370 |
entities = []
|
| 371 |
-
# Dates
|
| 372 |
date_patterns = [
|
| 373 |
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
| 374 |
(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
|
|
@@ -378,7 +377,6 @@ def extract_entities(text):
|
|
| 378 |
for pat, etype in date_patterns:
|
| 379 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 380 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 381 |
-
# Monetary values
|
| 382 |
money_patterns = [
|
| 383 |
(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
| 384 |
(r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros)', "MONEY"),
|
|
@@ -386,7 +384,6 @@ def extract_entities(text):
|
|
| 386 |
for pat, etype in money_patterns:
|
| 387 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 388 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 389 |
-
# Party names
|
| 390 |
party_patterns = [
|
| 391 |
(r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
|
| 392 |
(r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
|
|
@@ -394,7 +391,6 @@ def extract_entities(text):
|
|
| 394 |
for pat, etype in party_patterns:
|
| 395 |
for m in re.finditer(pat, text):
|
| 396 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 397 |
-
# Jurisdictions
|
| 398 |
jurisdiction_patterns = [
|
| 399 |
(r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
| 400 |
(r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong)\b', "JURISDICTION"),
|
|
@@ -402,7 +398,6 @@ def extract_entities(text):
|
|
| 402 |
for pat, etype in jurisdiction_patterns:
|
| 403 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 404 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
| 405 |
-
# Defined Terms
|
| 406 |
defined_patterns = [
|
| 407 |
(r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
|
| 408 |
(r'\(([A-Z][A-Z\s]+)\)', "DEFINED_TERM"),
|
|
@@ -410,7 +405,6 @@ def extract_entities(text):
|
|
| 410 |
for pat, etype in defined_patterns:
|
| 411 |
for m in re.finditer(pat, text):
|
| 412 |
entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
|
| 413 |
-
# Deduplicate
|
| 414 |
entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
|
| 415 |
filtered = []
|
| 416 |
last_end = -1
|
|
@@ -759,7 +753,6 @@ def run_analysis(text):
|
|
| 759 |
if error:
|
| 760 |
err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
|
| 761 |
return [err_html] * 7 + [None, None, error]
|
| 762 |
-
# Save export files
|
| 763 |
json_path = "/tmp/clauseguard_report.json"
|
| 764 |
with open(json_path, "w") as f:
|
| 765 |
json.dump(result, f, indent=2, default=str)
|
|
@@ -876,7 +869,7 @@ with gr.Blocks(
|
|
| 876 |
""")
|
| 877 |
|
| 878 |
# ββ Main Tabs: Analysis vs Comparison ββ
|
| 879 |
-
with gr.Tabs()
|
| 880 |
|
| 881 |
# βββββββ TAB 1: Single Contract Analysis βββββββ
|
| 882 |
with gr.Tab("π Single Contract Analysis"):
|
|
|
|
| 368 |
|
| 369 |
def extract_entities(text):
|
| 370 |
entities = []
|
|
|
|
| 371 |
date_patterns = [
|
| 372 |
(r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', "DATE"),
|
| 373 |
(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b', "DATE"),
|
|
|
|
| 377 |
for pat, etype in date_patterns:
|
| 378 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 379 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 380 |
money_patterns = [
|
| 381 |
(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?(?:\s*(?:million|billion|thousand|M|B|K))?', "MONEY"),
|
| 382 |
(r'\b\d{1,3}(?:,\d{3})*(?:\.\d{2})?\s*(?:USD|EUR|GBP|dollars|euros)', "MONEY"),
|
|
|
|
| 384 |
for pat, etype in money_patterns:
|
| 385 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 386 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 387 |
party_patterns = [
|
| 388 |
(r'\b[A-Z][A-Za-z0-9\s&]+(?:Inc\.|LLC|Ltd\.|Limited|Corp\.|Corporation|PLC|GmbH|AG|S\.A\.|B\.V\.)\b', "PARTY"),
|
| 389 |
(r'\b(?:Party A|Party B|Disclosing Party|Receiving Party|Licensor|Licensee|Buyer|Seller|Tenant|Landlord|Employer|Employee|Company|Customer|Vendor|Client)\b', "PARTY_ROLE"),
|
|
|
|
| 391 |
for pat, etype in party_patterns:
|
| 392 |
for m in re.finditer(pat, text):
|
| 393 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 394 |
jurisdiction_patterns = [
|
| 395 |
(r'\b(?:State|Laws?) of [A-Z][a-zA-Z\s]+', "JURISDICTION"),
|
| 396 |
(r'\b(?:California|Delaware|New York|Texas|Florida|England|Ireland|Germany|France|Singapore|Hong Kong)\b', "JURISDICTION"),
|
|
|
|
| 398 |
for pat, etype in jurisdiction_patterns:
|
| 399 |
for m in re.finditer(pat, text, re.IGNORECASE):
|
| 400 |
entities.append({"text": m.group(), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 401 |
defined_patterns = [
|
| 402 |
(r'"([A-Z][A-Z\s]+)"', "DEFINED_TERM"),
|
| 403 |
(r'\(([A-Z][A-Z\s]+)\)', "DEFINED_TERM"),
|
|
|
|
| 405 |
for pat, etype in defined_patterns:
|
| 406 |
for m in re.finditer(pat, text):
|
| 407 |
entities.append({"text": m.group(1), "type": etype, "start": m.start(), "end": m.end()})
|
|
|
|
| 408 |
entities.sort(key=lambda x: (x["start"], -(x["end"] - x["start"])))
|
| 409 |
filtered = []
|
| 410 |
last_end = -1
|
|
|
|
| 753 |
if error:
|
| 754 |
err_html = f'<p style="color:#dc2626;padding:16px;">{error}</p>'
|
| 755 |
return [err_html] * 7 + [None, None, error]
|
|
|
|
| 756 |
json_path = "/tmp/clauseguard_report.json"
|
| 757 |
with open(json_path, "w") as f:
|
| 758 |
json.dump(result, f, indent=2, default=str)
|
|
|
|
| 869 |
""")
|
| 870 |
|
| 871 |
# ββ Main Tabs: Analysis vs Comparison ββ
|
| 872 |
+
with gr.Tabs():
|
| 873 |
|
| 874 |
# βββββββ TAB 1: Single Contract Analysis βββββββ
|
| 875 |
with gr.Tab("π Single Contract Analysis"):
|