Spaces:
Running
Running
Commit ·
9c3a4be
1
Parent(s): 8ed167c
timeout warning
Browse files
app.py
CHANGED
|
@@ -17,69 +17,41 @@ import json
|
|
| 17 |
import time
|
| 18 |
import traceback
|
| 19 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
| 20 |
-
import threading
|
| 21 |
|
| 22 |
def process_file(file):
|
| 23 |
results = []
|
| 24 |
json_data = None
|
| 25 |
-
timer = None
|
| 26 |
-
warning_text = None # to be set if timer fires
|
| 27 |
|
| 28 |
filename = file.name.split("/")[-1]
|
| 29 |
|
| 30 |
-
# JSON validation
|
| 31 |
json_valid, json_message, json_data = validate_json(file.name)
|
| 32 |
json_message = json_message.replace("\n✓\n", "\n")
|
| 33 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
|
|
|
| 34 |
if not json_valid:
|
| 35 |
-
return results, None
|
| 36 |
|
| 37 |
-
#
|
| 38 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
| 39 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
| 40 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
|
|
|
| 41 |
if not croissant_valid:
|
| 42 |
-
return results, None
|
| 43 |
-
|
| 44 |
-
# Start timer before records validation
|
| 45 |
-
fired = threading.Event()
|
| 46 |
-
|
| 47 |
-
def trigger_warning():
|
| 48 |
-
nonlocal warning_text
|
| 49 |
-
warning_text = """
|
| 50 |
-
⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
|
| 51 |
-
at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
|
| 52 |
-
In that case, we recommend using any of the following options:
|
| 53 |
-
<ul style="text-align:left; margin: 0 auto; display:inline-block;">
|
| 54 |
-
<li>🔁 Duplicate this Space on Hugging Face</li>
|
| 55 |
-
<li>💻 Run it locally (GitHub or Docker)</li>
|
| 56 |
-
<li>🥐 Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
|
| 57 |
-
</ul>
|
| 58 |
-
"""
|
| 59 |
-
fired.set()
|
| 60 |
-
|
| 61 |
-
timer = threading.Timer(0.1, trigger_warning)
|
| 62 |
-
timer.start()
|
| 63 |
-
|
| 64 |
-
try:
|
| 65 |
-
records_valid, records_message, records_status = validate_records(json_data)
|
| 66 |
-
finally:
|
| 67 |
-
timer.cancel()
|
| 68 |
-
|
| 69 |
-
if fired.is_set():
|
| 70 |
-
warning_html_update = gr.update(value=warning_text, visible=True)
|
| 71 |
-
else:
|
| 72 |
-
warning_html_update = gr.update(visible=False)
|
| 73 |
|
|
|
|
|
|
|
| 74 |
records_message = records_message.replace("\n✓\n", "\n")
|
| 75 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
| 76 |
|
|
|
|
| 77 |
report = generate_validation_report(filename, json_data, results)
|
| 78 |
-
|
|
|
|
| 79 |
|
| 80 |
def create_ui():
|
| 81 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
| 82 |
-
delayed_warning_html = gr.HTML("", visible=False)
|
| 83 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
| 84 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
| 85 |
gr.Markdown("""
|
|
@@ -510,12 +482,14 @@ def create_ui():
|
|
| 510 |
gr.update(visible=False), # validation_results
|
| 511 |
gr.update(visible=False), # validation_progress
|
| 512 |
gr.update(visible=False), # report_group
|
| 513 |
-
None,
|
| 514 |
-
|
| 515 |
]
|
| 516 |
|
| 517 |
-
|
|
|
|
| 518 |
|
|
|
|
| 519 |
try:
|
| 520 |
with open(file.name, 'r') as f:
|
| 521 |
json_data = json.load(f)
|
|
@@ -523,18 +497,19 @@ def create_ui():
|
|
| 523 |
except:
|
| 524 |
dataset_name = 'unnamed'
|
| 525 |
|
|
|
|
| 526 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
| 527 |
if report:
|
| 528 |
with open(report_filename, "w") as f:
|
| 529 |
f.write(report)
|
| 530 |
|
|
|
|
| 531 |
return [
|
| 532 |
-
build_results_html(results),
|
| 533 |
-
gr.update(visible=False),
|
| 534 |
-
gr.update(visible=True) if report else gr.update(visible=False),
|
| 535 |
-
report if report else None,
|
| 536 |
-
report_filename if report else None
|
| 537 |
-
warning_html_update or gr.update(visible=False)
|
| 538 |
]
|
| 539 |
|
| 540 |
# Connect UI events to functions with updated outputs
|
|
@@ -564,7 +539,7 @@ def create_ui():
|
|
| 564 |
None, # report_text
|
| 565 |
None # report_md
|
| 566 |
]
|
| 567 |
-
|
| 568 |
validate_btn.click(
|
| 569 |
fn=show_progress,
|
| 570 |
inputs=None,
|
|
@@ -573,7 +548,7 @@ def create_ui():
|
|
| 573 |
).then(
|
| 574 |
fn=on_validate,
|
| 575 |
inputs=file_input,
|
| 576 |
-
outputs=[validation_results, validation_progress, report_group, report_text, report_md
|
| 577 |
)
|
| 578 |
|
| 579 |
fetch_btn.click(
|
|
@@ -588,6 +563,18 @@ def create_ui():
|
|
| 588 |
<p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
|
| 589 |
</div>
|
| 590 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
|
| 592 |
return app
|
| 593 |
|
|
|
|
| 17 |
import time
|
| 18 |
import traceback
|
| 19 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
|
|
|
| 20 |
|
| 21 |
def process_file(file):
|
| 22 |
results = []
|
| 23 |
json_data = None
|
|
|
|
|
|
|
| 24 |
|
| 25 |
filename = file.name.split("/")[-1]
|
| 26 |
|
| 27 |
+
# Check 1: JSON validation
|
| 28 |
json_valid, json_message, json_data = validate_json(file.name)
|
| 29 |
json_message = json_message.replace("\n✓\n", "\n")
|
| 30 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
| 31 |
+
|
| 32 |
if not json_valid:
|
| 33 |
+
return results, None
|
| 34 |
|
| 35 |
+
# Check 2: Croissant validation
|
| 36 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
| 37 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
| 38 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
| 39 |
+
|
| 40 |
if not croissant_valid:
|
| 41 |
+
return results, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
# Check 3: Records validation (with timeout-safe and error-specific logic)
|
| 44 |
+
records_valid, records_message, records_status = validate_records(json_data)
|
| 45 |
records_message = records_message.replace("\n✓\n", "\n")
|
| 46 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
| 47 |
|
| 48 |
+
# Generate final report
|
| 49 |
report = generate_validation_report(filename, json_data, results)
|
| 50 |
+
|
| 51 |
+
return results, report
|
| 52 |
|
| 53 |
def create_ui():
|
| 54 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
|
| 55 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
| 56 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
| 57 |
gr.Markdown("""
|
|
|
|
| 482 |
gr.update(visible=False), # validation_results
|
| 483 |
gr.update(visible=False), # validation_progress
|
| 484 |
gr.update(visible=False), # report_group
|
| 485 |
+
None, # report_text
|
| 486 |
+
None # report_md
|
| 487 |
]
|
| 488 |
|
| 489 |
+
# Process the file and get results
|
| 490 |
+
results, report = process_file(file)
|
| 491 |
|
| 492 |
+
# Extract dataset name from the JSON for the report filename
|
| 493 |
try:
|
| 494 |
with open(file.name, 'r') as f:
|
| 495 |
json_data = json.load(f)
|
|
|
|
| 497 |
except:
|
| 498 |
dataset_name = 'unnamed'
|
| 499 |
|
| 500 |
+
# Save report to file with new naming convention
|
| 501 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
| 502 |
if report:
|
| 503 |
with open(report_filename, "w") as f:
|
| 504 |
f.write(report)
|
| 505 |
|
| 506 |
+
# Return final state
|
| 507 |
return [
|
| 508 |
+
build_results_html(results), # validation_results
|
| 509 |
+
gr.update(visible=False), # validation_progress
|
| 510 |
+
gr.update(visible=True) if report else gr.update(visible=False), # report_group
|
| 511 |
+
report if report else None, # report_text
|
| 512 |
+
report_filename if report else None # report_md
|
|
|
|
| 513 |
]
|
| 514 |
|
| 515 |
# Connect UI events to functions with updated outputs
|
|
|
|
| 539 |
None, # report_text
|
| 540 |
None # report_md
|
| 541 |
]
|
| 542 |
+
|
| 543 |
validate_btn.click(
|
| 544 |
fn=show_progress,
|
| 545 |
inputs=None,
|
|
|
|
| 548 |
).then(
|
| 549 |
fn=on_validate,
|
| 550 |
inputs=file_input,
|
| 551 |
+
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
|
| 552 |
)
|
| 553 |
|
| 554 |
fetch_btn.click(
|
|
|
|
| 563 |
<p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
|
| 564 |
</div>
|
| 565 |
""")
|
| 566 |
+
|
| 567 |
+
gr.HTML("""
|
| 568 |
+
<div class="progress-status" style="text-align: left; color: #d35400;">
|
| 569 |
+
⚠️ It is possible that this validator is currently being used by a lot of people at the same time, which may trigger rate limiting by the platform hosting your data.
|
| 570 |
+
The app will then try again and may get into a very long loop. If it takes too long to run, we recommend using any of the following options:
|
| 571 |
+
<ul style="text-align:left; margin: 0 auto; display:inline-block;">
|
| 572 |
+
<li>🔁 Click the button with the three dots (⋯) above and select "Duplicate this Space" to run this app in your own Hugging Face space.</li>
|
| 573 |
+
<li>💻 Click the button with the three dots (⋯) above and select "Run Locally" and then "Clone (git)" to get instructions to run the checker locally. You can also use docker option (you don't need the tokens).</li>
|
| 574 |
+
<li>🥐 Run the Croissant validation code yourself (<a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a>), e.g. with <a href="https://github.com/mlcommons/croissant/tree/7a632f34438e9c8e3812c6a0049898560259c6d4/python/mlcroissant/mlcroissant/scripts" target="_blank">these scripts</a> (validate and load).</li>
|
| 575 |
+
</ul>
|
| 576 |
+
</div>
|
| 577 |
+
""")
|
| 578 |
|
| 579 |
return app
|
| 580 |
|