Commit ·
2060674
1
Parent(s): 70b6b21
requirements fix and licence warnings
Browse files- app.py +70 -28
- validation.py +14 -3
app.py
CHANGED
|
@@ -21,9 +21,9 @@ def process_file(file):
|
|
| 21 |
return results, None
|
| 22 |
|
| 23 |
# Check 2: Croissant validation
|
| 24 |
-
croissant_valid, croissant_message = validate_croissant(json_data)
|
| 25 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
| 26 |
-
results.append(("Croissant Schema Validation", croissant_valid, croissant_message,
|
| 27 |
|
| 28 |
if not croissant_valid:
|
| 29 |
return results, None
|
|
@@ -43,7 +43,7 @@ def process_file(file):
|
|
| 43 |
return results, report
|
| 44 |
|
| 45 |
def create_ui():
|
| 46 |
-
with gr.Blocks(
|
| 47 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
| 48 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
| 49 |
gr.Markdown("""
|
|
@@ -85,7 +85,7 @@ def create_ui():
|
|
| 85 |
# Now create the validation results section in a separate group
|
| 86 |
with gr.Group():
|
| 87 |
# Validation results
|
| 88 |
-
validation_results = gr.HTML(visible=
|
| 89 |
validation_progress = gr.HTML(visible=False)
|
| 90 |
|
| 91 |
# Collapsible report section
|
|
@@ -99,7 +99,6 @@ def create_ui():
|
|
| 99 |
report_text = gr.Textbox(
|
| 100 |
label="Report Content",
|
| 101 |
visible=True,
|
| 102 |
-
show_copy_button=True,
|
| 103 |
lines=10,
|
| 104 |
elem_id="report-text-box"
|
| 105 |
)
|
|
@@ -107,10 +106,39 @@ def create_ui():
|
|
| 107 |
# Define CSS for the validation UI
|
| 108 |
gr.HTML("""
|
| 109 |
<style>
|
|
|
|
|
|
|
|
|
|
| 110 |
/* Set max width and center the app */
|
| 111 |
.gradio-container {
|
| 112 |
max-width: 750px !important;
|
| 113 |
margin: 0 auto !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
}
|
| 115 |
|
| 116 |
/* Make basic containers transparent */
|
|
@@ -297,7 +325,7 @@ def create_ui():
|
|
| 297 |
return [
|
| 298 |
"upload",
|
| 299 |
"""<div class="progress-status">Ready for upload</div>""",
|
| 300 |
-
gr.update(
|
| 301 |
gr.update(visible=False), # Hide report group
|
| 302 |
None, # Clear report text
|
| 303 |
None, # Clear report file
|
|
@@ -308,7 +336,7 @@ def create_ui():
|
|
| 308 |
return [
|
| 309 |
"url",
|
| 310 |
"""<div class="progress-status">Enter a URL to fetch</div>""",
|
| 311 |
-
gr.update(
|
| 312 |
gr.update(visible=False), # Hide report group
|
| 313 |
None, # Clear report text
|
| 314 |
None, # Clear report file
|
|
@@ -329,7 +357,7 @@ def create_ui():
|
|
| 329 |
if file is None:
|
| 330 |
return [
|
| 331 |
"""<div class="progress-status">Ready for upload</div>""",
|
| 332 |
-
gr.update(
|
| 333 |
gr.update(visible=False), # Hide report group
|
| 334 |
None, # Clear report text
|
| 335 |
None # Clear report file
|
|
@@ -337,7 +365,7 @@ def create_ui():
|
|
| 337 |
|
| 338 |
return [
|
| 339 |
"""<div class="progress-status">✅ File uploaded successfully</div>""",
|
| 340 |
-
gr.update(
|
| 341 |
gr.update(visible=False), # Hide report group
|
| 342 |
None, # Clear report text
|
| 343 |
None # Clear report file
|
|
@@ -347,7 +375,7 @@ def create_ui():
|
|
| 347 |
if not url:
|
| 348 |
return [
|
| 349 |
"""<div class="progress-status">Please enter a URL</div>""",
|
| 350 |
-
gr.update(
|
| 351 |
gr.update(visible=False),
|
| 352 |
None,
|
| 353 |
None
|
|
@@ -365,8 +393,8 @@ def create_ui():
|
|
| 365 |
results = []
|
| 366 |
results.append(("JSON Format Validation", True, "The URL returned valid JSON."))
|
| 367 |
|
| 368 |
-
croissant_valid, croissant_message = validate_croissant(json_data)
|
| 369 |
-
results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
|
| 370 |
|
| 371 |
if not croissant_valid:
|
| 372 |
return [
|
|
@@ -404,7 +432,7 @@ def create_ui():
|
|
| 404 |
error_message = f"Error fetching URL: {str(e)}"
|
| 405 |
return [
|
| 406 |
f"""<div class="progress-status">{error_message}</div>""",
|
| 407 |
-
gr.update(
|
| 408 |
gr.update(visible=False),
|
| 409 |
None,
|
| 410 |
None
|
|
@@ -413,7 +441,7 @@ def create_ui():
|
|
| 413 |
error_message = f"URL did not return valid JSON: {str(e)}"
|
| 414 |
return [
|
| 415 |
f"""<div class="progress-status">{error_message}</div>""",
|
| 416 |
-
gr.update(
|
| 417 |
gr.update(visible=False),
|
| 418 |
None,
|
| 419 |
None
|
|
@@ -422,7 +450,7 @@ def create_ui():
|
|
| 422 |
error_message = f"Unexpected error: {str(e)}"
|
| 423 |
return [
|
| 424 |
f"""<div class="progress-status">{error_message}</div>""",
|
| 425 |
-
gr.update(
|
| 426 |
gr.update(visible=False),
|
| 427 |
None,
|
| 428 |
None
|
|
@@ -442,11 +470,14 @@ def create_ui():
|
|
| 442 |
status_class = "status-success"
|
| 443 |
status_icon = "✓"
|
| 444 |
message_with_emoji = "✅ " + message
|
| 445 |
-
elif status == "warning"
|
| 446 |
status_class = "status-warning"
|
| 447 |
status_icon = "?"
|
| 448 |
-
|
| 449 |
-
|
|
|
|
|
|
|
|
|
|
| 450 |
status_class = "status-error"
|
| 451 |
status_icon = "✗"
|
| 452 |
message_with_emoji = "❌ " + message
|
|
@@ -482,13 +513,29 @@ def create_ui():
|
|
| 482 |
|
| 483 |
def on_validate(file):
|
| 484 |
if file is None:
|
| 485 |
-
|
| 486 |
-
gr.update(
|
| 487 |
gr.update(visible=False), # validation_progress
|
| 488 |
gr.update(visible=False), # report_group
|
| 489 |
None, # report_text
|
| 490 |
None # report_md
|
| 491 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
|
| 493 |
# Process the file and get results
|
| 494 |
results, report = process_file(file)
|
|
@@ -508,7 +555,7 @@ def create_ui():
|
|
| 508 |
f.write(report)
|
| 509 |
|
| 510 |
# Return final state
|
| 511 |
-
|
| 512 |
build_results_html(results), # validation_results
|
| 513 |
gr.update(visible=False), # validation_progress
|
| 514 |
gr.update(visible=True) if report else gr.update(visible=False), # report_group
|
|
@@ -545,11 +592,6 @@ def create_ui():
|
|
| 545 |
]
|
| 546 |
|
| 547 |
validate_btn.click(
|
| 548 |
-
fn=show_progress,
|
| 549 |
-
inputs=None,
|
| 550 |
-
outputs=[validation_results, validation_progress, report_group, report_text, report_md],
|
| 551 |
-
queue=False
|
| 552 |
-
).then(
|
| 553 |
fn=on_validate,
|
| 554 |
inputs=file_input,
|
| 555 |
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
|
|
@@ -564,7 +606,7 @@ def create_ui():
|
|
| 564 |
# Footer
|
| 565 |
gr.HTML("""
|
| 566 |
<div style="text-align: center; margin-top: 20px;">
|
| 567 |
-
<p>Learn more about
|
| 568 |
</div>
|
| 569 |
""")
|
| 570 |
|
|
@@ -584,4 +626,4 @@ def create_ui():
|
|
| 584 |
|
| 585 |
if __name__ == "__main__":
|
| 586 |
app = create_ui()
|
| 587 |
-
app.launch()
|
|
|
|
| 21 |
return results, None
|
| 22 |
|
| 23 |
# Check 2: Croissant validation
|
| 24 |
+
croissant_valid, croissant_message, croissant_status = validate_croissant(json_data)
|
| 25 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
| 26 |
+
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, croissant_status))
|
| 27 |
|
| 28 |
if not croissant_valid:
|
| 29 |
return results, None
|
|
|
|
| 43 |
return results, report
|
| 44 |
|
| 45 |
def create_ui():
|
| 46 |
+
with gr.Blocks() as app:
|
| 47 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
| 48 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
| 49 |
gr.Markdown("""
|
|
|
|
| 85 |
# Now create the validation results section in a separate group
|
| 86 |
with gr.Group():
|
| 87 |
# Validation results
|
| 88 |
+
validation_results = gr.HTML(value="", visible=True)
|
| 89 |
validation_progress = gr.HTML(visible=False)
|
| 90 |
|
| 91 |
# Collapsible report section
|
|
|
|
| 99 |
report_text = gr.Textbox(
|
| 100 |
label="Report Content",
|
| 101 |
visible=True,
|
|
|
|
| 102 |
lines=10,
|
| 103 |
elem_id="report-text-box"
|
| 104 |
)
|
|
|
|
| 106 |
# Define CSS for the validation UI
|
| 107 |
gr.HTML("""
|
| 108 |
<style>
|
| 109 |
+
/* Import Google Sans from Google Fonts */
|
| 110 |
+
@import url('https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;600&display=swap');
|
| 111 |
+
|
| 112 |
/* Set max width and center the app */
|
| 113 |
.gradio-container {
|
| 114 |
max-width: 750px !important;
|
| 115 |
margin: 0 auto !important;
|
| 116 |
+
font-family: 'Google Sans', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
|
| 117 |
+
font-size: 16px !important;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
.gradio-container * {
|
| 121 |
+
font-family: 'Google Sans', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
/* Breathing room inside tabs and groups */
|
| 125 |
+
.gradio-container .tab-nav {
|
| 126 |
+
gap: 6px !important;
|
| 127 |
+
}
|
| 128 |
+
.gradio-container .tabs {
|
| 129 |
+
padding-top: 14px !important;
|
| 130 |
+
}
|
| 131 |
+
.gradio-container .tab-nav button {
|
| 132 |
+
font-size: 15px !important;
|
| 133 |
+
padding: 12px 22px !important;
|
| 134 |
+
}
|
| 135 |
+
.gradio-container .tabitem button[class*="primary"] {
|
| 136 |
+
margin-top: 12px !important;
|
| 137 |
+
margin-bottom: 4px !important;
|
| 138 |
+
}
|
| 139 |
+
.gradio-container label, .gradio-container .label-wrap {
|
| 140 |
+
font-size: 15px !important;
|
| 141 |
+
margin-bottom: 6px !important;
|
| 142 |
}
|
| 143 |
|
| 144 |
/* Make basic containers transparent */
|
|
|
|
| 325 |
return [
|
| 326 |
"upload",
|
| 327 |
"""<div class="progress-status">Ready for upload</div>""",
|
| 328 |
+
gr.update(value=""), # Clear validation results
|
| 329 |
gr.update(visible=False), # Hide report group
|
| 330 |
None, # Clear report text
|
| 331 |
None, # Clear report file
|
|
|
|
| 336 |
return [
|
| 337 |
"url",
|
| 338 |
"""<div class="progress-status">Enter a URL to fetch</div>""",
|
| 339 |
+
gr.update(value=""), # Clear validation results
|
| 340 |
gr.update(visible=False), # Hide report group
|
| 341 |
None, # Clear report text
|
| 342 |
None, # Clear report file
|
|
|
|
| 357 |
if file is None:
|
| 358 |
return [
|
| 359 |
"""<div class="progress-status">Ready for upload</div>""",
|
| 360 |
+
gr.update(value=""), # Clear validation results
|
| 361 |
gr.update(visible=False), # Hide report group
|
| 362 |
None, # Clear report text
|
| 363 |
None # Clear report file
|
|
|
|
| 365 |
|
| 366 |
return [
|
| 367 |
"""<div class="progress-status">✅ File uploaded successfully</div>""",
|
| 368 |
+
gr.update(value=""), # Clear validation results
|
| 369 |
gr.update(visible=False), # Hide report group
|
| 370 |
None, # Clear report text
|
| 371 |
None # Clear report file
|
|
|
|
| 375 |
if not url:
|
| 376 |
return [
|
| 377 |
"""<div class="progress-status">Please enter a URL</div>""",
|
| 378 |
+
gr.update(value=""),
|
| 379 |
gr.update(visible=False),
|
| 380 |
None,
|
| 381 |
None
|
|
|
|
| 393 |
results = []
|
| 394 |
results.append(("JSON Format Validation", True, "The URL returned valid JSON."))
|
| 395 |
|
| 396 |
+
croissant_valid, croissant_message, croissant_status = validate_croissant(json_data)
|
| 397 |
+
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, croissant_status))
|
| 398 |
|
| 399 |
if not croissant_valid:
|
| 400 |
return [
|
|
|
|
| 432 |
error_message = f"Error fetching URL: {str(e)}"
|
| 433 |
return [
|
| 434 |
f"""<div class="progress-status">{error_message}</div>""",
|
| 435 |
+
gr.update(value=""),
|
| 436 |
gr.update(visible=False),
|
| 437 |
None,
|
| 438 |
None
|
|
|
|
| 441 |
error_message = f"URL did not return valid JSON: {str(e)}"
|
| 442 |
return [
|
| 443 |
f"""<div class="progress-status">{error_message}</div>""",
|
| 444 |
+
gr.update(value=""),
|
| 445 |
gr.update(visible=False),
|
| 446 |
None,
|
| 447 |
None
|
|
|
|
| 450 |
error_message = f"Unexpected error: {str(e)}"
|
| 451 |
return [
|
| 452 |
f"""<div class="progress-status">{error_message}</div>""",
|
| 453 |
+
gr.update(value=""),
|
| 454 |
gr.update(visible=False),
|
| 455 |
None,
|
| 456 |
None
|
|
|
|
| 470 |
status_class = "status-success"
|
| 471 |
status_icon = "✓"
|
| 472 |
message_with_emoji = "✅ " + message
|
| 473 |
+
elif status == "warning":
|
| 474 |
status_class = "status-warning"
|
| 475 |
status_icon = "?"
|
| 476 |
+
if "Records" in test_name:
|
| 477 |
+
message_with_emoji = "⚠️ Could not automatically generate records. This is oftentimes not an issue (e.g. datasets could be too large or too complex), and it's not required to pass this test to submit to NeurIPS.\n\n" + message
|
| 478 |
+
else:
|
| 479 |
+
message_with_emoji = "⚠️ " + message
|
| 480 |
+
else: # error
|
| 481 |
status_class = "status-error"
|
| 482 |
status_icon = "✗"
|
| 483 |
message_with_emoji = "❌ " + message
|
|
|
|
| 513 |
|
| 514 |
def on_validate(file):
|
| 515 |
if file is None:
|
| 516 |
+
yield [
|
| 517 |
+
gr.update(value=""), # validation_results
|
| 518 |
gr.update(visible=False), # validation_progress
|
| 519 |
gr.update(visible=False), # report_group
|
| 520 |
None, # report_text
|
| 521 |
None # report_md
|
| 522 |
]
|
| 523 |
+
return
|
| 524 |
+
|
| 525 |
+
# Show progress spinner
|
| 526 |
+
progress_html = """
|
| 527 |
+
<div class="validation-progress">
|
| 528 |
+
<div class="loading-spinner"></div>
|
| 529 |
+
<span>Validating file...</span>
|
| 530 |
+
</div>
|
| 531 |
+
"""
|
| 532 |
+
yield [
|
| 533 |
+
gr.update(value=""), # validation_results
|
| 534 |
+
gr.update(visible=True, value=progress_html), # validation_progress
|
| 535 |
+
gr.update(visible=False), # report_group
|
| 536 |
+
None, # report_text
|
| 537 |
+
None # report_md
|
| 538 |
+
]
|
| 539 |
|
| 540 |
# Process the file and get results
|
| 541 |
results, report = process_file(file)
|
|
|
|
| 555 |
f.write(report)
|
| 556 |
|
| 557 |
# Return final state
|
| 558 |
+
yield [
|
| 559 |
build_results_html(results), # validation_results
|
| 560 |
gr.update(visible=False), # validation_progress
|
| 561 |
gr.update(visible=True) if report else gr.update(visible=False), # report_group
|
|
|
|
| 592 |
]
|
| 593 |
|
| 594 |
validate_btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
fn=on_validate,
|
| 596 |
inputs=file_input,
|
| 597 |
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
|
|
|
|
| 606 |
# Footer
|
| 607 |
gr.HTML("""
|
| 608 |
<div style="text-align: center; margin-top: 20px;">
|
| 609 |
+
<p>Learn more about 🥐<a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
|
| 610 |
</div>
|
| 611 |
""")
|
| 612 |
|
|
|
|
| 626 |
|
| 627 |
if __name__ == "__main__":
|
| 628 |
app = create_ui()
|
| 629 |
+
app.launch(theme=gr.themes.Soft())
|
validation.py
CHANGED
|
@@ -51,19 +51,30 @@ def validate_json(file_path):
|
|
| 51 |
error_message = f"Error reading file: {str(e)}"
|
| 52 |
return False, error_message, None
|
| 53 |
|
|
|
|
|
|
|
| 54 |
def validate_croissant(json_data):
|
| 55 |
"""Validate that the JSON follows Croissant schema."""
|
| 56 |
try:
|
| 57 |
dataset = mlc.Dataset(jsonld=json_data)
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
except mlc.ValidationError as e:
|
| 60 |
error_details = traceback.format_exc()
|
| 61 |
error_message = f"Validation failed: {str(e)}\n\n{error_details}"
|
| 62 |
-
return False, error_message
|
| 63 |
except Exception as e:
|
| 64 |
error_details = traceback.format_exc()
|
| 65 |
error_message = f"Unexpected error during validation: {str(e)}\n\n{error_details}"
|
| 66 |
-
return False, error_message
|
| 67 |
|
| 68 |
def try_generate_record(record_collection):
|
| 69 |
try:
|
|
|
|
| 51 |
error_message = f"Error reading file: {str(e)}"
|
| 52 |
return False, error_message, None
|
| 53 |
|
| 54 |
+
REQUIRED_SCHEMA_FIELDS = ["license"]
|
| 55 |
+
|
| 56 |
def validate_croissant(json_data):
|
| 57 |
"""Validate that the JSON follows Croissant schema."""
|
| 58 |
try:
|
| 59 |
dataset = mlc.Dataset(jsonld=json_data)
|
| 60 |
+
missing = [f for f in REQUIRED_SCHEMA_FIELDS if not json_data.get(f)]
|
| 61 |
+
if missing:
|
| 62 |
+
return True, (
|
| 63 |
+
"The dataset passes Croissant schema validation, but the `license` field is missing. "
|
| 64 |
+
"Please add a `license` field to your Croissant file. "
|
| 65 |
+
"Croissant recommends using the URL of a known license, e.g. one of the licenses listed at "
|
| 66 |
+
"<a href='https://spdx.org/licenses/' target='_blank'>https://spdx.org/licenses/</a>. "
|
| 67 |
+
"Note that if your paper is accepted at NeurIPS, a permissive license will be required."
|
| 68 |
+
), "warning"
|
| 69 |
+
return True, "The dataset passes Croissant validation.", "pass"
|
| 70 |
except mlc.ValidationError as e:
|
| 71 |
error_details = traceback.format_exc()
|
| 72 |
error_message = f"Validation failed: {str(e)}\n\n{error_details}"
|
| 73 |
+
return False, error_message, "error"
|
| 74 |
except Exception as e:
|
| 75 |
error_details = traceback.format_exc()
|
| 76 |
error_message = f"Unexpected error during validation: {str(e)}\n\n{error_details}"
|
| 77 |
+
return False, error_message, "error"
|
| 78 |
|
| 79 |
def try_generate_record(record_collection):
|
| 80 |
try:
|