JoaquinVanschoren commited on
Commit
2060674
·
1 Parent(s): 70b6b21

requirements fix and licence warnings

Browse files
Files changed (2) hide show
  1. app.py +70 -28
  2. validation.py +14 -3
app.py CHANGED
@@ -21,9 +21,9 @@ def process_file(file):
21
  return results, None
22
 
23
  # Check 2: Croissant validation
24
- croissant_valid, croissant_message = validate_croissant(json_data)
25
  croissant_message = croissant_message.replace("\n✓\n", "\n")
26
- results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
27
 
28
  if not croissant_valid:
29
  return results, None
@@ -43,7 +43,7 @@ def process_file(file):
43
  return results, report
44
 
45
  def create_ui():
46
- with gr.Blocks(theme=gr.themes.Soft()) as app:
47
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
48
  gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
49
  gr.Markdown("""
@@ -85,7 +85,7 @@ def create_ui():
85
  # Now create the validation results section in a separate group
86
  with gr.Group():
87
  # Validation results
88
- validation_results = gr.HTML(visible=False)
89
  validation_progress = gr.HTML(visible=False)
90
 
91
  # Collapsible report section
@@ -99,7 +99,6 @@ def create_ui():
99
  report_text = gr.Textbox(
100
  label="Report Content",
101
  visible=True,
102
- show_copy_button=True,
103
  lines=10,
104
  elem_id="report-text-box"
105
  )
@@ -107,10 +106,39 @@ def create_ui():
107
  # Define CSS for the validation UI
108
  gr.HTML("""
109
  <style>
 
 
 
110
  /* Set max width and center the app */
111
  .gradio-container {
112
  max-width: 750px !important;
113
  margin: 0 auto !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
  /* Make basic containers transparent */
@@ -297,7 +325,7 @@ def create_ui():
297
  return [
298
  "upload",
299
  """<div class="progress-status">Ready for upload</div>""",
300
- gr.update(visible=False),
301
  gr.update(visible=False), # Hide report group
302
  None, # Clear report text
303
  None, # Clear report file
@@ -308,7 +336,7 @@ def create_ui():
308
  return [
309
  "url",
310
  """<div class="progress-status">Enter a URL to fetch</div>""",
311
- gr.update(visible=False),
312
  gr.update(visible=False), # Hide report group
313
  None, # Clear report text
314
  None, # Clear report file
@@ -329,7 +357,7 @@ def create_ui():
329
  if file is None:
330
  return [
331
  """<div class="progress-status">Ready for upload</div>""",
332
- gr.update(visible=False),
333
  gr.update(visible=False), # Hide report group
334
  None, # Clear report text
335
  None # Clear report file
@@ -337,7 +365,7 @@ def create_ui():
337
 
338
  return [
339
  """<div class="progress-status">✅ File uploaded successfully</div>""",
340
- gr.update(visible=False),
341
  gr.update(visible=False), # Hide report group
342
  None, # Clear report text
343
  None # Clear report file
@@ -347,7 +375,7 @@ def create_ui():
347
  if not url:
348
  return [
349
  """<div class="progress-status">Please enter a URL</div>""",
350
- gr.update(visible=False),
351
  gr.update(visible=False),
352
  None,
353
  None
@@ -365,8 +393,8 @@ def create_ui():
365
  results = []
366
  results.append(("JSON Format Validation", True, "The URL returned valid JSON."))
367
 
368
- croissant_valid, croissant_message = validate_croissant(json_data)
369
- results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
370
 
371
  if not croissant_valid:
372
  return [
@@ -404,7 +432,7 @@ def create_ui():
404
  error_message = f"Error fetching URL: {str(e)}"
405
  return [
406
  f"""<div class="progress-status">{error_message}</div>""",
407
- gr.update(visible=False),
408
  gr.update(visible=False),
409
  None,
410
  None
@@ -413,7 +441,7 @@ def create_ui():
413
  error_message = f"URL did not return valid JSON: {str(e)}"
414
  return [
415
  f"""<div class="progress-status">{error_message}</div>""",
416
- gr.update(visible=False),
417
  gr.update(visible=False),
418
  None,
419
  None
@@ -422,7 +450,7 @@ def create_ui():
422
  error_message = f"Unexpected error: {str(e)}"
423
  return [
424
  f"""<div class="progress-status">{error_message}</div>""",
425
- gr.update(visible=False),
426
  gr.update(visible=False),
427
  None,
428
  None
@@ -442,11 +470,14 @@ def create_ui():
442
  status_class = "status-success"
443
  status_icon = "✓"
444
  message_with_emoji = "✅ " + message
445
- elif status == "warning" and "Records" in test_name:
446
  status_class = "status-warning"
447
  status_icon = "?"
448
- message_with_emoji = "⚠️ Could not automatically generate records. This is oftentimes not an issue (e.g. datasets could be too large or too complex), and it's not required to pass this test to submit to NeurIPS.\n\n" + message
449
- else: # error or non-records warning
 
 
 
450
  status_class = "status-error"
451
  status_icon = "✗"
452
  message_with_emoji = "❌ " + message
@@ -482,13 +513,29 @@ def create_ui():
482
 
483
  def on_validate(file):
484
  if file is None:
485
- return [
486
- gr.update(visible=False), # validation_results
487
  gr.update(visible=False), # validation_progress
488
  gr.update(visible=False), # report_group
489
  None, # report_text
490
  None # report_md
491
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
 
493
  # Process the file and get results
494
  results, report = process_file(file)
@@ -508,7 +555,7 @@ def create_ui():
508
  f.write(report)
509
 
510
  # Return final state
511
- return [
512
  build_results_html(results), # validation_results
513
  gr.update(visible=False), # validation_progress
514
  gr.update(visible=True) if report else gr.update(visible=False), # report_group
@@ -545,11 +592,6 @@ def create_ui():
545
  ]
546
 
547
  validate_btn.click(
548
- fn=show_progress,
549
- inputs=None,
550
- outputs=[validation_results, validation_progress, report_group, report_text, report_md],
551
- queue=False
552
- ).then(
553
  fn=on_validate,
554
  inputs=file_input,
555
  outputs=[validation_results, validation_progress, report_group, report_text, report_md]
@@ -564,7 +606,7 @@ def create_ui():
564
  # Footer
565
  gr.HTML("""
566
  <div style="text-align: center; margin-top: 20px;">
567
- <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
568
  </div>
569
  """)
570
 
@@ -584,4 +626,4 @@ def create_ui():
584
 
585
  if __name__ == "__main__":
586
  app = create_ui()
587
- app.launch()
 
21
  return results, None
22
 
23
  # Check 2: Croissant validation
24
+ croissant_valid, croissant_message, croissant_status = validate_croissant(json_data)
25
  croissant_message = croissant_message.replace("\n✓\n", "\n")
26
+ results.append(("Croissant Schema Validation", croissant_valid, croissant_message, croissant_status))
27
 
28
  if not croissant_valid:
29
  return results, None
 
43
  return results, report
44
 
45
  def create_ui():
46
+ with gr.Blocks() as app:
47
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
48
  gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
49
  gr.Markdown("""
 
85
  # Now create the validation results section in a separate group
86
  with gr.Group():
87
  # Validation results
88
+ validation_results = gr.HTML(value="", visible=True)
89
  validation_progress = gr.HTML(visible=False)
90
 
91
  # Collapsible report section
 
99
  report_text = gr.Textbox(
100
  label="Report Content",
101
  visible=True,
 
102
  lines=10,
103
  elem_id="report-text-box"
104
  )
 
106
  # Define CSS for the validation UI
107
  gr.HTML("""
108
  <style>
109
+ /* Import Google Sans from Google Fonts */
110
+ @import url('https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;600&display=swap');
111
+
112
  /* Set max width and center the app */
113
  .gradio-container {
114
  max-width: 750px !important;
115
  margin: 0 auto !important;
116
+ font-family: 'Google Sans', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
117
+ font-size: 16px !important;
118
+ }
119
+
120
+ .gradio-container * {
121
+ font-family: 'Google Sans', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
122
+ }
123
+
124
+ /* Breathing room inside tabs and groups */
125
+ .gradio-container .tab-nav {
126
+ gap: 6px !important;
127
+ }
128
+ .gradio-container .tabs {
129
+ padding-top: 14px !important;
130
+ }
131
+ .gradio-container .tab-nav button {
132
+ font-size: 15px !important;
133
+ padding: 12px 22px !important;
134
+ }
135
+ .gradio-container .tabitem button[class*="primary"] {
136
+ margin-top: 12px !important;
137
+ margin-bottom: 4px !important;
138
+ }
139
+ .gradio-container label, .gradio-container .label-wrap {
140
+ font-size: 15px !important;
141
+ margin-bottom: 6px !important;
142
  }
143
 
144
  /* Make basic containers transparent */
 
325
  return [
326
  "upload",
327
  """<div class="progress-status">Ready for upload</div>""",
328
+ gr.update(value=""), # Clear validation results
329
  gr.update(visible=False), # Hide report group
330
  None, # Clear report text
331
  None, # Clear report file
 
336
  return [
337
  "url",
338
  """<div class="progress-status">Enter a URL to fetch</div>""",
339
+ gr.update(value=""), # Clear validation results
340
  gr.update(visible=False), # Hide report group
341
  None, # Clear report text
342
  None, # Clear report file
 
357
  if file is None:
358
  return [
359
  """<div class="progress-status">Ready for upload</div>""",
360
+ gr.update(value=""), # Clear validation results
361
  gr.update(visible=False), # Hide report group
362
  None, # Clear report text
363
  None # Clear report file
 
365
 
366
  return [
367
  """<div class="progress-status">✅ File uploaded successfully</div>""",
368
+ gr.update(value=""), # Clear validation results
369
  gr.update(visible=False), # Hide report group
370
  None, # Clear report text
371
  None # Clear report file
 
375
  if not url:
376
  return [
377
  """<div class="progress-status">Please enter a URL</div>""",
378
+ gr.update(value=""),
379
  gr.update(visible=False),
380
  None,
381
  None
 
393
  results = []
394
  results.append(("JSON Format Validation", True, "The URL returned valid JSON."))
395
 
396
+ croissant_valid, croissant_message, croissant_status = validate_croissant(json_data)
397
+ results.append(("Croissant Schema Validation", croissant_valid, croissant_message, croissant_status))
398
 
399
  if not croissant_valid:
400
  return [
 
432
  error_message = f"Error fetching URL: {str(e)}"
433
  return [
434
  f"""<div class="progress-status">{error_message}</div>""",
435
+ gr.update(value=""),
436
  gr.update(visible=False),
437
  None,
438
  None
 
441
  error_message = f"URL did not return valid JSON: {str(e)}"
442
  return [
443
  f"""<div class="progress-status">{error_message}</div>""",
444
+ gr.update(value=""),
445
  gr.update(visible=False),
446
  None,
447
  None
 
450
  error_message = f"Unexpected error: {str(e)}"
451
  return [
452
  f"""<div class="progress-status">{error_message}</div>""",
453
+ gr.update(value=""),
454
  gr.update(visible=False),
455
  None,
456
  None
 
470
  status_class = "status-success"
471
  status_icon = "✓"
472
  message_with_emoji = "✅ " + message
473
+ elif status == "warning":
474
  status_class = "status-warning"
475
  status_icon = "?"
476
+ if "Records" in test_name:
477
+ message_with_emoji = "⚠️ Could not automatically generate records. This is oftentimes not an issue (e.g. datasets could be too large or too complex), and it's not required to pass this test to submit to NeurIPS.\n\n" + message
478
+ else:
479
+ message_with_emoji = "⚠️ " + message
480
+ else: # error
481
  status_class = "status-error"
482
  status_icon = "✗"
483
  message_with_emoji = "❌ " + message
 
513
 
514
  def on_validate(file):
515
  if file is None:
516
+ yield [
517
+ gr.update(value=""), # validation_results
518
  gr.update(visible=False), # validation_progress
519
  gr.update(visible=False), # report_group
520
  None, # report_text
521
  None # report_md
522
  ]
523
+ return
524
+
525
+ # Show progress spinner
526
+ progress_html = """
527
+ <div class="validation-progress">
528
+ <div class="loading-spinner"></div>
529
+ <span>Validating file...</span>
530
+ </div>
531
+ """
532
+ yield [
533
+ gr.update(value=""), # validation_results
534
+ gr.update(visible=True, value=progress_html), # validation_progress
535
+ gr.update(visible=False), # report_group
536
+ None, # report_text
537
+ None # report_md
538
+ ]
539
 
540
  # Process the file and get results
541
  results, report = process_file(file)
 
555
  f.write(report)
556
 
557
  # Return final state
558
+ yield [
559
  build_results_html(results), # validation_results
560
  gr.update(visible=False), # validation_progress
561
  gr.update(visible=True) if report else gr.update(visible=False), # report_group
 
592
  ]
593
 
594
  validate_btn.click(
 
 
 
 
 
595
  fn=on_validate,
596
  inputs=file_input,
597
  outputs=[validation_results, validation_progress, report_group, report_text, report_md]
 
606
  # Footer
607
  gr.HTML("""
608
  <div style="text-align: center; margin-top: 20px;">
609
+ <p>Learn more about 🥐<a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
610
  </div>
611
  """)
612
 
 
626
 
627
  if __name__ == "__main__":
628
  app = create_ui()
629
+ app.launch(theme=gr.themes.Soft())
validation.py CHANGED
@@ -51,19 +51,30 @@ def validate_json(file_path):
51
  error_message = f"Error reading file: {str(e)}"
52
  return False, error_message, None
53
 
 
 
54
  def validate_croissant(json_data):
55
  """Validate that the JSON follows Croissant schema."""
56
  try:
57
  dataset = mlc.Dataset(jsonld=json_data)
58
- return True, "The dataset passes Croissant validation."
 
 
 
 
 
 
 
 
 
59
  except mlc.ValidationError as e:
60
  error_details = traceback.format_exc()
61
  error_message = f"Validation failed: {str(e)}\n\n{error_details}"
62
- return False, error_message
63
  except Exception as e:
64
  error_details = traceback.format_exc()
65
  error_message = f"Unexpected error during validation: {str(e)}\n\n{error_details}"
66
- return False, error_message
67
 
68
  def try_generate_record(record_collection):
69
  try:
 
51
  error_message = f"Error reading file: {str(e)}"
52
  return False, error_message, None
53
 
54
+ REQUIRED_SCHEMA_FIELDS = ["license"]
55
+
56
  def validate_croissant(json_data):
57
  """Validate that the JSON follows Croissant schema."""
58
  try:
59
  dataset = mlc.Dataset(jsonld=json_data)
60
+ missing = [f for f in REQUIRED_SCHEMA_FIELDS if not json_data.get(f)]
61
+ if missing:
62
+ return True, (
63
+ "The dataset passes Croissant schema validation, but the `license` field is missing. "
64
+ "Please add a `license` field to your Croissant file. "
65
+ "Croissant recommends using the URL of a known license, e.g. one of the licenses listed at "
66
+ "<a href='https://spdx.org/licenses/' target='_blank'>https://spdx.org/licenses/</a>. "
67
+ "Note that if your paper is accepted at NeurIPS, a permissive license will be required."
68
+ ), "warning"
69
+ return True, "The dataset passes Croissant validation.", "pass"
70
  except mlc.ValidationError as e:
71
  error_details = traceback.format_exc()
72
  error_message = f"Validation failed: {str(e)}\n\n{error_details}"
73
+ return False, error_message, "error"
74
  except Exception as e:
75
  error_details = traceback.format_exc()
76
  error_message = f"Unexpected error during validation: {str(e)}\n\n{error_details}"
77
+ return False, error_message, "error"
78
 
79
  def try_generate_record(record_collection):
80
  try: