JoaquinVanschoren commited on
Commit
9c3a4be
·
1 Parent(s): 8ed167c

timeout warning

Browse files
Files changed (1) hide show
  1. app.py +37 -50
app.py CHANGED
@@ -17,69 +17,41 @@ import json
17
  import time
18
  import traceback
19
  from validation import validate_json, validate_croissant, validate_records, generate_validation_report
20
- import threading
21
 
22
  def process_file(file):
23
  results = []
24
  json_data = None
25
- timer = None
26
- warning_text = None # to be set if timer fires
27
 
28
  filename = file.name.split("/")[-1]
29
 
30
- # JSON validation
31
  json_valid, json_message, json_data = validate_json(file.name)
32
  json_message = json_message.replace("\n✓\n", "\n")
33
  results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
 
34
  if not json_valid:
35
- return results, None, None
36
 
37
- # Schema validation
38
  croissant_valid, croissant_message = validate_croissant(json_data)
39
  croissant_message = croissant_message.replace("\n✓\n", "\n")
40
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
 
41
  if not croissant_valid:
42
- return results, None, None
43
-
44
- # Start timer before records validation
45
- fired = threading.Event()
46
-
47
- def trigger_warning():
48
- nonlocal warning_text
49
- warning_text = """
50
- ⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
51
- at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
52
- In that case, we recommend using any of the following options:
53
- <ul style="text-align:left; margin: 0 auto; display:inline-block;">
54
- <li>🔁 Duplicate this Space on Hugging Face</li>
55
- <li>💻 Run it locally (GitHub or Docker)</li>
56
- <li>🥐 Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
57
- </ul>
58
- """
59
- fired.set()
60
-
61
- timer = threading.Timer(0.1, trigger_warning)
62
- timer.start()
63
-
64
- try:
65
- records_valid, records_message, records_status = validate_records(json_data)
66
- finally:
67
- timer.cancel()
68
-
69
- if fired.is_set():
70
- warning_html_update = gr.update(value=warning_text, visible=True)
71
- else:
72
- warning_html_update = gr.update(visible=False)
73
 
 
 
74
  records_message = records_message.replace("\n✓\n", "\n")
75
  results.append(("Records Generation Test", records_valid, records_message, records_status))
76
 
 
77
  report = generate_validation_report(filename, json_data, results)
78
- return results, report, warning_html_update
 
79
 
80
  def create_ui():
81
  with gr.Blocks(theme=gr.themes.Soft()) as app:
82
- delayed_warning_html = gr.HTML("", visible=False)
83
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
84
  gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
85
  gr.Markdown("""
@@ -510,12 +482,14 @@ def create_ui():
510
  gr.update(visible=False), # validation_results
511
  gr.update(visible=False), # validation_progress
512
  gr.update(visible=False), # report_group
513
- None, None, # report_text, report_md
514
- gr.update(visible=False) # delayed_warning_html
515
  ]
516
 
517
- results, report, warning_html_update = process_file(file)
 
518
 
 
519
  try:
520
  with open(file.name, 'r') as f:
521
  json_data = json.load(f)
@@ -523,18 +497,19 @@ def create_ui():
523
  except:
524
  dataset_name = 'unnamed'
525
 
 
526
  report_filename = f"report_croissant-validation_{dataset_name}.md"
527
  if report:
528
  with open(report_filename, "w") as f:
529
  f.write(report)
530
 
 
531
  return [
532
- build_results_html(results),
533
- gr.update(visible=False),
534
- gr.update(visible=True) if report else gr.update(visible=False),
535
- report if report else None,
536
- report_filename if report else None,
537
- warning_html_update or gr.update(visible=False)
538
  ]
539
 
540
  # Connect UI events to functions with updated outputs
@@ -564,7 +539,7 @@ def create_ui():
564
  None, # report_text
565
  None # report_md
566
  ]
567
-
568
  validate_btn.click(
569
  fn=show_progress,
570
  inputs=None,
@@ -573,7 +548,7 @@ def create_ui():
573
  ).then(
574
  fn=on_validate,
575
  inputs=file_input,
576
- outputs=[validation_results, validation_progress, report_group, report_text, report_md, delayed_warning_html]
577
  )
578
 
579
  fetch_btn.click(
@@ -588,6 +563,18 @@ def create_ui():
588
  <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
589
  </div>
590
  """)
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
  return app
593
 
 
17
  import time
18
  import traceback
19
  from validation import validate_json, validate_croissant, validate_records, generate_validation_report
 
20
 
21
  def process_file(file):
22
  results = []
23
  json_data = None
 
 
24
 
25
  filename = file.name.split("/")[-1]
26
 
27
+ # Check 1: JSON validation
28
  json_valid, json_message, json_data = validate_json(file.name)
29
  json_message = json_message.replace("\n✓\n", "\n")
30
  results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
31
+
32
  if not json_valid:
33
+ return results, None
34
 
35
+ # Check 2: Croissant validation
36
  croissant_valid, croissant_message = validate_croissant(json_data)
37
  croissant_message = croissant_message.replace("\n✓\n", "\n")
38
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
39
+
40
  if not croissant_valid:
41
+ return results, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Check 3: Records validation (with timeout-safe and error-specific logic)
44
+ records_valid, records_message, records_status = validate_records(json_data)
45
  records_message = records_message.replace("\n✓\n", "\n")
46
  results.append(("Records Generation Test", records_valid, records_message, records_status))
47
 
48
+ # Generate final report
49
  report = generate_validation_report(filename, json_data, results)
50
+
51
+ return results, report
52
 
53
  def create_ui():
54
  with gr.Blocks(theme=gr.themes.Soft()) as app:
 
55
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
56
  gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
57
  gr.Markdown("""
 
482
  gr.update(visible=False), # validation_results
483
  gr.update(visible=False), # validation_progress
484
  gr.update(visible=False), # report_group
485
+ None, # report_text
486
+ None # report_md
487
  ]
488
 
489
+ # Process the file and get results
490
+ results, report = process_file(file)
491
 
492
+ # Extract dataset name from the JSON for the report filename
493
  try:
494
  with open(file.name, 'r') as f:
495
  json_data = json.load(f)
 
497
  except:
498
  dataset_name = 'unnamed'
499
 
500
+ # Save report to file with new naming convention
501
  report_filename = f"report_croissant-validation_{dataset_name}.md"
502
  if report:
503
  with open(report_filename, "w") as f:
504
  f.write(report)
505
 
506
+ # Return final state
507
  return [
508
+ build_results_html(results), # validation_results
509
+ gr.update(visible=False), # validation_progress
510
+ gr.update(visible=True) if report else gr.update(visible=False), # report_group
511
+ report if report else None, # report_text
512
+ report_filename if report else None # report_md
 
513
  ]
514
 
515
  # Connect UI events to functions with updated outputs
 
539
  None, # report_text
540
  None # report_md
541
  ]
542
+
543
  validate_btn.click(
544
  fn=show_progress,
545
  inputs=None,
 
548
  ).then(
549
  fn=on_validate,
550
  inputs=file_input,
551
+ outputs=[validation_results, validation_progress, report_group, report_text, report_md]
552
  )
553
 
554
  fetch_btn.click(
 
563
  <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
564
  </div>
565
  """)
566
+
567
+ gr.HTML("""
568
+ <div class="progress-status" style="text-align: left; color: #d35400;">
569
+ ⚠️ It is possible that this validator is currently being used by a lot of people at the same time, which may trigger rate limiting by the platform hosting your data.
570
+ The app will then try again and may get into a very long loop. If it takes too long to run, we recommend using any of the following options:
571
+ <ul style="text-align:left; margin: 0 auto; display:inline-block;">
572
+ <li>🔁 Click the button with the three dots (⋯) above and select "Duplicate this Space" to run this app in your own Hugging Face space.</li>
573
+ <li>💻 Click the button with the three dots (⋯) above and select "Run Locally" and then "Clone (git)" to get instructions to run the checker locally. You can also use docker option (you don't need the tokens).</li>
574
+ <li>🥐 Run the Croissant validation code yourself (<a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a>), e.g. with <a href="https://github.com/mlcommons/croissant/tree/7a632f34438e9c8e3812c6a0049898560259c6d4/python/mlcroissant/mlcroissant/scripts" target="_blank">these scripts</a> (validate and load).</li>
575
+ </ul>
576
+ </div>
577
+ """)
578
 
579
  return app
580