CoCoOne commited on
Commit
1259d99
·
1 Parent(s): 7df30c5

Harden submission cleanup and ID allocation

Browse files
Files changed (3) hide show
  1. app.py +187 -27
  2. repo_ops.py +10 -0
  3. validator.py +93 -1
app.py CHANGED
@@ -5,29 +5,54 @@ import os
5
  from pathlib import Path
6
 
7
  import gradio as gr
 
8
 
9
  try:
10
- from .repo_ops import DEFAULT_REPO_ID, allocate_next_task_id, create_dataset_pr, list_existing_task_ids, load_hf_token
 
 
 
 
 
 
 
11
  from .validator import (
12
  DOMAINS,
13
  PreparedSubmission,
14
  SubmissionMetadata,
15
  ValidationError,
16
  build_public_report,
 
 
 
17
  cleanup_work_dir,
18
  normalize_domain_token,
 
 
19
  validate_and_prepare_submission,
20
  )
21
  except ImportError:
22
- from repo_ops import DEFAULT_REPO_ID, allocate_next_task_id, create_dataset_pr, list_existing_task_ids, load_hf_token
 
 
 
 
 
 
 
23
  from validator import (
24
  DOMAINS,
25
  PreparedSubmission,
26
  SubmissionMetadata,
27
  ValidationError,
28
  build_public_report,
 
 
 
29
  cleanup_work_dir,
30
  normalize_domain_token,
 
 
31
  validate_and_prepare_submission,
32
  )
33
 
@@ -36,6 +61,18 @@ SPACE_TITLE = 'ResearchClawBench Task Submission'
36
  GITHUB_REPO_URL = 'https://github.com/InternScience/ResearchClawBench'
37
  DATASET_URL = f'https://huggingface.co/datasets/{DEFAULT_REPO_ID}'
38
  SPACE_URL = 'https://huggingface.co/spaces/InternScience/ResearchClawBench-Task-Submit'
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  CSS = """
41
  @import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700;800&display=swap');
@@ -469,11 +506,26 @@ def resolve_domain(selected_domain: str, custom_domain: str) -> str:
469
  return normalized
470
 
471
 
472
- def handle_archive_upload(archive_path: str | None):
 
 
473
  if not archive_path:
474
  return '', 'No ZIP file selected yet.'
475
- filename = Path(archive_path).name
476
- return archive_path, f'Selected ZIP: `{filename}`'
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
 
479
  def build_validation_markdown(prepared: PreparedSubmission) -> str:
@@ -498,6 +550,36 @@ def build_failure_markdown(message: str) -> str:
498
  return f'## Validation failed\n\n{bullets}'
499
 
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  def validate_submission(
502
  archive_path: str,
503
  suggested_domain: str,
@@ -513,7 +595,16 @@ def validate_submission(
513
  cleanup_work_dir(current_state.get('work_dir'))
514
 
515
  if not archive_path:
516
- return None, '', '## Validation failed\n\n- Please upload a zip file.', '{}', gr.update(interactive=False), ''
 
 
 
 
 
 
 
 
 
517
 
518
  domain = resolve_domain(suggested_domain, custom_domain)
519
  token = load_hf_token()
@@ -533,57 +624,100 @@ def validate_submission(
533
  pr_ready = bool(token)
534
  return (
535
  prepared.to_state(),
 
536
  prepared.assigned_task_id,
537
  build_validation_markdown(prepared),
538
  json.dumps(build_public_report(prepared), indent=2, ensure_ascii=False),
539
  gr.update(interactive=pr_ready),
540
  '' if pr_ready else 'Validation passed, but PR creation is disabled until a write token is configured.',
 
541
  )
542
  except ValidationError as exc:
 
543
  return (
544
  None,
545
  '',
 
546
  build_failure_markdown(str(exc)),
547
  json.dumps({'status': 'error', 'errors': str(exc).splitlines()}, indent=2, ensure_ascii=False),
548
  gr.update(interactive=False),
549
  '',
 
550
  )
551
  except Exception as exc:
 
552
  return (
553
  None,
554
  '',
 
555
  build_failure_markdown(str(exc)),
556
  json.dumps({'status': 'error', 'errors': [str(exc)]}, indent=2, ensure_ascii=False),
557
  gr.update(interactive=False),
558
  '',
 
559
  )
560
 
561
 
562
- def create_pr(state: dict | None):
563
  if not state:
564
- return None, gr.update(interactive=False), '## PR creation failed\n\n- Validate a submission first.'
 
 
 
 
 
 
565
 
566
  prepared = PreparedSubmission.from_state(state)
567
  token = load_hf_token()
568
- try:
569
- commit_info = create_dataset_pr(prepared, repo_id=DEFAULT_REPO_ID, token=token)
570
- pr_url = commit_info.pr_url or commit_info.commit_url
571
- message = '\n'.join([
572
- '## PR created',
573
- '',
574
- f'- Task ID: `{prepared.assigned_task_id}`',
575
- f'- PR: {pr_url}',
576
- ])
577
- return None, gr.update(interactive=False), message
578
- except Exception as exc:
579
- return None, gr.update(interactive=False), build_failure_markdown(str(exc).strip() or 'Unknown PR creation error')
580
- finally:
581
- cleanup_work_dir(prepared.work_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
 
584
  with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
585
- state = gr.State(None)
586
- archive_state = gr.State('')
587
 
588
  gr.HTML(build_hero_html())
589
 
@@ -674,7 +808,11 @@ with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
674
  with gr.Column(scale=1, min_width=0, elem_classes=['shell-spacer']):
675
  gr.HTML('')
676
 
677
- archive.upload(fn=handle_archive_upload, inputs=[archive], outputs=[archive_state, archive_notice])
 
 
 
 
678
 
679
  validate_btn.click(
680
  fn=validate_submission,
@@ -689,9 +827,31 @@ with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
689
  notes,
690
  state,
691
  ],
692
- outputs=[state, assigned_task_id, validation_md, validation_report, create_pr_btn, pr_md],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
693
  )
694
- create_pr_btn.click(fn=create_pr, inputs=[state], outputs=[state, create_pr_btn, pr_md])
695
 
696
 
697
  if __name__ == '__main__':
 
5
  from pathlib import Path
6
 
7
  import gradio as gr
8
+ from huggingface_hub.utils import HfHubHTTPError
9
 
10
  try:
11
+ from .repo_ops import (
12
+ DEFAULT_REPO_ID,
13
+ allocate_next_task_id,
14
+ create_dataset_pr,
15
+ get_repo_head_sha,
16
+ list_existing_task_ids,
17
+ load_hf_token,
18
+ )
19
  from .validator import (
20
  DOMAINS,
21
  PreparedSubmission,
22
  SubmissionMetadata,
23
  ValidationError,
24
  build_public_report,
25
+ cleanup_stale_managed_files,
26
+ cleanup_submission_state,
27
+ cleanup_uploaded_archive,
28
  cleanup_work_dir,
29
  normalize_domain_token,
30
+ persist_uploaded_archive,
31
+ stage_submission,
32
  validate_and_prepare_submission,
33
  )
34
  except ImportError:
35
+ from repo_ops import (
36
+ DEFAULT_REPO_ID,
37
+ allocate_next_task_id,
38
+ create_dataset_pr,
39
+ get_repo_head_sha,
40
+ list_existing_task_ids,
41
+ load_hf_token,
42
+ )
43
  from validator import (
44
  DOMAINS,
45
  PreparedSubmission,
46
  SubmissionMetadata,
47
  ValidationError,
48
  build_public_report,
49
+ cleanup_stale_managed_files,
50
+ cleanup_submission_state,
51
+ cleanup_uploaded_archive,
52
  cleanup_work_dir,
53
  normalize_domain_token,
54
+ persist_uploaded_archive,
55
+ stage_submission,
56
  validate_and_prepare_submission,
57
  )
58
 
 
61
  GITHUB_REPO_URL = 'https://github.com/InternScience/ResearchClawBench'
62
  DATASET_URL = f'https://huggingface.co/datasets/{DEFAULT_REPO_ID}'
63
  SPACE_URL = 'https://huggingface.co/spaces/InternScience/ResearchClawBench-Task-Submit'
64
+ STATE_TTL_SECONDS = int(os.environ.get('RCB_SPACE_STATE_TTL_SECONDS', '3600'))
65
+ STALE_WORK_DIR_TTL_SECONDS = int(
66
+ os.environ.get('RCB_SPACE_STALE_WORK_DIR_TTL_SECONDS', str(max(STATE_TTL_SECONDS * 2, 24 * 3600)))
67
+ )
68
+
69
+ _removed_stale_managed_files = cleanup_stale_managed_files(STALE_WORK_DIR_TTL_SECONDS)
70
+ if _removed_stale_managed_files:
71
+ print(
72
+ f'[startup] Removed {_removed_stale_managed_files} stale managed submission file(s) '
73
+ f'older than {STALE_WORK_DIR_TTL_SECONDS}s.',
74
+ flush=True,
75
+ )
76
 
77
  CSS = """
78
  @import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700;800&display=swap');
 
506
  return normalized
507
 
508
 
509
+ def handle_archive_upload(archive_path: str | None, current_archive_path: str | None):
510
+ if current_archive_path and current_archive_path != archive_path:
511
+ cleanup_uploaded_archive(current_archive_path)
512
  if not archive_path:
513
  return '', 'No ZIP file selected yet.'
514
+ managed_archive_path = persist_uploaded_archive(archive_path)
515
+ original_path = Path(archive_path)
516
+ managed_name = managed_archive_path.name
517
+ if managed_archive_path.resolve() != original_path.resolve():
518
+ try:
519
+ original_path.unlink()
520
+ except OSError:
521
+ pass
522
+ return str(managed_archive_path), f'Selected ZIP: `{managed_name}`'
523
+
524
+
525
+ def archive_notice_text(archive_path: str | None) -> str:
526
+ if not archive_path:
527
+ return 'No ZIP file selected yet.'
528
+ return f'Selected ZIP: `{Path(archive_path).name}`'
529
 
530
 
531
  def build_validation_markdown(prepared: PreparedSubmission) -> str:
 
550
  return f'## Validation failed\n\n{bullets}'
551
 
552
 
553
+ def refresh_prepared_submission_for_pr(
554
+ prepared: PreparedSubmission,
555
+ *,
556
+ repo_id: str,
557
+ token: str | None,
558
+ ) -> tuple[PreparedSubmission, bool, str]:
559
+ head_sha = get_repo_head_sha(repo_id=repo_id, token=token)
560
+ existing_ids = list_existing_task_ids(repo_id=repo_id, token=token)
561
+
562
+ reassigned = False
563
+ final_task_id = prepared.assigned_task_id
564
+ if final_task_id in existing_ids:
565
+ final_task_id = allocate_next_task_id(prepared.metadata.domain, existing_ids)
566
+ prepared.assigned_task_id = final_task_id
567
+ prepared.staged_task_dir = str(
568
+ stage_submission(prepared.uploaded_task_dir, final_task_id, prepared.work_dir)
569
+ )
570
+ reassigned = True
571
+
572
+ return prepared, reassigned, head_sha
573
+
574
+
575
+ def is_retryable_pr_error(exc: Exception) -> bool:
576
+ if not isinstance(exc, HfHubHTTPError):
577
+ return False
578
+ status_code = getattr(getattr(exc, 'response', None), 'status_code', None)
579
+ message = str(exc).lower()
580
+ return status_code in {409, 412} or 'parent commit' in message or 'conflict' in message or 'stale' in message
581
+
582
+
583
  def validate_submission(
584
  archive_path: str,
585
  suggested_domain: str,
 
595
  cleanup_work_dir(current_state.get('work_dir'))
596
 
597
  if not archive_path:
598
+ return (
599
+ None,
600
+ '',
601
+ '',
602
+ '## Validation failed\n\n- Please upload a zip file.',
603
+ '{}',
604
+ gr.update(interactive=False),
605
+ '',
606
+ archive_notice_text(None),
607
+ )
608
 
609
  domain = resolve_domain(suggested_domain, custom_domain)
610
  token = load_hf_token()
 
624
  pr_ready = bool(token)
625
  return (
626
  prepared.to_state(),
627
+ archive_path,
628
  prepared.assigned_task_id,
629
  build_validation_markdown(prepared),
630
  json.dumps(build_public_report(prepared), indent=2, ensure_ascii=False),
631
  gr.update(interactive=pr_ready),
632
  '' if pr_ready else 'Validation passed, but PR creation is disabled until a write token is configured.',
633
+ archive_notice_text(archive_path),
634
  )
635
  except ValidationError as exc:
636
+ cleanup_uploaded_archive(archive_path)
637
  return (
638
  None,
639
  '',
640
+ '',
641
  build_failure_markdown(str(exc)),
642
  json.dumps({'status': 'error', 'errors': str(exc).splitlines()}, indent=2, ensure_ascii=False),
643
  gr.update(interactive=False),
644
  '',
645
+ archive_notice_text(None),
646
  )
647
  except Exception as exc:
648
+ cleanup_uploaded_archive(archive_path)
649
  return (
650
  None,
651
  '',
652
+ '',
653
  build_failure_markdown(str(exc)),
654
  json.dumps({'status': 'error', 'errors': [str(exc)]}, indent=2, ensure_ascii=False),
655
  gr.update(interactive=False),
656
  '',
657
+ archive_notice_text(None),
658
  )
659
 
660
 
661
+ def create_pr(state: dict | None, archive_path: str | None):
662
  if not state:
663
+ return (
664
+ None,
665
+ '',
666
+ gr.update(interactive=False),
667
+ '## PR creation failed\n\n- Validate a submission first.',
668
+ 'No ZIP file selected yet.',
669
+ )
670
 
671
  prepared = PreparedSubmission.from_state(state)
672
  token = load_hf_token()
673
+ reassigned = False
674
+
675
+ for attempt in range(2):
676
+ try:
677
+ prepared, was_reassigned, head_sha = refresh_prepared_submission_for_pr(
678
+ prepared,
679
+ repo_id=DEFAULT_REPO_ID,
680
+ token=token,
681
+ )
682
+ reassigned = reassigned or was_reassigned
683
+ commit_info = create_dataset_pr(
684
+ prepared,
685
+ repo_id=DEFAULT_REPO_ID,
686
+ token=token,
687
+ parent_commit=head_sha,
688
+ )
689
+ pr_url = commit_info.pr_url or commit_info.commit_url
690
+ lines = [
691
+ '## PR created',
692
+ '',
693
+ f'- Task ID: `{prepared.assigned_task_id}`',
694
+ f'- PR: {pr_url}',
695
+ ]
696
+ if reassigned:
697
+ lines.insert(3, '- The task ID was reassigned at PR time because the previously validated ID is no longer available on the dataset main branch.')
698
+ message = '\n'.join(lines)
699
+ cleanup_work_dir(prepared.work_dir)
700
+ cleanup_uploaded_archive(archive_path)
701
+ return None, '', gr.update(interactive=False), message, archive_notice_text(None)
702
+ except Exception as exc:
703
+ if attempt == 0 and is_retryable_pr_error(exc):
704
+ continue
705
+
706
+ message = str(exc).strip() or 'Unknown PR creation error'
707
+ if is_retryable_pr_error(exc):
708
+ message += '\nPlease click "Create Dataset PR" again. The dataset main branch changed while your PR was being created.'
709
+ return (
710
+ prepared.to_state(),
711
+ archive_path or '',
712
+ gr.update(interactive=bool(token)),
713
+ build_failure_markdown(message),
714
+ archive_notice_text(archive_path),
715
+ )
716
 
717
 
718
  with gr.Blocks(title=SPACE_TITLE, fill_width=True) as demo:
719
+ state = gr.State(None, time_to_live=STATE_TTL_SECONDS, delete_callback=cleanup_submission_state)
720
+ archive_state = gr.State('', time_to_live=STATE_TTL_SECONDS, delete_callback=cleanup_uploaded_archive)
721
 
722
  gr.HTML(build_hero_html())
723
 
 
808
  with gr.Column(scale=1, min_width=0, elem_classes=['shell-spacer']):
809
  gr.HTML('')
810
 
811
+ archive.upload(
812
+ fn=handle_archive_upload,
813
+ inputs=[archive, archive_state],
814
+ outputs=[archive_state, archive_notice],
815
+ )
816
 
817
  validate_btn.click(
818
  fn=validate_submission,
 
827
  notes,
828
  state,
829
  ],
830
+ outputs=[
831
+ state,
832
+ archive_state,
833
+ assigned_task_id,
834
+ validation_md,
835
+ validation_report,
836
+ create_pr_btn,
837
+ pr_md,
838
+ archive_notice,
839
+ ],
840
+ )
841
+ create_pr_btn.click(
842
+ fn=create_pr,
843
+ inputs=[
844
+ state,
845
+ archive_state,
846
+ ],
847
+ outputs=[
848
+ state,
849
+ archive_state,
850
+ create_pr_btn,
851
+ pr_md,
852
+ archive_notice,
853
+ ],
854
  )
 
855
 
856
 
857
  if __name__ == '__main__':
repo_ops.py CHANGED
@@ -39,6 +39,14 @@ def list_existing_task_ids(repo_id: str = DEFAULT_REPO_ID, token: str | None = N
39
  return task_ids
40
 
41
 
 
 
 
 
 
 
 
 
42
  def allocate_next_task_id(domain: str, existing_task_ids: Iterable[str]) -> str:
43
  domain = normalize_domain_token(domain)
44
  if not DOMAIN_TOKEN_RE.fullmatch(domain):
@@ -80,6 +88,7 @@ def create_dataset_pr(
80
  *,
81
  repo_id: str = DEFAULT_REPO_ID,
82
  token: str | None = None,
 
83
  ):
84
  token = token or load_hf_token()
85
  if not token:
@@ -111,4 +120,5 @@ def create_dataset_pr(
111
  token=token,
112
  create_pr=True,
113
  revision='main',
 
114
  )
 
39
  return task_ids
40
 
41
 
42
+ def get_repo_head_sha(repo_id: str = DEFAULT_REPO_ID, token: str | None = None) -> str:
43
+ api = HfApi(token=token)
44
+ info = api.repo_info(repo_id=repo_id, repo_type='dataset', revision='main', token=token)
45
+ if not getattr(info, 'sha', None):
46
+ raise RuntimeError(f'Failed to fetch HEAD SHA for dataset repo {repo_id}.')
47
+ return info.sha
48
+
49
+
50
  def allocate_next_task_id(domain: str, existing_task_ids: Iterable[str]) -> str:
51
  domain = normalize_domain_token(domain)
52
  if not DOMAIN_TOKEN_RE.fullmatch(domain):
 
88
  *,
89
  repo_id: str = DEFAULT_REPO_ID,
90
  token: str | None = None,
91
+ parent_commit: str | None = None,
92
  ):
93
  token = token or load_hf_token()
94
  if not token:
 
120
  token=token,
121
  create_pr=True,
122
  revision='main',
123
+ parent_commit=parent_commit,
124
  )
validator.py CHANGED
@@ -6,6 +6,8 @@ import re
6
  import shutil
7
  import stat
8
  import tempfile
 
 
9
  import zipfile
10
  from dataclasses import asdict, dataclass
11
  from pathlib import Path, PurePosixPath
@@ -43,6 +45,16 @@ IGNORED_ARCHIVE_NAMES = {'.DS_Store'}
43
  DEFAULT_MAX_FILES = int(os.environ.get('RCB_SPACE_MAX_FILES', '5000'))
44
  DEFAULT_MAX_TOTAL_BYTES = int(os.environ.get('RCB_SPACE_MAX_TOTAL_BYTES', str(5 * 1024 * 1024 * 1024)))
45
  DEFAULT_MAX_SINGLE_FILE_BYTES = int(os.environ.get('RCB_SPACE_MAX_SINGLE_FILE_BYTES', str(1024 * 1024 * 1024)))
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  @dataclass
@@ -151,8 +163,88 @@ def cleanup_work_dir(work_dir: str | Path | None) -> None:
151
  shutil.rmtree(Path(work_dir), ignore_errors=True)
152
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def create_work_dir() -> Path:
155
- return Path(tempfile.mkdtemp(prefix='rcb_space_submit_'))
156
 
157
 
158
  def extract_submission_zip(
 
6
  import shutil
7
  import stat
8
  import tempfile
9
+ import time
10
+ import uuid
11
  import zipfile
12
  from dataclasses import asdict, dataclass
13
  from pathlib import Path, PurePosixPath
 
45
  DEFAULT_MAX_FILES = int(os.environ.get('RCB_SPACE_MAX_FILES', '5000'))
46
  DEFAULT_MAX_TOTAL_BYTES = int(os.environ.get('RCB_SPACE_MAX_TOTAL_BYTES', str(5 * 1024 * 1024 * 1024)))
47
  DEFAULT_MAX_SINGLE_FILE_BYTES = int(os.environ.get('RCB_SPACE_MAX_SINGLE_FILE_BYTES', str(1024 * 1024 * 1024)))
48
+ WORK_DIR_PREFIX = 'rcb_space_submit_'
49
+ ARCHIVE_PREFIX = 'rcb_space_upload_'
50
+ TEMP_ROOTS = tuple(
51
+ Path(root).resolve()
52
+ for root in {
53
+ tempfile.gettempdir(),
54
+ os.environ.get('GRADIO_TEMP_DIR', ''),
55
+ }
56
+ if root
57
+ )
58
 
59
 
60
  @dataclass
 
163
  shutil.rmtree(Path(work_dir), ignore_errors=True)
164
 
165
 
166
+ def cleanup_submission_state(state: dict[str, Any] | None) -> None:
167
+ if not state or not isinstance(state, dict):
168
+ return
169
+ cleanup_work_dir(state.get('work_dir'))
170
+
171
+
172
+ def _is_under_temp_root(path: Path) -> bool:
173
+ try:
174
+ resolved = path.resolve()
175
+ except OSError:
176
+ return False
177
+ return any(root == resolved or root in resolved.parents for root in TEMP_ROOTS)
178
+
179
+
180
+ def cleanup_uploaded_archive(archive_path: str | Path | None) -> None:
181
+ if not archive_path:
182
+ return
183
+ path = Path(archive_path)
184
+ if not path.exists() or not path.is_file():
185
+ return
186
+ if path.suffix.lower() != '.zip':
187
+ return
188
+ if not _is_under_temp_root(path):
189
+ return
190
+ if not path.name.startswith(ARCHIVE_PREFIX):
191
+ return
192
+ try:
193
+ path.unlink()
194
+ except OSError:
195
+ pass
196
+
197
+
198
+ def persist_uploaded_archive(archive_path: str | Path) -> Path:
199
+ source = Path(archive_path)
200
+ if not source.exists() or not source.is_file():
201
+ raise ValidationError(f'Uploaded archive does not exist: {source}')
202
+ if source.suffix.lower() != '.zip':
203
+ raise ValidationError('Only .zip uploads are supported.')
204
+
205
+ managed_root = Path(tempfile.gettempdir())
206
+ managed_path = managed_root / f'{ARCHIVE_PREFIX}{uuid.uuid4().hex}.zip'
207
+ shutil.copy2(source, managed_path)
208
+ return managed_path
209
+
210
+
211
+ def cleanup_stale_managed_files(max_age_seconds: int) -> int:
212
+ if max_age_seconds <= 0:
213
+ return 0
214
+
215
+ temp_root = Path(tempfile.gettempdir())
216
+ if not temp_root.exists():
217
+ return 0
218
+
219
+ cutoff = time.time() - max_age_seconds
220
+ removed = 0
221
+ for path in temp_root.iterdir():
222
+ if path.is_dir():
223
+ if not path.name.startswith(WORK_DIR_PREFIX):
224
+ continue
225
+ elif path.is_file():
226
+ if not path.name.startswith(ARCHIVE_PREFIX) or path.suffix.lower() != '.zip':
227
+ continue
228
+ else:
229
+ continue
230
+ try:
231
+ if path.stat().st_mtime > cutoff:
232
+ continue
233
+ except OSError:
234
+ continue
235
+ if path.is_dir():
236
+ shutil.rmtree(path, ignore_errors=True)
237
+ else:
238
+ try:
239
+ path.unlink()
240
+ except OSError:
241
+ continue
242
+ removed += 1
243
+ return removed
244
+
245
+
246
  def create_work_dir() -> Path:
247
+ return Path(tempfile.mkdtemp(prefix=WORK_DIR_PREFIX))
248
 
249
 
250
  def extract_submission_zip(