#!/usr/bin/env python3 """Audit the public WildFIRE-FM release before upload.""" from __future__ import annotations import json import re from pathlib import Path ROOT = Path(__file__).resolve().parents[1] REQUIRED = [ "README.md", "LICENSE", "requirements.txt", "data_sources/DATA_SOURCES.md", "models/wildfire_fm/README.md", "models/wildfire_fm/modeling_unet.py", "models/wildfire_fm/checkpoint_manifest.json", "paper/manuscript_final.pdf", "paper_outputs/figures/overview_wildfire.pdf", "paper_outputs/figures/matching.pdf", "paper_outputs/figures/fig_task_contract_tiles.pdf", "paper_outputs/figures/fig_primary_rank_change_map.pdf", "paper_outputs/figures/fig_selection_regret_scatter.pdf", "paper_outputs/figures/fig_rank_heatmap1.pdf", "assets/selection_regret_preview.png", "assets/task_rank_map_preview.png", "assets/primary_rank_change_preview.png", "artifacts/manifests/paper_outputs.sha256", "scripts/check_paper_output_hashes.py", ] TABLE_LABELS = [ "tab_primary_results.tex", "tab_supporting_results.tex", "tab_fireprone_contract_progression.tex", "tab_selection_regret_scope.tex", "tab_selection_regret_scope_sweep.tex", "tab_appendix_selection_regret_tolerance.tex", ] FORBIDDEN_TEXT = [ "/home/yx21e", "/blue/", "/orange/", "fsu-compsci", "TBD", "N/A", "Pangu24", ] TEXT_SUFFIXES = {".md", ".py", ".sh", ".tex", ".csv", ".json", ".yml", ".yaml", ".txt"} SKIP_FOR_FORBIDDEN = {"audit_release.py", "build_selection_regret_rq2_figure.py"} def iter_text_files() -> list[Path]: out: list[Path] = [] for path in ROOT.rglob("*"): if ".git" in path.parts or "__pycache__" in path.parts: continue if path.name in SKIP_FOR_FORBIDDEN: continue if path.is_file() and path.suffix in TEXT_SUFFIXES: out.append(path) return sorted(out) def main() -> None: issues: list[str] = [] for rel in REQUIRED: if not (ROOT / rel).exists(): issues.append(f"missing required file: {rel}") for table in TABLE_LABELS: if not (ROOT / "paper_outputs/tables" / table).exists(): issues.append(f"missing paper table output: {table}") for path in iter_text_files(): text = path.read_text(errors="ignore") for token in FORBIDDEN_TEXT: if token in text: issues.append(f"{path.relative_to(ROOT)} contains forbidden token {token!r}") readme = (ROOT / "README.md").read_text(errors="ignore") for phrase in ["WildFIRE-FM", "Model weights", "Data Used By The Study", "Loading A Checkpoint"]: if phrase not in readme: issues.append(f"README missing expected model-card phrase: {phrase}") manifest_path = ROOT / "models/wildfire_fm/checkpoint_manifest.json" if manifest_path.exists(): data = json.loads(manifest_path.read_text()) checkpoints = data.get("checkpoints", []) if len(checkpoints) != 5: issues.append("checkpoint manifest should list five seeded checkpoints") for item in checkpoints: rel = item.get("filename", "") if not rel.startswith("models/wildfire_fm/checkpoints/seed_"): issues.append(f"unexpected checkpoint filename in manifest: {rel}") if "source_path" in item: issues.append("checkpoint manifest exposes source_path") if not re.fullmatch(r"[0-9a-f]{64}", str(item.get("sha256", ""))): issues.append(f"bad sha256 in checkpoint manifest: {item}") for path in (ROOT / "paper_outputs/tables").glob("*.tex"): text = path.read_text(errors="ignore") if re.search(r"\\ms\{[^}]*\}\{0\.0000\}", text): issues.append(f"{path.relative_to(ROOT)} displays zero std in an \\ms cell") checksum_manifest = ROOT / "artifacts/manifests/paper_outputs.sha256" if checksum_manifest.exists(): listed: list[str] = [] for line in checksum_manifest.read_text(errors="ignore").splitlines(): if not line.strip(): continue parts = line.split(None, 1) if len(parts) != 2: issues.append(f"bad checksum manifest line: {line!r}") continue rel = parts[1].strip() listed.append(rel) if not (ROOT / rel).exists(): issues.append(f"checksum manifest lists missing output: {rel}") expected_paths = [] for rel_root in ["paper_outputs", "assets"]: expected_paths.extend(str(p.relative_to(ROOT)) for p in (ROOT / rel_root).rglob("*") if p.is_file()) expected_paths.append("paper/manuscript_final.pdf") expected = sorted(set(expected_paths)) if sorted(listed) != expected: missing = sorted(set(expected) - set(listed)) extra = sorted(set(listed) - set(expected)) if missing: issues.append(f"checksum manifest missing outputs: {missing}") if extra: issues.append(f"checksum manifest has extra outputs: {extra}") if issues: print("Release audit failed:") for issue in issues: print(f"- {issue}") raise SystemExit(1) print("Release audit passed.") if __name__ == "__main__": main()