File size: 5,463 Bytes
80ef3b2 84b67b3 80ef3b2 84b67b3 80ef3b2 84b67b3 80ef3b2 84b67b3 d3bc17d 84b67b3 80ef3b2 84b67b3 80ef3b2 84b67b3 80ef3b2 84b67b3 80ef3b2 84b67b3 d3bc17d 84b67b3 80ef3b2 84b67b3 80ef3b2 84b67b3 80ef3b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | #!/usr/bin/env python3
"""Audit the public WildFIRE-FM release before upload."""
from __future__ import annotations
import json
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
REQUIRED = [
"README.md",
"LICENSE",
"requirements.txt",
"data_sources/DATA_SOURCES.md",
"models/wildfire_fm/README.md",
"models/wildfire_fm/modeling_unet.py",
"models/wildfire_fm/checkpoint_manifest.json",
"paper/manuscript_final.pdf",
"paper_outputs/figures/overview_wildfire.pdf",
"paper_outputs/figures/matching.pdf",
"paper_outputs/figures/fig_task_contract_tiles.pdf",
"paper_outputs/figures/fig_primary_rank_change_map.pdf",
"paper_outputs/figures/fig_selection_regret_scatter.pdf",
"paper_outputs/figures/fig_rank_heatmap1.pdf",
"assets/wildfire_fm_model_card.svg",
"assets/release_contents.svg",
"assets/selection_regret_final.png",
"assets/supporting_rank_map_final.png",
"assets/primary_rank_change_final.png",
"artifacts/manifests/paper_outputs.sha256",
"scripts/check_paper_output_hashes.py",
]
TABLE_LABELS = [
"tab_primary_results.tex",
"tab_supporting_results.tex",
"tab_fireprone_contract_progression.tex",
"tab_selection_regret_scope.tex",
"tab_selection_regret_scope_sweep.tex",
"tab_appendix_selection_regret_tolerance.tex",
]
FORBIDDEN_TEXT = [
"/home/yx21e",
"/blue/",
"/orange/",
"fsu-compsci",
"TBD",
"N/A",
"Pangu24",
]
TEXT_SUFFIXES = {".md", ".py", ".sh", ".tex", ".csv", ".json", ".yml", ".yaml", ".txt"}
SKIP_FOR_FORBIDDEN = {"audit_release.py", "build_selection_regret_rq2_figure.py"}
def iter_text_files() -> list[Path]:
out: list[Path] = []
for path in ROOT.rglob("*"):
if ".git" in path.parts or "__pycache__" in path.parts:
continue
if path.name in SKIP_FOR_FORBIDDEN:
continue
if path.is_file() and path.suffix in TEXT_SUFFIXES:
out.append(path)
return sorted(out)
def main() -> None:
issues: list[str] = []
for rel in REQUIRED:
if not (ROOT / rel).exists():
issues.append(f"missing required file: {rel}")
for table in TABLE_LABELS:
if not (ROOT / "paper_outputs/tables" / table).exists():
issues.append(f"missing paper table output: {table}")
for path in iter_text_files():
text = path.read_text(errors="ignore")
for token in FORBIDDEN_TEXT:
if token in text:
issues.append(f"{path.relative_to(ROOT)} contains forbidden token {token!r}")
readme = (ROOT / "README.md").read_text(errors="ignore")
for phrase in ["WildFIRE-FM", "Quick Load", "Data Sources", "Evaluation Snapshot"]:
if phrase not in readme:
issues.append(f"README missing expected model-card phrase: {phrase}")
manifest_path = ROOT / "models/wildfire_fm/checkpoint_manifest.json"
if manifest_path.exists():
data = json.loads(manifest_path.read_text())
checkpoints = data.get("checkpoints", [])
if len(checkpoints) != 5:
issues.append("checkpoint manifest should list five seeded checkpoints")
for item in checkpoints:
rel = item.get("filename", "")
if not rel.startswith("models/wildfire_fm/checkpoints/seed_"):
issues.append(f"unexpected checkpoint filename in manifest: {rel}")
if "source_path" in item:
issues.append("checkpoint manifest exposes source_path")
if not re.fullmatch(r"[0-9a-f]{64}", str(item.get("sha256", ""))):
issues.append(f"bad sha256 in checkpoint manifest: {item}")
for path in (ROOT / "paper_outputs/tables").glob("*.tex"):
text = path.read_text(errors="ignore")
if re.search(r"\\ms\{[^}]*\}\{0\.0000\}", text):
issues.append(f"{path.relative_to(ROOT)} displays zero std in an \\ms cell")
checksum_manifest = ROOT / "artifacts/manifests/paper_outputs.sha256"
if checksum_manifest.exists():
listed: list[str] = []
for line in checksum_manifest.read_text(errors="ignore").splitlines():
if not line.strip():
continue
parts = line.split(None, 1)
if len(parts) != 2:
issues.append(f"bad checksum manifest line: {line!r}")
continue
rel = parts[1].strip()
listed.append(rel)
if not (ROOT / rel).exists():
issues.append(f"checksum manifest lists missing output: {rel}")
expected_paths = []
for rel_root in ["paper_outputs", "assets"]:
expected_paths.extend(str(p.relative_to(ROOT)) for p in (ROOT / rel_root).rglob("*") if p.is_file())
expected_paths.append("paper/manuscript_final.pdf")
expected = sorted(set(expected_paths))
if sorted(listed) != expected:
missing = sorted(set(expected) - set(listed))
extra = sorted(set(listed) - set(expected))
if missing:
issues.append(f"checksum manifest missing outputs: {missing}")
if extra:
issues.append(f"checksum manifest has extra outputs: {extra}")
if issues:
print("Release audit failed:")
for issue in issues:
print(f"- {issue}")
raise SystemExit(1)
print("Release audit passed.")
if __name__ == "__main__":
main()
|