File size: 5,401 Bytes
80ef3b2
84b67b3
80ef3b2
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
84b67b3
 
 
 
 
80ef3b2
 
84b67b3
 
 
 
 
 
 
80ef3b2
 
 
 
 
 
 
84b67b3
 
80ef3b2
 
 
 
 
 
 
 
 
 
 
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84b67b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80ef3b2
 
 
 
 
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
84b67b3
 
 
 
 
80ef3b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3
"""Audit the public WildFIRE-FM release before upload."""

from __future__ import annotations

import json
import re
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]

REQUIRED = [
    "README.md",
    "LICENSE",
    "requirements.txt",
    "data_sources/DATA_SOURCES.md",
    "models/wildfire_fm/README.md",
    "models/wildfire_fm/modeling_unet.py",
    "models/wildfire_fm/checkpoint_manifest.json",
    "paper/manuscript_final.pdf",
    "paper_outputs/figures/overview_wildfire.pdf",
    "paper_outputs/figures/matching.pdf",
    "paper_outputs/figures/fig_task_contract_tiles.pdf",
    "paper_outputs/figures/fig_primary_rank_change_map.pdf",
    "paper_outputs/figures/fig_selection_regret_scatter.pdf",
    "paper_outputs/figures/fig_rank_heatmap1.pdf",
    "assets/selection_regret_preview.png",
    "assets/task_rank_map_preview.png",
    "assets/primary_rank_change_preview.png",
    "artifacts/manifests/paper_outputs.sha256",
    "scripts/check_paper_output_hashes.py",
]

TABLE_LABELS = [
    "tab_primary_results.tex",
    "tab_supporting_results.tex",
    "tab_fireprone_contract_progression.tex",
    "tab_selection_regret_scope.tex",
    "tab_selection_regret_scope_sweep.tex",
    "tab_appendix_selection_regret_tolerance.tex",
]

FORBIDDEN_TEXT = [
    "/home/yx21e",
    "/blue/",
    "/orange/",
    "fsu-compsci",
    "TBD",
    "N/A",
    "Pangu24",
]

TEXT_SUFFIXES = {".md", ".py", ".sh", ".tex", ".csv", ".json", ".yml", ".yaml", ".txt"}
SKIP_FOR_FORBIDDEN = {"audit_release.py", "build_selection_regret_rq2_figure.py"}


def iter_text_files() -> list[Path]:
    out: list[Path] = []
    for path in ROOT.rglob("*"):
        if ".git" in path.parts or "__pycache__" in path.parts:
            continue
        if path.name in SKIP_FOR_FORBIDDEN:
            continue
        if path.is_file() and path.suffix in TEXT_SUFFIXES:
            out.append(path)
    return sorted(out)


def main() -> None:
    issues: list[str] = []

    for rel in REQUIRED:
        if not (ROOT / rel).exists():
            issues.append(f"missing required file: {rel}")
    for table in TABLE_LABELS:
        if not (ROOT / "paper_outputs/tables" / table).exists():
            issues.append(f"missing paper table output: {table}")

    for path in iter_text_files():
        text = path.read_text(errors="ignore")
        for token in FORBIDDEN_TEXT:
            if token in text:
                issues.append(f"{path.relative_to(ROOT)} contains forbidden token {token!r}")

    readme = (ROOT / "README.md").read_text(errors="ignore")
    for phrase in ["WildFIRE-FM", "Model weights", "Data Used By The Study", "Loading A Checkpoint"]:
        if phrase not in readme:
            issues.append(f"README missing expected model-card phrase: {phrase}")

    manifest_path = ROOT / "models/wildfire_fm/checkpoint_manifest.json"
    if manifest_path.exists():
        data = json.loads(manifest_path.read_text())
        checkpoints = data.get("checkpoints", [])
        if len(checkpoints) != 5:
            issues.append("checkpoint manifest should list five seeded checkpoints")
        for item in checkpoints:
            rel = item.get("filename", "")
            if not rel.startswith("models/wildfire_fm/checkpoints/seed_"):
                issues.append(f"unexpected checkpoint filename in manifest: {rel}")
            if "source_path" in item:
                issues.append("checkpoint manifest exposes source_path")
            if not re.fullmatch(r"[0-9a-f]{64}", str(item.get("sha256", ""))):
                issues.append(f"bad sha256 in checkpoint manifest: {item}")

    for path in (ROOT / "paper_outputs/tables").glob("*.tex"):
        text = path.read_text(errors="ignore")
        if re.search(r"\\ms\{[^}]*\}\{0\.0000\}", text):
            issues.append(f"{path.relative_to(ROOT)} displays zero std in an \\ms cell")

    checksum_manifest = ROOT / "artifacts/manifests/paper_outputs.sha256"
    if checksum_manifest.exists():
        listed: list[str] = []
        for line in checksum_manifest.read_text(errors="ignore").splitlines():
            if not line.strip():
                continue
            parts = line.split(None, 1)
            if len(parts) != 2:
                issues.append(f"bad checksum manifest line: {line!r}")
                continue
            rel = parts[1].strip()
            listed.append(rel)
            if not (ROOT / rel).exists():
                issues.append(f"checksum manifest lists missing output: {rel}")
        expected_paths = []
        for rel_root in ["paper_outputs", "assets"]:
            expected_paths.extend(str(p.relative_to(ROOT)) for p in (ROOT / rel_root).rglob("*") if p.is_file())
        expected_paths.append("paper/manuscript_final.pdf")
        expected = sorted(set(expected_paths))
        if sorted(listed) != expected:
            missing = sorted(set(expected) - set(listed))
            extra = sorted(set(listed) - set(expected))
            if missing:
                issues.append(f"checksum manifest missing outputs: {missing}")
            if extra:
                issues.append(f"checksum manifest has extra outputs: {extra}")

    if issues:
        print("Release audit failed:")
        for issue in issues:
            print(f"- {issue}")
        raise SystemExit(1)
    print("Release audit passed.")


if __name__ == "__main__":
    main()