File size: 4,358 Bytes
5c1bb37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env bash
# tools/make_sha256sums.sh
#
# Generate / verify the SHA256SUMS manifest for the CM-EVS release.
#
# Why this exists:
#   The dataset release is split across multiple roots — code, sample data,
#   regen scripts for HM3D / ScanNet++, and the public Blender frames. The
#   reviewer (or any downloader) needs a single text file that lists the
#   sha256 of every distributable file, so they can verify integrity with a
#   one-liner: `shasum -a 256 -c SHA256SUMS`.
#
# Usage:
#   tools/make_sha256sums.sh generate [ROOT [OUT_FILE]]
#   tools/make_sha256sums.sh verify   [ROOT [SUMS_FILE]]
#
# Defaults:
#   ROOT      = current directory
#   OUT_FILE  = SHA256SUMS (in ROOT)
#   SUMS_FILE = SHA256SUMS (in ROOT)
#
# Examples:
#   # 1) refresh manifest for the code release we are sitting in
#   tools/make_sha256sums.sh generate
#
#   # 2) make a manifest for a freshly populated data drop somewhere else
#   tools/make_sha256sums.sh generate /Volumes/CMEVS/cm-evs-data /Volumes/CMEVS/cm-evs-data/SHA256SUMS
#
#   # 3) verify a downloaded data drop
#   tools/make_sha256sums.sh verify /Volumes/CMEVS/cm-evs-data
#
# Format:
#   <sha256-hex>  <relative-path>
#   one per line, sorted by path. Paths are POSIX-style relative paths from
#   ROOT, so the manifest is portable between machines.
#
# What is excluded by default:
#   - .git/, .DS_Store, *.pyc, __pycache__/, .ipynb_checkpoints/, the
#     SHA256SUMS file itself, and any tar/zip that ROOT itself owns
#     (those are checksummed separately by their containing release root).
#
# Tooling:
#   uses `shasum -a 256` (macOS / BSD / Linux coreutils all ship it).
#   `find` + `xargs -0` handle filenames with spaces.

set -euo pipefail

readonly CMD="${1:-generate}"
readonly ROOT="${2:-$PWD}"
readonly DEFAULT_SUMS="$ROOT/SHA256SUMS"
readonly SUMS="${3:-$DEFAULT_SUMS}"

if [[ ! -d "$ROOT" ]]; then
    echo "ERROR: ROOT directory does not exist: $ROOT" >&2
    exit 2
fi

# A POSIX-portable shasum invocation.
hasher() {
    if command -v shasum >/dev/null 2>&1; then
        shasum -a 256 "$@"
    elif command -v sha256sum >/dev/null 2>&1; then
        sha256sum "$@"
    else
        echo "ERROR: neither shasum nor sha256sum is on PATH" >&2
        exit 3
    fi
}

generate() {
    local out="$SUMS"
    local tmp; tmp="$(mktemp)"
    trap 'rm -f "$tmp"' EXIT

    echo "Hashing files under: $ROOT"
    echo "Writing manifest to: $out"

    cd "$ROOT"

    # Collect every regular file, exclude common junk, sort by path so the
    # manifest is deterministic across machines.
    # Exclusions:
    #   - .git/, __pycache__/, .ipynb_checkpoints/, .DS_Store, *.pyc
    #   - SHA256SUMS itself (chicken/egg)
    #   - dataset_metadata/manifests_h100/  — these are large data-snapshot
    #     manifests (per-frame sha256 of the H100 data drop, ~71 MB total);
    #     they have their own ARCHIVE_DIGESTS.txt for self-verification and
    #     are not part of the redistributable code+metadata package.
    find . -type f \
        -not -path './.git/*' \
        -not -path '*/__pycache__/*' \
        -not -path '*/.ipynb_checkpoints/*' \
        -not -path './dataset_metadata/manifests_h100/*' \
        -not -name '.DS_Store' \
        -not -name '*.pyc' \
        -not -name 'SHA256SUMS' \
        -print0 | LC_ALL=C sort -z | xargs -0 -n 50 shasum -a 256 \
        | sed 's|\./||' \
        > "$tmp"

    mv "$tmp" "$out"
    trap - EXIT

    local n; n="$(wc -l < "$out" | tr -d ' ')"
    echo "OK: hashed $n files."
    echo "Verify with: tools/make_sha256sums.sh verify"
}

verify() {
    local sums="$SUMS"
    if [[ ! -f "$sums" ]]; then
        echo "ERROR: manifest not found: $sums" >&2
        exit 4
    fi

    echo "Verifying files under: $ROOT"
    echo "Against manifest:      $sums"

    cd "$ROOT"
    if shasum -a 256 -c "$sums"; then
        echo ""
        echo "PASS: all checksums match."
    else
        echo ""
        echo "FAIL: at least one file does not match (see above)."
        exit 5
    fi
}

case "$CMD" in
    generate|gen|g)
        generate
        ;;
    verify|check|c)
        verify
        ;;
    -h|--help|help)
        sed -n '2,/^set -euo/p' "$0" | sed 's/^# \{0,1\}//' | sed '$d'
        exit 0
        ;;
    *)
        echo "Unknown command: $CMD" >&2
        echo "Try: $0 --help" >&2
        exit 1
        ;;
esac