| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -e |
|
|
| OUT_ROOT="${1:-cache}" |
| FULL_TRAIN="$OUT_ROOT/full/train" |
| FULL_VAL="$OUT_ROOT/full/validation" |
|
|
| |
| echo "=== Stage 1: caching train scenes from raw tars ===" |
| python -m s23dr_2026_example.cache_scenes --out-dir "$FULL_TRAIN" --split train --skip-existing |
|
|
| echo "=== Stage 1: caching validation scenes from raw tars ===" |
| python -m s23dr_2026_example.cache_scenes --out-dir "$FULL_VAL" --split validation --skip-existing |
|
|
| |
| for split in train validation; do |
| for seq_len in 2048 4096; do |
| in_dir="$OUT_ROOT/full/$split" |
| out_dir="$OUT_ROOT/sampled_${seq_len}/$split" |
| echo "=== Stage 2: sampling $split at seq_len=$seq_len ===" |
| python -m s23dr_2026_example.make_sampled_cache \ |
| --in-dir "$in_dir" --out-dir "$out_dir" --seq-len "$seq_len" |
| done |
| done |
|
|
| echo "" |
| echo "All done. Sampled datasets are at:" |
| echo " $OUT_ROOT/sampled_2048/{train,validation}" |
| echo " $OUT_ROOT/sampled_4096/{train,validation}" |
| echo "" |
| echo "To train from these, point reproduce.sh at them via" |
| echo " --cache-dir \"\$OUT_ROOT/sampled_2048/train\" (and similar for val/4096)" |
| echo "instead of the default hf:// URLs." |
|
|