AlexWortega commited on
Commit
0e15c6f
·
verified ·
1 Parent(s): 3cfa9a4

Upload download_data.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. download_data.sh +22 -0
download_data.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Download The Well dataset locally for faster training.
3
+ # HF streaming works but is slow (~3-4s/batch); local data is ~10x faster.
4
+ #
5
+ # Usage:
6
+ # ./download_data.sh # downloads default dataset
7
+ # ./download_data.sh active_matter # specify dataset
8
+ # ./download_data.sh turbulent_radiative_layer_2D train # specific split
9
+
10
+ DATASET="${1:-turbulent_radiative_layer_2D}"
11
+ SPLIT="${2:-}" # empty = all splits
12
+ BASE_PATH="${WELL_DATA_PATH:-/home/alexw/data/the_well}"
13
+
14
+ echo "Downloading The Well: dataset=$DATASET, split=${SPLIT:-all}, path=$BASE_PATH"
15
+
16
+ if [ -n "$SPLIT" ]; then
17
+ the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" --split "$SPLIT"
18
+ else
19
+ the-well-download --base-path "$BASE_PATH" --dataset "$DATASET"
20
+ fi
21
+
22
+ echo "Done. Use --no-streaming --local_path $BASE_PATH in training scripts."