SynLayers commited on
Commit
06be361
·
verified ·
1 Parent(s): 798996c

Upload dataset/dataset_construction.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. dataset/dataset_construction.sh +82 -0
dataset/dataset_construction.sh ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ PYTHON_BIN="${PYTHON_BIN:-python}"
6
+
7
+ : "${BLENDED_DIR:?Set BLENDED_DIR}"
8
+ : "${LAION_DIR:?Set LAION_DIR}"
9
+ : "${CAPTION_DIR:?Set CAPTION_DIR}"
10
+ : "${OUTPUT_DIR:?Set OUTPUT_DIR}"
11
+
12
+ CAPTION_META="${CAPTION_META:-${CAPTION_DIR}/captions.jsonl}"
13
+ NUM_SAMPLES="${NUM_SAMPLES:-500000}"
14
+ START_INDEX="${START_INDEX:-0}"
15
+ SEED="${SEED:-42}"
16
+ MAX_BASE_SAMPLES="${MAX_BASE_SAMPLES:-18000}"
17
+ MIN_DONOR_SAMPLES="${MIN_DONOR_SAMPLES:-1}"
18
+ MAX_DONOR_SAMPLES="${MAX_DONOR_SAMPLES:-4}"
19
+ MIN_LAYERS_PER_DONOR="${MIN_LAYERS_PER_DONOR:-0}"
20
+ MAX_LAYERS_PER_DONOR="${MAX_LAYERS_PER_DONOR:-2}"
21
+ MIN_LAYERS_TO_REMOVE="${MIN_LAYERS_TO_REMOVE:-1}"
22
+ MAX_LAYERS_TO_REMOVE="${MAX_LAYERS_TO_REMOVE:-4}"
23
+ ADDED_LAYER_MIN_SIZE="${ADDED_LAYER_MIN_SIZE:-0.9}"
24
+ ADDED_LAYER_MAX_SIZE="${ADDED_LAYER_MAX_SIZE:-1.1}"
25
+ LAION_PROB="${LAION_PROB:-0.60}"
26
+ CAPTION_PROB="${CAPTION_PROB:-0.35}"
27
+ LAION_MIN_SIZE="${LAION_MIN_SIZE:-0.3}"
28
+ LAION_MAX_SIZE="${LAION_MAX_SIZE:-0.4}"
29
+ CAPTION_MIN_SIZE="${CAPTION_MIN_SIZE:-0.6}"
30
+ CAPTION_MAX_SIZE="${CAPTION_MAX_SIZE:-0.8}"
31
+ ALPHAVAE_MIN_LAYERS="${ALPHAVAE_MIN_LAYERS:-0}"
32
+ ALPHAVAE_MAX_LAYERS="${ALPHAVAE_MAX_LAYERS:-0}"
33
+ ALPHAVAE_MIN_SIZE="${ALPHAVAE_MIN_SIZE:-0.25}"
34
+ ALPHAVAE_MAX_SIZE="${ALPHAVAE_MAX_SIZE:-0.40}"
35
+ NUM_WORKERS="${NUM_WORKERS:-64}"
36
+ SKIP_EXISTING="${SKIP_EXISTING:-0}"
37
+
38
+ cmd=(
39
+ "$PYTHON_BIN" "$SCRIPT_DIR/scaleup_dataset.py"
40
+ --blended_dir "$BLENDED_DIR"
41
+ --laion_dir "$LAION_DIR"
42
+ --caption_dir "$CAPTION_DIR"
43
+ --caption_meta "$CAPTION_META"
44
+ --output_dir "$OUTPUT_DIR"
45
+ --num_samples "$NUM_SAMPLES"
46
+ --start_index "$START_INDEX"
47
+ --seed "$SEED"
48
+ --min_donor_samples "$MIN_DONOR_SAMPLES"
49
+ --max_donor_samples "$MAX_DONOR_SAMPLES"
50
+ --min_layers_per_donor "$MIN_LAYERS_PER_DONOR"
51
+ --max_layers_per_donor "$MAX_LAYERS_PER_DONOR"
52
+ --added_layer_min_size "$ADDED_LAYER_MIN_SIZE"
53
+ --added_layer_max_size "$ADDED_LAYER_MAX_SIZE"
54
+ --laion_prob "$LAION_PROB"
55
+ --caption_prob "$CAPTION_PROB"
56
+ --laion_min_size "$LAION_MIN_SIZE"
57
+ --laion_max_size "$LAION_MAX_SIZE"
58
+ --caption_min_size "$CAPTION_MIN_SIZE"
59
+ --caption_max_size "$CAPTION_MAX_SIZE"
60
+ --min_layers_to_remove "$MIN_LAYERS_TO_REMOVE"
61
+ --max_layers_to_remove "$MAX_LAYERS_TO_REMOVE"
62
+ --alphavae_min_layers "$ALPHAVAE_MIN_LAYERS"
63
+ --alphavae_max_layers "$ALPHAVAE_MAX_LAYERS"
64
+ --alphavae_min_size "$ALPHAVAE_MIN_SIZE"
65
+ --alphavae_max_size "$ALPHAVAE_MAX_SIZE"
66
+ --max_base_samples "$MAX_BASE_SAMPLES"
67
+ --num_workers "$NUM_WORKERS"
68
+ )
69
+
70
+ if [[ -n "${ALPHAVAE_DIR:-}" ]]; then
71
+ cmd+=(--alphavae_dir "$ALPHAVAE_DIR")
72
+ fi
73
+
74
+ if [[ -n "${ALPHAVAE_PROMPTS:-}" ]]; then
75
+ cmd+=(--alphavae_prompts "$ALPHAVAE_PROMPTS")
76
+ fi
77
+
78
+ if [[ "$SKIP_EXISTING" == "1" ]]; then
79
+ cmd+=(--skip_existing)
80
+ fi
81
+
82
+ "${cmd[@]}"