| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -e |
|
|
| OUT_DIR="${1:-runs}" |
| BASE="--args-from configs/base.json" |
|
|
| |
| |
| |
| echo "=== Step 1: Training on 2048 data (deterministic) ===" |
| python -m s23dr_2026_example.train $BASE \ |
| --cache-dir hf://usm3d/s23dr-2026-sampled_2048_v2:train \ |
| --seq-len 2048 \ |
| --lr 3e-4 \ |
| --batch-size 32 \ |
| --steps 125000 \ |
| --deterministic \ |
| --out-dir "$OUT_DIR" |
|
|
| STEP1_DIR=$(ls -dt "$OUT_DIR"/*/args.json 2>/dev/null | head -1 | xargs dirname) |
| echo "Step 1 complete: $STEP1_DIR" |
|
|
| |
| |
| |
| echo "=== Step 2: Finetuning on 4096 data (deterministic) ===" |
| python -m s23dr_2026_example.train $BASE \ |
| --cache-dir hf://usm3d/s23dr-2026-sampled_4096_v2:train \ |
| --resume "$STEP1_DIR/checkpoints/step125000.pt" \ |
| --seq-len 4096 \ |
| --lr 3e-5 \ |
| --batch-size 64 \ |
| --steps 135000 \ |
| --deterministic \ |
| --out-dir "$OUT_DIR" |
|
|
| STEP2_DIR=$(ls -dt "$OUT_DIR"/*/args.json 2>/dev/null | head -1 | xargs dirname) |
| echo "Step 2 complete: $STEP2_DIR" |
|
|
| |
| |
| |
| echo "=== Step 3: Cooldown with endpoint loss (deterministic) ===" |
| python -m s23dr_2026_example.train $BASE \ |
| --cache-dir hf://usm3d/s23dr-2026-sampled_4096_v2:train \ |
| --resume "$STEP2_DIR/checkpoints/step135000.pt" \ |
| --seq-len 4096 \ |
| --lr 3e-5 \ |
| --batch-size 64 \ |
| --endpoint-weight 0.1 \ |
| --cooldown-start 150000 \ |
| --cooldown-steps 20000 \ |
| --steps 170000 \ |
| --deterministic \ |
| --out-dir "$OUT_DIR" |
|
|
| STEP3_DIR=$(ls -dt "$OUT_DIR"/*/args.json 2>/dev/null | head -1 | xargs dirname) |
| echo "Step 3 complete: $STEP3_DIR" |
| echo "" |
| echo "Final checkpoint: $STEP3_DIR/checkpoints/final.pt" |
| echo "Copy to checkpoint.pt for submission." |
|
|