add grow_layers, sweep configs (replicate_zero4, grow40_winning, grow40_simple), sweep runner
3f04365 verified | # Sequential sweep runner. | |
| # | |
| # Each config grabs all 8 GPUs via accelerate, so they run back-to-back, not in | |
| # parallel. Output goes to logs/<run>.log; the master log goes to logs/sweep_master.log. | |
| # Reads HF_TOKEN, HUGGING_FACE_HUB_TOKEN, WANDB_API_KEY from the calling env. | |
| # | |
| # Launch in the background with: | |
| # nohup ./scripts/run_sweep.sh > logs/sweep_master.log 2>&1 & | |
| set -uo pipefail | |
| cd "$(dirname "$0")/.." | |
| CONFIGS=( | |
| "configs/replicate_zero4.toml" | |
| "configs/grow40_winning.toml" | |
| "configs/grow40_simple.toml" | |
| ) | |
| LOG_DIR="logs" | |
| mkdir -p "$LOG_DIR" | |
| for cfg in "${CONFIGS[@]}"; do | |
| name="$(basename "$cfg" .toml)" | |
| log="$LOG_DIR/$name.log" | |
| echo ">>> [$(date '+%F %T')] starting $name -> $log" | |
| .venv/bin/accelerate launch \ | |
| --config_file configs/accelerate.yaml \ | |
| distill.py \ | |
| --config "$cfg" \ | |
| > "$log" 2>&1 | |
| rc=$? | |
| echo "<<< [$(date '+%F %T')] finished $name (exit=$rc)" | |
| if [[ $rc -ne 0 ]]; then | |
| echo " last 20 lines of $log:" | |
| tail -20 "$log" | sed 's/^/ /' | |
| fi | |
| done | |
| echo ">>> [$(date '+%F %T')] sweep complete" | |