| set -euo pipefail | |
| # Show status for a run directory. | |
| # Usage: bash scripts/status_run.sh runs/qwen3-8b-qlora-YYYYMMDD-HHMMSS | |
| if [ $# -lt 1 ]; then | |
| echo "Usage: $0 <RUN_DIR>" >&2 | |
| exit 1 | |
| fi | |
| RUN_DIR="$1" | |
| echo "RUN_DIR=$RUN_DIR" | |
| [ -f "$RUN_DIR/RUN_ID.txt" ] && echo "RUN_ID=$(cat "$RUN_DIR/RUN_ID.txt")" | |
| for f in TRAIN_PID.txt TRAIN_RESUME_PID.txt EVAL_PID.txt; do | |
| if [ -f "$RUN_DIR/$f" ]; then | |
| PID=$(cat "$RUN_DIR/$f") | |
| if kill -0 "$PID" 2>/dev/null; then | |
| echo "$f PID=$PID running" | |
| else | |
| echo "$f PID=$PID not running" | |
| fi | |
| fi | |
| done | |
| echo | |
| if [ -d "$RUN_DIR/outputs/adapter" ]; then | |
| echo "Latest checkpoints:" | |
| find "$RUN_DIR/outputs/adapter" -maxdepth 1 -type d -name "checkpoint-*" | sort -V | tail -n 5 || true | |
| fi | |
| echo | |
| if ls "$RUN_DIR"/logs/*.log >/dev/null 2>&1; then | |
| echo "Logs:" | |
| ls -lh "$RUN_DIR"/logs/*.log | |
| echo | |
| echo "Last train/eval lines:" | |
| tail -n 20 "$RUN_DIR"/logs/*.log | |
| fi | |
| echo | |
| if [ -f "$RUN_DIR/eval/all_metrics.json" ]; then | |
| echo "Eval metrics: $RUN_DIR/eval/all_metrics.json" | |
| fi | |