physix / train /sync-plots.sh
Pratyush-01's picture
Upload folder using huggingface_hub
0e24aff verified
#!/usr/bin/env bash
# Pull the latest training-curve PNGs from the HF model repo into
# physix-live/docs/plots/, ready to commit to GitHub.
#
# Why we need this:
# The competition validator checks for committed PNG plots (loss curve +
# reward curve at minimum) inside the public repo at submission time.
# Wandb-only links are not accepted. Our training job writes the PNGs to
# the model repo's plots/ directory; this script mirrors them locally.
#
# Usage:
# ./train/sync-plots.sh # default repo (3B profile)
# ./train/sync-plots.sh user/repo # explicit repo override
#
# Idempotent — overwrites existing PNGs. Run after every training job.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# physix-live root is the parent of train/ now that the launcher lives
# inside the env repo (was at the sibling repo before the merge).
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
DST="$ROOT/docs/plots"
# Default repo matches PROFILES["3b"]["hub_final_repo"] in job_train.py.
REPO="${1:-Pratyush-01/physix-3b-rl}"
echo "Mirroring https://huggingface.co/$REPO/tree/main/plots → $DST"
mkdir -p "$DST"
# `hf download` exits non-zero if the plots/ subdir doesn't exist yet —
# tolerate that gracefully so we can run this before the first job finishes.
if hf download "$REPO" \
--repo-type model \
--include "plots/*.png" \
--local-dir "$DST.tmp" >/tmp/sync-plots.log 2>&1; then
if [ -d "$DST.tmp/plots" ]; then
# Move PNGs up one level so they live at docs/plots/foo.png, not
# docs/plots/plots/foo.png.
mv "$DST.tmp/plots/"*.png "$DST/" 2>/dev/null || true
fi
rm -rf "$DST.tmp"
echo "Synced files:"
ls -la "$DST"
else
echo "WARN: could not pull plots from $REPO (job may not have finished yet)"
cat /tmp/sync-plots.log >&2
rm -rf "$DST.tmp"
exit 1
fi