Georg commited on
Commit
bd9a893
·
1 Parent(s): 8d70077

Prepare job build context

Browse files
Files changed (3) hide show
  1. Dockerfile.base +2 -2
  2. deploy.sh +68 -3
  3. scripts/run_hf_image_job.py +27 -26
Dockerfile.base CHANGED
@@ -1,7 +1,7 @@
1
  # Base image with FoundationPose dependencies split into CPU (L1) and GPU (L2)
2
 
3
  # Stage 1: CPU-only base with Python deps
4
- FROM ubuntu:22.04 AS foundationpose-base-l1
5
 
6
  ENV DEBIAN_FRONTEND=noninteractive
7
 
@@ -102,7 +102,7 @@ RUN pip install --no-cache-dir \
102
  && pip cache purge
103
 
104
  # Stage 2: GPU-enabled base
105
- FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS foundationpose-base-l2
106
 
107
  ENV DEBIAN_FRONTEND=noninteractive
108
  ENV CUDA_HOME=/usr/local/cuda
 
1
  # Base image with FoundationPose dependencies split into CPU (L1) and GPU (L2)
2
 
3
  # Stage 1: CPU-only base with Python deps
4
+ FROM docker.io/ubuntu:22.04 AS foundationpose-base-l1
5
 
6
  ENV DEBIAN_FRONTEND=noninteractive
7
 
 
102
  && pip cache purge
103
 
104
  # Stage 2: GPU-enabled base
105
+ FROM docker.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS foundationpose-base-l2
106
 
107
  ENV DEBIAN_FRONTEND=noninteractive
108
  ENV CUDA_HOME=/usr/local/cuda
deploy.sh CHANGED
@@ -41,6 +41,30 @@ if ! "${PY_BIN}" -c "import huggingface_hub" >/dev/null 2>&1; then
41
  "${PY_BIN}" -m pip install --quiet huggingface_hub
42
  fi
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  echo "Stage 1: Building base image via HF Job"
45
  echo "Platform: ${PLATFORM}"
46
  echo "Image: ${IMAGE_NAME}:${TAG}"
@@ -53,7 +77,8 @@ JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \
53
  --dockerfile "Dockerfile.base" \
54
  --target "foundationpose-base-l2" \
55
  --flavor "l40sx1" \
56
- --git-repo "https://huggingface.co/spaces/${HF_SPACE}" 2>&1 | tee /tmp/hf_image_job.log)
 
57
 
58
  JOB_ID=$(echo "${JOB_OUTPUT}" | awk '/Job ID:/ {print $3}')
59
  if [ -z "${JOB_ID}" ]; then
@@ -61,15 +86,55 @@ if [ -z "${JOB_ID}" ]; then
61
  else
62
  echo "Following job logs for 1 minute..."
63
  if [ -x "${HF_BIN}" ]; then
64
- (timeout 60 "${HF_BIN}" jobs logs "${JOB_ID}") || true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  echo ""
66
  echo "Job status:"
67
- "${HF_BIN}" jobs status "${JOB_ID}" || true
68
  else
69
  echo "hf CLI not available; job logs skipped"
70
  fi
71
  fi
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  echo ""
74
  echo "Stage 2: Deploying to HuggingFace Space"
75
  echo ""
 
41
  "${PY_BIN}" -m pip install --quiet huggingface_hub
42
  fi
43
 
44
+ # Initialize git repo if needed (for job context)
45
+ if [ ! -d .git ]; then
46
+ echo "Initializing git repository..."
47
+ git init
48
+ git remote add origin "https://huggingface.co/spaces/${HF_SPACE}"
49
+ echo "✓ Git repository initialized"
50
+ echo ""
51
+ fi
52
+
53
+ # Commit local changes before job so the job can build the right ref
54
+ if [[ -n $(git status -s) ]]; then
55
+ echo "Committing changes for job context..."
56
+ git add Dockerfile Dockerfile.base requirements.txt deploy.sh app.py client.py estimator.py masks.py scripts/run_hf_image_job.py download_weights.py
57
+ git commit -m "Prepare job build context"
58
+ echo "✓ Job context committed"
59
+ fi
60
+
61
+ # Push a temporary ref for the job to build from
62
+ JOB_REF="job-build-$(date +%Y%m%d-%H%M%S)"
63
+ echo "Pushing job ref: ${JOB_REF}"
64
+ git push "https://huggingface.co/spaces/${HF_SPACE}" "HEAD:${JOB_REF}" --force
65
+ echo "✓ Job ref pushed"
66
+ echo ""
67
+
68
  echo "Stage 1: Building base image via HF Job"
69
  echo "Platform: ${PLATFORM}"
70
  echo "Image: ${IMAGE_NAME}:${TAG}"
 
77
  --dockerfile "Dockerfile.base" \
78
  --target "foundationpose-base-l2" \
79
  --flavor "l40sx1" \
80
+ --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
81
+ --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log)
82
 
83
  JOB_ID=$(echo "${JOB_OUTPUT}" | awk '/Job ID:/ {print $3}')
84
  if [ -z "${JOB_ID}" ]; then
 
86
  else
87
  echo "Following job logs for 1 minute..."
88
  if [ -x "${HF_BIN}" ]; then
89
+ HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID}" "${PY_BIN}" - <<'PY'
90
+ import os
91
+ import subprocess
92
+ import sys
93
+ import time
94
+
95
+ hf = os.environ["HF_BIN_PATH"]
96
+ job_id = os.environ["JOB_ID"]
97
+
98
+ proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
99
+ try:
100
+ time.sleep(60)
101
+ finally:
102
+ proc.terminate()
103
+ try:
104
+ proc.wait(timeout=5)
105
+ except Exception:
106
+ proc.kill()
107
+ PY
108
  echo ""
109
  echo "Job status:"
110
+ "${HF_BIN}" jobs inspect "${JOB_ID}" || true
111
  else
112
  echo "hf CLI not available; job logs skipped"
113
  fi
114
  fi
115
 
116
+ if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
117
+ echo ""
118
+ echo "Waiting for image build job to complete..."
119
+ for i in $(seq 1 40); do
120
+ JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
121
+ echo " Job stage: ${JOB_STAGE}"
122
+ case "${JOB_STAGE}" in
123
+ SUCCESS|SUCCEEDED|COMPLETED|DONE)
124
+ echo "✓ Image build job completed"
125
+ break
126
+ ;;
127
+ FAILED|ERROR|CANCELED|CANCELLED)
128
+ echo "✗ Image build job failed: ${JOB_STAGE}"
129
+ exit 1
130
+ ;;
131
+ *)
132
+ sleep 30
133
+ ;;
134
+ esac
135
+ done
136
+ fi
137
+
138
  echo ""
139
  echo "Stage 2: Deploying to HuggingFace Space"
140
  echo ""
scripts/run_hf_image_job.py CHANGED
@@ -49,6 +49,11 @@ def main() -> None:
49
  default="https://huggingface.co/spaces/gpue/foundationpose",
50
  help="Git repo to clone for build context (default: HF space repo)",
51
  )
 
 
 
 
 
52
  parser.add_argument(
53
  "--flavor",
54
  default="l40sx1",
@@ -94,6 +99,7 @@ def main() -> None:
94
  "CONTEXT": args.context,
95
  "TARGET": args.target,
96
  "GIT_REPO": args.git_repo,
 
97
  "DOCKER_USER": args.docker_user,
98
  }
99
  secrets = {
@@ -102,51 +108,46 @@ def main() -> None:
102
  }
103
 
104
  command = [
105
- "sh",
106
- "-c",
107
  r"""
108
  set -euo pipefail
109
 
110
  echo "Installing git and certificates..."
111
- apk add --no-cache git ca-certificates curl >/dev/null
112
-
113
- # Start Docker daemon (DinD image)
114
- echo "Starting Docker daemon..."
115
- dockerd-entrypoint.sh > /tmp/dockerd.log 2>&1 &
116
-
117
- # Wait for Docker
118
- for i in $(seq 1 30); do
119
- if docker info >/dev/null 2>&1; then
120
- break
121
- fi
122
- sleep 1
123
- if [ "$i" -eq 30 ]; then
124
- echo "Docker did not start in time. Logs:" >&2
125
- tail -n 200 /tmp/dockerd.log >&2 || true
126
- exit 1
127
- fi
128
- done
129
 
130
  echo "Cloning build context..."
131
  if [ -n "${HF_TOKEN:-}" ]; then
132
  AUTH_REPO=$(echo "$GIT_REPO" | sed -e "s#https://#https://user:${HF_TOKEN}@#")
133
- git clone --depth 1 "$AUTH_REPO" /work/repo
134
  else
135
- git clone --depth 1 "$GIT_REPO" /work/repo
136
  fi
137
 
138
  cd /work/repo
139
 
140
  echo "Logging in to Docker Hub..."
141
- echo "$DOCKER_TOKEN" | docker login -u "$DOCKER_USER" --password-stdin
 
 
 
 
 
 
142
 
143
  IMAGE_REF="$IMAGE_NAME:$IMAGE_TAG"
144
 
145
  echo "Building image $IMAGE_REF (target: $TARGET)..."
146
- docker build --platform "$PLATFORM" -f "$DOCKERFILE" --target "$TARGET" -t "$IMAGE_REF" "$CONTEXT"
147
 
148
  echo "Pushing image $IMAGE_REF..."
149
- docker push "$IMAGE_REF"
150
 
151
  echo "✓ Image pushed successfully"
152
  """,
@@ -162,7 +163,7 @@ echo "✓ Image pushed successfully"
162
  print()
163
 
164
  job_info = run_job(
165
- image="docker:24.0.7-dind",
166
  command=command,
167
  env=env,
168
  secrets=secrets,
 
49
  default="https://huggingface.co/spaces/gpue/foundationpose",
50
  help="Git repo to clone for build context (default: HF space repo)",
51
  )
52
+ parser.add_argument(
53
+ "--git-ref",
54
+ default="main",
55
+ help="Git ref (branch/tag/sha) to build from (default: main)",
56
+ )
57
  parser.add_argument(
58
  "--flavor",
59
  default="l40sx1",
 
99
  "CONTEXT": args.context,
100
  "TARGET": args.target,
101
  "GIT_REPO": args.git_repo,
102
+ "GIT_REF": args.git_ref,
103
  "DOCKER_USER": args.docker_user,
104
  }
105
  secrets = {
 
108
  }
109
 
110
  command = [
111
+ "bash",
112
+ "-lc",
113
  r"""
114
  set -euo pipefail
115
 
116
  echo "Installing git and certificates..."
117
+ if command -v microdnf >/dev/null 2>&1; then
118
+ microdnf install -y git ca-certificates tar gzip >/dev/null
119
+ elif command -v dnf >/dev/null 2>&1; then
120
+ dnf install -y git ca-certificates tar gzip >/dev/null
121
+ elif command -v apt-get >/dev/null 2>&1; then
122
+ apt-get update -qq && apt-get install -y -qq git ca-certificates tar gzip >/dev/null
123
+ fi
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  echo "Cloning build context..."
126
  if [ -n "${HF_TOKEN:-}" ]; then
127
  AUTH_REPO=$(echo "$GIT_REPO" | sed -e "s#https://#https://user:${HF_TOKEN}@#")
128
+ git clone --depth 1 --branch "$GIT_REF" "$AUTH_REPO" /work/repo
129
  else
130
+ git clone --depth 1 --branch "$GIT_REF" "$GIT_REPO" /work/repo
131
  fi
132
 
133
  cd /work/repo
134
 
135
  echo "Logging in to Docker Hub..."
136
+ buildah login -u "$DOCKER_USER" -p "$DOCKER_TOKEN" docker.io
137
+
138
+ export BUILDAH_ISOLATION=chroot
139
+ export STORAGE_DRIVER=vfs
140
+
141
+ PLATFORM_OS="${PLATFORM%%/*}"
142
+ PLATFORM_ARCH="${PLATFORM##*/}"
143
 
144
  IMAGE_REF="$IMAGE_NAME:$IMAGE_TAG"
145
 
146
  echo "Building image $IMAGE_REF (target: $TARGET)..."
147
+ buildah bud --format docker --os "$PLATFORM_OS" --arch "$PLATFORM_ARCH" -f "$DOCKERFILE" --target "$TARGET" -t "$IMAGE_REF" "$CONTEXT"
148
 
149
  echo "Pushing image $IMAGE_REF..."
150
+ buildah push "$IMAGE_REF" "docker://$IMAGE_REF"
151
 
152
  echo "✓ Image pushed successfully"
153
  """,
 
163
  print()
164
 
165
  job_info = run_job(
166
+ image="quay.io/buildah/stable:latest",
167
  command=command,
168
  env=env,
169
  secrets=secrets,