Z User commited on
Commit
69fc040
·
1 Parent(s): 06cb3d4

fix: auto-update fails after container rebuild

Browse files

Two root causes:

1. Shallow clone (--depth 1) prevents tag checkout
Dockerfile pins hermes-agent with 'git clone --depth 1 --branch v2026.4.30'.
After rebuild, the clone is shallow and 'git rev-parse <newer-tag>' always
fails because the target commit is outside the shallow boundary.
Fix: 'git fetch --unshallow' at startup before any tag operations.

2. Version file / actual code mismatch
After rebuild, /app/hermes-agent resets to Dockerfile pinned version (v2026.4.30)
but /data/hermes/agent.version (persistent) still records the newer version from
the previous auto-update. The updater reads version file -> 'already latest' ->
skips update, while actual code is still old.
Fix: Compare actual git tag with recorded version. If mismatch detected, force
re-apply the update regardless of version comparison result.

Files changed (1) hide show
  1. start.sh +57 -16
start.sh CHANGED
@@ -457,15 +457,45 @@ update_hermes_agent_background() {
457
  API_URL="https://api.github.com/repos/${AGENT_REPO}/releases/latest"
458
  EXTRAS="feishu,mcp,cron,pty"
459
 
460
- # Current version (from Dockerfile build or previous update)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  CURRENT_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | head -1)"
462
  if [ -z "$CURRENT_VERSION" ]; then
463
- # Try build-time version file
464
- CURRENT_VERSION="$(cat /app/hermes-agent.version 2>/dev/null | head -1)"
465
  [ -z "$CURRENT_VERSION" ] && CURRENT_VERSION="v2026.4.30"
466
  echo "$CURRENT_VERSION" > "$VERSION_FILE"
467
  fi
468
 
 
 
 
 
 
 
 
 
 
 
469
  # Version comparison helper: strip leading 'v', compare date-style like 2026.4.30
470
  compare_date_versions() {
471
  local a="${1#v}" b="${2#v}"
@@ -479,9 +509,9 @@ update_hermes_agent_background() {
479
  return 1 # equal or older
480
  }
481
 
482
- echo "[$(date)] Agent auto-update: checking (current: $CURRENT_VERSION)..."
483
 
484
- # Query GitHub API
485
  LATEST_JSON=$(curl -sf --connect-timeout 10 --max-time 20 "$API_URL" 2>/dev/null)
486
  if [ $? -ne 0 ] || [ -z "$LATEST_JSON" ]; then
487
  echo "[$(date)] Agent auto-update: failed to reach GitHub API, skipping"
@@ -496,45 +526,56 @@ update_hermes_agent_background() {
496
 
497
  echo "[$(date)] Agent auto-update: latest release is $LATEST_TAG"
498
 
499
- # Compare
500
  if compare_date_versions "$CURRENT_VERSION" "$LATEST_TAG"; then
501
  echo "[$(date)] Agent auto-update: upgrading $CURRENT_VERSION → $LATEST_TAG ..."
 
 
 
 
502
  else
503
  echo "[$(date)] Agent auto-update: $CURRENT_VERSION is up to date"
504
  return
505
  fi
506
 
507
- # Phase 1: git fetch + checkout new tag (non-destructive)
508
  cd "$AGENT_DIR"
509
  if ! git fetch --tags origin 2>&1 | tail -3; then
510
  echo "[$(date)] Agent auto-update: git fetch failed, aborting"
511
  return
512
  fi
513
 
514
- # Verify tag exists
515
  if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then
516
- echo "[$(date)] Agent auto-update: tag $LATEST_TAG not found, aborting"
517
- return
 
 
 
 
 
 
 
518
  fi
519
 
520
- # Phase 2: checkout new version
521
  if ! git checkout "$LATEST_TAG" 2>&1 | tail -3; then
522
  echo "[$(date)] Agent auto-update: git checkout failed, aborting"
523
  # Try to recover to previous version
524
- git checkout "$CURRENT_VERSION" 2>/dev/null
525
  return
526
  fi
527
 
528
- # Phase 3: update pip dependencies (editable install)
529
  echo "[$(date)] Agent auto-update: updating pip dependencies..."
530
  if ! pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>&1 | tail -10; then
531
  echo "[$(date)] Agent auto-update: pip install failed, rolling back"
532
- git checkout "$CURRENT_VERSION" 2>/dev/null
533
  pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>/dev/null
534
  return
535
  fi
536
 
537
- # Phase 4: reinstall our patches on top of new version
538
  echo "[$(date)] Agent auto-update: re-applying Hermes Bot patches..."
539
  if [ -f "/app/scripts/patch_file_delivery.py" ]; then
540
  python3 /app/scripts/patch_file_delivery.py 2>/dev/null
@@ -569,7 +610,7 @@ update_hermes_agent_background() {
569
  echo "$(date '+%Y-%m-%d %H:%M:%S')" >> "$VERSION_FILE"
570
  echo "[$(date)] Agent auto-update: upgraded to $LATEST_TAG ✓ (restart needed for full effect)"
571
 
572
- # Phase 5: schedule gateway restart for clean reload
573
  # Send SIGUSR1 to entry.py to trigger gateway restart cycle
574
  ENTRY_PID=$(pgrep -f "python3 /app/entry.py" 2>/dev/null | head -1)
575
  if [ -n "$ENTRY_PID" ]; then
 
457
  API_URL="https://api.github.com/repos/${AGENT_REPO}/releases/latest"
458
  EXTRAS="feishu,mcp,cron,pty"
459
 
460
+ # ── Phase 0: Unshallow the clone if needed ──
461
+ # Dockerfile uses `git clone --depth 1` which prevents checking out
462
+ # any tag/commit outside the shallow boundary.
463
+ # Without this, `git rev-parse <tag>` ALWAYS fails after a rebuild.
464
+ if [ -f "$AGENT_DIR/.git/shallow" ]; then
465
+ echo "[$(date)] Agent auto-update: unshallowing clone (Dockerfile --depth 1)..."
466
+ if git -C "$AGENT_DIR" fetch --unshallow origin 2>&1 | tail -3; then
467
+ echo "[$(date)] Agent auto-update: clone unshallowed successfully"
468
+ else
469
+ echo "[$(date)] Agent auto-update: unshallow failed, tag checkout may not work"
470
+ fi
471
+ fi
472
+
473
+ # ── Phase 1: Detect actual code version vs recorded version ──
474
+ # After a HF Space rebuild, /app/hermes-agent is re-cloned at the
475
+ # Dockerfile pinned version, but /data/hermes/agent.version (persistent)
476
+ # still says the newer version from the previous auto-update.
477
+ # This mismatch causes the updater to think it's already up to date.
478
+ ACTUAL_TAG=$(git -C "$AGENT_DIR" describe --tags --exact-match 2>/dev/null || echo "")
479
+ BUILD_VERSION="$(cat /app/hermes-agent.version 2>/dev/null | head -1)"
480
+
481
+ # Current version from persistent storage (survives rebuilds)
482
  CURRENT_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | head -1)"
483
  if [ -z "$CURRENT_VERSION" ]; then
484
+ CURRENT_VERSION="$BUILD_VERSION"
 
485
  [ -z "$CURRENT_VERSION" ] && CURRENT_VERSION="v2026.4.30"
486
  echo "$CURRENT_VERSION" > "$VERSION_FILE"
487
  fi
488
 
489
+ # Detect rebuild mismatch: actual git tag ≠ recorded version
490
+ NEED_FORCE=false
491
+ if [ -n "$ACTUAL_TAG" ] && [ "$ACTUAL_TAG" != "$CURRENT_VERSION" ]; then
492
+ echo "[$(date)] Agent auto-update: REBUILD DETECTED (actual=$ACTUAL_TAG, recorded=$CURRENT_VERSION)"
493
+ echo "[$(date)] Agent auto-update: code was reset to Dockerfile version by container rebuild"
494
+ NEED_FORCE=true
495
+ # Reset comparison baseline to actual (old) code version
496
+ CURRENT_VERSION="$ACTUAL_TAG"
497
+ fi
498
+
499
  # Version comparison helper: strip leading 'v', compare date-style like 2026.4.30
500
  compare_date_versions() {
501
  local a="${1#v}" b="${2#v}"
 
509
  return 1 # equal or older
510
  }
511
 
512
+ echo "[$(date)] Agent auto-update: checking (current: $CURRENT_VERSION, actual: $ACTUAL_TAG, latest: querying...)"
513
 
514
+ # ── Phase 2: Query GitHub API for latest release ──
515
  LATEST_JSON=$(curl -sf --connect-timeout 10 --max-time 20 "$API_URL" 2>/dev/null)
516
  if [ $? -ne 0 ] || [ -z "$LATEST_JSON" ]; then
517
  echo "[$(date)] Agent auto-update: failed to reach GitHub API, skipping"
 
526
 
527
  echo "[$(date)] Agent auto-update: latest release is $LATEST_TAG"
528
 
529
+ # ── Phase 3: Decide if update is needed ──
530
  if compare_date_versions "$CURRENT_VERSION" "$LATEST_TAG"; then
531
  echo "[$(date)] Agent auto-update: upgrading $CURRENT_VERSION → $LATEST_TAG ..."
532
+ elif [ "$NEED_FORCE" = "true" ]; then
533
+ # Rebuild detected: latest = recorded version, but code is still old.
534
+ # Re-apply the update to restore correct version.
535
+ echo "[$(date)] Agent auto-update: re-applying $LATEST_TAG after rebuild (code was reset to $ACTUAL_TAG)"
536
  else
537
  echo "[$(date)] Agent auto-update: $CURRENT_VERSION is up to date"
538
  return
539
  fi
540
 
541
+ # ── Phase 4: git fetch + checkout new tag (non-destructive) ──
542
  cd "$AGENT_DIR"
543
  if ! git fetch --tags origin 2>&1 | tail -3; then
544
  echo "[$(date)] Agent auto-update: git fetch failed, aborting"
545
  return
546
  fi
547
 
548
+ # Verify tag exists (after unshallow, this should succeed)
549
  if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then
550
+ echo "[$(date)] Agent auto-update: tag $LATEST_TAG not found locally, fetching explicitly..."
551
+ if ! git fetch origin "refs/tags/$LATEST_TAG:refs/tags/$LATEST_TAG" 2>&1; then
552
+ echo "[$(date)] Agent auto-update: explicit tag fetch failed, aborting"
553
+ return
554
+ fi
555
+ if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then
556
+ echo "[$(date)] Agent auto-update: tag $LATEST_TAG still not found, aborting"
557
+ return
558
+ fi
559
  fi
560
 
561
+ # Phase 5: checkout new version
562
  if ! git checkout "$LATEST_TAG" 2>&1 | tail -3; then
563
  echo "[$(date)] Agent auto-update: git checkout failed, aborting"
564
  # Try to recover to previous version
565
+ git checkout "$ACTUAL_TAG" 2>/dev/null
566
  return
567
  fi
568
 
569
+ # Phase 6: update pip dependencies (editable install)
570
  echo "[$(date)] Agent auto-update: updating pip dependencies..."
571
  if ! pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>&1 | tail -10; then
572
  echo "[$(date)] Agent auto-update: pip install failed, rolling back"
573
+ git checkout "$ACTUAL_TAG" 2>/dev/null
574
  pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>/dev/null
575
  return
576
  fi
577
 
578
+ # Phase 7: reinstall our patches on top of new version
579
  echo "[$(date)] Agent auto-update: re-applying Hermes Bot patches..."
580
  if [ -f "/app/scripts/patch_file_delivery.py" ]; then
581
  python3 /app/scripts/patch_file_delivery.py 2>/dev/null
 
610
  echo "$(date '+%Y-%m-%d %H:%M:%S')" >> "$VERSION_FILE"
611
  echo "[$(date)] Agent auto-update: upgraded to $LATEST_TAG ✓ (restart needed for full effect)"
612
 
613
+ # Phase 8: schedule gateway restart for clean reload
614
  # Send SIGUSR1 to entry.py to trigger gateway restart cycle
615
  ENTRY_PID=$(pgrep -f "python3 /app/entry.py" 2>/dev/null | head -1)
616
  if [ -n "$ENTRY_PID" ]; then