#!/bin/bash # Hermes Bot — HuggingFace Space Startup # NOTE: No 'set -e' — gateway restarts should not kill the entire script echo "=== Hermes Bot — HuggingFace Space Startup ===" # Ensure system timezone matches config (logging timestamps use system TZ) export TZ="${TZ:-Asia/Shanghai}" # Ensure persistent storage directories exist mkdir -p /data/hermes/{sessions,memories,uploads,logs,palace,skills,weixin} # Create symlinks from hermes home to persistent storage HERMES_HOME="/root/.hermes" for dir in sessions memories uploads logs palace skills; do target="$HERMES_HOME/$dir" if [ ! -L "$target" ] && [ ! -d "$target" ]; then ln -sf "/data/hermes/$dir" "$target" echo "Created symlink: $dir -> /data/hermes/$dir" elif [ -L "$target" ]; then echo "Symlink exists: $dir" fi done # Persist WeChat/Weixin session data across container rebuilds # Weixin adapter stores auth tokens, context tokens, and sync cursors in ~/.hermes/weixin/ # Without this, WeChat binding breaks on every container rebuild WEIXIN_DIR="$HERMES_HOME/weixin" if [ -d "$WEIXIN_DIR" ] && [ ! -L "$WEIXIN_DIR" ]; then # Migrate existing session data to persistent storage cp -rn "$WEIXIN_DIR"/* /data/hermes/weixin/ 2>/dev/null rm -rf "$WEIXIN_DIR" fi if [ ! -L "$WEIXIN_DIR" ]; then ln -sf "/data/hermes/weixin" "$WEIXIN_DIR" echo "Symlink: weixin -> /data/hermes/weixin" fi # ── WeChat credential persistence ── # Priority: HF Space Secrets > persisted account JSON file > .env file # Once set via HF Space Secrets, WeChat survives ALL container rebuilds. ACCOUNTS_DIR="/data/hermes/weixin/accounts" mkdir -p "$ACCOUNTS_DIR" if [ -z "$WEIXIN_ACCOUNT_ID" ] || [ -z "$WEIXIN_TOKEN" ]; then # Fallback: restore from persisted account JSON file if [ -z "$WEIXIN_ACCOUNT_ID" ] && [ -d "$ACCOUNTS_DIR" ]; then LATEST=$(find "$ACCOUNTS_DIR" -name "*.json" ! -name "*.context-tokens.json" ! -name "*.sync.json" -type f -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | awk '{print $2}') if [ -n "$LATEST" ]; then DISCOVERED_ID=$(basename "$LATEST" .json) export WEIXIN_ACCOUNT_ID="$DISCOVERED_ID" echo "Auto-discovered WEIXIN_ACCOUNT_ID=$DISCOVERED_ID" fi fi if [ -z "$WEIXIN_TOKEN" ] && [ -n "$WEIXIN_ACCOUNT_ID" ]; then ACCOUNT_FILE="$ACCOUNTS_DIR/${WEIXIN_ACCOUNT_ID}.json" if [ -f "$ACCOUNT_FILE" ]; then DISCOVERED_TOKEN=$(python3 -c "import json; d=json.load(open('$ACCOUNT_FILE')); print(d.get('token',''))" 2>/dev/null) if [ -n "$DISCOVERED_TOKEN" ]; then export WEIXIN_TOKEN="$DISCOVERED_TOKEN" echo "Restored WEIXIN_TOKEN from persisted account file" fi fi fi fi if [ -n "$WEIXIN_ACCOUNT_ID" ] && [ -n "$WEIXIN_TOKEN" ]; then echo "WeChat credentials ready (account=$(_mask_val "$WEIXIN_ACCOUNT_ID"))" # Persist credentials to account JSON so gateway's load_weixin_account() also finds them ACCOUNT_FILE="$ACCOUNTS_DIR/${WEIXIN_ACCOUNT_ID}.json" if [ ! -f "$ACCOUNT_FILE" ] || ! python3 -c "import json; d=json.load(open('$ACCOUNT_FILE')); exit(0 if d.get('token') else 1)" 2>/dev/null; then python3 -c " import json, time payload = {'token': '$WEIXIN_TOKEN', 'base_url': 'https://ilinkai.weixin.qq.com', 'saved_at': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())} with open('$ACCOUNT_FILE', 'w') as f: json.dump(payload, f) " 2>/dev/null && chmod 600 "$ACCOUNT_FILE" echo "WeChat credentials persisted to account file" fi else echo "WARNING: WeChat not configured (no token/account). Run 'hermes gateway setup' to scan QR." fi # -- Persist .env across container rebuilds -- # Priority: Space Secrets (env vars) > persistent storage # SECURITY: .env is NO LONGER in git repo -- use HF Space Secrets ENV_FILE="$HERMES_HOME/.env" ENV_DATA="/data/hermes/.env" # Helper: mask a secret value for safe logging (show first 6 + **** + last 4) _mask_val() { local val="$1" if [ -z "$val" ] || [ ${#val} -lt 12 ]; then echo "****"; return; fi echo "${val:0:6}****${val: -4}" } # Generate .env from Space Secrets (environment variables injected by HF) # SECURITY: secrets are written to file ONLY — never echoed to stdout/build logs if [ ! -f "$ENV_DATA" ] && [ -n "$OPENROUTER_API_KEY" ]; then echo "Generating .env from Space Secrets..." { echo "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" [ -n "$OPENAI_API_KEY" ] && echo "OPENAI_API_KEY=$OPENAI_API_KEY" [ -n "$OPENAI_BASE_URL" ] && echo "OPENAI_BASE_URL=$OPENAI_BASE_URL" [ -n "$FEISHU_APP_ID" ] && echo "FEISHU_APP_ID=$FEISHU_APP_ID" [ -n "$FEISHU_APP_SECRET" ] && echo "FEISHU_APP_SECRET=$FEISHU_APP_SECRET" echo "GATEWAY_ALLOW_ALL_USERS=true" echo "HERMES_ACCEPT_HOOKS=1" [ -n "$MEMPALACE_PALACE_PATH" ] && echo "MEMPALACE_PALACE_PATH=$MEMPALACE_PALACE_PATH" [ -n "$FIRECRAWL_API_KEY" ] && echo "FIRECRAWL_API_KEY=$FIRECRAWL_API_KEY" [ -n "$WEIXIN_ACCOUNT_ID" ] && echo "WEIXIN_ACCOUNT_ID=$WEIXIN_ACCOUNT_ID" [ -n "$WEIXIN_TOKEN" ] && echo "WEIXIN_TOKEN=$WEIXIN_TOKEN" } > "$ENV_DATA" chmod 600 "$ENV_DATA" echo "Created .env from Space Secrets (keys masked below)" echo " OPENROUTER_API_KEY=$(_mask_val "$OPENROUTER_API_KEY")" [ -n "$OPENAI_API_KEY" ] && echo " OPENAI_API_KEY=$(_mask_val "$OPENAI_API_KEY")" [ -n "$FEISHU_APP_ID" ] && echo " FEISHU_APP_ID=$FEISHU_APP_ID" [ -n "$FEISHU_APP_SECRET" ] && echo " FEISHU_APP_SECRET=$(_mask_val "$FEISHU_APP_SECRET")" [ -n "$FIRECRAWL_API_KEY" ] && echo " FIRECRAWL_API_KEY=$(_mask_val "$FIRECRAWL_API_KEY")" [ -n "$WEIXIN_TOKEN" ] && echo " WEIXIN_TOKEN=$(_mask_val "$WEIXIN_TOKEN")" fi # Fallback: if no secrets and no persistent data if [ ! -f "$ENV_DATA" ] && [ -f "/app/.env.example" ]; then cp "/app/.env.example" "$ENV_DATA" echo "WARNING: No .env found. Set API keys via HF Space Secrets!" fi # Always symlink if [ ! -L "$ENV_FILE" ]; then rm -f "$ENV_FILE" ln -sf "$ENV_DATA" "$ENV_FILE" echo "Symlink: .env -> $ENV_DATA" else echo "Symlink exists: .env" fi # Ensure WEIXIN_TOKEN/ACCOUNT_ID are in .env even if file was created earlier without them if [ -f "$ENV_DATA" ] && [ -n "$WEIXIN_TOKEN" ] && ! grep -q '^WEIXIN_TOKEN=' "$ENV_DATA" 2>/dev/null; then echo "WEIXIN_TOKEN=$WEIXIN_TOKEN" >> "$ENV_DATA" fi if [ -f "$ENV_DATA" ] && [ -n "$WEIXIN_ACCOUNT_ID" ] && ! grep -q '^WEIXIN_ACCOUNT_ID=' "$ENV_DATA" 2>/dev/null; then echo "WEIXIN_ACCOUNT_ID=$WEIXIN_ACCOUNT_ID" >> "$ENV_DATA" fi # ── Persist config.yaml across container rebuilds ── # WebUI settings page and WeChat save flow update ~/.hermes/config.yaml at runtime CFG_FILE="$HERMES_HOME/config.yaml" CFG_DATA="/data/hermes/config.yaml" if [ -f "$CFG_FILE" ] && [ ! -L "$CFG_FILE" ] && [ ! -f "$CFG_DATA" ]; then # First time: migrate build-time config to persistent storage cp "$CFG_FILE" "$CFG_DATA" echo "Migrated config.yaml to persistent storage" elif [ -L "$CFG_FILE" ] && [ ! -f "$CFG_DATA" ]; then # Symlink exists but target missing — recreate from repo copy if [ -f "/app/config.yaml" ]; then cp "/app/config.yaml" "$CFG_DATA" echo "Restored config.yaml from repo fallback" fi fi if [ ! -L "$CFG_FILE" ]; then rm -f "$CFG_FILE" ln -sf "$CFG_DATA" "$CFG_FILE" echo "Symlink: config.yaml -> $CFG_DATA" else echo "Symlink exists: config.yaml" fi echo "Persistent storage ready." # ── Persist WebUI credentials across rebuilds ── WEBUI_HOME="/root/.hermes-web-ui" WEBUI_DATA="/data/hermes/webui" mkdir -p "$WEBUI_DATA" if [ ! -L "$WEBUI_HOME" ] && [ -d "$WEBUI_HOME" ]; then # Migrate existing credentials to persistent storage if [ -f "$WEBUI_HOME/.credentials" ] && [ ! -f "$WEBUI_DATA/.credentials" ]; then cp "$WEBUI_HOME/.credentials" "$WEBUI_DATA/.credentials" echo "Migrated WebUI credentials to persistent storage" fi rm -rf "$WEBUI_HOME" fi if [ ! -L "$WEBUI_HOME" ]; then ln -sf "$WEBUI_DATA" "$WEBUI_HOME" echo "Symlink: hermes-web-ui -> $WEBUI_DATA" fi # ── Persist agency-agents across container rebuilds ── # 211 expert role prompts for instant role switching AGENCY_SRC="/app/agency-agents" AGENCY_DST="/data/hermes/agency-agents" AGENCY_LINK="$HERMES_HOME/agency-agents" if [ -d "$AGENCY_SRC" ] && [ ! -d "$AGENCY_DST" ]; then cp -r "$AGENCY_SRC" "$AGENCY_DST" echo "Copied agency-agents to persistent storage" fi # Merge custom agents (Hermes extensions) into agency-agents directory if [ -d "/app/custom-agents" ] && [ -d "$AGENCY_DST" ]; then cp -rn /app/custom-agents/* "$AGENCY_DST/" 2>/dev/null echo "Merged custom agents into agency-agents" fi if [ ! -L "$AGENCY_LINK" ]; then rm -rf "$AGENCY_LINK" if [ -d "$AGENCY_DST" ]; then ln -sf "$AGENCY_DST" "$AGENCY_LINK" echo "Symlink: agency-agents -> $AGENCY_DST" fi fi # Generate agent index JSON for fast role lookup if [ -d "$AGENCY_DST" ] && command -v python3 &>/dev/null; then python3 << 'AGENT_INDEX' import os, json, re, yaml agents = [] base = '/data/hermes/agency-agents' skip = {'README.md','README.zh-TW.md','CATALOG.md','AGENT-LIST.md','CONTRIBUTING.md','LICENSE','UPSTREAM.md','.gitattributes'} for root, dirs, files in os.walk(base): for f in files: if f.endswith('.md') and f not in skip: path = os.path.join(root, f) rel = os.path.relpath(path, base) try: with open(path, encoding='utf-8') as fh: content = fh.read() name = desc = dept = '' if content.startswith('---'): parts = content.split('---', 2) if len(parts) >= 3: meta = yaml.safe_load(parts[1]) or {} name = meta.get('name', '') desc = meta.get('description', '') if not name: name = f.replace('.md', '').replace('-', ' ').title() dept = rel.split('/')[0] if '/' in rel else 'root' agents.append({'id': f.replace('.md',''), 'name': name, 'desc': desc[:80], 'dept': dept, 'path': rel}) except Exception: pass agents.sort(key=lambda x: (x['dept'], x['name'])) idx_path = os.path.join(base, '.agent-index.json') with open(idx_path, 'w', encoding='utf-8') as out: json.dump(agents, out, ensure_ascii=False, indent=2) print(f"Agency agents indexed: {len(agents)} roles ready") AGENT_INDEX fi # ── Force-kill any residual gateway processes from previous crash ── echo "Cleaning up residual gateway processes..." # Kill any lingering hermes gateway processes (prevents Feishu lock conflict) for pid in $(pgrep -f "hermes_cli.main.*gateway" 2>/dev/null); do echo " Killing residual gateway PID=$pid" kill -9 "$pid" 2>/dev/null done sleep 2 # Wait for processes and ports to be fully released # ── Clean up stale PID/lock files from previous crash ── echo "Cleaning up stale state..." rm -f "$HERMES_HOME/gateway.pid" 2>/dev/null rm -f "$HERMES_HOME/.gateway_runtime_lock" 2>/dev/null rm -f "$HERMES_HOME/.gateway_takeover" 2>/dev/null rm -f /tmp/hermes-gateway.pid 2>/dev/null # Clean Feishu lock files rm -f "$HERMES_HOME"/feishu*.lock 2>/dev/null rm -f "$HERMES_HOME"/*.feishu_lock 2>/dev/null echo "Stale state cleaned." # ─── Lifecycle Heartbeat 初始化 ────────────────────── HERMES_DATA_DIR="/data/hermes" # 确保 scripts 目录在持久化区存在,同步 lifecycle 脚本 mkdir -p "$HERMES_DATA_DIR/scripts" if [ -f "/app/scripts/hermes-lifecycle.sh" ]; then cp -f "/app/scripts/hermes-lifecycle.sh" "$HERMES_DATA_DIR/scripts/hermes-lifecycle.sh" chmod +x "$HERMES_DATA_DIR/scripts/hermes-lifecycle.sh" echo "Lifecycle script synced to persistent storage." fi # 初始化 identity.md(仅首次创建,容器重启不覆盖) if [ ! -f "$HERMES_DATA_DIR/identity.md" ]; then cat > "$HERMES_DATA_DIR/identity.md" <<'IDENTITY' # Hermes 身份记忆 # 这个文件定义了 Hermes 对自己的认知 # /reset 后此文件不会被清除(在持久化目录中) ## 基础信息 - 名字: Hermes - 通道: 飞书(WebSocket) / 微信 - 主人: 用户344064 ## 性格特征 - 中文为主,简洁有力 - 结果先行,解释后补 - 偶尔幽默但不影响效率 - 有工具、有记忆、有判断力 ## 主人偏好 - 不喜欢废话,喜欢直给 - 欣赏有深度的技术分析 - 喜欢直来直去的沟通 ## 运维记忆 IDENTITY echo "identity.md initialized." else echo "identity.md exists (preserved)." fi # 初始化 insights.md(仅首次创建) if [ ! -f "$HERMES_DATA_DIR/insights.md" ]; then cat > "$HERMES_DATA_DIR/insights.md" <<'INSIGHTS' # Hermes 洞察日志 (insights.md) # 自动记录异常、观察、值得汇报的事 # 类别: 通道异常 / 系统异常 / 用户洞察 / 技术发现 / 待办提醒 / 运维记忆 INSIGHTS echo "insights.md initialized." else echo "insights.md exists (preserved)." fi # 初始化 heartbeat-state.json(仅首次创建) if [ ! -f "$HERMES_DATA_DIR/heartbeat-state.json" ]; then echo '{"lastCheck":null,"lastConfigCheck":null,"totalRuns":0,"totalErrors":0,"consecutiveErrors":0,"lastError":null}' > "$HERMES_DATA_DIR/heartbeat-state.json" echo "heartbeat-state.json initialized." else echo "heartbeat-state.json exists (preserved)." fi # 确保 cron 目录持久化 mkdir -p "$HERMES_DATA_DIR/cron" echo "Lifecycle heartbeat ready." # Initialize MemPalace if not already PALACE_PATH="${MEMPALACE_PALACE_PATH:-/data/hermes/palace}" if [ ! -f "$PALACE_PATH/.palace_initialized" ]; then echo "Initializing MemPalace at $PALACE_PATH..." mempalace init "$PALACE_PATH" 2>/dev/null || echo "MemPalace init skipped (may already exist)" touch "$PALACE_PATH/.palace_initialized" echo "MemPalace initialized." else echo "MemPalace already initialized." fi # ─── Auto-register Lifecycle Cron Job ───────────────── # 合并后的单一 cron:lifecycle-heartbeat # 包含:健康检查 / 配置完整性 / 日志分析 / 洞察记录 / 清理 / 状态更新 # 每 2 小时执行一次 CRON_DIR="$HERMES_DATA_DIR/cron" CRON_JOBS="$CRON_DIR/jobs.json" if [ -f "$CRON_JOBS" ]; then # 已有 cron 配置,检查 lifecycle-heartbeat 是否存在 if ! python3 -c " import json d=json.load(open('$CRON_JOBS')) jobs=[j for j in d.get('jobs',[]) if j.get('name')=='lifecycle-heartbeat'] print('found' if jobs else 'missing') " 2>/dev/null | grep -q "found"; then echo "Cron exists but lifecycle-heartbeat missing, injecting..." python3 -c " import json, uuid from datetime import datetime, timezone, timedelta f='$CRON_JOBS' d=json.load(open(f)) now=datetime.now(timezone(timedelta(hours=8))) next_run=now.replace(minute=0,second=0,microsecond=0)+timedelta(hours=2) d['jobs'].append({ 'id': uuid.uuid4().hex[:12], 'name': 'lifecycle-heartbeat', 'prompt': 'Execute lifecycle heartbeat: health check, config integrity, log analysis, insights, cleanup.', 'skills': [], 'skill': None, 'model': None, 'provider': None, 'base_url': None, 'script': 'hermes-lifecycle.sh', 'context_from': None, 'schedule': {'kind': 'cron', 'expr': '0 0/2 * * *', 'display': '0 0/2 * * *'}, 'schedule_display': '0 0/2 * * *', 'repeat': {'times': None, 'completed': 0}, 'enabled': True, 'state': 'scheduled', 'paused_at': None, 'paused_reason': None, 'created_at': now.isoformat(), 'next_run_at': next_run.isoformat(), 'last_run_at': None, 'last_status': None, 'last_error': None, 'last_delivery_error': None, 'deliver': ['local'], 'origin': 'start.sh-auto-inject', 'enabled_toolsets': None, 'workdir': '/data/hermes' }) d['updated_at']=now.isoformat() json.dump(d,open(f,'w'),indent=2) print('lifecycle-heartbeat cron injected') " 2>/dev/null && echo "OK" || echo "WARN: Failed to inject cron job" else echo "lifecycle-heartbeat cron already configured." fi else # 首次创建 cron jobs.json mkdir -p "$CRON_DIR" python3 -c " import json, uuid from datetime import datetime, timezone, timedelta now=datetime.now(timezone(timedelta(hours=8))) next_run=now.replace(minute=0,second=0,microsecond=0)+timedelta(hours=2) d={ 'jobs': [{ 'id': uuid.uuid4().hex[:12], 'name': 'lifecycle-heartbeat', 'prompt': 'Execute lifecycle heartbeat: health check, config integrity, log analysis, insights, cleanup.', 'skills': [], 'skill': None, 'model': None, 'provider': None, 'base_url': None, 'script': 'hermes-lifecycle.sh', 'context_from': None, 'schedule': {'kind': 'cron', 'expr': '0 0/2 * * *', 'display': '0 0/2 * * *'}, 'schedule_display': '0 0/2 * * *', 'repeat': {'times': None, 'completed': 0}, 'enabled': True, 'state': 'scheduled', 'paused_at': None, 'paused_reason': None, 'created_at': now.isoformat(), 'next_run_at': next_run.isoformat(), 'last_run_at': None, 'last_status': None, 'last_error': None, 'last_delivery_error': None, 'deliver': ['local'], 'origin': 'start.sh-auto-inject', 'enabled_toolsets': None, 'workdir': '/data/hermes' }], 'updated_at': now.isoformat() } json.dump(d,open('$CRON_JOBS','w'),indent=2) print('lifecycle-heartbeat cron created') " 2>/dev/null && echo "Cron job auto-created." || echo "WARN: Failed to create cron job" fi # ── Gateway startup is handled by entry.py watchdog ── # Do NOT start gateway here — entry.py's _gateway_watchdog thread manages # the full lifecycle (start, monitor, zombie-detect, restart with --replace). # Starting gateway from both start.sh AND entry.py causes PID conflicts # and "Another gateway already using this Feishu app_id" errors. echo "[$(date)] Gateway will be started by entry.py watchdog" echo "[$(date)] Waiting for gateway to be ready on :8642..." for i in $(seq 1 60); do if curl -s http://127.0.0.1:8642/health > /dev/null 2>&1; then echo "[$(date)] Gateway is ready on :8642" break fi sleep 2 done # ── Auto-update hermes-agent if newer release exists ── # hermes-agent is pip install -e (editable), so git pull + pip upgrade = instant. # Safety: update runs in background; if pip fails, old code stays intact. # Set AGENT_AUTO_UPDATE=false to disable. update_hermes_agent_background() { [ "${AGENT_AUTO_UPDATE}" = "false" ] && return AGENT_REPO="NousResearch/hermes-agent" AGENT_DIR="/app/hermes-agent" VERSION_FILE="/data/hermes/agent.version" API_URL="https://api.github.com/repos/${AGENT_REPO}/releases/latest" EXTRAS="feishu,mcp,cron,pty" # ── Phase 0: Unshallow the clone if needed ── # Dockerfile uses `git clone --depth 1` which prevents checking out # any tag/commit outside the shallow boundary. # Without this, `git rev-parse ` ALWAYS fails after a rebuild. if [ -f "$AGENT_DIR/.git/shallow" ]; then echo "[$(date)] Agent auto-update: unshallowing clone (Dockerfile --depth 1)..." if git -C "$AGENT_DIR" fetch --unshallow origin 2>&1 | tail -3; then echo "[$(date)] Agent auto-update: clone unshallowed successfully" else echo "[$(date)] Agent auto-update: unshallow failed, tag checkout may not work" fi fi # ── Phase 1: Detect actual code version vs recorded version ── # After a HF Space rebuild, /app/hermes-agent is re-cloned at the # Dockerfile pinned version, but /data/hermes/agent.version (persistent) # still says the newer version from the previous auto-update. # This mismatch causes the updater to think it's already up to date. ACTUAL_TAG=$(git -C "$AGENT_DIR" describe --tags --exact-match 2>/dev/null || echo "") BUILD_VERSION="$(cat /app/hermes-agent.version 2>/dev/null | head -1)" # Current version from persistent storage (survives rebuilds) CURRENT_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | head -1)" if [ -z "$CURRENT_VERSION" ]; then CURRENT_VERSION="$BUILD_VERSION" [ -z "$CURRENT_VERSION" ] && CURRENT_VERSION="v2026.4.30" echo "$CURRENT_VERSION" > "$VERSION_FILE" fi # Detect rebuild mismatch: actual git tag ≠ recorded version NEED_FORCE=false if [ -n "$ACTUAL_TAG" ] && [ "$ACTUAL_TAG" != "$CURRENT_VERSION" ]; then echo "[$(date)] Agent auto-update: REBUILD DETECTED (actual=$ACTUAL_TAG, recorded=$CURRENT_VERSION)" echo "[$(date)] Agent auto-update: code was reset to Dockerfile version by container rebuild" NEED_FORCE=true # Reset comparison baseline to actual (old) code version CURRENT_VERSION="$ACTUAL_TAG" fi # Version comparison helper: strip leading 'v', compare date-style like 2026.4.30 compare_date_versions() { local a="${1#v}" b="${2#v}" IFS='.' read -ra A <<< "$a" IFS='.' read -ra B <<< "$b" for i in 0 1 2; do local ai=${A[$i]:-0} bi=${B[$i]:-0} if [ "$bi" -gt "$ai" ] 2>/dev/null; then return 0; fi if [ "$bi" -lt "$ai" ] 2>/dev/null; then return 1; fi done return 1 # equal or older } echo "[$(date)] Agent auto-update: checking (current: $CURRENT_VERSION, actual: $ACTUAL_TAG, latest: querying...)" # ── Phase 2: Query GitHub API for latest release ── LATEST_JSON=$(curl -sf --connect-timeout 10 --max-time 20 "$API_URL" 2>/dev/null) if [ $? -ne 0 ] || [ -z "$LATEST_JSON" ]; then echo "[$(date)] Agent auto-update: failed to reach GitHub API, skipping" return fi LATEST_TAG=$(echo "$LATEST_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('tag_name',''))" 2>/dev/null) if [ -z "$LATEST_TAG" ]; then echo "[$(date)] Agent auto-update: could not parse latest tag, skipping" return fi echo "[$(date)] Agent auto-update: latest release is $LATEST_TAG" # ── Phase 3: Decide if update is needed ── if compare_date_versions "$CURRENT_VERSION" "$LATEST_TAG"; then echo "[$(date)] Agent auto-update: upgrading $CURRENT_VERSION → $LATEST_TAG ..." elif [ "$NEED_FORCE" = "true" ]; then # Rebuild detected: latest = recorded version, but code is still old. # Re-apply the update to restore correct version. echo "[$(date)] Agent auto-update: re-applying $LATEST_TAG after rebuild (code was reset to $ACTUAL_TAG)" else echo "[$(date)] Agent auto-update: $CURRENT_VERSION is up to date" return fi # ── Phase 4: git fetch + checkout new tag (non-destructive) ── cd "$AGENT_DIR" if ! git fetch --tags origin 2>&1 | tail -3; then echo "[$(date)] Agent auto-update: git fetch failed, aborting" return fi # Verify tag exists (after unshallow, this should succeed) if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then echo "[$(date)] Agent auto-update: tag $LATEST_TAG not found locally, fetching explicitly..." if ! git fetch origin "refs/tags/$LATEST_TAG:refs/tags/$LATEST_TAG" 2>&1; then echo "[$(date)] Agent auto-update: explicit tag fetch failed, aborting" return fi if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then echo "[$(date)] Agent auto-update: tag $LATEST_TAG still not found, aborting" return fi fi # Phase 5: checkout new version if ! git checkout "$LATEST_TAG" 2>&1 | tail -3; then echo "[$(date)] Agent auto-update: git checkout failed, aborting" # Try to recover to previous version git checkout "$ACTUAL_TAG" 2>/dev/null return fi # Phase 6: update pip dependencies (editable install) echo "[$(date)] Agent auto-update: updating pip dependencies..." if ! pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>&1 | tail -10; then echo "[$(date)] Agent auto-update: pip install failed, rolling back" git checkout "$ACTUAL_TAG" 2>/dev/null pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>/dev/null return fi # Phase 7: reinstall our patches on top of new version echo "[$(date)] Agent auto-update: re-applying Hermes Bot patches..." if [ -f "/app/scripts/patch_file_delivery.py" ]; then python3 /app/scripts/patch_file_delivery.py 2>/dev/null fi if [ -f "/app/scripts/patch_auto_media.py" ]; then python3 /app/scripts/patch_auto_media.py 2>/dev/null fi if [ -f "/app/scripts/patch_resolve_media_paths.py" ]; then python3 /app/scripts/patch_resolve_media_paths.py 2>/dev/null fi if [ -f "/app/scripts/patch_weixin_cross_loop.py" ]; then python3 /app/scripts/patch_weixin_cross_loop.py 2>/dev/null fi if [ -f "/app/scripts/patch_web_search_fallback.py" ]; then python3 /app/scripts/patch_web_search_fallback.py 2>/dev/null fi if [ -f "/app/scripts/patch_strip_thinking_tags.py" ]; then python3 /app/scripts/patch_strip_thinking_tags.py 2>/dev/null fi if [ -f "/app/scripts/patch_sandbox_isolation.py" ]; then python3 /app/scripts/patch_sandbox_isolation.py 2>/dev/null fi # Copy patch files if they exist for patch_file in prompt_builder.py send_message_tool.py; do if [ -f "/app/patches/hermes-agent/agent/$patch_file" ] && [ -f "$AGENT_DIR/agent/$patch_file" ]; then cp "/app/patches/hermes-agent/agent/$patch_file" "$AGENT_DIR/agent/$patch_file" 2>/dev/null fi done # Save new version echo "$LATEST_TAG" > "$VERSION_FILE" echo "$(date '+%Y-%m-%d %H:%M:%S')" >> "$VERSION_FILE" echo "[$(date)] Agent auto-update: upgraded to $LATEST_TAG ✓ (restart needed for full effect)" # Phase 8: schedule gateway restart for clean reload # Send SIGUSR1 to entry.py to trigger gateway restart cycle ENTRY_PID=$(pgrep -f "python3 /app/entry.py" 2>/dev/null | head -1) if [ -n "$ENTRY_PID" ]; then kill -USR1 "$ENTRY_PID" 2>/dev/null && \ echo "[$(date)] Agent auto-update: sent reload signal to entry.py (PID: $ENTRY_PID)" || \ echo "[$(date)] Agent auto-update: gateway will use new code on next conversation" fi } # ── Auto-update hermes-web-ui if newer release exists ── # Runs asynchronously so it doesn't block startup. # All output goes to /data/hermes/logs/auto-update.log (not stdout, which gets eaten by exec). # Set WEBUI_AUTO_UPDATE=false to disable. update_webui_background() { [ "${WEBUI_AUTO_UPDATE}" = "false" ] && return WEBUI_REPO="EKKOLearnAI/hermes-web-ui" VERSION_FILE="/data/hermes/webui.version" BUILD_VERSION_FILE="/app/webui.version" BUILD_TMP="/tmp/webui-update" WEBUI_INSTALL="/app/webui-server" WEBUI_CLIENT="/app/webui-client" API_URL="https://api.github.com/repos/${WEBUI_REPO}/releases/latest" UPDATE_LOG="/data/hermes/logs/auto-update.log" # Tee all output to log file for diagnostics _log() { echo "[$(date)] $*"; } _log_and_tee() { _log "$*" | tee -a "$UPDATE_LOG"; } _log_and_tee "=== WebUI auto-update starting ===" # ── Detect rebuild: Dockerfile writes /app/webui.version, persistent is /data/hermes/ ── BUILD_VERSION="$(cat "$BUILD_VERSION_FILE" 2>/dev/null | head -1)" RECORDED_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | head -1)" NEED_FORCE=false if [ -n "$BUILD_VERSION" ] && [ -n "$RECORDED_VERSION" ] && [ "$BUILD_VERSION" != "$RECORDED_VERSION" ]; then _log_and_tee "REBUILD DETECTED (Dockerfile=$BUILD_VERSION, recorded=$RECORDED_VERSION)" NEED_FORCE=true CURRENT_VERSION="$BUILD_VERSION" elif [ -n "$RECORDED_VERSION" ]; then CURRENT_VERSION="$RECORDED_VERSION" elif [ -n "$BUILD_VERSION" ]; then CURRENT_VERSION="$BUILD_VERSION" else CURRENT_VERSION="v0.5.5" echo "$CURRENT_VERSION" > "$VERSION_FILE" fi _log_and_tee "Checking: current=$CURRENT_VERSION, Dockerfile=$BUILD_VERSION, latest=?" # Query GitHub API for latest release tag LATEST_JSON=$(curl -sf --connect-timeout 10 --max-time 20 "$API_URL" 2>/dev/null) if [ $? -ne 0 ] || [ -z "$LATEST_JSON" ]; then _log_and_tee "ERROR: failed to reach GitHub API, skipping" return fi LATEST_TAG=$(echo "$LATEST_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('tag_name',''))" 2>/dev/null) if [ -z "$LATEST_TAG" ]; then _log_and_tee "ERROR: could not parse latest tag, skipping" return fi _log_and_tee "Latest release: $LATEST_TAG" # Compare versions CURRENT_NUM="${CURRENT_VERSION#v}" LATEST_NUM="${LATEST_TAG#v}" if [ "$CURRENT_NUM" = "$LATEST_NUM" ] && [ "$NEED_FORCE" = "false" ]; then _log_and_tee "Already on latest ($CURRENT_VERSION)" return fi update_needed=false IFS='.' read -ra C <<< "$CURRENT_NUM" IFS='.' read -ra L <<< "$LATEST_NUM" for i in 0 1 2; do c=${C[$i]:-0}; l=${L[$i]:-0} if [ "$l" -gt "$c" ] 2>/dev/null; then update_needed=true; break; fi if [ "$l" -lt "$c" ] 2>/dev/null; then break; fi done if [ "$update_needed" = "false" ] && [ "$NEED_FORCE" = "false" ]; then _log_and_tee "Current $CURRENT_VERSION is up to date" return fi if [ "$NEED_FORCE" = "true" ] && [ "$update_needed" = "false" ]; then _log_and_tee "Re-applying $LATEST_TAG after rebuild (code reset to $BUILD_VERSION)" else _log_and_tee "Upgrading $CURRENT_VERSION -> $LATEST_TAG" fi # ── Build with retry (2 attempts) ── for attempt in 1 2; do _log_and_tee "Build attempt $attempt/2..." # Clone rm -rf "$BUILD_TMP" if ! git clone --depth 1 --branch "$LATEST_TAG" "https://github.com/${WEBUI_REPO}.git" "$BUILD_TMP" 2>&1 | tee -a "$UPDATE_LOG" | tail -3; then _log_and_tee "ERROR: git clone failed" rm -rf "$BUILD_TMP" [ "$attempt" -lt 2 ] && sleep 10 && continue return fi cd "$BUILD_TMP" # Install (with timeout) _log_and_tee "Running npm install..." if ! timeout 120 npm install --ignore-scripts 2>&1 | tee -a "$UPDATE_LOG" | tail -5; then _log_and_tee "ERROR: npm install failed/timed out" rm -rf "$BUILD_TMP" [ "$attempt" -lt 2 ] && sleep 10 && continue return fi # Rebuild native modules (required by node-pty, matching upstream Dockerfile) _log_and_tee "Running npm rebuild node-pty..." npm rebuild node-pty 2>&1 | tee -a "$UPDATE_LOG" | tail -5 # Build (with memory limit, matching upstream Dockerfile) _log_and_tee "Running npm run build (NODE_OPTIONS=--max-old-space-size=4096)..." if ! timeout 180 env NODE_OPTIONS=--max-old-space-size=4096 npm run build 2>&1 | tee -a "$UPDATE_LOG" | tail -15; then _log_and_tee "ERROR: npm build failed/timed out" rm -rf "$BUILD_TMP" [ "$attempt" -lt 2 ] && sleep 10 && continue return fi # Verify build output if [ ! -d "$BUILD_TMP/dist/server" ] || [ ! -d "$BUILD_TMP/dist/client" ]; then _log_and_tee "ERROR: build output missing (no dist/server or dist/client)" ls -la "$BUILD_TMP/dist/" 2>&1 | tee -a "$UPDATE_LOG" rm -rf "$BUILD_TMP" [ "$attempt" -lt 2 ] && sleep 10 && continue return fi _log_and_tee "Build succeeded!" break # exit retry loop done # Hot-swap: kill old WebUI process, replace files, restart _log_and_tee "Hot-swapping: stopping old WebUI, replacing files..." OLD_WEBUI_PID=$(pgrep -f "node index.js" 2>/dev/null | head -1) if [ -n "$OLD_WEBUI_PID" ]; then kill "$OLD_WEBUI_PID" 2>/dev/null sleep 2 # Force kill if still running kill -9 "$OLD_WEBUI_PID" 2>/dev/null _log_and_tee "Killed old WebUI PID=$OLD_WEBUI_PID" fi # Install new files rm -rf "$WEBUI_INSTALL" "$WEBUI_CLIENT" mkdir -p "$WEBUI_INSTALL" "$WEBUI_CLIENT" cp -r "$BUILD_TMP/dist/server/"* "$WEBUI_INSTALL/" cp -r "$BUILD_TMP/dist/client/"* "$WEBUI_CLIENT/" cp "$BUILD_TMP/package.json" "$WEBUI_INSTALL/package.json" # Install production-only node_modules cd "$BUILD_TMP" npm prune --omit=dev 2>&1 | tail -3 cp -r node_modules "$WEBUI_INSTALL/node_modules" # Save new version echo "$LATEST_TAG" > "$VERSION_FILE" echo "$(date '+%Y-%m-%d %H:%M:%S')" >> "$VERSION_FILE" # Restart WebUI cd "$WEBUI_INSTALL" export PORT=6060 UPSTREAM=http://127.0.0.1:8642 HERMES_HOME=/root/.hermes export AUTH_TOKEN="${AUTH_TOKEN:-hermes-bot-2026}" CORS_ORIGINS="*" NODE_ENV=production node index.js >> /data/hermes/logs/webui.log 2>&1 & NEW_PID=$! _log_and_tee "WebUI upgraded to $LATEST_TAG (new PID: $NEW_PID)" # Verify sleep 3 if curl -sf http://127.0.0.1:6060/health > /dev/null 2>&1; then _log_and_tee "$LATEST_TAG is running and healthy" else _log_and_tee "WARNING: health check failed after upgrade" fi rm -rf "$BUILD_TMP" _log_and_tee "=== WebUI auto-update complete ===" } # ── Start hermes-web-ui Node.js BFF server on :6060 ── echo "[$(date)] Starting hermes-web-ui BFF..." export PORT=6060 export UPSTREAM=http://127.0.0.1:8642 export HERMES_HOME=/root/.hermes export AUTH_TOKEN="${AUTH_TOKEN:-hermes-bot-2026}" export CORS_ORIGINS="*" export NODE_ENV=production cd /app/webui-server node index.js >> /data/hermes/logs/webui.log 2>&1 & WEBUI_PID=$! echo "[$(date)] WebUI BFF PID: $WEBUI_PID" # ── Force-correct version display ── # Ensure __init__.py shows semver (e.g. 0.12.0) not git tag date (e.g. 2026.4.30) # This runs after any potential auto-update has changed the files HERMES_INIT="/app/hermes-agent/hermes_cli/__init__.py" if [ -f "$HERMES_INIT" ]; then # Read the git tag from version file to map date → semver CURRENT_TAG="$(cat /data/hermes/agent.version 2>/dev/null | head -1)" CURRENT_TAG="${CURRENT_TAG:-v2026.4.30}" # Build version mapping: date-tag → semver case "$CURRENT_TAG" in v2026.4.30) SEMVER="0.12.0"; RDATE="2026.4.30" ;; *) SEMVER=""; RDATE="" ;; esac if [ -n "$SEMVER" ]; then sed -i "s/__version__\s*=\s*\"[^"]*\"/__version__ = \"$SEMVER\"/" "$HERMES_INIT" sed -i "s/__release_date__\s*=\s*\"[^"]*\"/__release_date__ = \"$RDATE\"/" "$HERMES_INIT" echo "Version patched: v$SEMVER ($RDATE)" fi fi # Trigger hermes-agent auto-update in background (framework first, then UI) update_hermes_agent_background & # Trigger WebUI auto-update in background (non-blocking) # Will check GitHub, build if newer, and hot-swap update_webui_background & # Wait for WebUI BFF to be ready echo "[$(date)] Waiting for WebUI BFF to start..." for i in $(seq 1 15); do if curl -s http://127.0.0.1:6060/health > /dev/null 2>&1; then echo "[$(date)] WebUI BFF is ready on :6060" break fi sleep 2 done # ── Auto-setup WebUI credentials if not configured ── AUTH_TOKEN="${AUTH_TOKEN:-hermes-bot-2026}" WEBUI_USER="${WEBUI_USERNAME:-admin}" WEBUI_PASS="${WEBUI_PASSWORD:-Hermes2026}" AUTH_STATUS=$(curl -s http://127.0.0.1:6060/api/auth/status 2>/dev/null) HAS_PW=$(echo "$AUTH_STATUS" | python3 -c "import json,sys; print(json.load(sys.stdin).get('hasPasswordLogin',False))" 2>/dev/null) if [ "$HAS_PW" = "False" ]; then echo "[$(date)] WebUI: No credentials configured, auto-setting up..." SETUP_RESULT=$(curl -s -w "\n%{http_code}" -X POST http://127.0.0.1:6060/api/auth/setup \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $AUTH_TOKEN" \ -d "{\"username\":\"$WEBUI_USER\",\"password\":\"$WEBUI_PASS\"}" 2>/dev/null) SETUP_CODE=$(echo "$SETUP_RESULT" | tail -1) if [ "$SETUP_CODE" = "200" ]; then echo "[$(date)] WebUI: Credentials auto-configured (user: $WEBUI_USER)" else echo "[$(date)] WebUI: Auto-setup failed: $SETUP_RESULT" fi else echo "[$(date)] WebUI: Credentials already configured" fi echo "" echo "=== All services started ===" echo " Gateway: http://127.0.0.1:8642 (with Python watchdog in entry.py)" echo " WebUI: http://127.0.0.1:6060" echo " Proxy: http://0.0.0.0:7860" echo " Auth Token: $(_mask_val "$AUTH_TOKEN")" echo "" # Start Python proxy on :7860 (main HF Space port) # entry.py contains a Python-based gateway watchdog that will auto-restart # the gateway if it dies, regardless of what happens to this shell script exec python3 /app/entry.py