File size: 37,158 Bytes
6ffc130
189ae90
 
6ffc130
ed0e617
 
cbfbec3
 
 
f55a35b
d2735be
6ffc130
f55a35b
6ffc130
 
 
 
 
f55a35b
6ffc130
 
 
 
7adec1d
d2735be
 
 
 
 
 
 
 
 
 
 
 
 
 
a9f57ed
 
 
31a92da
a9f57ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31a92da
 
 
a9f57ed
 
 
31a92da
a9f57ed
 
 
 
 
 
 
5e3d05b
a9f57ed
 
5e3d05b
 
62ec96b
 
 
272cc2c
 
62ec96b
692d0b9
 
 
 
 
 
 
62ec96b
692d0b9
62ec96b
 
 
 
 
 
 
 
 
 
 
 
a9f57ed
 
62ec96b
272cc2c
692d0b9
 
 
 
 
 
a9f57ed
272cc2c
62ec96b
 
 
 
 
 
 
 
272cc2c
 
 
 
 
 
 
 
a9f57ed
 
 
 
 
 
 
 
272cc2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f55a35b
946e383
b820b36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c7e90d
 
 
 
 
 
 
 
 
7b0e47a
 
 
 
 
9c7e90d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b24c76e
 
 
 
 
 
 
 
 
300ab73
 
 
 
 
189ae90
b24c76e
 
 
300ab73
 
3e3cf48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f55a35b
 
 
 
 
 
 
 
 
 
7adec1d
3af3d87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20f5934
 
 
 
 
 
 
 
189ae90
 
ed0e617
 
189ae90
ed0e617
 
f0b15e1
 
 
 
 
 
 
 
 
 
 
 
 
69fc040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0b15e1
 
69fc040
f0b15e1
 
 
 
69fc040
 
 
 
 
 
 
 
 
 
f0b15e1
 
 
 
 
 
 
 
 
 
 
 
 
69fc040
f0b15e1
69fc040
f0b15e1
 
 
 
 
 
 
 
 
 
 
 
 
 
69fc040
f0b15e1
 
69fc040
 
 
 
f0b15e1
 
 
 
 
69fc040
f0b15e1
 
 
 
 
 
69fc040
f0b15e1
69fc040
 
 
 
 
 
 
 
 
f0b15e1
 
69fc040
f0b15e1
 
 
69fc040
f0b15e1
 
 
69fc040
f0b15e1
 
 
69fc040
f0b15e1
 
 
 
69fc040
f0b15e1
 
 
 
 
 
 
 
 
4036608
 
 
 
 
 
f52e0ed
 
 
06cb3d4
 
 
f0b15e1
 
 
 
 
 
 
 
 
 
 
 
 
69fc040
f0b15e1
 
 
 
 
 
 
 
 
7bfa1b4
 
e10e850
7bfa1b4
 
 
 
 
 
99efc6a
7bfa1b4
 
 
 
e10e850
 
 
 
 
 
 
7bfa1b4
99efc6a
 
 
 
 
 
e10e850
99efc6a
 
 
 
 
 
 
 
7bfa1b4
 
 
e10e850
7bfa1b4
 
 
 
e10e850
7bfa1b4
 
 
 
 
e10e850
7bfa1b4
 
 
e10e850
7bfa1b4
e10e850
7bfa1b4
 
 
99efc6a
e10e850
7bfa1b4
 
 
 
 
 
 
 
e10e850
 
7bfa1b4
 
99efc6a
e10e850
7bfa1b4
 
 
99efc6a
e10e850
99efc6a
e10e850
99efc6a
7bfa1b4
e10e850
 
 
7bfa1b4
e10e850
7bfa1b4
e10e850
 
 
 
 
 
7bfa1b4
e10e850
7bfa1b4
e10e850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bfa1b4
 
e10e850
7bfa1b4
e10e850
 
 
 
 
 
 
 
 
 
 
 
 
7bfa1b4
e10e850
 
 
 
 
7bfa1b4
 
 
 
 
 
 
e10e850
 
7bfa1b4
 
e10e850
7bfa1b4
 
 
e10e850
 
7bfa1b4
e10e850
7bfa1b4
 
 
e10e850
7bfa1b4
 
189ae90
300ab73
ed0e617
 
 
 
 
 
 
 
 
300ab73
ed0e617
460aac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0b15e1
 
 
7bfa1b4
 
 
 
ed0e617
300ab73
189ae90
ed0e617
300ab73
ed0e617
 
 
 
 
b820b36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed0e617
 
189ae90
ed0e617
 
692d0b9
ed0e617
 
 
189ae90
 
f55a35b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
#!/bin/bash
# Hermes Bot — HuggingFace Space Startup
# NOTE: No 'set -e' — gateway restarts should not kill the entire script

echo "=== Hermes Bot — HuggingFace Space Startup ==="

# Ensure system timezone matches config (logging timestamps use system TZ)
export TZ="${TZ:-Asia/Shanghai}"

# Ensure persistent storage directories exist
mkdir -p /data/hermes/{sessions,memories,uploads,logs,palace,skills,weixin}

# Create symlinks from hermes home to persistent storage
HERMES_HOME="/root/.hermes"
for dir in sessions memories uploads logs palace skills; do
    target="$HERMES_HOME/$dir"
    if [ ! -L "$target" ] && [ ! -d "$target" ]; then
        ln -sf "/data/hermes/$dir" "$target"
        echo "Created symlink: $dir -> /data/hermes/$dir"
    elif [ -L "$target" ]; then
        echo "Symlink exists: $dir"
    fi
done

# Persist WeChat/Weixin session data across container rebuilds
# Weixin adapter stores auth tokens, context tokens, and sync cursors in ~/.hermes/weixin/
# Without this, WeChat binding breaks on every container rebuild
WEIXIN_DIR="$HERMES_HOME/weixin"
if [ -d "$WEIXIN_DIR" ] && [ ! -L "$WEIXIN_DIR" ]; then
    # Migrate existing session data to persistent storage
    cp -rn "$WEIXIN_DIR"/* /data/hermes/weixin/ 2>/dev/null
    rm -rf "$WEIXIN_DIR"
fi
if [ ! -L "$WEIXIN_DIR" ]; then
    ln -sf "/data/hermes/weixin" "$WEIXIN_DIR"
    echo "Symlink: weixin -> /data/hermes/weixin"
fi

# ── WeChat credential persistence ──
# Priority: HF Space Secrets > persisted account JSON file > .env file
# Once set via HF Space Secrets, WeChat survives ALL container rebuilds.
ACCOUNTS_DIR="/data/hermes/weixin/accounts"
mkdir -p "$ACCOUNTS_DIR"

if [ -z "$WEIXIN_ACCOUNT_ID" ] || [ -z "$WEIXIN_TOKEN" ]; then
    # Fallback: restore from persisted account JSON file
    if [ -z "$WEIXIN_ACCOUNT_ID" ] && [ -d "$ACCOUNTS_DIR" ]; then
        LATEST=$(find "$ACCOUNTS_DIR" -name "*.json" ! -name "*.context-tokens.json" ! -name "*.sync.json" -type f -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | awk '{print $2}')
        if [ -n "$LATEST" ]; then
            DISCOVERED_ID=$(basename "$LATEST" .json)
            export WEIXIN_ACCOUNT_ID="$DISCOVERED_ID"
            echo "Auto-discovered WEIXIN_ACCOUNT_ID=$DISCOVERED_ID"
        fi
    fi
    if [ -z "$WEIXIN_TOKEN" ] && [ -n "$WEIXIN_ACCOUNT_ID" ]; then
        ACCOUNT_FILE="$ACCOUNTS_DIR/${WEIXIN_ACCOUNT_ID}.json"
        if [ -f "$ACCOUNT_FILE" ]; then
            DISCOVERED_TOKEN=$(python3 -c "import json; d=json.load(open('$ACCOUNT_FILE')); print(d.get('token',''))" 2>/dev/null)
            if [ -n "$DISCOVERED_TOKEN" ]; then
                export WEIXIN_TOKEN="$DISCOVERED_TOKEN"
                echo "Restored WEIXIN_TOKEN from persisted account file"
            fi
        fi
    fi
fi

if [ -n "$WEIXIN_ACCOUNT_ID" ] && [ -n "$WEIXIN_TOKEN" ]; then
    echo "WeChat credentials ready (account=$(_mask_val "$WEIXIN_ACCOUNT_ID"))"
    # Persist credentials to account JSON so gateway's load_weixin_account() also finds them
    ACCOUNT_FILE="$ACCOUNTS_DIR/${WEIXIN_ACCOUNT_ID}.json"
    if [ ! -f "$ACCOUNT_FILE" ] || ! python3 -c "import json; d=json.load(open('$ACCOUNT_FILE')); exit(0 if d.get('token') else 1)" 2>/dev/null; then
        python3 -c "
import json, time
payload = {'token': '$WEIXIN_TOKEN', 'base_url': 'https://ilinkai.weixin.qq.com', 'saved_at': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
with open('$ACCOUNT_FILE', 'w') as f: json.dump(payload, f)
" 2>/dev/null && chmod 600 "$ACCOUNT_FILE"
        echo "WeChat credentials persisted to account file"
    fi
else
    echo "WARNING: WeChat not configured (no token/account). Run 'hermes gateway setup' to scan QR."
fi

# -- Persist .env across container rebuilds --
# Priority: Space Secrets (env vars) > persistent storage
# SECURITY: .env is NO LONGER in git repo -- use HF Space Secrets
ENV_FILE="$HERMES_HOME/.env"
ENV_DATA="/data/hermes/.env"

# Helper: mask a secret value for safe logging (show first 6 + **** + last 4)
_mask_val() {
    local val="$1"
    if [ -z "$val" ] || [ ${#val} -lt 12 ]; then echo "****"; return; fi
    echo "${val:0:6}****${val: -4}"
}

# Generate .env from Space Secrets (environment variables injected by HF)
# SECURITY: secrets are written to file ONLY — never echoed to stdout/build logs
if [ ! -f "$ENV_DATA" ] && [ -n "$OPENROUTER_API_KEY" ]; then
    echo "Generating .env from Space Secrets..."
    {
        echo "OPENROUTER_API_KEY=$OPENROUTER_API_KEY"
        [ -n "$OPENAI_API_KEY" ] && echo "OPENAI_API_KEY=$OPENAI_API_KEY"
        [ -n "$OPENAI_BASE_URL" ] && echo "OPENAI_BASE_URL=$OPENAI_BASE_URL"
        [ -n "$FEISHU_APP_ID" ] && echo "FEISHU_APP_ID=$FEISHU_APP_ID"
        [ -n "$FEISHU_APP_SECRET" ] && echo "FEISHU_APP_SECRET=$FEISHU_APP_SECRET"
        echo "GATEWAY_ALLOW_ALL_USERS=true"
        echo "HERMES_ACCEPT_HOOKS=1"
        [ -n "$MEMPALACE_PALACE_PATH" ] && echo "MEMPALACE_PALACE_PATH=$MEMPALACE_PALACE_PATH"
        [ -n "$FIRECRAWL_API_KEY" ] && echo "FIRECRAWL_API_KEY=$FIRECRAWL_API_KEY"
        [ -n "$WEIXIN_ACCOUNT_ID" ] && echo "WEIXIN_ACCOUNT_ID=$WEIXIN_ACCOUNT_ID"
        [ -n "$WEIXIN_TOKEN" ] && echo "WEIXIN_TOKEN=$WEIXIN_TOKEN"
    } > "$ENV_DATA"
    chmod 600 "$ENV_DATA"
    echo "Created .env from Space Secrets (keys masked below)"
    echo "  OPENROUTER_API_KEY=$(_mask_val "$OPENROUTER_API_KEY")"
    [ -n "$OPENAI_API_KEY" ] && echo "  OPENAI_API_KEY=$(_mask_val "$OPENAI_API_KEY")"
    [ -n "$FEISHU_APP_ID" ] && echo "  FEISHU_APP_ID=$FEISHU_APP_ID"
    [ -n "$FEISHU_APP_SECRET" ] && echo "  FEISHU_APP_SECRET=$(_mask_val "$FEISHU_APP_SECRET")"
    [ -n "$FIRECRAWL_API_KEY" ] && echo "  FIRECRAWL_API_KEY=$(_mask_val "$FIRECRAWL_API_KEY")"
    [ -n "$WEIXIN_TOKEN" ] && echo "  WEIXIN_TOKEN=$(_mask_val "$WEIXIN_TOKEN")"
fi

# Fallback: if no secrets and no persistent data
if [ ! -f "$ENV_DATA" ] && [ -f "/app/.env.example" ]; then
    cp "/app/.env.example" "$ENV_DATA"
    echo "WARNING: No .env found. Set API keys via HF Space Secrets!"
fi

# Always symlink
if [ ! -L "$ENV_FILE" ]; then
    rm -f "$ENV_FILE"
    ln -sf "$ENV_DATA" "$ENV_FILE"
    echo "Symlink: .env -> $ENV_DATA"
else
    echo "Symlink exists: .env"
fi

# Ensure WEIXIN_TOKEN/ACCOUNT_ID are in .env even if file was created earlier without them
if [ -f "$ENV_DATA" ] && [ -n "$WEIXIN_TOKEN" ] && ! grep -q '^WEIXIN_TOKEN=' "$ENV_DATA" 2>/dev/null; then
    echo "WEIXIN_TOKEN=$WEIXIN_TOKEN" >> "$ENV_DATA"
fi
if [ -f "$ENV_DATA" ] && [ -n "$WEIXIN_ACCOUNT_ID" ] && ! grep -q '^WEIXIN_ACCOUNT_ID=' "$ENV_DATA" 2>/dev/null; then
    echo "WEIXIN_ACCOUNT_ID=$WEIXIN_ACCOUNT_ID" >> "$ENV_DATA"
fi

# ── Persist config.yaml across container rebuilds ──
# WebUI settings page and WeChat save flow update ~/.hermes/config.yaml at runtime
CFG_FILE="$HERMES_HOME/config.yaml"
CFG_DATA="/data/hermes/config.yaml"
if [ -f "$CFG_FILE" ] && [ ! -L "$CFG_FILE" ] && [ ! -f "$CFG_DATA" ]; then
    # First time: migrate build-time config to persistent storage
    cp "$CFG_FILE" "$CFG_DATA"
    echo "Migrated config.yaml to persistent storage"
elif [ -L "$CFG_FILE" ] && [ ! -f "$CFG_DATA" ]; then
    # Symlink exists but target missing — recreate from repo copy
    if [ -f "/app/config.yaml" ]; then
        cp "/app/config.yaml" "$CFG_DATA"
        echo "Restored config.yaml from repo fallback"
    fi
fi
if [ ! -L "$CFG_FILE" ]; then
    rm -f "$CFG_FILE"
    ln -sf "$CFG_DATA" "$CFG_FILE"
    echo "Symlink: config.yaml -> $CFG_DATA"
else
    echo "Symlink exists: config.yaml"
fi

echo "Persistent storage ready."

# ── Persist WebUI credentials across rebuilds ──
WEBUI_HOME="/root/.hermes-web-ui"
WEBUI_DATA="/data/hermes/webui"
mkdir -p "$WEBUI_DATA"
if [ ! -L "$WEBUI_HOME" ] && [ -d "$WEBUI_HOME" ]; then
    # Migrate existing credentials to persistent storage
    if [ -f "$WEBUI_HOME/.credentials" ] && [ ! -f "$WEBUI_DATA/.credentials" ]; then
        cp "$WEBUI_HOME/.credentials" "$WEBUI_DATA/.credentials"
        echo "Migrated WebUI credentials to persistent storage"
    fi
    rm -rf "$WEBUI_HOME"
fi
if [ ! -L "$WEBUI_HOME" ]; then
    ln -sf "$WEBUI_DATA" "$WEBUI_HOME"
    echo "Symlink: hermes-web-ui -> $WEBUI_DATA"
fi

# ── Persist agency-agents across container rebuilds ──
# 211 expert role prompts for instant role switching
AGENCY_SRC="/app/agency-agents"
AGENCY_DST="/data/hermes/agency-agents"
AGENCY_LINK="$HERMES_HOME/agency-agents"
if [ -d "$AGENCY_SRC" ] && [ ! -d "$AGENCY_DST" ]; then
    cp -r "$AGENCY_SRC" "$AGENCY_DST"
    echo "Copied agency-agents to persistent storage"
fi
# Merge custom agents (Hermes extensions) into agency-agents directory
if [ -d "/app/custom-agents" ] && [ -d "$AGENCY_DST" ]; then
    cp -rn /app/custom-agents/* "$AGENCY_DST/" 2>/dev/null
    echo "Merged custom agents into agency-agents"
fi
if [ ! -L "$AGENCY_LINK" ]; then
    rm -rf "$AGENCY_LINK"
    if [ -d "$AGENCY_DST" ]; then
        ln -sf "$AGENCY_DST" "$AGENCY_LINK"
        echo "Symlink: agency-agents -> $AGENCY_DST"
    fi
fi

# Generate agent index JSON for fast role lookup
if [ -d "$AGENCY_DST" ] && command -v python3 &>/dev/null; then
    python3 << 'AGENT_INDEX'
import os, json, re, yaml
agents = []
base = '/data/hermes/agency-agents'
skip = {'README.md','README.zh-TW.md','CATALOG.md','AGENT-LIST.md','CONTRIBUTING.md','LICENSE','UPSTREAM.md','.gitattributes'}
for root, dirs, files in os.walk(base):
    for f in files:
        if f.endswith('.md') and f not in skip:
            path = os.path.join(root, f)
            rel = os.path.relpath(path, base)
            try:
                with open(path, encoding='utf-8') as fh:
                    content = fh.read()
                name = desc = dept = ''
                if content.startswith('---'):
                    parts = content.split('---', 2)
                    if len(parts) >= 3:
                        meta = yaml.safe_load(parts[1]) or {}
                        name = meta.get('name', '')
                        desc = meta.get('description', '')
                if not name:
                    name = f.replace('.md', '').replace('-', ' ').title()
                dept = rel.split('/')[0] if '/' in rel else 'root'
                agents.append({'id': f.replace('.md',''), 'name': name, 'desc': desc[:80], 'dept': dept, 'path': rel})
            except Exception:
                pass
agents.sort(key=lambda x: (x['dept'], x['name']))
idx_path = os.path.join(base, '.agent-index.json')
with open(idx_path, 'w', encoding='utf-8') as out:
    json.dump(agents, out, ensure_ascii=False, indent=2)
print(f"Agency agents indexed: {len(agents)} roles ready")
AGENT_INDEX
fi

# ── Force-kill any residual gateway processes from previous crash ──
echo "Cleaning up residual gateway processes..."
# Kill any lingering hermes gateway processes (prevents Feishu lock conflict)
for pid in $(pgrep -f "hermes_cli.main.*gateway" 2>/dev/null); do
    echo "  Killing residual gateway PID=$pid"
    kill -9 "$pid" 2>/dev/null
done
sleep 2  # Wait for processes and ports to be fully released

# ── Clean up stale PID/lock files from previous crash ──
echo "Cleaning up stale state..."
rm -f "$HERMES_HOME/gateway.pid" 2>/dev/null
rm -f "$HERMES_HOME/.gateway_runtime_lock" 2>/dev/null
rm -f "$HERMES_HOME/.gateway_takeover" 2>/dev/null
rm -f /tmp/hermes-gateway.pid 2>/dev/null
# Clean Feishu lock files
rm -f "$HERMES_HOME"/feishu*.lock 2>/dev/null
rm -f "$HERMES_HOME"/*.feishu_lock 2>/dev/null
echo "Stale state cleaned."

# ─── Lifecycle Heartbeat 初始化 ──────────────────────
HERMES_DATA_DIR="/data/hermes"

# 确保 scripts 目录在持久化区存在,同步 lifecycle 脚本
mkdir -p "$HERMES_DATA_DIR/scripts"
if [ -f "/app/scripts/hermes-lifecycle.sh" ]; then
    cp -f "/app/scripts/hermes-lifecycle.sh" "$HERMES_DATA_DIR/scripts/hermes-lifecycle.sh"
    chmod +x "$HERMES_DATA_DIR/scripts/hermes-lifecycle.sh"
    echo "Lifecycle script synced to persistent storage."
fi

# 初始化 identity.md(仅首次创建,容器重启不覆盖)
if [ ! -f "$HERMES_DATA_DIR/identity.md" ]; then
    cat > "$HERMES_DATA_DIR/identity.md" <<'IDENTITY'
# Hermes 身份记忆
# 这个文件定义了 Hermes 对自己的认知
# /reset 后此文件不会被清除(在持久化目录中)

## 基础信息
- 名字: Hermes
- 通道: 飞书(WebSocket) / 微信
- 主人: 用户344064

## 性格特征
- 中文为主,简洁有力
- 结果先行,解释后补
- 偶尔幽默但不影响效率
- 有工具、有记忆、有判断力

## 主人偏好
- 不喜欢废话,喜欢直给
- 欣赏有深度的技术分析
- 喜欢直来直去的沟通

## 运维记忆
<!-- 自动追加,不要手动编辑此节 -->
IDENTITY
    echo "identity.md initialized."
else
    echo "identity.md exists (preserved)."
fi

# 初始化 insights.md(仅首次创建)
if [ ! -f "$HERMES_DATA_DIR/insights.md" ]; then
    cat > "$HERMES_DATA_DIR/insights.md" <<'INSIGHTS'
# Hermes 洞察日志 (insights.md)
# 自动记录异常、观察、值得汇报的事
# 类别: 通道异常 / 系统异常 / 用户洞察 / 技术发现 / 待办提醒 / 运维记忆
INSIGHTS
    echo "insights.md initialized."
else
    echo "insights.md exists (preserved)."
fi

# 初始化 heartbeat-state.json(仅首次创建)
if [ ! -f "$HERMES_DATA_DIR/heartbeat-state.json" ]; then
    echo '{"lastCheck":null,"lastConfigCheck":null,"totalRuns":0,"totalErrors":0,"consecutiveErrors":0,"lastError":null}' > "$HERMES_DATA_DIR/heartbeat-state.json"
    echo "heartbeat-state.json initialized."
else
    echo "heartbeat-state.json exists (preserved)."
fi

# 确保 cron 目录持久化
mkdir -p "$HERMES_DATA_DIR/cron"

echo "Lifecycle heartbeat ready."

# Initialize MemPalace if not already
PALACE_PATH="${MEMPALACE_PALACE_PATH:-/data/hermes/palace}"
if [ ! -f "$PALACE_PATH/.palace_initialized" ]; then
    echo "Initializing MemPalace at $PALACE_PATH..."
    mempalace init "$PALACE_PATH" 2>/dev/null || echo "MemPalace init skipped (may already exist)"
    touch "$PALACE_PATH/.palace_initialized"
    echo "MemPalace initialized."
else
    echo "MemPalace already initialized."
fi

# ─── Auto-register Lifecycle Cron Job ─────────────────
# 合并后的单一 cron:lifecycle-heartbeat
# 包含:健康检查 / 配置完整性 / 日志分析 / 洞察记录 / 清理 / 状态更新
# 每 2 小时执行一次
CRON_DIR="$HERMES_DATA_DIR/cron"
CRON_JOBS="$CRON_DIR/jobs.json"

if [ -f "$CRON_JOBS" ]; then
    # 已有 cron 配置,检查 lifecycle-heartbeat 是否存在
    if ! python3 -c "
import json
d=json.load(open('$CRON_JOBS'))
jobs=[j for j in d.get('jobs',[]) if j.get('name')=='lifecycle-heartbeat']
print('found' if jobs else 'missing')
" 2>/dev/null | grep -q "found"; then
        echo "Cron exists but lifecycle-heartbeat missing, injecting..."
        python3 -c "
import json, uuid
from datetime import datetime, timezone, timedelta
f='$CRON_JOBS'
d=json.load(open(f))
now=datetime.now(timezone(timedelta(hours=8)))
next_run=now.replace(minute=0,second=0,microsecond=0)+timedelta(hours=2)
d['jobs'].append({
    'id': uuid.uuid4().hex[:12],
    'name': 'lifecycle-heartbeat',
    'prompt': 'Execute lifecycle heartbeat: health check, config integrity, log analysis, insights, cleanup.',
    'skills': [], 'skill': None, 'model': None, 'provider': None, 'base_url': None,
    'script': 'hermes-lifecycle.sh',
    'context_from': None,
    'schedule': {'kind': 'cron', 'expr': '0 0/2 * * *', 'display': '0 0/2 * * *'},
    'schedule_display': '0 0/2 * * *',
    'repeat': {'times': None, 'completed': 0},
    'enabled': True, 'state': 'scheduled',
    'paused_at': None, 'paused_reason': None,
    'created_at': now.isoformat(),
    'next_run_at': next_run.isoformat(),
    'last_run_at': None, 'last_status': None, 'last_error': None,
    'last_delivery_error': None,
    'deliver': ['local'],
    'origin': 'start.sh-auto-inject',
    'enabled_toolsets': None,
    'workdir': '/data/hermes'
})
d['updated_at']=now.isoformat()
json.dump(d,open(f,'w'),indent=2)
print('lifecycle-heartbeat cron injected')
" 2>/dev/null && echo "OK" || echo "WARN: Failed to inject cron job"
    else
        echo "lifecycle-heartbeat cron already configured."
    fi
else
    # 首次创建 cron jobs.json
    mkdir -p "$CRON_DIR"
    python3 -c "
import json, uuid
from datetime import datetime, timezone, timedelta
now=datetime.now(timezone(timedelta(hours=8)))
next_run=now.replace(minute=0,second=0,microsecond=0)+timedelta(hours=2)
d={
    'jobs': [{
        'id': uuid.uuid4().hex[:12],
        'name': 'lifecycle-heartbeat',
        'prompt': 'Execute lifecycle heartbeat: health check, config integrity, log analysis, insights, cleanup.',
        'skills': [], 'skill': None, 'model': None, 'provider': None, 'base_url': None,
        'script': 'hermes-lifecycle.sh',
        'context_from': None,
        'schedule': {'kind': 'cron', 'expr': '0 0/2 * * *', 'display': '0 0/2 * * *'},
        'schedule_display': '0 0/2 * * *',
        'repeat': {'times': None, 'completed': 0},
        'enabled': True, 'state': 'scheduled',
        'paused_at': None, 'paused_reason': None,
        'created_at': now.isoformat(),
        'next_run_at': next_run.isoformat(),
        'last_run_at': None, 'last_status': None, 'last_error': None,
        'last_delivery_error': None,
        'deliver': ['local'],
        'origin': 'start.sh-auto-inject',
        'enabled_toolsets': None,
        'workdir': '/data/hermes'
    }],
    'updated_at': now.isoformat()
}
json.dump(d,open('$CRON_JOBS','w'),indent=2)
print('lifecycle-heartbeat cron created')
" 2>/dev/null && echo "Cron job auto-created." || echo "WARN: Failed to create cron job"
fi

# ── Gateway startup is handled by entry.py watchdog ──
# Do NOT start gateway here — entry.py's _gateway_watchdog thread manages
# the full lifecycle (start, monitor, zombie-detect, restart with --replace).
# Starting gateway from both start.sh AND entry.py causes PID conflicts
# and "Another gateway already using this Feishu app_id" errors.
echo "[$(date)] Gateway will be started by entry.py watchdog"
echo "[$(date)] Waiting for gateway to be ready on :8642..."
for i in $(seq 1 60); do
    if curl -s http://127.0.0.1:8642/health > /dev/null 2>&1; then
        echo "[$(date)] Gateway is ready on :8642"
        break
    fi
    sleep 2
done

# ── Auto-update hermes-agent if newer release exists ──
# hermes-agent is pip install -e (editable), so git pull + pip upgrade = instant.
# Safety: update runs in background; if pip fails, old code stays intact.
# Set AGENT_AUTO_UPDATE=false to disable.
update_hermes_agent_background() {
    [ "${AGENT_AUTO_UPDATE}" = "false" ] && return

    AGENT_REPO="NousResearch/hermes-agent"
    AGENT_DIR="/app/hermes-agent"
    VERSION_FILE="/data/hermes/agent.version"
    API_URL="https://api.github.com/repos/${AGENT_REPO}/releases/latest"
    EXTRAS="feishu,mcp,cron,pty"

    # ── Phase 0: Unshallow the clone if needed ──
    # Dockerfile uses `git clone --depth 1` which prevents checking out
    # any tag/commit outside the shallow boundary.
    # Without this, `git rev-parse <tag>` ALWAYS fails after a rebuild.
    if [ -f "$AGENT_DIR/.git/shallow" ]; then
        echo "[$(date)] Agent auto-update: unshallowing clone (Dockerfile --depth 1)..."
        if git -C "$AGENT_DIR" fetch --unshallow origin 2>&1 | tail -3; then
            echo "[$(date)] Agent auto-update: clone unshallowed successfully"
        else
            echo "[$(date)] Agent auto-update: unshallow failed, tag checkout may not work"
        fi
    fi

    # ── Phase 1: Detect actual code version vs recorded version ──
    # After a HF Space rebuild, /app/hermes-agent is re-cloned at the
    # Dockerfile pinned version, but /data/hermes/agent.version (persistent)
    # still says the newer version from the previous auto-update.
    # This mismatch causes the updater to think it's already up to date.
    ACTUAL_TAG=$(git -C "$AGENT_DIR" describe --tags --exact-match 2>/dev/null || echo "")
    BUILD_VERSION="$(cat /app/hermes-agent.version 2>/dev/null | head -1)"

    # Current version from persistent storage (survives rebuilds)
    CURRENT_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | head -1)"
    if [ -z "$CURRENT_VERSION" ]; then
        CURRENT_VERSION="$BUILD_VERSION"
        [ -z "$CURRENT_VERSION" ] && CURRENT_VERSION="v2026.4.30"
        echo "$CURRENT_VERSION" > "$VERSION_FILE"
    fi

    # Detect rebuild mismatch: actual git tag ≠ recorded version
    NEED_FORCE=false
    if [ -n "$ACTUAL_TAG" ] && [ "$ACTUAL_TAG" != "$CURRENT_VERSION" ]; then
        echo "[$(date)] Agent auto-update: REBUILD DETECTED (actual=$ACTUAL_TAG, recorded=$CURRENT_VERSION)"
        echo "[$(date)] Agent auto-update: code was reset to Dockerfile version by container rebuild"
        NEED_FORCE=true
        # Reset comparison baseline to actual (old) code version
        CURRENT_VERSION="$ACTUAL_TAG"
    fi

    # Version comparison helper: strip leading 'v', compare date-style like 2026.4.30
    compare_date_versions() {
        local a="${1#v}" b="${2#v}"
        IFS='.' read -ra A <<< "$a"
        IFS='.' read -ra B <<< "$b"
        for i in 0 1 2; do
            local ai=${A[$i]:-0} bi=${B[$i]:-0}
            if [ "$bi" -gt "$ai" ] 2>/dev/null; then return 0; fi
            if [ "$bi" -lt "$ai" ] 2>/dev/null; then return 1; fi
        done
        return 1  # equal or older
    }

    echo "[$(date)] Agent auto-update: checking (current: $CURRENT_VERSION, actual: $ACTUAL_TAG, latest: querying...)"

    # ── Phase 2: Query GitHub API for latest release ──
    LATEST_JSON=$(curl -sf --connect-timeout 10 --max-time 20 "$API_URL" 2>/dev/null)
    if [ $? -ne 0 ] || [ -z "$LATEST_JSON" ]; then
        echo "[$(date)] Agent auto-update: failed to reach GitHub API, skipping"
        return
    fi

    LATEST_TAG=$(echo "$LATEST_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('tag_name',''))" 2>/dev/null)
    if [ -z "$LATEST_TAG" ]; then
        echo "[$(date)] Agent auto-update: could not parse latest tag, skipping"
        return
    fi

    echo "[$(date)] Agent auto-update: latest release is $LATEST_TAG"

    # ── Phase 3: Decide if update is needed ──
    if compare_date_versions "$CURRENT_VERSION" "$LATEST_TAG"; then
        echo "[$(date)] Agent auto-update: upgrading $CURRENT_VERSION$LATEST_TAG ..."
    elif [ "$NEED_FORCE" = "true" ]; then
        # Rebuild detected: latest = recorded version, but code is still old.
        # Re-apply the update to restore correct version.
        echo "[$(date)] Agent auto-update: re-applying $LATEST_TAG after rebuild (code was reset to $ACTUAL_TAG)"
    else
        echo "[$(date)] Agent auto-update: $CURRENT_VERSION is up to date"
        return
    fi

    # ── Phase 4: git fetch + checkout new tag (non-destructive) ──
    cd "$AGENT_DIR"
    if ! git fetch --tags origin 2>&1 | tail -3; then
        echo "[$(date)] Agent auto-update: git fetch failed, aborting"
        return
    fi

    # Verify tag exists (after unshallow, this should succeed)
    if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then
        echo "[$(date)] Agent auto-update: tag $LATEST_TAG not found locally, fetching explicitly..."
        if ! git fetch origin "refs/tags/$LATEST_TAG:refs/tags/$LATEST_TAG" 2>&1; then
            echo "[$(date)] Agent auto-update: explicit tag fetch failed, aborting"
            return
        fi
        if ! git rev-parse "$LATEST_TAG" >/dev/null 2>&1; then
            echo "[$(date)] Agent auto-update: tag $LATEST_TAG still not found, aborting"
            return
        fi
    fi

    # Phase 5: checkout new version
    if ! git checkout "$LATEST_TAG" 2>&1 | tail -3; then
        echo "[$(date)] Agent auto-update: git checkout failed, aborting"
        # Try to recover to previous version
        git checkout "$ACTUAL_TAG" 2>/dev/null
        return
    fi

    # Phase 6: update pip dependencies (editable install)
    echo "[$(date)] Agent auto-update: updating pip dependencies..."
    if ! pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>&1 | tail -10; then
        echo "[$(date)] Agent auto-update: pip install failed, rolling back"
        git checkout "$ACTUAL_TAG" 2>/dev/null
        pip install --quiet -e "/app/hermes-agent[${EXTRAS}]" 2>/dev/null
        return
    fi

    # Phase 7: reinstall our patches on top of new version
    echo "[$(date)] Agent auto-update: re-applying Hermes Bot patches..."
    if [ -f "/app/scripts/patch_file_delivery.py" ]; then
        python3 /app/scripts/patch_file_delivery.py 2>/dev/null
    fi
    if [ -f "/app/scripts/patch_auto_media.py" ]; then
        python3 /app/scripts/patch_auto_media.py 2>/dev/null
    fi
    if [ -f "/app/scripts/patch_resolve_media_paths.py" ]; then
        python3 /app/scripts/patch_resolve_media_paths.py 2>/dev/null
    fi
    if [ -f "/app/scripts/patch_weixin_cross_loop.py" ]; then
        python3 /app/scripts/patch_weixin_cross_loop.py 2>/dev/null
    fi
    if [ -f "/app/scripts/patch_web_search_fallback.py" ]; then
        python3 /app/scripts/patch_web_search_fallback.py 2>/dev/null
    fi
    if [ -f "/app/scripts/patch_strip_thinking_tags.py" ]; then
        python3 /app/scripts/patch_strip_thinking_tags.py 2>/dev/null
    fi
    if [ -f "/app/scripts/patch_sandbox_isolation.py" ]; then
        python3 /app/scripts/patch_sandbox_isolation.py 2>/dev/null
    fi
    # Copy patch files if they exist
    for patch_file in prompt_builder.py send_message_tool.py; do
        if [ -f "/app/patches/hermes-agent/agent/$patch_file" ] && [ -f "$AGENT_DIR/agent/$patch_file" ]; then
            cp "/app/patches/hermes-agent/agent/$patch_file" "$AGENT_DIR/agent/$patch_file" 2>/dev/null
        fi
    done

    # Save new version
    echo "$LATEST_TAG" > "$VERSION_FILE"
    echo "$(date '+%Y-%m-%d %H:%M:%S')" >> "$VERSION_FILE"
    echo "[$(date)] Agent auto-update: upgraded to $LATEST_TAG ✓ (restart needed for full effect)"

    # Phase 8: schedule gateway restart for clean reload
    # Send SIGUSR1 to entry.py to trigger gateway restart cycle
    ENTRY_PID=$(pgrep -f "python3 /app/entry.py" 2>/dev/null | head -1)
    if [ -n "$ENTRY_PID" ]; then
        kill -USR1 "$ENTRY_PID" 2>/dev/null && \
            echo "[$(date)] Agent auto-update: sent reload signal to entry.py (PID: $ENTRY_PID)" || \
            echo "[$(date)] Agent auto-update: gateway will use new code on next conversation"
    fi
}

# ── Auto-update hermes-web-ui if newer release exists ──
# Runs asynchronously so it doesn't block startup.
# All output goes to /data/hermes/logs/auto-update.log (not stdout, which gets eaten by exec).
# Set WEBUI_AUTO_UPDATE=false to disable.
update_webui_background() {
    [ "${WEBUI_AUTO_UPDATE}" = "false" ] && return

    WEBUI_REPO="EKKOLearnAI/hermes-web-ui"
    VERSION_FILE="/data/hermes/webui.version"
    BUILD_VERSION_FILE="/app/webui.version"
    BUILD_TMP="/tmp/webui-update"
    WEBUI_INSTALL="/app/webui-server"
    WEBUI_CLIENT="/app/webui-client"
    API_URL="https://api.github.com/repos/${WEBUI_REPO}/releases/latest"
    UPDATE_LOG="/data/hermes/logs/auto-update.log"

    # Tee all output to log file for diagnostics
    _log() { echo "[$(date)] $*"; }
    _log_and_tee() { _log "$*" | tee -a "$UPDATE_LOG"; }

    _log_and_tee "=== WebUI auto-update starting ==="

    # ── Detect rebuild: Dockerfile writes /app/webui.version, persistent is /data/hermes/ ──
    BUILD_VERSION="$(cat "$BUILD_VERSION_FILE" 2>/dev/null | head -1)"
    RECORDED_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | head -1)"
    NEED_FORCE=false

    if [ -n "$BUILD_VERSION" ] && [ -n "$RECORDED_VERSION" ] && [ "$BUILD_VERSION" != "$RECORDED_VERSION" ]; then
        _log_and_tee "REBUILD DETECTED (Dockerfile=$BUILD_VERSION, recorded=$RECORDED_VERSION)"
        NEED_FORCE=true
        CURRENT_VERSION="$BUILD_VERSION"
    elif [ -n "$RECORDED_VERSION" ]; then
        CURRENT_VERSION="$RECORDED_VERSION"
    elif [ -n "$BUILD_VERSION" ]; then
        CURRENT_VERSION="$BUILD_VERSION"
    else
        CURRENT_VERSION="v0.5.5"
        echo "$CURRENT_VERSION" > "$VERSION_FILE"
    fi

    _log_and_tee "Checking: current=$CURRENT_VERSION, Dockerfile=$BUILD_VERSION, latest=?"

    # Query GitHub API for latest release tag
    LATEST_JSON=$(curl -sf --connect-timeout 10 --max-time 20 "$API_URL" 2>/dev/null)
    if [ $? -ne 0 ] || [ -z "$LATEST_JSON" ]; then
        _log_and_tee "ERROR: failed to reach GitHub API, skipping"
        return
    fi

    LATEST_TAG=$(echo "$LATEST_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('tag_name',''))" 2>/dev/null)
    if [ -z "$LATEST_TAG" ]; then
        _log_and_tee "ERROR: could not parse latest tag, skipping"
        return
    fi

    _log_and_tee "Latest release: $LATEST_TAG"

    # Compare versions
    CURRENT_NUM="${CURRENT_VERSION#v}"
    LATEST_NUM="${LATEST_TAG#v}"

    if [ "$CURRENT_NUM" = "$LATEST_NUM" ] && [ "$NEED_FORCE" = "false" ]; then
        _log_and_tee "Already on latest ($CURRENT_VERSION)"
        return
    fi

    update_needed=false
    IFS='.' read -ra C <<< "$CURRENT_NUM"
    IFS='.' read -ra L <<< "$LATEST_NUM"
    for i in 0 1 2; do
        c=${C[$i]:-0}; l=${L[$i]:-0}
        if [ "$l" -gt "$c" ] 2>/dev/null; then update_needed=true; break; fi
        if [ "$l" -lt "$c" ] 2>/dev/null; then break; fi
    done

    if [ "$update_needed" = "false" ] && [ "$NEED_FORCE" = "false" ]; then
        _log_and_tee "Current $CURRENT_VERSION is up to date"
        return
    fi

    if [ "$NEED_FORCE" = "true" ] && [ "$update_needed" = "false" ]; then
        _log_and_tee "Re-applying $LATEST_TAG after rebuild (code reset to $BUILD_VERSION)"
    else
        _log_and_tee "Upgrading $CURRENT_VERSION -> $LATEST_TAG"
    fi

    # ── Build with retry (2 attempts) ──
    for attempt in 1 2; do
        _log_and_tee "Build attempt $attempt/2..."

        # Clone
        rm -rf "$BUILD_TMP"
        if ! git clone --depth 1 --branch "$LATEST_TAG" "https://github.com/${WEBUI_REPO}.git" "$BUILD_TMP" 2>&1 | tee -a "$UPDATE_LOG" | tail -3; then
            _log_and_tee "ERROR: git clone failed"
            rm -rf "$BUILD_TMP"
            [ "$attempt" -lt 2 ] && sleep 10 && continue
            return
        fi

        cd "$BUILD_TMP"

        # Install (with timeout)
        _log_and_tee "Running npm install..."
        if ! timeout 120 npm install --ignore-scripts 2>&1 | tee -a "$UPDATE_LOG" | tail -5; then
            _log_and_tee "ERROR: npm install failed/timed out"
            rm -rf "$BUILD_TMP"
            [ "$attempt" -lt 2 ] && sleep 10 && continue
            return
        fi

        # Rebuild native modules (required by node-pty, matching upstream Dockerfile)
        _log_and_tee "Running npm rebuild node-pty..."
        npm rebuild node-pty 2>&1 | tee -a "$UPDATE_LOG" | tail -5

        # Build (with memory limit, matching upstream Dockerfile)
        _log_and_tee "Running npm run build (NODE_OPTIONS=--max-old-space-size=4096)..."
        if ! timeout 180 env NODE_OPTIONS=--max-old-space-size=4096 npm run build 2>&1 | tee -a "$UPDATE_LOG" | tail -15; then
            _log_and_tee "ERROR: npm build failed/timed out"
            rm -rf "$BUILD_TMP"
            [ "$attempt" -lt 2 ] && sleep 10 && continue
            return
        fi

        # Verify build output
        if [ ! -d "$BUILD_TMP/dist/server" ] || [ ! -d "$BUILD_TMP/dist/client" ]; then
            _log_and_tee "ERROR: build output missing (no dist/server or dist/client)"
            ls -la "$BUILD_TMP/dist/" 2>&1 | tee -a "$UPDATE_LOG"
            rm -rf "$BUILD_TMP"
            [ "$attempt" -lt 2 ] && sleep 10 && continue
            return
        fi

        _log_and_tee "Build succeeded!"
        break  # exit retry loop
    done

    # Hot-swap: kill old WebUI process, replace files, restart
    _log_and_tee "Hot-swapping: stopping old WebUI, replacing files..."
    OLD_WEBUI_PID=$(pgrep -f "node index.js" 2>/dev/null | head -1)
    if [ -n "$OLD_WEBUI_PID" ]; then
        kill "$OLD_WEBUI_PID" 2>/dev/null
        sleep 2
        # Force kill if still running
        kill -9 "$OLD_WEBUI_PID" 2>/dev/null
        _log_and_tee "Killed old WebUI PID=$OLD_WEBUI_PID"
    fi

    # Install new files
    rm -rf "$WEBUI_INSTALL" "$WEBUI_CLIENT"
    mkdir -p "$WEBUI_INSTALL" "$WEBUI_CLIENT"
    cp -r "$BUILD_TMP/dist/server/"* "$WEBUI_INSTALL/"
    cp -r "$BUILD_TMP/dist/client/"* "$WEBUI_CLIENT/"
    cp "$BUILD_TMP/package.json" "$WEBUI_INSTALL/package.json"

    # Install production-only node_modules
    cd "$BUILD_TMP"
    npm prune --omit=dev 2>&1 | tail -3
    cp -r node_modules "$WEBUI_INSTALL/node_modules"

    # Save new version
    echo "$LATEST_TAG" > "$VERSION_FILE"
    echo "$(date '+%Y-%m-%d %H:%M:%S')" >> "$VERSION_FILE"

    # Restart WebUI
    cd "$WEBUI_INSTALL"
    export PORT=6060 UPSTREAM=http://127.0.0.1:8642 HERMES_HOME=/root/.hermes
    export AUTH_TOKEN="${AUTH_TOKEN:-hermes-bot-2026}" CORS_ORIGINS="*" NODE_ENV=production
    node index.js >> /data/hermes/logs/webui.log 2>&1 &
    NEW_PID=$!
    _log_and_tee "WebUI upgraded to $LATEST_TAG (new PID: $NEW_PID)"

    # Verify
    sleep 3
    if curl -sf http://127.0.0.1:6060/health > /dev/null 2>&1; then
        _log_and_tee "$LATEST_TAG is running and healthy"
    else
        _log_and_tee "WARNING: health check failed after upgrade"
    fi

    rm -rf "$BUILD_TMP"
    _log_and_tee "=== WebUI auto-update complete ==="
}

# ── Start hermes-web-ui Node.js BFF server on :6060 ──
echo "[$(date)] Starting hermes-web-ui BFF..."
export PORT=6060
export UPSTREAM=http://127.0.0.1:8642
export HERMES_HOME=/root/.hermes
export AUTH_TOKEN="${AUTH_TOKEN:-hermes-bot-2026}"
export CORS_ORIGINS="*"
export NODE_ENV=production
cd /app/webui-server
node index.js >> /data/hermes/logs/webui.log 2>&1 &
WEBUI_PID=$!
echo "[$(date)] WebUI BFF PID: $WEBUI_PID"

# ── Force-correct version display ──
# Ensure __init__.py shows semver (e.g. 0.12.0) not git tag date (e.g. 2026.4.30)
# This runs after any potential auto-update has changed the files
HERMES_INIT="/app/hermes-agent/hermes_cli/__init__.py"
if [ -f "$HERMES_INIT" ]; then
    # Read the git tag from version file to map date → semver
    CURRENT_TAG="$(cat /data/hermes/agent.version 2>/dev/null | head -1)"
    CURRENT_TAG="${CURRENT_TAG:-v2026.4.30}"
    # Build version mapping: date-tag → semver
    case "$CURRENT_TAG" in
        v2026.4.30) SEMVER="0.12.0"; RDATE="2026.4.30" ;;
        *) SEMVER=""; RDATE="" ;;
    esac
    if [ -n "$SEMVER" ]; then
        sed -i "s/__version__\s*=\s*\"[^"]*\"/__version__ = \"$SEMVER\"/" "$HERMES_INIT"
        sed -i "s/__release_date__\s*=\s*\"[^"]*\"/__release_date__ = \"$RDATE\"/" "$HERMES_INIT"
        echo "Version patched: v$SEMVER ($RDATE)"
    fi
fi

# Trigger hermes-agent auto-update in background (framework first, then UI)
update_hermes_agent_background &

# Trigger WebUI auto-update in background (non-blocking)
# Will check GitHub, build if newer, and hot-swap
update_webui_background &

# Wait for WebUI BFF to be ready
echo "[$(date)] Waiting for WebUI BFF to start..."
for i in $(seq 1 15); do
    if curl -s http://127.0.0.1:6060/health > /dev/null 2>&1; then
        echo "[$(date)] WebUI BFF is ready on :6060"
        break
    fi
    sleep 2
done

# ── Auto-setup WebUI credentials if not configured ──
AUTH_TOKEN="${AUTH_TOKEN:-hermes-bot-2026}"
WEBUI_USER="${WEBUI_USERNAME:-admin}"
WEBUI_PASS="${WEBUI_PASSWORD:-Hermes2026}"
AUTH_STATUS=$(curl -s http://127.0.0.1:6060/api/auth/status 2>/dev/null)
HAS_PW=$(echo "$AUTH_STATUS" | python3 -c "import json,sys; print(json.load(sys.stdin).get('hasPasswordLogin',False))" 2>/dev/null)
if [ "$HAS_PW" = "False" ]; then
    echo "[$(date)] WebUI: No credentials configured, auto-setting up..."
    SETUP_RESULT=$(curl -s -w "\n%{http_code}" -X POST http://127.0.0.1:6060/api/auth/setup \
        -H "Content-Type: application/json" \
        -H "Authorization: Bearer $AUTH_TOKEN" \
        -d "{\"username\":\"$WEBUI_USER\",\"password\":\"$WEBUI_PASS\"}" 2>/dev/null)
    SETUP_CODE=$(echo "$SETUP_RESULT" | tail -1)
    if [ "$SETUP_CODE" = "200" ]; then
        echo "[$(date)] WebUI: Credentials auto-configured (user: $WEBUI_USER)"
    else
        echo "[$(date)] WebUI: Auto-setup failed: $SETUP_RESULT"
    fi
else
    echo "[$(date)] WebUI: Credentials already configured"
fi

echo ""
echo "=== All services started ==="
echo "  Gateway:  http://127.0.0.1:8642 (with Python watchdog in entry.py)"
echo "  WebUI:    http://127.0.0.1:6060"
echo "  Proxy:    http://0.0.0.0:7860"
echo "  Auth Token: $(_mask_val "$AUTH_TOKEN")"
echo ""

# Start Python proxy on :7860 (main HF Space port)
# entry.py contains a Python-based gateway watchdog that will auto-restart
# the gateway if it dies, regardless of what happens to this shell script
exec python3 /app/entry.py