Spaces:
Running
Running
AutoFix commited on
Commit ·
8d9879d
1
Parent(s): dfa04db
fix: bind port 7860 BEFORE backend health checks
Browse filesHF Space shows 404 if the EXPOSE'd port isn't bound quickly after
container start. Previously entry.py waited up to 40s for gateway and
WebUI BFF health checks before binding port 7860 — too slow.
Now: bind 7860 first, then start watchdog + health checks in background
threads. This ensures HF routing sees the port immediately.
entry.py
CHANGED
|
@@ -879,6 +879,16 @@ def main():
|
|
| 879 |
_ensure_persistent_storage()
|
| 880 |
logger.info("Persistent storage ready at %s", DATA_DIR)
|
| 881 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 882 |
# Start log tailer thread
|
| 883 |
tailer = threading.Thread(target=_log_tailer, daemon=True)
|
| 884 |
tailer.start()
|
|
@@ -889,53 +899,31 @@ def main():
|
|
| 889 |
watchdog.start()
|
| 890 |
logger.info("Python gateway watchdog started")
|
| 891 |
|
| 892 |
-
# Check if backend services are reachable
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
logger.info("Gateway reachable at %s", GATEWAY_URL)
|
| 897 |
-
break
|
| 898 |
-
except Exception:
|
| 899 |
-
logger.warning("Gateway not ready (attempt %d/10)", attempt + 1)
|
| 900 |
-
time.sleep(2)
|
| 901 |
-
|
| 902 |
-
for attempt in range(10):
|
| 903 |
-
try:
|
| 904 |
-
urllib.request.urlopen(f"{WEBUI_BFF_URL}/health", timeout=2)
|
| 905 |
-
logger.info("WebUI BFF reachable at %s", WEBUI_BFF_URL)
|
| 906 |
-
break
|
| 907 |
-
except Exception:
|
| 908 |
-
logger.warning("WebUI BFF not ready (attempt %d/10)", attempt + 1)
|
| 909 |
-
time.sleep(2)
|
| 910 |
-
|
| 911 |
-
# Start HTTP proxy server
|
| 912 |
-
try:
|
| 913 |
-
server = ThreadingHTTPServer(("0.0.0.0", 7860), ProxyHandler)
|
| 914 |
-
except OSError as e:
|
| 915 |
-
if "Address already in use" in str(e):
|
| 916 |
-
logger.error("FATAL: Port 7860 is already in use! Attempting to free it...")
|
| 917 |
-
# Try to find and kill whatever is using port 7860
|
| 918 |
try:
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
time.sleep(2)
|
| 924 |
-
|
| 925 |
-
|
|
|
|
|
|
|
|
|
|
| 926 |
except Exception:
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
else:
|
| 930 |
-
raise
|
| 931 |
|
|
|
|
| 932 |
try:
|
| 933 |
-
logger.info("Proxy listening on :7860")
|
| 934 |
logger.info(" / → Dashboard")
|
| 935 |
logger.info(" /webui → hermes-web-ui")
|
| 936 |
logger.info(" /api/* → proxy to WebUI BFF (:6060)")
|
| 937 |
logger.info(" /v1/* → proxy to WebUI BFF (:6060)")
|
| 938 |
-
|
| 939 |
server.serve_forever()
|
| 940 |
except Exception as e:
|
| 941 |
logger.error("FATAL: Proxy server error: %s", e)
|
|
|
|
| 879 |
_ensure_persistent_storage()
|
| 880 |
logger.info("Persistent storage ready at %s", DATA_DIR)
|
| 881 |
|
| 882 |
+
# Start HTTP proxy server FIRST (before background checks)
|
| 883 |
+
# HF Space requires the EXPOSE'd port to be bound quickly,
|
| 884 |
+
# otherwise it shows a 404 page even though the container is RUNNING.
|
| 885 |
+
try:
|
| 886 |
+
server = ThreadingHTTPServer(("0.0.0.0", 7860), ProxyHandler)
|
| 887 |
+
logger.info("Proxy listening on :7860")
|
| 888 |
+
except OSError as e:
|
| 889 |
+
logger.error("FATAL: Cannot bind port 7860: %s", e)
|
| 890 |
+
sys.exit(1)
|
| 891 |
+
|
| 892 |
# Start log tailer thread
|
| 893 |
tailer = threading.Thread(target=_log_tailer, daemon=True)
|
| 894 |
tailer.start()
|
|
|
|
| 899 |
watchdog.start()
|
| 900 |
logger.info("Python gateway watchdog started")
|
| 901 |
|
| 902 |
+
# Check if backend services are reachable (non-blocking, best-effort)
|
| 903 |
+
# This runs in a background thread so it doesn't delay serve_forever()
|
| 904 |
+
def _wait_for_backends():
|
| 905 |
+
for attempt in range(10):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
try:
|
| 907 |
+
urllib.request.urlopen(f"{GATEWAY_URL}/health", timeout=2)
|
| 908 |
+
logger.info("Gateway reachable at %s", GATEWAY_URL)
|
| 909 |
+
break
|
| 910 |
+
except Exception:
|
| 911 |
time.sleep(2)
|
| 912 |
+
for attempt in range(10):
|
| 913 |
+
try:
|
| 914 |
+
urllib.request.urlopen(f"{WEBUI_BFF_URL}/health", timeout=2)
|
| 915 |
+
logger.info("WebUI BFF reachable at %s", WEBUI_BFF_URL)
|
| 916 |
+
break
|
| 917 |
except Exception:
|
| 918 |
+
time.sleep(2)
|
| 919 |
+
threading.Thread(target=_wait_for_backends, daemon=True).start()
|
|
|
|
|
|
|
| 920 |
|
| 921 |
+
# Start serving (this blocks forever)
|
| 922 |
try:
|
|
|
|
| 923 |
logger.info(" / → Dashboard")
|
| 924 |
logger.info(" /webui → hermes-web-ui")
|
| 925 |
logger.info(" /api/* → proxy to WebUI BFF (:6060)")
|
| 926 |
logger.info(" /v1/* → proxy to WebUI BFF (:6060)")
|
|
|
|
| 927 |
server.serve_forever()
|
| 928 |
except Exception as e:
|
| 929 |
logger.error("FATAL: Proxy server error: %s", e)
|