AutoFix commited on
Commit
8d9879d
·
1 Parent(s): dfa04db

fix: bind port 7860 BEFORE backend health checks

Browse files

HF Space shows 404 if the EXPOSE'd port isn't bound quickly after
container start. Previously entry.py waited up to 40s for gateway and
WebUI BFF health checks before binding port 7860 — too slow.

Now: bind 7860 first, then start watchdog + health checks in background
threads. This ensures HF routing sees the port immediately.

Files changed (1) hide show
  1. entry.py +26 -38
entry.py CHANGED
@@ -879,6 +879,16 @@ def main():
879
  _ensure_persistent_storage()
880
  logger.info("Persistent storage ready at %s", DATA_DIR)
881
 
 
 
 
 
 
 
 
 
 
 
882
  # Start log tailer thread
883
  tailer = threading.Thread(target=_log_tailer, daemon=True)
884
  tailer.start()
@@ -889,53 +899,31 @@ def main():
889
  watchdog.start()
890
  logger.info("Python gateway watchdog started")
891
 
892
- # Check if backend services are reachable
893
- for attempt in range(10):
894
- try:
895
- urllib.request.urlopen(f"{GATEWAY_URL}/health", timeout=2)
896
- logger.info("Gateway reachable at %s", GATEWAY_URL)
897
- break
898
- except Exception:
899
- logger.warning("Gateway not ready (attempt %d/10)", attempt + 1)
900
- time.sleep(2)
901
-
902
- for attempt in range(10):
903
- try:
904
- urllib.request.urlopen(f"{WEBUI_BFF_URL}/health", timeout=2)
905
- logger.info("WebUI BFF reachable at %s", WEBUI_BFF_URL)
906
- break
907
- except Exception:
908
- logger.warning("WebUI BFF not ready (attempt %d/10)", attempt + 1)
909
- time.sleep(2)
910
-
911
- # Start HTTP proxy server
912
- try:
913
- server = ThreadingHTTPServer(("0.0.0.0", 7860), ProxyHandler)
914
- except OSError as e:
915
- if "Address already in use" in str(e):
916
- logger.error("FATAL: Port 7860 is already in use! Attempting to free it...")
917
- # Try to find and kill whatever is using port 7860
918
  try:
919
- for conn in psutil.net_connections(kind='inet'):
920
- if conn.laddr.port == 7862 and conn.status == 'LISTEN':
921
- logger.info("Killing process %d on port 7862", conn.pid)
922
- psutil.Process(conn.pid).kill()
923
  time.sleep(2)
924
- server = ThreadingHTTPServer(("0.0.0.0", 7860), ProxyHandler)
925
- logger.info("Port 7860 freed successfully, proxy started")
 
 
 
926
  except Exception:
927
- logger.error("Could not free port 7860, aborting")
928
- sys.exit(1)
929
- else:
930
- raise
931
 
 
932
  try:
933
- logger.info("Proxy listening on :7860")
934
  logger.info(" / → Dashboard")
935
  logger.info(" /webui → hermes-web-ui")
936
  logger.info(" /api/* → proxy to WebUI BFF (:6060)")
937
  logger.info(" /v1/* → proxy to WebUI BFF (:6060)")
938
-
939
  server.serve_forever()
940
  except Exception as e:
941
  logger.error("FATAL: Proxy server error: %s", e)
 
879
  _ensure_persistent_storage()
880
  logger.info("Persistent storage ready at %s", DATA_DIR)
881
 
882
+ # Start HTTP proxy server FIRST (before background checks)
883
+ # HF Space requires the EXPOSE'd port to be bound quickly,
884
+ # otherwise it shows a 404 page even though the container is RUNNING.
885
+ try:
886
+ server = ThreadingHTTPServer(("0.0.0.0", 7860), ProxyHandler)
887
+ logger.info("Proxy listening on :7860")
888
+ except OSError as e:
889
+ logger.error("FATAL: Cannot bind port 7860: %s", e)
890
+ sys.exit(1)
891
+
892
  # Start log tailer thread
893
  tailer = threading.Thread(target=_log_tailer, daemon=True)
894
  tailer.start()
 
899
  watchdog.start()
900
  logger.info("Python gateway watchdog started")
901
 
902
+ # Check if backend services are reachable (non-blocking, best-effort)
903
+ # This runs in a background thread so it doesn't delay serve_forever()
904
+ def _wait_for_backends():
905
+ for attempt in range(10):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  try:
907
+ urllib.request.urlopen(f"{GATEWAY_URL}/health", timeout=2)
908
+ logger.info("Gateway reachable at %s", GATEWAY_URL)
909
+ break
910
+ except Exception:
911
  time.sleep(2)
912
+ for attempt in range(10):
913
+ try:
914
+ urllib.request.urlopen(f"{WEBUI_BFF_URL}/health", timeout=2)
915
+ logger.info("WebUI BFF reachable at %s", WEBUI_BFF_URL)
916
+ break
917
  except Exception:
918
+ time.sleep(2)
919
+ threading.Thread(target=_wait_for_backends, daemon=True).start()
 
 
920
 
921
+ # Start serving (this blocks forever)
922
  try:
 
923
  logger.info(" / → Dashboard")
924
  logger.info(" /webui → hermes-web-ui")
925
  logger.info(" /api/* → proxy to WebUI BFF (:6060)")
926
  logger.info(" /v1/* → proxy to WebUI BFF (:6060)")
 
927
  server.serve_forever()
928
  except Exception as e:
929
  logger.error("FATAL: Proxy server error: %s", e)