rydlrKE commited on
Commit
389a1da
·
1 Parent(s): 2ed553b

Space: keep existing embedding pipeline and start local text-encoder API server on boot

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py CHANGED
@@ -2,6 +2,9 @@
2
  from __future__ import annotations
3
 
4
  import os
 
 
 
5
  import traceback
6
  import time
7
 
@@ -24,6 +27,9 @@ os.environ["SERVER_PORT"] = str(PORT)
24
  os.environ.setdefault("HF_MODE", "1")
25
  # Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
26
  os.environ.setdefault("TEXT_ENCODER_MODE", "api")
 
 
 
27
  # Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
28
  os.environ.setdefault("KIMODO_DEVICE", "cpu")
29
 
@@ -34,16 +40,42 @@ def _gpu_healthcheck() -> str:
34
  return "ok"
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def main() -> None:
38
  try:
39
  # Invoke GPU function to satisfy HF Spaces startup requirement.
40
  _gpu_healthcheck()
41
 
 
 
 
42
  import kimodo
43
  from kimodo.demo.app import Demo
44
 
45
  print(f"[movimento][boot] kimodo_module={getattr(kimodo, '__file__', 'unknown')}")
46
  print(f"[movimento][boot] mode=native_direct port={PORT}")
 
47
  Demo()
48
 
49
  # Keep the process alive while Viser serves on SERVER_PORT.
 
2
  from __future__ import annotations
3
 
4
  import os
5
+ import socket
6
+ import subprocess
7
+ import sys
8
  import traceback
9
  import time
10
 
 
27
  os.environ.setdefault("HF_MODE", "1")
28
  # Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
29
  os.environ.setdefault("TEXT_ENCODER_MODE", "api")
30
+ os.environ.setdefault("TEXT_ENCODER", "llm2vec")
31
+ TEXT_ENCODER_PORT = int(os.environ.get("TEXT_ENCODER_PORT", "9550"))
32
+ os.environ.setdefault("TEXT_ENCODER_URL", f"http://127.0.0.1:{TEXT_ENCODER_PORT}/")
33
  # Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
34
  os.environ.setdefault("KIMODO_DEVICE", "cpu")
35
 
 
40
  return "ok"
41
 
42
 
43
+ def _wait_for_port(port: int, timeout_s: float = 30.0) -> None:
44
+ deadline = time.time() + timeout_s
45
+ while time.time() < deadline:
46
+ try:
47
+ with socket.create_connection(("127.0.0.1", port), timeout=1.5):
48
+ return
49
+ except OSError:
50
+ time.sleep(0.5)
51
+ raise RuntimeError(f"Text encoder server failed to bind on 127.0.0.1:{port}")
52
+
53
+
54
+ def _start_text_encoder_server() -> subprocess.Popen:
55
+ env = os.environ.copy()
56
+ env["GRADIO_SERVER_NAME"] = "127.0.0.1"
57
+ env["GRADIO_SERVER_PORT"] = str(TEXT_ENCODER_PORT)
58
+ print(f"[movimento][boot] starting text encoder server at 127.0.0.1:{TEXT_ENCODER_PORT}")
59
+ proc = subprocess.Popen([sys.executable, "-m", "kimodo.scripts.run_text_encoder_server"], env=env)
60
+ _wait_for_port(TEXT_ENCODER_PORT, timeout_s=45.0)
61
+ print(f"[movimento][boot] text encoder server ready at 127.0.0.1:{TEXT_ENCODER_PORT}")
62
+ return proc
63
+
64
+
65
  def main() -> None:
66
  try:
67
  # Invoke GPU function to satisfy HF Spaces startup requirement.
68
  _gpu_healthcheck()
69
 
70
+ # Keep existing embedding pipeline (TextEncoderAPI -> local llm2vec server).
71
+ text_encoder_proc = _start_text_encoder_server()
72
+
73
  import kimodo
74
  from kimodo.demo.app import Demo
75
 
76
  print(f"[movimento][boot] kimodo_module={getattr(kimodo, '__file__', 'unknown')}")
77
  print(f"[movimento][boot] mode=native_direct port={PORT}")
78
+ print(f"[movimento][boot] text_encoder_pid={text_encoder_proc.pid}")
79
  Demo()
80
 
81
  # Keep the process alive while Viser serves on SERVER_PORT.