Spaces:
Runtime error
Runtime error
Space: keep existing embedding pipeline and start local text-encoder API server on boot
Browse files
app.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
import os
|
|
|
|
|
|
|
|
|
|
| 5 |
import traceback
|
| 6 |
import time
|
| 7 |
|
|
@@ -24,6 +27,9 @@ os.environ["SERVER_PORT"] = str(PORT)
|
|
| 24 |
os.environ.setdefault("HF_MODE", "1")
|
| 25 |
# Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
|
| 26 |
os.environ.setdefault("TEXT_ENCODER_MODE", "api")
|
|
|
|
|
|
|
|
|
|
| 27 |
# Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
|
| 28 |
os.environ.setdefault("KIMODO_DEVICE", "cpu")
|
| 29 |
|
|
@@ -34,16 +40,42 @@ def _gpu_healthcheck() -> str:
|
|
| 34 |
return "ok"
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def main() -> None:
|
| 38 |
try:
|
| 39 |
# Invoke GPU function to satisfy HF Spaces startup requirement.
|
| 40 |
_gpu_healthcheck()
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
import kimodo
|
| 43 |
from kimodo.demo.app import Demo
|
| 44 |
|
| 45 |
print(f"[movimento][boot] kimodo_module={getattr(kimodo, '__file__', 'unknown')}")
|
| 46 |
print(f"[movimento][boot] mode=native_direct port={PORT}")
|
|
|
|
| 47 |
Demo()
|
| 48 |
|
| 49 |
# Keep the process alive while Viser serves on SERVER_PORT.
|
|
|
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
import os
|
| 5 |
+
import socket
|
| 6 |
+
import subprocess
|
| 7 |
+
import sys
|
| 8 |
import traceback
|
| 9 |
import time
|
| 10 |
|
|
|
|
| 27 |
os.environ.setdefault("HF_MODE", "1")
|
| 28 |
# Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
|
| 29 |
os.environ.setdefault("TEXT_ENCODER_MODE", "api")
|
| 30 |
+
os.environ.setdefault("TEXT_ENCODER", "llm2vec")
|
| 31 |
+
TEXT_ENCODER_PORT = int(os.environ.get("TEXT_ENCODER_PORT", "9550"))
|
| 32 |
+
os.environ.setdefault("TEXT_ENCODER_URL", f"http://127.0.0.1:{TEXT_ENCODER_PORT}/")
|
| 33 |
# Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
|
| 34 |
os.environ.setdefault("KIMODO_DEVICE", "cpu")
|
| 35 |
|
|
|
|
| 40 |
return "ok"
|
| 41 |
|
| 42 |
|
| 43 |
+
def _wait_for_port(port: int, timeout_s: float = 30.0) -> None:
|
| 44 |
+
deadline = time.time() + timeout_s
|
| 45 |
+
while time.time() < deadline:
|
| 46 |
+
try:
|
| 47 |
+
with socket.create_connection(("127.0.0.1", port), timeout=1.5):
|
| 48 |
+
return
|
| 49 |
+
except OSError:
|
| 50 |
+
time.sleep(0.5)
|
| 51 |
+
raise RuntimeError(f"Text encoder server failed to bind on 127.0.0.1:{port}")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _start_text_encoder_server() -> subprocess.Popen:
|
| 55 |
+
env = os.environ.copy()
|
| 56 |
+
env["GRADIO_SERVER_NAME"] = "127.0.0.1"
|
| 57 |
+
env["GRADIO_SERVER_PORT"] = str(TEXT_ENCODER_PORT)
|
| 58 |
+
print(f"[movimento][boot] starting text encoder server at 127.0.0.1:{TEXT_ENCODER_PORT}")
|
| 59 |
+
proc = subprocess.Popen([sys.executable, "-m", "kimodo.scripts.run_text_encoder_server"], env=env)
|
| 60 |
+
_wait_for_port(TEXT_ENCODER_PORT, timeout_s=45.0)
|
| 61 |
+
print(f"[movimento][boot] text encoder server ready at 127.0.0.1:{TEXT_ENCODER_PORT}")
|
| 62 |
+
return proc
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def main() -> None:
|
| 66 |
try:
|
| 67 |
# Invoke GPU function to satisfy HF Spaces startup requirement.
|
| 68 |
_gpu_healthcheck()
|
| 69 |
|
| 70 |
+
# Keep existing embedding pipeline (TextEncoderAPI -> local llm2vec server).
|
| 71 |
+
text_encoder_proc = _start_text_encoder_server()
|
| 72 |
+
|
| 73 |
import kimodo
|
| 74 |
from kimodo.demo.app import Demo
|
| 75 |
|
| 76 |
print(f"[movimento][boot] kimodo_module={getattr(kimodo, '__file__', 'unknown')}")
|
| 77 |
print(f"[movimento][boot] mode=native_direct port={PORT}")
|
| 78 |
+
print(f"[movimento][boot] text_encoder_pid={text_encoder_proc.pid}")
|
| 79 |
Demo()
|
| 80 |
|
| 81 |
# Keep the process alive while Viser serves on SERVER_PORT.
|