Spaces:
Running on Zero
Running on Zero
File size: 4,601 Bytes
b672762 ad26024 0bb30bb ad26024 389a1da e1c31e5 b672762 e1c31e5 1ceeba7 b672762 e1c31e5 b672762 e1c31e5 7431399 389a1da cc3cd7d 782371a de482a9 782371a de482a9 389a1da de482a9 adbafd9 e1c31e5 1ceeba7 4fc5451 389a1da f88caad fd6eef4 f88caad 389a1da b672762 e1c31e5 5b9d219 de482a9 389a1da e56474f e1c31e5 e56474f b672762 de482a9 b672762 4fc5451 b672762 075f5be eeeb0d0 b672762 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | """Movimento Space entrypoint: run native Kimodo demo directly."""
from __future__ import annotations
import os
import socket
import subprocess
import sys
import traceback
import time
try:
import spaces # type: ignore
except Exception:
class _SpacesFallback:
@staticmethod
def GPU(*args, **kwargs):
def _decorator(fn):
return fn
return _decorator
spaces = _SpacesFallback()
PORT = int(os.environ.get("PORT", "7860"))
os.environ.setdefault("SERVER_NAME", "0.0.0.0")
os.environ["SERVER_PORT"] = str(PORT)
os.environ.setdefault("HF_MODE", "1")
# Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
os.environ.setdefault("TEXT_ENCODER_MODE", "api")
os.environ.setdefault("TEXT_ENCODER", "llm2vec")
os.environ.setdefault("LLM2VEC_BASE_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
os.environ.setdefault(
"LLM2VEC_PEFT_MODEL",
"McGill-NLP/LLM2Vec-Meta-Llama-31-8B-Instruct-mntp-supervised",
)
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token)
os.environ.setdefault("HF_HUB_TOKEN", hf_token)
os.environ.setdefault("HUGGINGFACEHUB_API_TOKEN", hf_token)
TEXT_ENCODER_PORT = int(os.environ.get("TEXT_ENCODER_PORT", "9550"))
TEXT_ENCODER_SOURCE = os.environ.get("TEXT_ENCODER_SOURCE", "local").strip().lower()
if TEXT_ENCODER_SOURCE not in {"local", "remote"}:
raise RuntimeError("TEXT_ENCODER_SOURCE must be 'local' or 'remote'.")
if TEXT_ENCODER_SOURCE == "local":
os.environ.setdefault("TEXT_ENCODER_URL", f"http://127.0.0.1:{TEXT_ENCODER_PORT}/")
elif "TEXT_ENCODER_URL" not in os.environ:
raise RuntimeError("TEXT_ENCODER_URL is required when TEXT_ENCODER_SOURCE=remote.")
# Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
os.environ.setdefault("KIMODO_DEVICE", "cpu")
@spaces.GPU(duration=60)
def _gpu_healthcheck() -> str:
# Required by ZeroGPU startup policy; native demo does not invoke this.
return "ok"
def _wait_for_port(port: int, timeout_s: float = 30.0) -> None:
deadline = time.time() + timeout_s
while time.time() < deadline:
try:
with socket.create_connection(("127.0.0.1", port), timeout=1.5):
return
except OSError:
time.sleep(0.5)
raise RuntimeError(f"Text encoder server failed to bind on 127.0.0.1:{port}")
def _start_text_encoder_server() -> subprocess.Popen:
env = os.environ.copy()
env["GRADIO_SERVER_NAME"] = "127.0.0.1"
env["GRADIO_SERVER_PORT"] = str(TEXT_ENCODER_PORT)
# Ensure HF_TOKEN is explicitly passed to text encoder subprocess
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
env["HF_TOKEN"] = hf_token
env["HUGGING_FACE_HUB_TOKEN"] = hf_token
env["HF_HUB_TOKEN"] = hf_token
env["HUGGINGFACEHUB_API_TOKEN"] = hf_token
print(f"[movimento][boot] HF_TOKEN set for text encoder (len={len(hf_token)})")
else:
print(f"[movimento][boot] WARNING: HF_TOKEN not found in environment")
print(f"[movimento][boot] starting text encoder server at 127.0.0.1:{TEXT_ENCODER_PORT}")
proc = subprocess.Popen([sys.executable, "-m", "kimodo.scripts.run_text_encoder_server"], env=env)
_wait_for_port(TEXT_ENCODER_PORT, timeout_s=45.0)
print(f"[movimento][boot] text encoder server ready at 127.0.0.1:{TEXT_ENCODER_PORT}")
return proc
def main() -> None:
try:
# Invoke GPU function to satisfy HF Spaces startup requirement.
_gpu_healthcheck()
text_encoder_proc = None
if TEXT_ENCODER_SOURCE == "local":
# Keep existing embedding pipeline (TextEncoderAPI -> local llm2vec server).
text_encoder_proc = _start_text_encoder_server()
else:
print(f"[movimento][boot] using remote text encoder: {os.environ['TEXT_ENCODER_URL']}")
import kimodo
from kimodo.demo.app import Demo
print(f"[movimento][boot] kimodo_module={getattr(kimodo, '__file__', 'unknown')}")
print(f"[movimento][boot] mode=native_direct port={PORT}")
if text_encoder_proc is not None:
print(f"[movimento][boot] text_encoder_pid={text_encoder_proc.pid}")
Demo()
# Keep the process alive while Viser serves on SERVER_PORT.
while True:
time.sleep(3600)
except Exception: # noqa: BLE001
print("[movimento][boot][fatal] native demo failed to start")
print(traceback.format_exc(limit=12))
raise
if __name__ == "__main__":
main()
|