"""Movimento Space entrypoint: run native Kimodo demo directly."""
from __future__ import annotations

import os
import socket
import subprocess
import sys
import traceback
import time

try:
    import spaces  # type: ignore
except Exception:
    class _SpacesFallback:
        @staticmethod
        def GPU(*args, **kwargs):
            def _decorator(fn):
                return fn

            return _decorator

    spaces = _SpacesFallback()

PORT = int(os.environ.get("PORT", "7860"))
os.environ.setdefault("SERVER_NAME", "0.0.0.0")
os.environ["SERVER_PORT"] = str(PORT)
os.environ.setdefault("HF_MODE", "1")
# Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
os.environ.setdefault("TEXT_ENCODER_MODE", "api")
os.environ.setdefault("TEXT_ENCODER", "llm2vec")
os.environ.setdefault("LLM2VEC_BASE_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
os.environ.setdefault(
    "LLM2VEC_PEFT_MODEL",
    "McGill-NLP/LLM2Vec-Meta-Llama-31-8B-Instruct-mntp-supervised",
)
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
    os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token)
    os.environ.setdefault("HF_HUB_TOKEN", hf_token)
    os.environ.setdefault("HUGGINGFACEHUB_API_TOKEN", hf_token)
TEXT_ENCODER_PORT = int(os.environ.get("TEXT_ENCODER_PORT", "9550"))
TEXT_ENCODER_SOURCE = os.environ.get("TEXT_ENCODER_SOURCE", "local").strip().lower()
if TEXT_ENCODER_SOURCE not in {"local", "remote"}:
    raise RuntimeError("TEXT_ENCODER_SOURCE must be 'local' or 'remote'.")
if TEXT_ENCODER_SOURCE == "local":
    os.environ.setdefault("TEXT_ENCODER_URL", f"http://127.0.0.1:{TEXT_ENCODER_PORT}/")
elif "TEXT_ENCODER_URL" not in os.environ:
    raise RuntimeError("TEXT_ENCODER_URL is required when TEXT_ENCODER_SOURCE=remote.")
# Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
os.environ.setdefault("KIMODO_DEVICE", "cpu")


@spaces.GPU(duration=60)
def _gpu_healthcheck() -> str:
    # Required by ZeroGPU startup policy; native demo does not invoke this.
    return "ok"


def _wait_for_port(port: int, timeout_s: float = 30.0) -> None:
    deadline = time.time() + timeout_s
    while time.time() < deadline:
        try:
            with socket.create_connection(("127.0.0.1", port), timeout=1.5):
                return
        except OSError:
            time.sleep(0.5)
    raise RuntimeError(f"Text encoder server failed to bind on 127.0.0.1:{port}")


def _start_text_encoder_server() -> subprocess.Popen:
    env = os.environ.copy()
    env["GRADIO_SERVER_NAME"] = "127.0.0.1"
    env["GRADIO_SERVER_PORT"] = str(TEXT_ENCODER_PORT)
    
    # Ensure HF_TOKEN is explicitly passed to text encoder subprocess
    hf_token = os.environ.get("HF_TOKEN")
    if hf_token:
        env["HF_TOKEN"] = hf_token
        env["HUGGING_FACE_HUB_TOKEN"] = hf_token
        env["HF_HUB_TOKEN"] = hf_token
        env["HUGGINGFACEHUB_API_TOKEN"] = hf_token
        print(f"[movimento][boot] HF_TOKEN set for text encoder (len={len(hf_token)})")
    else:
        print(f"[movimento][boot] WARNING: HF_TOKEN not found in environment")

    print(f"[movimento][boot] starting text encoder server at 127.0.0.1:{TEXT_ENCODER_PORT}")
    proc = subprocess.Popen([sys.executable, "-m", "kimodo.scripts.run_text_encoder_server"], env=env)
    _wait_for_port(TEXT_ENCODER_PORT, timeout_s=45.0)
    print(f"[movimento][boot] text encoder server ready at 127.0.0.1:{TEXT_ENCODER_PORT}")
    return proc


def main() -> None:
    try:
        # Invoke GPU function to satisfy HF Spaces startup requirement.
        _gpu_healthcheck()

        text_encoder_proc = None
        if TEXT_ENCODER_SOURCE == "local":
            # Keep existing embedding pipeline (TextEncoderAPI -> local llm2vec server).
            text_encoder_proc = _start_text_encoder_server()
        else:
            print(f"[movimento][boot] using remote text encoder: {os.environ['TEXT_ENCODER_URL']}")

        import kimodo
        from kimodo.demo.app import Demo

        print(f"[movimento][boot] kimodo_module={getattr(kimodo, '__file__', 'unknown')}")
        print(f"[movimento][boot] mode=native_direct port={PORT}")
        if text_encoder_proc is not None:
            print(f"[movimento][boot] text_encoder_pid={text_encoder_proc.pid}")
        Demo()

        # Keep the process alive while Viser serves on SERVER_PORT.
        while True:
            time.sleep(3600)
    except Exception:  # noqa: BLE001
        print("[movimento][boot][fatal] native demo failed to start")
        print(traceback.format_exc(limit=12))
        raise


if __name__ == "__main__":
    main()