Fix CUDA import order - import spaces before torch (commit e28bffd)

Browse files

Files changed (14) hide show

.gitattributes +1 -0
__init__.py +29 -0
__main__.py +8 -0
__pycache__/app.cpython-312.pyc +0 -0
__pycache__/state.cpython-312.pyc +0 -0
__pycache__/ui.cpython-312.pyc +3 -0
_qwen_prompts.py +107 -0
app.py +742 -0
config.py +172 -0
embedding_cache.py +253 -0
generation.py +218 -0
queue_manager.py +336 -0
state.py +61 -0
ui.py +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+__pycache__/ui.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text

__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# ruff: noqa: I001
+import argparse
+from kimodo.model import DEFAULT_MODEL
+from kimodo.model.registry import resolve_model_name
+from .app import Demo
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Run the kimodo demo UI.")
+    parser.add_argument(
+        "--model",
+        type=str,
+        default=DEFAULT_MODEL,
+        help="Default model to load (e.g. Kimodo-SOMA-RP-v1, kimodo-soma-rp, or SOMA).",
+    )
+    args = parser.parse_args()
+    resolved = resolve_model_name(args.model, "Kimodo")
+    demo = Demo(default_model_name=resolved)
+    demo.run()
+if __name__ == "__main__":
+    main()

__main__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Entry point for `python -m kimodo.demo`."""
+from kimodo.demo import main
+if __name__ == "__main__":
+    main()

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (34.2 kB). View file

__pycache__/state.cpython-312.pyc ADDED Viewed

Binary file (3.01 kB). View file

__pycache__/ui.cpython-312.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:023646c6b51d238f0aedf942347c389dfa7384d452f7b1b99aa34bcef924706e
+size 145667

_qwen_prompts.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Qwen-on-Fireworks helper for auto-generating multi-text-prompt batches."""
+from __future__ import annotations
+import json
+import os
+import re
+import urllib.error
+import urllib.request
+_MODEL = "accounts/fireworks/models/qwen3p6-27b"
+_BASE  = "https://api.fireworks.ai/inference/v1"
+_SYSTEM = """\
+You are a motion-description writer for a single humanoid character in a 3D animation system.
+Given a scene context and the character's recent motion history, output ONLY a JSON object:
+{"texts": ["<action phrase 1>", ...], "durations": [<seconds float>, ...]}
+Rules:
+- Return between 1 and requested_actions short, vivid action phrases that flow naturally from each other.
+- Each phrase describes one distinct physical motion (e.g. "walks forward briskly", "pivots left and crouches").
+- Each duration is between 2.0 and 8.0 seconds.
+- texts and durations must have the same length.
+- Do NOT repeat phrases from history.
+- Return raw JSON only — no markdown, no explanation.
+"""
+def _call_fireworks(messages: list[dict]) -> str:
+    api_key = os.environ.get("FIREWORKS_API_KEY", "").strip()
+    if not api_key:
+        raise RuntimeError("FIREWORKS_API_KEY is not set")
+    body = json.dumps({
+        "model": _MODEL,
+        "messages": messages,
+        "max_tokens": 400,
+        "temperature": 0.85,
+    }).encode()
+    req = urllib.request.Request(
+        f"{_BASE}/chat/completions",
+        data=body,
+        headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=40) as r:
+            return json.loads(r.read())["choices"][0]["message"]["content"]
+    except urllib.error.HTTPError as e:
+        raise RuntimeError(f"Fireworks {e.code}: {e.read().decode(errors='ignore')}") from e
+def _parse(raw: str) -> dict:
+    text = raw.strip()
+    m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
+    text = m.group(1) if m else text
+    s, e = text.find("{"), text.rfind("}")
+    return json.loads(text[s:e + 1])
+def _fallback(offset: int) -> dict:
+    phrases = [
+        "walks forward at a steady pace",
+        "turns smoothly to the left",
+        "pauses and surveys the surroundings",
+        "steps forward and gestures expressively",
+        "crouches down then rises back up",
+        "sidesteps to the right with purpose",
+    ]
+    n = len(phrases)
+    return {
+        "texts": [phrases[(offset + i) % n] for i in range(3)],
+        "durations": [3.0, 3.5, 3.0],
+    }
+def call_qwen_for_prompts(
+    scene: str,
+    history: list[str],
+    requested_actions: int = 5,
+) -> tuple[dict, list[str]]:
+    """Call Qwen to produce the next batch of motion prompts.
+    Returns (batch_dict, updated_history).
+    batch_dict has keys "texts" and "durations".
+    Raises RuntimeError on API failure (caller may fall back).
+    """
+    user_msg = (
+        f"Scene: {scene or 'a character moving continuously in 3D space'}\n"
+        f"Motion history (do not repeat): {json.dumps(history[-12:])}\n\n"
+        f"requested_actions: {max(1, min(10, int(requested_actions)))}\n"
+        "Generate the next batch of motion prompts."
+    )
+    try:
+        raw = _call_fireworks([{"role": "system", "content": _SYSTEM}, {"role": "user", "content": user_msg}])
+        batch = _parse(raw)
+        if not isinstance(batch.get("texts"), list) or not isinstance(batch.get("durations"), list):
+            raise ValueError("Missing texts or durations")
+        n = min(len(batch["texts"]), len(batch["durations"]))
+        batch["texts"] = batch["texts"][:n]
+        batch["durations"] = batch["durations"][:n]
+    except Exception:
+        batch = _fallback(len(history))
+    new_history = history + list(batch["texts"])
+    return batch, new_history

app.py ADDED Viewed

	@@ -0,0 +1,742 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# ============================================================================
+# CRITICAL: Import spaces FIRST, before any CUDA-related packages.
+# This prevents "CUDA has been initialized before importing spaces" error.
+# ============================================================================
+try:
+    import spaces  # noqa: F401 - imported early for ZeroGPU compatibility
+except ImportError:
+    pass  # Not running on HF Spaces with ZeroGPU
+import base64
+import logging
+import os
+import shutil
+import threading
+import time
+from typing import Optional
+import numpy as np
+import torch
+import viser
+from kimodo.assets import DEMO_ASSETS_ROOT
+from kimodo.model.load_model import load_model
+from kimodo.model.registry import resolve_model_name
+from kimodo.runtime.device import select_runtime_device
+from kimodo.skeleton import SkeletonBase, SOMASkeleton30
+from kimodo.tools import load_json
+from kimodo.viz import viser_utils
+from kimodo.viz.viser_utils import (
+    Character,
+    CharacterMotion,
+    EEJointsKeyframeSet,
+    FullbodyKeyframeSet,
+    RootKeyframe2DSet,
+)
+from viser.theme import TitlebarButton, TitlebarConfig, TitlebarImage
+from . import generation, ui
+from .config import (
+    DARK_THEME,
+    DEFAULT_CUR_DURATION,
+    DEFAULT_MODEL,
+    DEFAULT_PLAYBACK_SPEED,
+    DEFAULT_PROMPT,
+    DEMO_UI_QUICK_START_MODAL_MD,
+    EXAMPLES_ROOT_DIR,
+    HF_MODE,
+    LIGHT_THEME,
+    MAX_ACTIVE_USERS,
+    MAX_DURATION,
+    MAX_SESSION_MINUTES,
+    MIN_DURATION,
+    MODEL_EXAMPLES_DIRS,
+    MODEL_NAMES,
+    SERVER_NAME,
+    SERVER_PORT,
+)
+from .embedding_cache import CachedTextEncoder
+from .queue_manager import QueueManager, UserQueue
+from .state import ClientSession, ModelBundle
+# Hosted runtimes (HF/Cloud Run) often send non-WS probes to the WS endpoint.
+# Suppress noisy stack traces for these expected invalid handshakes.
+logging.getLogger("websockets.server").setLevel(logging.CRITICAL)
+logging.getLogger("websockets.asyncio.server").setLevel(logging.CRITICAL)
+class Demo:
+    def __init__(self, default_model_name: str = DEFAULT_MODEL):
+        # In hosted HF runtimes (including ZeroGPU), touching CUDA too early can
+        # crash startup before queue-managed inference starts.
+        requested_device = os.getenv("KIMODO_DEVICE")
+        running_in_space = bool(os.getenv("SPACE_ID")) or os.getenv("SYSTEM", "").strip().lower() == "spaces"
+        if requested_device is None and (HF_MODE or running_in_space):
+            requested_device = "cpu"
+        self.device = select_runtime_device(requested=requested_device)
+        print(f"Using device: {self.device}")
+        self.models: dict[str, ModelBundle] = {}
+        resolved = resolve_model_name(default_model_name, "Kimodo")
+        if resolved not in MODEL_NAMES:
+            raise ValueError(f"Unknown model '{default_model_name}'. Expected one of: {MODEL_NAMES}")
+        self.default_model_name = resolved
+        self.defer_model_load = os.getenv("KIMODO_DEFER_MODEL_LOAD", "true").strip().lower() in {
+            "1",
+            "true",
+            "yes",
+            "on",
+        }
+        self.ensure_examples_layout()
+        if self.defer_model_load:
+            print("Deferring model load until first active client session.")
+        else:
+            self.load_model(self.default_model_name)
+        # Serialize GPU-bound generation across all clients
+        self._generation_lock = threading.Lock()
+        self._cuda_healthy = True
+        # Per-client sessions
+        self.client_sessions: dict[int, ClientSession] = {}
+        self.start_direction_markers: dict[int, viser_utils.WaypointMesh] = {}
+        self.grid_handles: dict[int, viser.GridHandle] = {}
+        self.server = viser.ViserServer(
+            host=SERVER_NAME,
+            port=SERVER_PORT,
+            label="Kimodo",
+            enable_camera_keyboard_controls=False,  # don't move the camera with the arrow keys
+        )
+        self.server.scene.world_axes.visible = False  # used for debugging
+        self.server.scene.set_up_direction("+y")
+        # Register callbacks for session handling
+        self.server.on_client_connect(self.on_client_connect)
+        self.server.on_client_disconnect(self.on_client_disconnect)
+        # HF mode: queue and session limit
+        if HF_MODE:
+            self.user_queue = UserQueue(MAX_ACTIVE_USERS, MAX_SESSION_MINUTES)
+            self.queue_manager = QueueManager(
+                queue=self.user_queue,
+                server=self.server,
+                setup_demo_for_client=self._setup_demo_for_client,
+                cleanup_session=self._cleanup_session_for_client,
+            )
+        else:
+            self.user_queue = None
+            self.queue_manager = None
+        # create grid and floor
+        self.floor_len = 20.0  # meters
+    def ensure_examples_layout(self) -> None:
+        os.makedirs(EXAMPLES_ROOT_DIR, exist_ok=True)
+        for model_dir in MODEL_EXAMPLES_DIRS.values():
+            os.makedirs(model_dir, exist_ok=True)
+        for entry in os.listdir(EXAMPLES_ROOT_DIR):
+            if entry in MODEL_EXAMPLES_DIRS:
+                continue
+            src = os.path.join(EXAMPLES_ROOT_DIR, entry)
+            if not os.path.isdir(src):
+                continue
+            dst = os.path.join(
+                MODEL_EXAMPLES_DIRS.get(DEFAULT_MODEL, next(iter(MODEL_EXAMPLES_DIRS.values()))),
+                entry,
+            )
+            if not os.path.exists(dst):
+                shutil.move(src, dst)
+    def get_examples_base_dir(self, model_name: str, absolute: bool = True) -> str:
+        return MODEL_EXAMPLES_DIRS[model_name]
+    def load_model(self, model_name: str) -> ModelBundle:
+        if model_name in self.models:
+            return self.models[model_name]
+        print(f"Loading model {model_name}...")
+        try:
+            model = load_model(modelname=model_name, device=self.device)
+        except Exception as e:
+            print(
+                "Error loading model during Kimodo startup. "
+                "This often means the text encoder server is not running, the Hugging Face token is missing, "
+                "or the gated text encoder model cannot be accessed."
+            )
+            print(f"Original error: {type(e).__name__}: {e}")
+            raise e
+        if hasattr(model, "text_encoder"):
+            model.text_encoder = CachedTextEncoder(model.text_encoder, model_name=model_name)
+        skeleton = model.motion_rep.skeleton
+        if isinstance(skeleton, SOMASkeleton30):
+            skeleton = skeleton.somaskel77.to(model.device)
+        bundle = ModelBundle(
+            model=model,
+            motion_rep=model.motion_rep,
+            skeleton=skeleton,
+            model_fps=model.motion_rep.fps,
+        )
+        self.models[model_name] = bundle
+        print(f"Model {model_name} loaded successfully")
+        self.prewarm_embedding_cache(model_name, bundle.model)
+        return bundle
+    def prewarm_embedding_cache(self, model_name: str, model: object) -> None:
+        encoder = getattr(model, "text_encoder", None)
+        if not isinstance(encoder, CachedTextEncoder):
+            return
+        prompt_set = set()
+        prompt_set.add(DEFAULT_PROMPT)
+        examples_dir = MODEL_EXAMPLES_DIRS.get(model_name)
+        if examples_dir and os.path.isdir(examples_dir):
+            for entry in os.listdir(examples_dir):
+                example_dir = os.path.join(examples_dir, entry)
+                if not os.path.isdir(example_dir):
+                    continue
+                meta_path = os.path.join(example_dir, "meta.json")
+                if not os.path.exists(meta_path):
+                    continue
+                try:
+                    meta = load_json(meta_path)
+                except Exception:
+                    continue
+                for prompt in meta.get("prompts_text", []):
+                    if isinstance(prompt, str):
+                        prompt_set.add(prompt)
+        if prompt_set:
+            try:
+                encoder.prewarm(list(prompt_set))
+            except Exception as error:
+                # Startup should not fail if text encoder is still warming up.
+                error_str = str(error)
+                if "Encoder initialization failed" in error_str:
+                    print(
+                        f"⚠️  WARNING: Text encoder failed to initialize: {error}\n"
+                        f"  This usually means the HuggingFace gated model cannot be accessed.\n"
+                        f"  To fix: Set HF_TOKEN environment variable with access to Meta-Llama-3-8B.\n"
+                        f"  Alternatively: Generation will still work but text embeddings may fail."
+                    )
+                else:
+                    print(f"Warning: embedding prewarm skipped: {error}")
+    def build_constraint_tracks(
+        self, client: viser.ClientHandle, skeleton: SkeletonBase
+    ) -> dict[str, viser_utils.ConstraintSet]:
+        return {
+            "Full-Body": FullbodyKeyframeSet(
+                name="Full-Body",
+                server=client,
+                skeleton=skeleton,
+            ),
+            "End-Effectors": EEJointsKeyframeSet(
+                name="End-Effectors",
+                server=client,
+                skeleton=skeleton,
+            ),
+            "2D Root": RootKeyframe2DSet(
+                name="2D Root",
+                server=client,
+                skeleton=skeleton,
+            ),
+        }
+    def set_timeline_defaults(self, timeline, model_fps: float) -> None:
+        timeline.set_defaults(
+            default_text=DEFAULT_PROMPT,
+            default_duration=int(DEFAULT_CUR_DURATION * model_fps - 1),
+            min_duration=int(MIN_DURATION * model_fps - 1),  # 2 seconds minimum,
+            max_duration=int(
+                MAX_DURATION * model_fps - 1  # - NB_TRANSITION_FRAMES
+            ),  # 10 seconds maximum, minus the transition frames, if needed
+            default_num_frames_zoom=int(1.10 * 10 * model_fps),  # a bit more than the max
+            max_frames_zoom=1000,
+            fps=model_fps,
+        )
+    def _apply_constraint_overlay_visibility(self, session: ClientSession) -> None:
+        """Apply show-all vs show-only-current-frame to constraint overlays."""
+        only_frame = session.frame_idx if session.show_only_current_constraint else None
+        for constraint in session.constraints.values():
+            constraint.set_overlay_visibility(only_frame)
+    def set_constraint_tracks_visible(self, session: ClientSession, visible: bool) -> None:
+        timeline = session.client.timeline
+        timeline_data = session.timeline_data
+        if timeline_data.get("constraint_tracks_visible", True) == visible:
+            return
+        with timeline_data["keyframe_update_lock"]:
+            if visible:
+                for track_id, track_info in timeline_data["tracks"].items():
+                    timeline.add_track(
+                        track_info["name"],
+                        track_type=track_info.get("track_type", "keyframe"),
+                        color=track_info.get("color"),
+                        height_scale=track_info.get("height_scale", 1.0),
+                        uuid=track_id,
+                    )
+                for keyframe_id, keyframe_data in timeline_data["keyframes"].items():
+                    timeline.add_keyframe(
+                        track_id=keyframe_data["track_id"],
+                        frame=keyframe_data["frame"],
+                        value=keyframe_data.get("value"),
+                        opacity=keyframe_data.get("opacity", 1.0),
+                        locked=keyframe_data.get("locked", False),
+                        uuid=keyframe_id,
+                    )
+                for interval_id, interval_data in timeline_data["intervals"].items():
+                    timeline.add_interval(
+                        track_id=interval_data["track_id"],
+                        start_frame=interval_data["start_frame_idx"],
+                        end_frame=interval_data["end_frame_idx"],
+                        value=interval_data.get("value"),
+                        opacity=interval_data.get("opacity", 1.0),
+                        locked=interval_data.get("locked", False),
+                        uuid=interval_id,
+                    )
+            else:
+                for track_id in list(timeline_data["tracks"].keys()):
+                    timeline.remove_track(track_id)
+        timeline_data["constraint_tracks_visible"] = visible
+    def _cleanup_session_for_client(self, client_id: int) -> None:
+        """Remove session and scene state for a client (e.g. on session expiry)."""
+        if client_id in self.client_sessions:
+            del self.client_sessions[client_id]
+        self.start_direction_markers.pop(client_id, None)
+        self.grid_handles.pop(client_id, None)
+    def _setup_demo_for_client(self, client: viser.ClientHandle) -> None:
+        """Initialize scene, GUI, and session state for a client (no modals)."""
+        self.setup_scene(client)
+        model_bundle = self.load_model(self.default_model_name)
+        # Initialize each empty constraint track
+        constraint_tracks = self.build_constraint_tracks(client, model_bundle.skeleton)
+        # Create GUI elements for this client
+        (
+            gui_elements,
+            timeline_tracks,
+            example_dict,
+            gui_examples_dropdown,
+            gui_save_example_path_text,
+            gui_model_selector,
+        ) = ui.create_gui(
+            demo=self,
+            client=client,
+            model_name=self.default_model_name,
+            model_fps=model_bundle.model_fps,
+        )
+        timeline_data = {
+            "tracks": timeline_tracks,
+            "tracks_ids": {val["name"]: key for key, val in timeline_tracks.items()},
+            "keyframes": {},
+            "intervals": {},
+            "keyframe_update_lock": threading.Lock(),
+            "keyframe_move_timers": {},
+            "pending_keyframe_moves": {},  # keyframe_id -> new_frame
+            "constraint_tracks_visible": True,
+            "dense_path_after_release_timer": None,
+        }
+        # Initialize session state
+        cur_duration = DEFAULT_CUR_DURATION
+        max_frame_idx = int(cur_duration * model_bundle.model_fps - 1)
+        session = ClientSession(
+            client=client,
+            gui_elements=gui_elements,
+            motions={},
+            constraints=constraint_tracks,
+            timeline_data=timeline_data,
+            frame_idx=0,
+            playing=False,
+            playback_speed=DEFAULT_PLAYBACK_SPEED,
+            cur_duration=cur_duration,
+            max_frame_idx=max_frame_idx,
+            updating_motions=False,
+            edit_mode=False,
+            model_name=self.default_model_name,
+            model_fps=model_bundle.model_fps,
+            skeleton=model_bundle.skeleton,
+            motion_rep=model_bundle.motion_rep,
+            examples_base_dir=self.get_examples_base_dir(self.default_model_name, absolute=True),
+            example_dict=example_dict,
+            gui_examples_dropdown=gui_examples_dropdown,
+            gui_save_example_path_text=gui_save_example_path_text,
+            gui_model_selector=gui_model_selector,
+        )
+        self.client_sessions[client.client_id] = session
+        # Initialize default character for this client
+        self.add_character_motion(client, session.skeleton)
+    def on_client_connect(self, client: viser.ClientHandle) -> None:
+        """Initialize GUI and state for each new client."""
+        print(f"Client {client.client_id} connected")
+        if HF_MODE and self.queue_manager is not None:
+            self.queue_manager.on_client_connect(client)
+        else:
+            # Show quick start popup when a browser client connects (non-HF mode).
+            with client.gui.add_modal(
+                "Welcome — Quick Start",
+                size="xl",
+                show_close_button=True,
+                save_choice="kimodo.demo.quick_start_ack",
+            ) as modal:
+                client.gui.add_markdown(DEMO_UI_QUICK_START_MODAL_MD)
+                client.gui.add_button("Got it (don't remind me again)").on_click(lambda _event: modal.close())
+            self._setup_demo_for_client(client)
+    def setup_scene(self, client: viser.ClientHandle) -> None:
+        self.configure_theme(client)
+        client.camera.position = np.array(
+            [2.7417358737841426, 1.8790455698853281, 7.675741569777456],
+            dtype=np.float64,
+        )
+        client.camera.look_at = np.array([0.0, 0.0, 0.0], dtype=np.float64)
+        client.camera.up_direction = np.array(
+            [-1.1102230246251568e-16, 1.0, 1.3596310734468913e-32],
+            dtype=np.float64,
+        )
+        client.camera.fov = np.deg2rad(45.0)
+        grid_handle = client.scene.add_grid(
+            "/grid",
+            width=self.floor_len,
+            height=self.floor_len,
+            wxyz=viser.transforms.SO3.from_x_radians(-np.pi / 2.0).wxyz,
+            position=(0.0, 0.0001, 0.0),
+            fade_distance=3 * self.floor_len,
+            section_color=LIGHT_THEME["grid"],
+            infinite_grid=True,
+        )
+        self.grid_handles[client.client_id] = grid_handle
+        # marker for origin
+        origin_waypoint = viser_utils.WaypointMesh(
+            "/origin_waypoint",
+            client,
+            position=np.array([0.0, 0.0, 0.0]),
+            heading=np.array([0.0, 1.0]),
+            color=(0, 0, 255),
+        )
+        self.start_direction_markers[client.client_id] = origin_waypoint
+    def on_client_disconnect(self, client: viser.ClientHandle) -> None:
+        """Clean up when client disconnects."""
+        print(f"Client {client.client_id} disconnected")
+        client_id = client.client_id
+        if HF_MODE and self.queue_manager is not None:
+            self.queue_manager.on_client_disconnect(client_id)
+        self._cleanup_session_for_client(client_id)
+    def set_start_direction_visible(self, client_id: int, visible: bool) -> None:
+        marker = self.start_direction_markers.get(client_id)
+        if marker is None:
+            return
+        marker.set_visible(visible)
+    def client_active(self, client_id: int) -> bool:
+        return client_id in self.client_sessions
+    def add_character_motion(
+        self,
+        client: viser.ClientHandle,
+        skeleton: SkeletonBase,
+        joints_pos: Optional[torch.Tensor] = None,
+        joints_rot: Optional[torch.Tensor] = None,
+        foot_contacts: Optional[torch.Tensor] = None,
+    ) -> None:
+        client_id = client.client_id
+        if not self.client_active(client_id):
+            return
+        session = self.client_sessions[client_id]
+        ci = len(session.motions)
+        character_name = f"character{ci}"
+        # build character skeleton and skinning mesh
+        if "g1" in session.model_name:
+            mesh_mode = "g1_stl"
+        elif "smplx" in session.model_name:
+            mesh_mode = "smplx_skin"
+        elif "soma" in session.model_name:
+            if session.gui_elements.gui_use_soma_layer_checkbox.value:
+                mesh_mode = "soma_layer_skin"
+            else:
+                mesh_mode = "soma_skin"
+        else:
+            raise ValueError("The model name is not recognized for skinning.")
+        new_character = Character(
+            character_name,
+            client,
+            skeleton,
+            create_skeleton_mesh=True,
+            create_skinned_mesh=True,
+            visible_skeleton=False,  # don't show immediately
+            visible_skinned_mesh=False,  # don't show immediately
+            skinned_mesh_opacity=session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value,
+            show_foot_contacts=session.gui_elements.gui_viz_foot_contacts_checkbox.value,
+            dark_mode=session.gui_elements.gui_dark_mode_checkbox.value,
+            mesh_mode=mesh_mode,
+            gui_use_soma_layer_checkbox=session.gui_elements.gui_use_soma_layer_checkbox,
+        )
+        # if no motion given, initialize to character default (rest) pose for one frame
+        init_joints_pos, init_joints_rot = new_character.get_pose()
+        if joints_pos is None:
+            joints_pos = init_joints_pos[None].repeat(session.max_frame_idx + 1, 1, 1)
+        if joints_rot is None:
+            joints_rot = init_joints_rot[None].repeat(session.max_frame_idx + 1, 1, 1, 1)
+        new_motion = CharacterMotion(new_character, joints_pos, joints_rot, foot_contacts)
+        # save the motion in our dict
+        session.motions[character_name] = new_motion
+        # put the character at the right frame
+        new_motion.set_frame(session.frame_idx)
+        # put them visible with a small delay
+        # so that the set_frame function has time to finish
+        def _set_visibility():
+            new_motion.character.set_skinned_mesh_visibility(session.gui_elements.gui_viz_skinned_mesh_checkbox.value)
+            new_motion.character.set_skeleton_visibility(session.gui_elements.gui_viz_skeleton_checkbox.value)
+        timer = threading.Timer(
+            0.2,  # 0.2s delay
+            _set_visibility,
+        )
+        timer.start()
+    def clear_motions(self, client_id: int) -> None:
+        if not self.client_active(client_id):
+            return
+        session = self.client_sessions[client_id]
+        for motion in list(session.motions.values()):
+            motion.clear()
+        session.motions.clear()
+    def compute_model_constraints_lst(
+        self,
+        session: ClientSession,
+        model_bundle: ModelBundle,
+        num_frames: int,
+    ):
+        return generation.compute_model_constraints_lst(session, model_bundle, num_frames, self.device)
+    def check_cuda_health(self) -> bool:
+        """Check if CUDA is still functional.
+        Trigger auto-restart if corrupted.
+        """
+        if self.device == "cpu":
+            return True
+        try:
+            torch.tensor([1.0], device=self.device) + torch.tensor([1.0], device=self.device)
+            return True
+        except RuntimeError as e:
+            if "device-side assert" in str(e) or "CUDA error" in str(e):
+                if self._cuda_healthy:
+                    self._cuda_healthy = False
+                    print("FATAL: CUDA context is corrupted (device-side assert). " "The process must be restarted.")
+                    self._trigger_restart()
+                return False
+            raise
+    def _trigger_restart(self) -> None:
+        """Exit the process so the HF Space (or systemd/Docker) can restart it."""
+        import sys
+        print("Initiating automatic restart due to unrecoverable CUDA error...")
+        sys.stdout.flush()
+        sys.stderr.flush()
+        os._exit(1)
+    def generate(
+        self,
+        client: viser.ClientHandle,
+        prompts: list[str],
+        num_frames: list[int],
+        num_samples: int,
+        seed: int,
+        diffusion_steps: int,
+        cfg_weight: Optional[list[float]] = None,
+        cfg_type: Optional[str] = None,
+        postprocess_parameters: Optional[dict] = None,
+        transitions_parameters: Optional[dict] = None,
+        real_robot_rotations: bool = False,
+    ) -> None:
+        if not self._cuda_healthy:
+            raise RuntimeError("CUDA is in a corrupted state. The space is restarting...")
+        locked = self._generation_lock.acquire(blocking=False)
+        if not locked:
+            waiting_notif = client.add_notification(
+                title="Waiting for GPU...",
+                body="Another generation is in progress. Yours will start automatically.",
+                loading=True,
+                with_close_button=False,
+            )
+            self._generation_lock.acquire()
+            waiting_notif.remove()
+        try:
+            session = self.client_sessions[client.client_id]
+            model_bundle = self.load_model(session.model_name)
+            generation.generate(
+                client=client,
+                session=session,
+                model_bundle=model_bundle,
+                prompts=prompts,
+                num_frames=num_frames,
+                num_samples=num_samples,
+                seed=seed,
+                diffusion_steps=diffusion_steps,
+                cfg_weight=cfg_weight,
+                cfg_type=cfg_type,
+                postprocess_parameters=postprocess_parameters,
+                transitions_parameters=transitions_parameters,
+                real_robot_rotations=real_robot_rotations,
+                device=self.device,
+                clear_motions=self.clear_motions,
+                add_character_motion=self.add_character_motion,
+            )
+        finally:
+            self._generation_lock.release()
+    def set_frame(self, client_id: int, frame_idx: int, update_timeline: bool = True):
+        if not self.client_active(client_id):
+            return
+        session = self.client_sessions[client_id]
+        session.frame_idx = frame_idx
+        if update_timeline:
+            session.client.timeline.set_current_frame(frame_idx)
+        for motion in list(session.motions.values()):
+            motion.set_frame(frame_idx)
+        self._apply_constraint_overlay_visibility(session)
+    def run(self) -> None:
+        last_loop_time = time.perf_counter()
+        last_cuda_check_time = 0.0
+        while True:
+            loop_start_time = time.perf_counter()
+            delta_time = loop_start_time - last_loop_time
+            last_loop_time = loop_start_time
+            if self.models:
+                # the max playback speed is 2x the model fps (from gui_playback_speed_buttons)
+                playback_fps = max(bundle.model_fps for bundle in self.models.values()) * 2.0
+            else:
+                playback_fps = 60.0
+            # update each client session independently
+            #   copy to a list first to avoid changing size if client disconnects
+            for client_id, session in list(self.client_sessions.items()):
+                if not session.playing:
+                    continue
+                if session.model_fps <= 0:
+                    continue
+                # Time-based stepping keeps playback smooth even if loop cadence jitters.
+                session.playback_time_accumulator += max(0.0, delta_time) * max(0.0, session.playback_speed)
+                frame_period = 1.0 / session.model_fps
+                if session.playback_time_accumulator < frame_period:
+                    continue
+                frames_to_advance = int(session.playback_time_accumulator / frame_period)
+                session.playback_time_accumulator -= frames_to_advance * frame_period
+                frame_count = max(1, session.max_frame_idx + 1)
+                new_frame_idx = (session.frame_idx + frames_to_advance) % frame_count
+                # make sure the client is still active before updating the frame
+                if self.client_active(client_id):
+                    self.set_frame(client_id, new_frame_idx)
+            if loop_start_time - last_cuda_check_time >= 5.0:
+                self.check_cuda_health()
+                last_cuda_check_time = loop_start_time
+            time_remaining = max(0.0, 1.0 / playback_fps - (time.perf_counter() - loop_start_time))
+            time.sleep(time_remaining)
+    def configure_theme(
+        self,
+        client: viser.ClientHandle,
+        dark_mode: bool = False,
+        titlebar_dark_mode_checkbox_uuid: str | None = None,
+    ):
+        # Sync grid color with theme (light vs dark)
+        theme = DARK_THEME if dark_mode else LIGHT_THEME
+        grid_handle = self.grid_handles.get(client.client_id)
+        if grid_handle is not None:
+            grid_handle.section_color = theme["grid"]
+        #
+        # setup theme
+        #
+        buttons = (
+            TitlebarButton(
+                text="Documentation",
+                icon="Description",
+                href="https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html",
+            ),
+            TitlebarButton(
+                text="Project Page",
+                icon=None,
+                href="https://research.nvidia.com/labs/sil/projects/kimodo/",
+            ),
+            TitlebarButton(
+                text="Github",
+                icon="GitHub",
+                href="https://github.com/nv-tlabs/kimodo",
+            ),
+        )
+        assets_dir = DEMO_ASSETS_ROOT
+        logo_light_path = assets_dir / "nvidia_logo.png"
+        logo_dark_path = assets_dir / "nvidia_logo_dark.png"
+        if logo_light_path.exists():
+            light_b64 = base64.standard_b64encode(logo_light_path.read_bytes()).decode("ascii")
+            dark_b64 = (
+                base64.standard_b64encode(logo_dark_path.read_bytes()).decode("ascii")
+                if logo_dark_path.exists()
+                else None
+            )
+            image = TitlebarImage(
+                image_url_light=f"data:image/png;base64,{light_b64}",
+                image_url_dark=(f"data:image/png;base64,{dark_b64}" if dark_b64 else None),
+                image_alt="NVIDIA",
+                href="https://www.nvidia.com/",
+            )
+        else:
+            image = None
+        titlebar_theme = TitlebarConfig(buttons=buttons, image=image, title_text="Movimento")
+        client.gui.set_panel_label("Movimento")
+        client.gui.configure_theme(
+            titlebar_content=titlebar_theme,
+            control_layout="floating",  # "floating",  # ['floating', 'collapsible', 'fixed']
+            control_width="large",  # ['small', 'medium', 'large']
+            dark_mode=dark_mode,
+            show_logo=False,  # hide viser logo on bottom left corner
+            show_share_button=False,
+            titlebar_dark_mode_checkbox_uuid=titlebar_dark_mode_checkbox_uuid,
+            brand_color=(152, 189, 255),  # (60, 131, 0),  # (R, G, B) tuple
+        )

config.py ADDED Viewed

	@@ -0,0 +1,172 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import os
+from kimodo.assets import DEMO_EXAMPLES_ROOT
+from kimodo.model.registry import (
+    AVAILABLE_MODELS,
+    DEFAULT_MODEL,
+    FRIENDLY_NAMES,
+    get_datasets,
+    get_model_info,
+    get_models_for_dataset_skeleton,
+    get_short_key_from_display_name,
+    get_skeleton_display_name,
+    get_skeleton_display_names_for_dataset,
+    get_skeleton_key_from_display_name,
+    get_skeletons_for_dataset,
+    get_versions_for_dataset_skeleton,
+    resolve_to_short_key,
+)
+SERVER_NAME = os.environ.get("SERVER_NAME", "0.0.0.0")
+SERVER_PORT = int(os.environ.get("SERVER_PORT") or os.environ.get("PORT", "7860"))
+def _env_bool(name: str, default: bool = False) -> bool:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    return str(raw).strip().lower() in ("1", "true", "yes", "on")
+HF_MODE = _env_bool("HF_MODE", False)
+# HF mode: user queue and session limit (override via env in Spaces)
+MAX_ACTIVE_USERS = int(os.environ.get("MAX_ACTIVE_USERS", "5"))
+MAX_SESSION_MINUTES = float(os.environ.get("MAX_SESSION_MINUTES", "5.0"))
+DEFAULT_PLAYBACK_SPEED = 1.0
+# default start duration is 6.0 sec, but model can handle up to 10 sec
+DEFAULT_CUR_DURATION = 6.0
+DEFAULT_PROMPT = "A person walks forward."
+MIN_DURATION = 2.0
+MAX_DURATION = 10.0
+SHOW_TRANSITION_PARAMS = False
+INIT_POSTPROCESSING = True
+NB_TRANSITION_FRAMES = 5
+LIGHT_THEME = dict(
+    floor=(220, 220, 220),
+    grid=(180, 180, 180),
+)
+# Dark theme: slightly lighter grid and floor for better visibility and less flat black
+DARK_THEME = dict(
+    floor=(48, 48, 52),
+    grid=(105, 105, 110),
+)
+EXAMPLES_ROOT_DIR = str(DEMO_EXAMPLES_ROOT)
+# Model list and paths from kimodo registry (all models: Kimodo + TMR)
+MODEL_NAMES = tuple(AVAILABLE_MODELS)
+MODEL_EXAMPLES_DIRS = {name: os.path.join(EXAMPLES_ROOT_DIR, name) for name in MODEL_NAMES}
+# Display labels for backward compatibility (short_key -> display name)
+MODEL_LABELS = {name: FRIENDLY_NAMES.get(name, f"Model ({name})") for name in MODEL_NAMES}
+MODEL_LABEL_TO_NAME = {label: name for name, label in MODEL_LABELS.items()}
+# -----------------------------------------------------------------------------
+# Demo UI copy
+# -----------------------------------------------------------------------------
+DEMO_UI_QUICK_START_CORE_MD = """
+### Camera
+- **Left-drag**: rotate
+- **Right-drag**: pan
+- **Scroll**: zoom
+### Playback
+- **Space** to play/pause
+- **←/→** to step frames, or click the frame number.
+- **Scroll up/down** in the timeline: move left/right
+- **Shift + scroll** in the timeline: zoom in/out
+### Prompts
+- **Double-click** a text prompt to edit it.
+- **Click and drag** the right edge of a prompt box to extend/shorten it.
+- **Click empty space** to add a prompt.
+- **Right-click** a prompt to delete it.
+### Generate
+- Go to the **Generate** tab to modify options
+- It is also possible to **load** examples
+- Click **Generate** to generate a motion
+### Constraints
+- This is **optional**: should be use after a first generation
+- **Click** in the timeline tracks (Full-Body / 2D root etc) to add a constraint.
+- **Right-click** on a constraint to delete it.
+- To **edit** a constraint:
+    - Move playback to the target frame
+    - Click **Enter Editing Mode** in the Constraints tab.
+"""
+DEMO_UI_QUICK_START_MODAL_MD = (
+    DEMO_UI_QUICK_START_CORE_MD
+    + """
+See the **Instructions** tab for the full user manual.
+"""
+)
+DEMO_UI_INSTRUCTIONS_TAB_MD = (
+    """
+## How to Use This Demo
+"""
+    + DEMO_UI_QUICK_START_CORE_MD
+    + """
+---
+### Generating Motion (step-by-step)
+1. **Edit the text prompts** in the timeline (e.g., "A person walks forward.")
+2. **Modify the duration** by moving the right edge of each prompts (2–10 seconds)
+3. **Add constraints** (optional) to control the motion:
+   - Click **Enter Editing Mode** to adjust the character pose
+   - Use the timeline to place keyframes or intervals in constraint tracks (see below)
+4. **Click Generate** to create the motion
+5. If generating multiple samples, **click on a mesh** to select which one to keep
+### Timeline Editing
+**Adding Constraints:**
+1. Click anywhere on the timeline to add a keyframe at that frame. The keyframe is created based on the current character motion.
+2. Ctrl/Cmd+click+drag to add an interval constraint, or expand a keyframe into an interval
+3. Enter editing mode with the **Enter Editing Mode** button to adjust character pose before/after adding constraints.
+**Constraint Types:**
+- **Full-Body**: constrains the entire character pose
+- **2D Root**: constrains the character's path on the ground plane
+  - Enable **Densify** to create a continuous path
+- **End-Effectors**: constrains hands and feet positions
+  - Use separate tracks for Left/Right Hand/Foot
+**Moving & Deleting:**
+- **Drag keyframes/intervals** to move them to different frames
+- **Right-click** a keyframe or interval to delete it
+- Use **Clear All Constraints** to remove everything
+**Tips:**
+- The posing skeleton becomes visible in editing mode for precise positioning
+- Use **Snap to constraint** to align the current frame to a constraint
+### Saving & Loading
+You can save the current constraints or current motion to load in later from the Load/Save menu.
+Saving an **Example** will save the full constraints, motion, and generation metadata.
+### Visualization Options
+Switch to the **Visualize** tab to:
+- Toggle mesh and skeleton visibility
+- Adjust mesh opacity
+- Show/hide foot contact indicators
+- Switch between light and dark modes
+"""
+)

embedding_cache.py ADDED Viewed

	@@ -0,0 +1,253 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import contextlib
+import contextvars
+import hashlib
+import json
+import os
+import threading
+import time
+from collections import OrderedDict
+from dataclasses import dataclass
+from typing import Iterable, Optional
+import numpy as np
+import torch
+from kimodo.sanitize import sanitize_texts
+_ACTIVE_SESSION = contextvars.ContextVar("kimodo_demo_active_session", default=None)
+@dataclass
+class CacheStats:
+    hits: int = 0
+    misses: int = 0
+    disk_hits: int = 0
+class EmbeddingCache:
+    """Disk-backed text embedding cache with a small in-memory LRU."""
+    def __init__(
+        self,
+        *,
+        model_name: str,
+        encoder_id: str,
+        base_dir: Optional[str] = None,
+        max_mem_entries: int = 128,
+    ) -> None:
+        cache_root = base_dir or os.environ.get(
+            "kimodo_EMBED_CACHE_DIR",
+            os.path.join("~", ".cache", "kimodo_demo", "embeddings"),
+        )
+        self.base_dir = os.path.expanduser(cache_root)
+        self.model_name = model_name
+        self.encoder_id = encoder_id
+        self.max_mem_entries = max_mem_entries
+        self.stats = CacheStats()
+        self._lock = threading.Lock()
+        self._mem_cache: OrderedDict[str, np.ndarray] = OrderedDict()
+        self._index = {}
+        self._index_loaded = False
+    def _model_dir(self) -> str:
+        return os.path.join(self.base_dir, self.model_name)
+    def _index_path(self) -> str:
+        return os.path.join(self._model_dir(), "index.json")
+    def _prewarm_marker_path(self, key: str) -> str:
+        return os.path.join(self._model_dir(), f"prewarm_{key}.json")
+    def has_prewarm_marker(self, key: str) -> bool:
+        return os.path.exists(self._prewarm_marker_path(key))
+    def write_prewarm_marker(self, key: str, *, prompt_count: int) -> None:
+        os.makedirs(self._model_dir(), exist_ok=True)
+        payload = {"prompt_count": prompt_count, "updated_at": time.time()}
+        tmp_path = f"{self._prewarm_marker_path(key)}.tmp"
+        with open(tmp_path, "w", encoding="utf-8") as f:
+            json.dump(payload, f)
+        os.replace(tmp_path, self._prewarm_marker_path(key))
+    def _load_index(self) -> None:
+        if self._index_loaded:
+            return
+        index_path = self._index_path()
+        if os.path.exists(index_path):
+            try:
+                with open(index_path, "r", encoding="utf-8") as f:
+                    self._index = json.load(f)
+            except json.JSONDecodeError:
+                self._index = {}
+        self._index_loaded = True
+    def _save_index(self) -> None:
+        os.makedirs(self._model_dir(), exist_ok=True)
+        tmp_path = f"{self._index_path()}.tmp"
+        with open(tmp_path, "w", encoding="utf-8") as f:
+            json.dump(self._index, f)
+        os.replace(tmp_path, self._index_path())
+    def _make_key(self, text: str) -> str:
+        key_src = f"{self.model_name}|{self.encoder_id}|{text}"
+        return hashlib.sha256(key_src.encode("utf-8")).hexdigest()
+    def _entry_path(self, key: str) -> str:
+        return os.path.join(self._model_dir(), f"{key}.npy")
+    def _mem_get(self, key: str) -> Optional[np.ndarray]:
+        if key in self._mem_cache:
+            self._mem_cache.move_to_end(key)
+            return self._mem_cache[key]
+        return None
+    def _mem_put(self, key: str, value: np.ndarray) -> None:
+        self._mem_cache[key] = value
+        self._mem_cache.move_to_end(key)
+        while len(self._mem_cache) > self.max_mem_entries:
+            self._mem_cache.popitem(last=False)
+    def _disk_load(self, key: str) -> Optional[np.ndarray]:
+        path = self._entry_path(key)
+        if not os.path.exists(path):
+            return None
+        try:
+            return np.load(path)
+        except Exception:
+            return None
+    def _disk_save(self, key: str, value: np.ndarray) -> None:
+        os.makedirs(self._model_dir(), exist_ok=True)
+        np.save(self._entry_path(key), value)
+        self._index[key] = {
+            "length": int(value.shape[0]),
+            "dtype": str(value.dtype),
+            "updated_at": time.time(),
+        }
+    def _maybe_use_session_cache(self, texts: list[str]):
+        session = _ACTIVE_SESSION.get()
+        if session is None:
+            return None
+        if session.last_prompt_texts == texts and session.last_prompt_embeddings is not None:
+            return session.last_prompt_embeddings, session.last_prompt_lengths
+        return None
+    def _update_session_cache(self, texts: list[str], tensor: torch.Tensor, lengths: list[int]) -> None:
+        session = _ACTIVE_SESSION.get()
+        if session is None:
+            return
+        session.last_prompt_texts = texts
+        session.last_prompt_embeddings = tensor
+        session.last_prompt_lengths = lengths
+    def get_or_encode(self, texts: Iterable[str], encoder):
+        if isinstance(texts, str):
+            texts = [texts]
+        texts = sanitize_texts(list(texts))
+        if len(texts) == 0:
+            empty = torch.empty()
+            return empty, []
+        session_cache = self._maybe_use_session_cache(texts)
+        if session_cache is not None:
+            return session_cache
+        arrays: list[Optional[np.ndarray]] = [None] * len(texts)
+        lengths: list[int] = [0] * len(texts)
+        misses: list[tuple[int, str, str]] = []
+        with self._lock:
+            self._load_index()
+            for idx, text in enumerate(texts):
+                key = self._make_key(text)
+                cached = self._mem_get(key)
+                if cached is not None:
+                    arrays[idx] = cached
+                    lengths[idx] = cached.shape[0]
+                    self.stats.hits += 1
+                    continue
+                cached = self._disk_load(key)
+                if cached is not None:
+                    arrays[idx] = cached
+                    lengths[idx] = cached.shape[0]
+                    self._mem_put(key, cached)
+                    self.stats.disk_hits += 1
+                    continue
+                misses.append((idx, text, key))
+                self.stats.misses += 1
+        if misses:
+            miss_texts = [text for _, text, _ in misses]
+            miss_tensor, miss_lengths = encoder(miss_texts)
+            miss_tensor = miss_tensor.detach().cpu()
+            miss_tensor_np = miss_tensor.numpy()
+            with self._lock:
+                self._load_index()
+                for miss_idx, length in enumerate(miss_lengths):
+                    idx, _text, key = misses[miss_idx]
+                    arr = miss_tensor_np[miss_idx, :length].copy()
+                    arrays[idx] = arr
+                    lengths[idx] = int(length)
+                    self._mem_put(key, arr)
+                    self._disk_save(key, arr)
+                self._save_index()
+        max_len = max(lengths) if lengths else 0
+        feat_dim = arrays[0].shape[-1] if arrays[0] is not None else 0
+        dtype = arrays[0].dtype if arrays[0] is not None else np.float32
+        padded = np.zeros((len(texts), max_len, feat_dim), dtype=dtype)
+        for idx, arr in enumerate(arrays):
+            if arr is None:
+                continue
+            padded[idx, : arr.shape[0]] = arr
+        result = torch.from_numpy(padded)
+        self._update_session_cache(texts, result, lengths)
+        return result, lengths
+class CachedTextEncoder:
+    """Wrapper around a text encoder to add disk-backed caching."""
+    def __init__(self, encoder, *, model_name: str, base_dir: Optional[str] = None):
+        self.encoder = encoder
+        self.model_name = model_name
+        encoder_id = f"{type(encoder).__name__}"
+        self.cache = EmbeddingCache(model_name=model_name, encoder_id=encoder_id, base_dir=base_dir)
+    def __call__(self, texts):
+        return self.cache.get_or_encode(texts, self.encoder)
+    def prewarm(self, texts) -> None:
+        if isinstance(texts, str):
+            texts = [texts]
+        texts = sanitize_texts(list(texts))
+        prewarm_key = hashlib.sha256("|".join(texts).encode("utf-8")).hexdigest()
+        if self.cache.has_prewarm_marker(prewarm_key):
+            return
+        self.cache.get_or_encode(texts, self.encoder)
+        self.cache.write_prewarm_marker(prewarm_key, prompt_count=len(texts))
+    def to(self, device=None, dtype=None):
+        if hasattr(self.encoder, "to"):
+            self.encoder.to(device=device, dtype=dtype)
+        return self
+    @contextlib.contextmanager
+    def session_context(self, session):
+        token = _ACTIVE_SESSION.set(session)
+        try:
+            yield
+        finally:
+            _ACTIVE_SESSION.reset(token)
+    def __getattr__(self, name):
+        return getattr(self.encoder, name)

generation.py ADDED Viewed

	@@ -0,0 +1,218 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from collections import defaultdict
+from typing import Optional
+import numpy as np
+import torch
+import viser
+from kimodo.constraints import (
+    TYPE_TO_CLASS,
+    FullBodyConstraintSet,
+    Root2DConstraintSet,
+)
+from kimodo.exports.mujoco import apply_g1_real_robot_projection
+from kimodo.skeleton import G1Skeleton34, SOMASkeleton30
+from kimodo.tools import seed_everything
+from .embedding_cache import CachedTextEncoder
+from .state import ClientSession, ModelBundle
+def compute_model_constraints_lst(
+    session: ClientSession,
+    model_bundle: ModelBundle,
+    num_frames: int,
+    device: str,
+):
+    """Compute the lst of constraints for the model based on the constraints in viser."""
+    assert len(session.motions) == 1, "Only one motion allowed for constrained generation"
+    if not session.constraints:
+        return []
+    model_skeleton = model_bundle.model.skeleton
+    # For SOMA, UI uses somaskel77; extract 30-joint subset for the model
+    use_skel_slice = isinstance(model_skeleton, SOMASkeleton30) and session.skeleton.nbjoints != model_skeleton.nbjoints
+    skel_slice = model_skeleton.get_skel_slice(session.skeleton) if use_skel_slice else None
+    dense_smooth_root_pos_2d = None
+    if session.constraints["2D Root"].dense_path:
+        # get the full 2d root
+        dense_smooth_root_pos_2d = session.constraints["2D Root"].get_constraint_info(device=device)["root_pos"][
+            :, [0, 2]
+        ]
+    model_constraints = []
+    for track_name, constraint in session.constraints.items():
+        constraint_info = constraint.get_constraint_info(device=device)
+        frame_idx = constraint_info["frame_idx"]
+        # drop any constraints outside the generation range
+        valid_info = [(i, fi) for i, fi in enumerate(frame_idx) if fi < num_frames]
+        valid_idx = [i for i, _ in valid_info]
+        valid_frame_idx = [fi for _, fi in valid_info]
+        if len(valid_frame_idx) == 0:
+            continue
+        frame_indices = torch.tensor(valid_frame_idx)
+        if track_name == "2D Root":
+            smooth_root_pos_2d = constraint_info["root_pos"][valid_idx][:, [0, 2]].to(device)
+            # same as "smooth_root_2d"
+            model_constraints.append(
+                Root2DConstraintSet(
+                    model_skeleton,
+                    frame_indices,
+                    smooth_root_pos_2d,
+                )
+            )
+        elif track_name == "Full-Body":
+            constraint_joints_pos = constraint_info["joints_pos"][valid_idx].to(device)
+            constraint_joints_rot = constraint_info["joints_rot"][valid_idx].to(device)
+            if skel_slice is not None:
+                constraint_joints_pos = constraint_joints_pos[:, skel_slice]
+                constraint_joints_rot = constraint_joints_rot[:, skel_slice]
+            smooth_root_pos_2d = None
+            if dense_smooth_root_pos_2d is not None:
+                smooth_root_pos_2d = dense_smooth_root_pos_2d[frame_indices]
+            model_constraints.append(
+                FullBodyConstraintSet(
+                    model_skeleton,
+                    frame_indices,
+                    constraint_joints_pos,
+                    constraint_joints_rot,
+                    smooth_root_2d=smooth_root_pos_2d,
+                )
+            )
+        elif track_name == "End-Effectors":
+            constraint_joints_pos = constraint_info["joints_pos"][valid_idx].to(device)
+            constraint_joints_rot = constraint_info["joints_rot"][valid_idx].to(device)
+            if skel_slice is not None:
+                constraint_joints_pos = constraint_joints_pos[:, skel_slice]
+                constraint_joints_rot = constraint_joints_rot[:, skel_slice]
+            end_effector_type_set_lst = [
+                end_effector_type_set
+                for i, end_effector_type_set in enumerate(constraint_info["end_effector_type"])
+                if i in valid_idx
+            ]
+            # regroup the end effector data by type
+            cls_idx = defaultdict(list)
+            for idx, end_effector_type_set in enumerate(end_effector_type_set_lst):
+                for end_effector_type in end_effector_type_set:
+                    cls_idx[TYPE_TO_CLASS[end_effector_type]].append(idx)
+            for cls, lst_idx in cls_idx.items():
+                frame_indices_cls = frame_indices[lst_idx]
+                smooth_root_pos_2d = None
+                if dense_smooth_root_pos_2d is not None:
+                    smooth_root_pos_2d = dense_smooth_root_pos_2d[frame_indices_cls]
+                constraint_joints_pos_el = constraint_joints_pos[lst_idx]
+                constraint_joints_rot_el = constraint_joints_rot[lst_idx]
+                model_constraints.append(
+                    cls(
+                        model_skeleton,
+                        frame_indices_cls,
+                        constraint_joints_pos_el,
+                        constraint_joints_rot_el,
+                        smooth_root_2d=smooth_root_pos_2d,
+                    )
+                )
+        else:
+            raise ValueError(f"Unsupported constraint type: {constraint.display_name}")
+    return model_constraints
+def generate(
+    *,
+    client: viser.ClientHandle,
+    session: ClientSession,
+    model_bundle: ModelBundle,
+    prompts: list[str],
+    num_frames: list[int],
+    num_samples: int,
+    seed: int,
+    diffusion_steps: int,
+    cfg_weight: Optional[list[float]] = None,
+    cfg_type: Optional[str] = None,
+    postprocess_parameters: Optional[dict] = None,
+    transitions_parameters: Optional[dict] = None,
+    real_robot_rotations: bool = False,
+    device: str,
+    clear_motions,
+    add_character_motion,
+) -> None:
+    client_id = client.client_id
+    print(
+        f"Generating {num_samples} samples for a total of {sum(num_frames)} frames with those prompt: {prompts} (client {client_id})"
+    )
+    seed_everything(seed)
+    model_constraints = compute_model_constraints_lst(session, model_bundle, sum(num_frames), device)
+    cfg_weight = cfg_weight or [2.0, 2.0]
+    postprocess_parameters = postprocess_parameters or {}
+    transitions_parameters = transitions_parameters or {}
+    encoder = getattr(model_bundle.model, "text_encoder", None)
+    if isinstance(encoder, CachedTextEncoder):
+        with encoder.session_context(session):
+            pred_joints_output = model_bundle.model(
+                prompts,
+                num_frames,
+                diffusion_steps,
+                multi_prompt=True,
+                constraint_lst=model_constraints,
+                cfg_weight=cfg_weight,
+                num_samples=num_samples,
+                cfg_type=cfg_type,
+                **(postprocess_parameters | transitions_parameters),
+            )  # [B, T, motion_rep_dim]
+    else:
+        pred_joints_output = model_bundle.model(
+            prompts,
+            num_frames,
+            diffusion_steps,
+            multi_prompt=True,
+            constraint_lst=model_constraints,
+            cfg_weight=cfg_weight,
+            num_samples=num_samples,
+            cfg_type=cfg_type,
+            **(postprocess_parameters | transitions_parameters),
+        )  # [B, T, motion_rep_dim]
+    joints_pos = pred_joints_output["posed_joints"]  # [B, T, J, 3]
+    joints_rot = pred_joints_output["global_rot_mats"]
+    foot_contacts = pred_joints_output.get("foot_contacts")
+    # Optionally project G1 to real robot DoF (1-DoF per joint, clamped) for display.
+    if real_robot_rotations and isinstance(session.skeleton, G1Skeleton34):
+        joints_pos, joints_rot = apply_g1_real_robot_projection(
+            session.skeleton,
+            pred_joints_output["posed_joints"],
+            pred_joints_output["global_rot_mats"],
+            clamp_to_limits=True,
+        )
+    # Display on characters (callbacks keep this module UI-agnostic).
+    clear_motions(client_id)
+    # Keep one sample centered at the origin so constraints align.
+    spread_factor = 1.0  # meters
+    center_idx = num_samples // 2
+    x_trans = (np.arange(num_samples) - center_idx) * spread_factor
+    for i in range(num_samples):
+        cur_joints_pos = joints_pos[i]
+        cur_joints_pos[..., 0] += x_trans[i]
+        add_character_motion(
+            client,
+            session.skeleton,
+            cur_joints_pos,
+            joints_rot[i],
+            foot_contacts[i],
+        )

queue_manager.py ADDED Viewed

	@@ -0,0 +1,336 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""HF mode user queue and session time limit."""
+import math
+import threading
+import time
+from collections.abc import Callable
+from typing import Any
+import viser
+from .config import DEMO_UI_QUICK_START_MODAL_MD, MAX_SESSION_MINUTES
+# Link for "Duplicate this Space" on Hugging Face (used in queue and expiry modals).
+DUPLICATE_SPACE_URL = "https://huggingface.co/spaces/nvidia/Kimodo?duplicate=true"
+GITHUB_REPO_URL = "https://github.com/nv-tlabs/kimodo"
+# How often to refresh queue modal content (position, total, estimated wait).
+QUEUE_MODAL_REFRESH_INTERVAL_SEC = 15
+class UserQueue:
+    """Thread-safe queue: active users (with activation timestamp) and waiting queue."""
+    def __init__(self, max_active: int, max_minutes: float) -> None:
+        self._max_active = max_active
+        self._max_minutes = max_minutes
+        self._max_seconds = max_minutes * 60.0
+        self._active: dict[int, float] = {}  # client_id -> activation timestamp
+        self._queued: list[int] = []
+        self._lock = threading.Lock()
+    def try_activate(self, client_id: int) -> bool:
+        """If a slot is free, add client as active and return True.
+        Else return False.
+        """
+        with self._lock:
+            if len(self._active) < self._max_active:
+                self._active[client_id] = time.time()
+                return True
+            return False
+    def enqueue(self, client_id: int) -> None:
+        with self._lock:
+            if client_id not in self._queued:
+                self._queued.append(client_id)
+    def remove(self, client_id: int) -> bool:
+        """Remove from active or queue.
+        Returns True if was active.
+        """
+        with self._lock:
+            was_active = client_id in self._active
+            self._active.pop(client_id, None)
+            if client_id in self._queued:
+                self._queued.remove(client_id)
+            return was_active
+    def promote_next(self) -> int | None:
+        """If queue non-empty, pop first, activate them, return their client_id.
+        Else None.
+        """
+        with self._lock:
+            if not self._queued:
+                return None
+            client_id = self._queued.pop(0)
+            self._active[client_id] = time.time()
+            return client_id
+    def get_queue_position(self, client_id: int) -> tuple[int, int] | None:
+        """(1-based position, total_in_queue) or None if not queued."""
+        with self._lock:
+            if client_id not in self._queued:
+                return None
+            pos = self._queued.index(client_id)
+            return (pos + 1, len(self._queued))
+    def get_estimated_wait_seconds(self, client_id: int) -> float:
+        """Estimated seconds until this queued client gets a slot."""
+        with self._lock:
+            if client_id not in self._queued:
+                return 0.0
+            pos = self._queued.index(client_id) + 1  # 1-based
+            # Expiry times of active users (when they free a slot)
+            now = time.time()
+            expiries = sorted(now + self._max_seconds - (now - t) for t in self._active.values())
+            if not expiries:
+                return 0.0
+            # Nth slot to free (1-indexed) wraps over expiries
+            idx = (pos - 1) % len(expiries)
+            cycles = (pos - 1) // len(expiries)
+            slot_free_time = expiries[idx] + cycles * self._max_seconds
+            return max(0.0, slot_free_time - now)
+    def is_active(self, client_id: int) -> bool:
+        with self._lock:
+            return client_id in self._active
+    def was_active(self, client_id: int) -> bool:
+        """True if client is currently active (for use when already holding lock)."""
+        return client_id in self._active
+def _format_wait(seconds: float) -> str:
+    if seconds < 60:
+        return "less than a minute"
+    mins = int(math.ceil(seconds / 60))
+    return f"~{mins} minute{'s' if mins != 1 else ''}"
+def _queue_modal_markdown(position: int, total: int, estimated_wait_sec: float) -> str:
+    wait_str = _format_wait(estimated_wait_sec)
+    mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES
+    return f"""## Kimodo Demo — Please Wait
+This demo runs with limited capacity.
+Each user gets **{mins} minute{"s" if mins != 1 else ""}** of interactive time.
+**Your position in queue:** {position} / {total}
+**Estimated wait:** {wait_str}
+Please keep this tab open — the demo will start automatically when it's your turn.
+---
+*Want unlimited access? [Duplicate this Space]({DUPLICATE_SPACE_URL}) or clone the [GitHub repo]({GITHUB_REPO_URL}) to run locally!*
+"""
+def _welcome_modal_markdown() -> str:
+    mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES
+    return f"""## Welcome to Kimodo Demo
+You have been granted a **{mins}-minute** demo session.
+Your session timer has started.
+Click the button below to begin!
+"""
+def _expiry_modal_markdown() -> str:
+    mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES
+    return f"""## Session Expired
+Your {mins}-minute demo session has ended.
+Thank you for trying Kimodo!
+Refresh this page to rejoin the queue, or [duplicate this Space]({DUPLICATE_SPACE_URL}) for unlimited access.
+"""
+class QueueManager:
+    """Orchestrates HF mode: queue modals, welcome modal, session timer, promotion."""
+    def __init__(
+        self,
+        queue: UserQueue,
+        server: viser.ViserServer,
+        setup_demo_for_client: Callable[[viser.ClientHandle], None],
+        cleanup_session: Callable[[int], None],
+    ) -> None:
+        self._queue = queue
+        self._server = server
+        self._setup_demo_for_client = setup_demo_for_client
+        self._cleanup_session = cleanup_session
+        self._max_seconds = queue._max_seconds
+        self._queue_modal_handles: dict[int, tuple[Any, Any]] = {}
+        self._welcome_modal_handles: dict[int, Any] = {}
+        self._expiry_timers: dict[int, threading.Timer] = {}
+        self._lock = threading.Lock()
+        self._refresh_stop = threading.Event()
+        self._refresh_thread = threading.Thread(
+            target=self._queue_modal_refresh_loop,
+            name="queue-modal-refresh",
+            daemon=True,
+        )
+        self._refresh_thread.start()
+    def _queue_modal_refresh_loop(self) -> None:
+        """Periodically refresh queue modals so position, total, and estimated wait stay current."""
+        while not self._refresh_stop.wait(timeout=QUEUE_MODAL_REFRESH_INTERVAL_SEC):
+            self._update_all_queue_modals()
+    def on_client_connect(self, client: viser.ClientHandle) -> None:
+        """Handle new connection: activate if slot free, else enqueue and show queue modal."""
+        client_id = client.client_id
+        if self._queue.try_activate(client_id):
+            try:
+                self._setup_demo_for_client(client)
+            except RuntimeError as e:
+                if "CUDA error" in str(e):
+                    print(f"CUDA error while setting up client {client_id}: {e}")
+                    return
+                raise
+            self._start_session_timer(client_id)
+            self._show_welcome_modal(client)
+        else:
+            self._queue.enqueue(client_id)
+            self._show_queue_modal(client)
+            self._update_all_queue_modals()
+    def on_client_disconnect(self, client_id: int) -> None:
+        """Remove from queue/active, cancel timer, promote next if was active.
+        Session/scene cleanup is done by the demo's on_client_disconnect.
+        """
+        with self._lock:
+            self._expiry_timers.pop(client_id, None)
+            self._queue_modal_handles.pop(client_id, None)
+            self._welcome_modal_handles.pop(client_id, None)
+        was_active = self._queue.remove(client_id)
+        if was_active:
+            self._promote_next_user()
+        else:
+            self._update_all_queue_modals()
+    def _show_queue_modal(self, client: viser.ClientHandle) -> None:
+        client_id = client.client_id
+        pos, total = self._queue.get_queue_position(client_id) or (0, 0)
+        wait_sec = self._queue.get_estimated_wait_seconds(client_id)
+        md_content = _queue_modal_markdown(pos, total, wait_sec)
+        modal = client.gui.add_modal(
+            "Kimodo Demo — Please Wait",
+            size="xl",
+            show_close_button=False,
+        )
+        with modal:
+            md_handle = client.gui.add_markdown(md_content)
+        with self._lock:
+            self._queue_modal_handles[client_id] = (modal, md_handle)
+    def _show_quick_start_modal(self, client: viser.ClientHandle) -> None:
+        """Show the quick start instructions modal (same as non-HF mode)."""
+        with client.gui.add_modal(
+            "Welcome — Quick Start",
+            size="xl",
+            show_close_button=True,
+            save_choice="kimodo.demo.quick_start_ack",
+        ) as quick_start_modal:
+            client.gui.add_markdown(DEMO_UI_QUICK_START_MODAL_MD)
+            client.gui.add_button("Got it (don't remind me again)").on_click(lambda _: quick_start_modal.close())
+    def _show_welcome_modal(self, client: viser.ClientHandle) -> None:
+        client_id = client.client_id
+        def _on_start_demo(_: Any) -> None:
+            modal.close()
+            self._show_quick_start_modal(client)
+        modal = client.gui.add_modal(
+            "Welcome to Kimodo Demo",
+            size="xl",
+            show_close_button=True,
+        )
+        with modal:
+            client.gui.add_markdown(_welcome_modal_markdown())
+            client.gui.add_button("Start Demo").on_click(_on_start_demo)
+        with self._lock:
+            self._welcome_modal_handles[client_id] = modal
+    def _update_all_queue_modals(self) -> None:
+        with self._lock:
+            handles = list(self._queue_modal_handles.items())
+        for client_id, (modal, md_handle) in handles:
+            pos_total = self._queue.get_queue_position(client_id)
+            if pos_total is None:
+                continue
+            pos, total = pos_total
+            wait_sec = self._queue.get_estimated_wait_seconds(client_id)
+            try:
+                md_handle.content = _queue_modal_markdown(pos, total, wait_sec)
+            except Exception:
+                pass
+    def _promote_next_user(self) -> None:
+        promoted_id = self._queue.promote_next()
+        if promoted_id is None:
+            return
+        clients = self._server.get_clients()
+        client = clients.get(promoted_id)
+        if client is None:
+            return
+        with self._lock:
+            old = self._queue_modal_handles.pop(promoted_id, None)
+        if old is not None:
+            try:
+                old[0].close()
+            except Exception:
+                pass
+        try:
+            self._setup_demo_for_client(client)
+        except RuntimeError as e:
+            if "CUDA error" in str(e):
+                print(f"CUDA error while setting up client {promoted_id}: {e}")
+                return
+            raise
+        self._start_session_timer(promoted_id)
+        self._show_welcome_modal(client)
+        self._update_all_queue_modals()
+    def _start_session_timer(self, client_id: int) -> None:
+        def on_expiry() -> None:
+            self._on_session_expired(client_id)
+        t = threading.Timer(self._max_seconds, on_expiry)
+        t.daemon = True
+        with self._lock:
+            self._expiry_timers[client_id] = t
+        t.start()
+    def _on_session_expired(self, client_id: int) -> None:
+        with self._lock:
+            self._expiry_timers.pop(client_id, None)
+        if not self._queue.is_active(client_id):
+            return
+        self._queue.remove(client_id)
+        clients = self._server.get_clients()
+        client = clients.get(client_id)
+        if client is not None:
+            try:
+                with client.gui.add_modal(
+                    "Session Expired",
+                    size="lg",
+                    show_close_button=False,
+                ) as modal_ctx:
+                    client.gui.add_markdown(_expiry_modal_markdown())
+            except Exception:
+                pass
+        self._cleanup_session(client_id)
+        self._promote_next_user()

state.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from dataclasses import dataclass, field
+from typing import Optional
+import torch
+import kimodo.viz.viser_utils as viser_utils
+import viser
+from kimodo.skeleton import SkeletonBase
+from kimodo.viz.viser_utils import GuiElements
+from .config import (
+    DEFAULT_CUR_DURATION,
+    DEFAULT_MODEL,
+    DEFAULT_PLAYBACK_SPEED,
+)
+@dataclass(frozen=True)
+class ModelBundle:
+    model: object
+    motion_rep: object
+    skeleton: SkeletonBase
+    model_fps: float
+@dataclass
+class ClientSession:
+    """Per-client session data."""
+    client: viser.ClientHandle
+    gui_elements: GuiElements
+    motions: dict  # character_name -> CharacterMotion
+    constraints: dict[str, viser_utils.ConstraintSet] = field(default_factory=dict)
+    timeline_data: object = None
+    frame_idx: int = 0
+    playing: bool = False
+    playback_speed: float = DEFAULT_PLAYBACK_SPEED
+    playback_time_accumulator: float = 0.0
+    last_space_toggle_time: float = 0.0
+    cur_duration: float = DEFAULT_CUR_DURATION
+    max_frame_idx: int = 100  # will be updated based on model_fps
+    updating_motions: bool = False
+    edit_mode: bool = False
+    model_name: str = DEFAULT_MODEL
+    model_fps: float = 0.0
+    skeleton: SkeletonBase | None = None
+    motion_rep: object | None = None
+    examples_base_dir: str = ""
+    example_dict: dict[str, str] = field(default_factory=dict)
+    gui_examples_dropdown: Optional[viser.GuiInputHandle] = None
+    gui_save_example_path_text: Optional[viser.GuiInputHandle] = None
+    gui_model_selector: Optional[viser.GuiInputHandle] = None
+    last_prompt_texts: Optional[list[str]] = None
+    last_prompt_embeddings: Optional[torch.Tensor] = None
+    last_prompt_lengths: Optional[list[int]] = None
+    edit_mode_snapshot: Optional[dict[int, dict[str, object]]] = None
+    undo_drag_snapshot: Optional[dict[str, object]] = None
+    show_only_current_constraint: bool = False  # False = Show All, True = Show only Current

ui.py ADDED Viewed

The diff for this file is too large to render. See raw diff