Spaces:

lablab-ai-amd-developer-hackathon
/

movimento

Running on Zero

App Files Files Community

rydlrKE commited on 3 days ago

Commit

0d13d79

verified ·

1 Parent(s): 3f43f78

fix: lazy TextEncoderAPI client with retry + HTTP readiness gate

Browse files

Files changed (1) hide show

kimodo/model/text_encoder_api.py +58 -29

kimodo/model/text_encoder_api.py CHANGED Viewed

@@ -4,6 +4,8 @@
 import logging
 import numpy as np
 import torch
 from gradio_client import Client
@@ -19,17 +21,34 @@ class TextEncoderAPI:
     """Text encoder API client for motion generation."""
     def __init__(self, url: str):
-        # Keep startup resilient: do not connect during app/model initialization.
-        # In strict API mode, we only attempt network calls when embeddings are requested.
         self.url = url
         self.client = None
         self.device = "cpu"
         self.dtype = torch.float
-    def _get_client(self):
-        if self.client is None:
-            self.client = Client(self.url, verbose=False)
-        return self.client
     def _create_np_random_name(self):
         import uuid
@@ -51,33 +70,43 @@ class TextEncoderAPI:
         elif result is not None:
             candidates = [result]
-        # First pass: check for valid .npy paths
         for item in candidates:
-            if isinstance(item, str) and item and item.endswith(".npy"):
-                return item
             if isinstance(item, dict):
                 for key in ("value", "path", "name"):
                     value = item.get(key)
-                    if isinstance(value, str) and value.endswith(".npy"):
-                        return value
-        # Second pass: collect all error indicators
-        error_parts = []
-        for item in candidates:
-            if isinstance(item, str) and item:
-                if item.startswith("##") or "failed" in item.lower() or "error" in item.lower():
-                    error_parts.append(item.strip())
-            if isinstance(item, dict):
-                value = item.get("value")
-                if isinstance(value, str) and (
-                    value.startswith("##") or "failed" in value.lower() or "error" in value.lower()
-                ):
-                    error_parts.append(value.strip())
-        if error_parts:
-            # Combine all error messages
-            full_error = "\n".join(error_parts)
-            raise RuntimeError(f"Text encoder initialization failed:\n{full_error}")
         raise RuntimeError(f"Text encoder API returned unexpected payload: {result!r}")

 import logging
+import os
 import numpy as np
 import torch
 from gradio_client import Client
     """Text encoder API client for motion generation."""
     def __init__(self, url: str):
         self.url = url
         self.client = None
         self.device = "cpu"
         self.dtype = torch.float
+    def _get_client(self) -> Client:
+        """Lazily create the Gradio client, retrying until the server is ready."""
+        if self.client is not None:
+            return self.client
+        import time
+        client_timeout_sec = int(os.environ.get("TEXT_ENCODER_CLIENT_TIMEOUT_SEC", "180"))
+        deadline = time.monotonic() + client_timeout_sec
+        last_exc: Exception | None = None
+        delay = 2.0
+        while time.monotonic() < deadline:
+            try:
+                self.client = Client(self.url, verbose=False)
+                return self.client
+            except Exception as exc:
+                last_exc = exc
+                print(f"[text_encoder_api] Client init failed ({exc}), retrying in {delay:.0f}s …")
+                time.sleep(delay)
+                delay = min(delay * 1.5, 20.0)
+        raise RuntimeError(
+            f"Text encoder at {self.url!r} did not become ready within {client_timeout_sec}s. "
+            f"Last error: {last_exc}"
+        )
     def _create_np_random_name(self):
         import uuid
         elif result is not None:
             candidates = [result]
         for item in candidates:
+            # Check for error messages first (e.g., "## Encoder initialization failed")
+            if isinstance(item, str):
+                if item and item.startswith("##"):
+                    # This is an error message from the Gradio server
+                    error_msg = item.replace("##", "").strip()
+                    if "initialization failed" in error_msg.lower():
+                        raise RuntimeError(
+                            f"Text encoder initialization failed. This usually indicates:\n"
+                            f"  - Missing or invalid HF_TOKEN for gated models (Llama-3)\n"
+                            f"  - Poor network connectivity during model download\n"
+                            f"  Original error: {error_msg}"
+                        )
+                    raise RuntimeError(f"Text encoder API error: {error_msg}")
+                if "failed" in item.lower() or "error" in item.lower():
+                    raise RuntimeError(f"Text encoder API error: {item}")
+                if item and item.endswith(".npy"):
+                    return item
+                if item:
+                    # Log unexpected string for debugging
+                    print(f"[text_encoder_api] unexpected string response: {item[:100]}")
             if isinstance(item, dict):
                 for key in ("value", "path", "name"):
                     value = item.get(key)
+                    if isinstance(value, str) and value:
+                        # Check for errors in dict values too
+                        if "initialization failed" in value.lower():
+                            raise RuntimeError(
+                                f"Text encoder initialization failed. This usually indicates:\n"
+                                f"  - Missing or invalid HF_TOKEN for gated models (Llama-3)\n"
+                                f"  - Poor network connectivity during model download"
+                            )
+                        if value.startswith("##") or "failed" in value.lower() or "error" in value.lower():
+                            raise RuntimeError(f"Text encoder API error: {value}")
+                        if value.endswith(".npy"):
+                            return value
         raise RuntimeError(f"Text encoder API returned unexpected payload: {result!r}")