Spaces:

bytedance-research
/

Lance

Running on Zero

App Files Files Community

ffy2000 commited on 5 days ago

Commit

e79d110

1 Parent(s): 62c372b

update

Browse files

Files changed (1) hide show

app.py +67 -13

app.py CHANGED Viewed

@@ -75,7 +75,7 @@ RUN_RECORD_FILENAME = "generation_record.json"
 LOCAL_MODEL_BASE_DIR = Path("downloads")
 SPACE_MODEL_BASE_DIR = Path("/data/lance_models")
 DEFAULT_MODEL_REPO_ID = "bytedance-research/Lance"
-DEFAULT_FLASH_ATTN_VERSION = "2.5.8"
 DEFAULT_MODEL_VARIANT = "video"
 MODEL_VARIANT_VIDEO = "video"
 MODEL_VARIANT_IMAGE = "image"
@@ -134,6 +134,10 @@ DEFAULT_QUEUE_SIZE = 32
 USE_KVCACHE = True
 TEXT_TEMPLATE = True
 RECORD_WRITE_LOCK = threading.Lock()
 LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
 LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
@@ -1044,8 +1048,9 @@ def convert_model_weights_to_bf16_inplace(model_path: Path) -> bool:
     return True
-def compact_downloaded_model_weights(model_base_dir: Path) -> None:
-    for model_dir_name in (MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]):
         model_path = model_base_dir / model_dir_name
         try:
             convert_model_weights_to_bf16_inplace(model_path)
@@ -1060,7 +1065,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
     required_paths = get_required_model_asset_paths(model_base_dir, model_path)
     if all(path.exists() for path in required_paths):
-        compact_downloaded_model_weights(model_base_dir)
         return model_path
     downloads_model_base_dir = Path("downloads")
@@ -1072,7 +1077,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
             model_path = downloads_model_path
             required_paths = downloads_required_paths
             os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
-            compact_downloaded_model_weights(model_base_dir)
             return model_path
     auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
@@ -1100,7 +1105,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
     if snapshot_path != model_base_dir and not model_path.exists():
         os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
         model_path = get_model_path(model_variant)
-    compact_downloaded_model_weights(model_base_dir)
     return model_path
@@ -2397,6 +2402,45 @@ def ensure_flash_attn_installed() -> None:
     print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
 def get_env_int(name: str, default: int) -> int:
     """Read an integer environment variable, falling back safely on invalid values."""
     try:
@@ -2444,8 +2488,8 @@ def get_run_task_gpu_duration(
     if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
         return clamp_zerogpu_duration(max(180, requested_seconds * 2))
     if internal_task == TASK_X2T_VIDEO:
-        return clamp_zerogpu_duration(180)
-    return clamp_zerogpu_duration(90)
 def get_pipeline_pool(task: str) -> PipelinePool:
@@ -2518,14 +2562,21 @@ def build_status_markdown() -> str:
     gpu_text = "unknown"
     concurrency = 1
     active_variant = "none"
     if ACTIVE_PIPELINE_POOL is not None:
         active_variant = ACTIVE_PIPELINE_POOL.model_variant
         gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
         concurrency = ACTIVE_PIPELINE_POOL.size
     return (
         f"**Status**  GPU: `{gpu_text}`  |  Max concurrency: `{concurrency}`  |  "
         f"Queue limit: `{QUEUE_MAX_SIZE}`  |  Active model: `{active_variant}`  |  "
-        f"Switch mode: `unload then load`"
     )
@@ -3044,10 +3095,13 @@ if __name__ == "__main__":
     args = parse_args()
     os.environ["LANCE_GPUS"] = args.gpus
     QUEUE_MAX_SIZE = args.queue_size
-    print(
-        "[startup] Skipping model preload. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
-        flush=True,
-    )
     concurrency_limit = 1
     demo = build_demo()
     demo.queue(

 LOCAL_MODEL_BASE_DIR = Path("downloads")
 SPACE_MODEL_BASE_DIR = Path("/data/lance_models")
 DEFAULT_MODEL_REPO_ID = "bytedance-research/Lance"
+DEFAULT_FLASH_ATTN_VERSION = "2.6.3"
 DEFAULT_MODEL_VARIANT = "video"
 MODEL_VARIANT_VIDEO = "video"
 MODEL_VARIANT_IMAGE = "image"
 USE_KVCACHE = True
 TEXT_TEMPLATE = True
 RECORD_WRITE_LOCK = threading.Lock()
+MODEL_ASSET_PREFETCH_LOCK = threading.Lock()
+MODEL_ASSET_PREFETCH_STARTED = False
+MODEL_ASSET_PREFETCH_DONE = threading.Event()
+MODEL_ASSET_PREFETCH_ERROR: Optional[str] = None
 LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
 LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
     return True
+def compact_downloaded_model_weights(model_base_dir: Path, variants: Optional[list[str]] = None) -> None:
+    model_dir_names = variants or [MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]]
+    for model_dir_name in model_dir_names:
         model_path = model_base_dir / model_dir_name
         try:
             convert_model_weights_to_bf16_inplace(model_path)
     required_paths = get_required_model_asset_paths(model_base_dir, model_path)
     if all(path.exists() for path in required_paths):
+        compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
         return model_path
     downloads_model_base_dir = Path("downloads")
             model_path = downloads_model_path
             required_paths = downloads_required_paths
             os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
+            compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
             return model_path
     auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
     if snapshot_path != model_base_dir and not model_path.exists():
         os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
         model_path = get_model_path(model_variant)
+    compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
     return model_path
     print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
+def prefetch_lance_runtime_assets() -> None:
+    global MODEL_ASSET_PREFETCH_ERROR
+    with MODEL_ASSET_PREFETCH_LOCK:
+        if MODEL_ASSET_PREFETCH_DONE.is_set():
+            return
+        print(
+            "[startup] Preloading Lance runtime assets on CPU: flash-attn plus both model variants.",
+            flush=True,
+        )
+        try:
+            ensure_flash_attn_installed()
+            for variant in (MODEL_VARIANT_VIDEO, MODEL_VARIANT_IMAGE):
+                model_path = ensure_model_assets(variant)
+                print(
+                    f"[startup] CPU preload finished for {variant} at {display_path(model_path)}",
+                    flush=True,
+                )
+            MODEL_ASSET_PREFETCH_ERROR = None
+            MODEL_ASSET_PREFETCH_DONE.set()
+            print("[startup] CPU asset preload finished for all Lance variants.", flush=True)
+        except Exception as exc:
+            MODEL_ASSET_PREFETCH_ERROR = str(exc)
+            print(f"[startup] CPU asset preload failed: {exc}", flush=True)
+def start_lance_runtime_asset_prefetch() -> None:
+    global MODEL_ASSET_PREFETCH_STARTED
+    with MODEL_ASSET_PREFETCH_LOCK:
+        if MODEL_ASSET_PREFETCH_STARTED:
+            return
+        MODEL_ASSET_PREFETCH_STARTED = True
+    thread = threading.Thread(
+        target=prefetch_lance_runtime_assets,
+        name="lance-runtime-asset-prefetch",
+        daemon=True,
+    )
+    thread.start()
 def get_env_int(name: str, default: int) -> int:
     """Read an integer environment variable, falling back safely on invalid values."""
     try:
     if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
         return clamp_zerogpu_duration(max(180, requested_seconds * 2))
     if internal_task == TASK_X2T_VIDEO:
+        return clamp_zerogpu_duration(60)
+    return clamp_zerogpu_duration(60)
 def get_pipeline_pool(task: str) -> PipelinePool:
     gpu_text = "unknown"
     concurrency = 1
     active_variant = "none"
+    asset_status = "pending"
     if ACTIVE_PIPELINE_POOL is not None:
         active_variant = ACTIVE_PIPELINE_POOL.model_variant
         gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
         concurrency = ACTIVE_PIPELINE_POOL.size
+    if MODEL_ASSET_PREFETCH_DONE.is_set():
+        asset_status = "done"
+    elif MODEL_ASSET_PREFETCH_STARTED:
+        asset_status = "running"
+    if MODEL_ASSET_PREFETCH_ERROR:
+        asset_status = f"failed: {MODEL_ASSET_PREFETCH_ERROR}"
     return (
         f"**Status**  GPU: `{gpu_text}`  |  Max concurrency: `{concurrency}`  |  "
         f"Queue limit: `{QUEUE_MAX_SIZE}`  |  Active model: `{active_variant}`  |  "
+        f"Switch mode: `unload then load`  |  Asset preload: `{asset_status}`"
     )
     args = parse_args()
     os.environ["LANCE_GPUS"] = args.gpus
     QUEUE_MAX_SIZE = args.queue_size
+    if env_flag("LANCE_PRELOAD_MODEL_ASSETS", running_on_space()):
+        start_lance_runtime_asset_prefetch()
+    else:
+        print(
+            "[startup] Model asset preload disabled. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
+            flush=True,
+        )
     concurrency_limit = 1
     demo = build_demo()
     demo.queue(