ffy2000 commited on
Commit
e79d110
·
1 Parent(s): 62c372b
Files changed (1) hide show
  1. app.py +67 -13
app.py CHANGED
@@ -75,7 +75,7 @@ RUN_RECORD_FILENAME = "generation_record.json"
75
  LOCAL_MODEL_BASE_DIR = Path("downloads")
76
  SPACE_MODEL_BASE_DIR = Path("/data/lance_models")
77
  DEFAULT_MODEL_REPO_ID = "bytedance-research/Lance"
78
- DEFAULT_FLASH_ATTN_VERSION = "2.5.8"
79
  DEFAULT_MODEL_VARIANT = "video"
80
  MODEL_VARIANT_VIDEO = "video"
81
  MODEL_VARIANT_IMAGE = "image"
@@ -134,6 +134,10 @@ DEFAULT_QUEUE_SIZE = 32
134
  USE_KVCACHE = True
135
  TEXT_TEMPLATE = True
136
  RECORD_WRITE_LOCK = threading.Lock()
 
 
 
 
137
 
138
  LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
139
  LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
@@ -1044,8 +1048,9 @@ def convert_model_weights_to_bf16_inplace(model_path: Path) -> bool:
1044
  return True
1045
 
1046
 
1047
- def compact_downloaded_model_weights(model_base_dir: Path) -> None:
1048
- for model_dir_name in (MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]):
 
1049
  model_path = model_base_dir / model_dir_name
1050
  try:
1051
  convert_model_weights_to_bf16_inplace(model_path)
@@ -1060,7 +1065,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
1060
 
1061
  required_paths = get_required_model_asset_paths(model_base_dir, model_path)
1062
  if all(path.exists() for path in required_paths):
1063
- compact_downloaded_model_weights(model_base_dir)
1064
  return model_path
1065
 
1066
  downloads_model_base_dir = Path("downloads")
@@ -1072,7 +1077,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
1072
  model_path = downloads_model_path
1073
  required_paths = downloads_required_paths
1074
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
1075
- compact_downloaded_model_weights(model_base_dir)
1076
  return model_path
1077
 
1078
  auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
@@ -1100,7 +1105,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
1100
  if snapshot_path != model_base_dir and not model_path.exists():
1101
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
1102
  model_path = get_model_path(model_variant)
1103
- compact_downloaded_model_weights(model_base_dir)
1104
  return model_path
1105
 
1106
 
@@ -2397,6 +2402,45 @@ def ensure_flash_attn_installed() -> None:
2397
  print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
2398
 
2399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2400
  def get_env_int(name: str, default: int) -> int:
2401
  """Read an integer environment variable, falling back safely on invalid values."""
2402
  try:
@@ -2444,8 +2488,8 @@ def get_run_task_gpu_duration(
2444
  if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
2445
  return clamp_zerogpu_duration(max(180, requested_seconds * 2))
2446
  if internal_task == TASK_X2T_VIDEO:
2447
- return clamp_zerogpu_duration(180)
2448
- return clamp_zerogpu_duration(90)
2449
 
2450
 
2451
  def get_pipeline_pool(task: str) -> PipelinePool:
@@ -2518,14 +2562,21 @@ def build_status_markdown() -> str:
2518
  gpu_text = "unknown"
2519
  concurrency = 1
2520
  active_variant = "none"
 
2521
  if ACTIVE_PIPELINE_POOL is not None:
2522
  active_variant = ACTIVE_PIPELINE_POOL.model_variant
2523
  gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
2524
  concurrency = ACTIVE_PIPELINE_POOL.size
 
 
 
 
 
 
2525
  return (
2526
  f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
2527
  f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
2528
- f"Switch mode: `unload then load`"
2529
  )
2530
 
2531
 
@@ -3044,10 +3095,13 @@ if __name__ == "__main__":
3044
  args = parse_args()
3045
  os.environ["LANCE_GPUS"] = args.gpus
3046
  QUEUE_MAX_SIZE = args.queue_size
3047
- print(
3048
- "[startup] Skipping model preload. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
3049
- flush=True,
3050
- )
 
 
 
3051
  concurrency_limit = 1
3052
  demo = build_demo()
3053
  demo.queue(
 
75
  LOCAL_MODEL_BASE_DIR = Path("downloads")
76
  SPACE_MODEL_BASE_DIR = Path("/data/lance_models")
77
  DEFAULT_MODEL_REPO_ID = "bytedance-research/Lance"
78
+ DEFAULT_FLASH_ATTN_VERSION = "2.6.3"
79
  DEFAULT_MODEL_VARIANT = "video"
80
  MODEL_VARIANT_VIDEO = "video"
81
  MODEL_VARIANT_IMAGE = "image"
 
134
  USE_KVCACHE = True
135
  TEXT_TEMPLATE = True
136
  RECORD_WRITE_LOCK = threading.Lock()
137
+ MODEL_ASSET_PREFETCH_LOCK = threading.Lock()
138
+ MODEL_ASSET_PREFETCH_STARTED = False
139
+ MODEL_ASSET_PREFETCH_DONE = threading.Event()
140
+ MODEL_ASSET_PREFETCH_ERROR: Optional[str] = None
141
 
142
  LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
143
  LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
 
1048
  return True
1049
 
1050
 
1051
+ def compact_downloaded_model_weights(model_base_dir: Path, variants: Optional[list[str]] = None) -> None:
1052
+ model_dir_names = variants or [MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]]
1053
+ for model_dir_name in model_dir_names:
1054
  model_path = model_base_dir / model_dir_name
1055
  try:
1056
  convert_model_weights_to_bf16_inplace(model_path)
 
1065
 
1066
  required_paths = get_required_model_asset_paths(model_base_dir, model_path)
1067
  if all(path.exists() for path in required_paths):
1068
+ compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
1069
  return model_path
1070
 
1071
  downloads_model_base_dir = Path("downloads")
 
1077
  model_path = downloads_model_path
1078
  required_paths = downloads_required_paths
1079
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
1080
+ compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
1081
  return model_path
1082
 
1083
  auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
 
1105
  if snapshot_path != model_base_dir and not model_path.exists():
1106
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
1107
  model_path = get_model_path(model_variant)
1108
+ compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
1109
  return model_path
1110
 
1111
 
 
2402
  print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
2403
 
2404
 
2405
+ def prefetch_lance_runtime_assets() -> None:
2406
+ global MODEL_ASSET_PREFETCH_ERROR
2407
+ with MODEL_ASSET_PREFETCH_LOCK:
2408
+ if MODEL_ASSET_PREFETCH_DONE.is_set():
2409
+ return
2410
+ print(
2411
+ "[startup] Preloading Lance runtime assets on CPU: flash-attn plus both model variants.",
2412
+ flush=True,
2413
+ )
2414
+ try:
2415
+ ensure_flash_attn_installed()
2416
+ for variant in (MODEL_VARIANT_VIDEO, MODEL_VARIANT_IMAGE):
2417
+ model_path = ensure_model_assets(variant)
2418
+ print(
2419
+ f"[startup] CPU preload finished for {variant} at {display_path(model_path)}",
2420
+ flush=True,
2421
+ )
2422
+ MODEL_ASSET_PREFETCH_ERROR = None
2423
+ MODEL_ASSET_PREFETCH_DONE.set()
2424
+ print("[startup] CPU asset preload finished for all Lance variants.", flush=True)
2425
+ except Exception as exc:
2426
+ MODEL_ASSET_PREFETCH_ERROR = str(exc)
2427
+ print(f"[startup] CPU asset preload failed: {exc}", flush=True)
2428
+
2429
+
2430
+ def start_lance_runtime_asset_prefetch() -> None:
2431
+ global MODEL_ASSET_PREFETCH_STARTED
2432
+ with MODEL_ASSET_PREFETCH_LOCK:
2433
+ if MODEL_ASSET_PREFETCH_STARTED:
2434
+ return
2435
+ MODEL_ASSET_PREFETCH_STARTED = True
2436
+ thread = threading.Thread(
2437
+ target=prefetch_lance_runtime_assets,
2438
+ name="lance-runtime-asset-prefetch",
2439
+ daemon=True,
2440
+ )
2441
+ thread.start()
2442
+
2443
+
2444
  def get_env_int(name: str, default: int) -> int:
2445
  """Read an integer environment variable, falling back safely on invalid values."""
2446
  try:
 
2488
  if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
2489
  return clamp_zerogpu_duration(max(180, requested_seconds * 2))
2490
  if internal_task == TASK_X2T_VIDEO:
2491
+ return clamp_zerogpu_duration(60)
2492
+ return clamp_zerogpu_duration(60)
2493
 
2494
 
2495
  def get_pipeline_pool(task: str) -> PipelinePool:
 
2562
  gpu_text = "unknown"
2563
  concurrency = 1
2564
  active_variant = "none"
2565
+ asset_status = "pending"
2566
  if ACTIVE_PIPELINE_POOL is not None:
2567
  active_variant = ACTIVE_PIPELINE_POOL.model_variant
2568
  gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
2569
  concurrency = ACTIVE_PIPELINE_POOL.size
2570
+ if MODEL_ASSET_PREFETCH_DONE.is_set():
2571
+ asset_status = "done"
2572
+ elif MODEL_ASSET_PREFETCH_STARTED:
2573
+ asset_status = "running"
2574
+ if MODEL_ASSET_PREFETCH_ERROR:
2575
+ asset_status = f"failed: {MODEL_ASSET_PREFETCH_ERROR}"
2576
  return (
2577
  f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
2578
  f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
2579
+ f"Switch mode: `unload then load` | Asset preload: `{asset_status}`"
2580
  )
2581
 
2582
 
 
3095
  args = parse_args()
3096
  os.environ["LANCE_GPUS"] = args.gpus
3097
  QUEUE_MAX_SIZE = args.queue_size
3098
+ if env_flag("LANCE_PRELOAD_MODEL_ASSETS", running_on_space()):
3099
+ start_lance_runtime_asset_prefetch()
3100
+ else:
3101
+ print(
3102
+ "[startup] Model asset preload disabled. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
3103
+ flush=True,
3104
+ )
3105
  concurrency_limit = 1
3106
  demo = build_demo()
3107
  demo.queue(