Spaces:
Running on Zero
Running on Zero
update
Browse files
app.py
CHANGED
|
@@ -75,7 +75,7 @@ RUN_RECORD_FILENAME = "generation_record.json"
|
|
| 75 |
LOCAL_MODEL_BASE_DIR = Path("downloads")
|
| 76 |
SPACE_MODEL_BASE_DIR = Path("/data/lance_models")
|
| 77 |
DEFAULT_MODEL_REPO_ID = "bytedance-research/Lance"
|
| 78 |
-
DEFAULT_FLASH_ATTN_VERSION = "2.
|
| 79 |
DEFAULT_MODEL_VARIANT = "video"
|
| 80 |
MODEL_VARIANT_VIDEO = "video"
|
| 81 |
MODEL_VARIANT_IMAGE = "image"
|
|
@@ -134,6 +134,10 @@ DEFAULT_QUEUE_SIZE = 32
|
|
| 134 |
USE_KVCACHE = True
|
| 135 |
TEXT_TEMPLATE = True
|
| 136 |
RECORD_WRITE_LOCK = threading.Lock()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
|
| 139 |
LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
|
|
@@ -1044,8 +1048,9 @@ def convert_model_weights_to_bf16_inplace(model_path: Path) -> bool:
|
|
| 1044 |
return True
|
| 1045 |
|
| 1046 |
|
| 1047 |
-
def compact_downloaded_model_weights(model_base_dir: Path) -> None:
|
| 1048 |
-
|
|
|
|
| 1049 |
model_path = model_base_dir / model_dir_name
|
| 1050 |
try:
|
| 1051 |
convert_model_weights_to_bf16_inplace(model_path)
|
|
@@ -1060,7 +1065,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
|
| 1060 |
|
| 1061 |
required_paths = get_required_model_asset_paths(model_base_dir, model_path)
|
| 1062 |
if all(path.exists() for path in required_paths):
|
| 1063 |
-
compact_downloaded_model_weights(model_base_dir)
|
| 1064 |
return model_path
|
| 1065 |
|
| 1066 |
downloads_model_base_dir = Path("downloads")
|
|
@@ -1072,7 +1077,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
|
| 1072 |
model_path = downloads_model_path
|
| 1073 |
required_paths = downloads_required_paths
|
| 1074 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
|
| 1075 |
-
compact_downloaded_model_weights(model_base_dir)
|
| 1076 |
return model_path
|
| 1077 |
|
| 1078 |
auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
|
|
@@ -1100,7 +1105,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
|
| 1100 |
if snapshot_path != model_base_dir and not model_path.exists():
|
| 1101 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
|
| 1102 |
model_path = get_model_path(model_variant)
|
| 1103 |
-
compact_downloaded_model_weights(model_base_dir)
|
| 1104 |
return model_path
|
| 1105 |
|
| 1106 |
|
|
@@ -2397,6 +2402,45 @@ def ensure_flash_attn_installed() -> None:
|
|
| 2397 |
print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
|
| 2398 |
|
| 2399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2400 |
def get_env_int(name: str, default: int) -> int:
|
| 2401 |
"""Read an integer environment variable, falling back safely on invalid values."""
|
| 2402 |
try:
|
|
@@ -2444,8 +2488,8 @@ def get_run_task_gpu_duration(
|
|
| 2444 |
if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
|
| 2445 |
return clamp_zerogpu_duration(max(180, requested_seconds * 2))
|
| 2446 |
if internal_task == TASK_X2T_VIDEO:
|
| 2447 |
-
return clamp_zerogpu_duration(
|
| 2448 |
-
return clamp_zerogpu_duration(
|
| 2449 |
|
| 2450 |
|
| 2451 |
def get_pipeline_pool(task: str) -> PipelinePool:
|
|
@@ -2518,14 +2562,21 @@ def build_status_markdown() -> str:
|
|
| 2518 |
gpu_text = "unknown"
|
| 2519 |
concurrency = 1
|
| 2520 |
active_variant = "none"
|
|
|
|
| 2521 |
if ACTIVE_PIPELINE_POOL is not None:
|
| 2522 |
active_variant = ACTIVE_PIPELINE_POOL.model_variant
|
| 2523 |
gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
|
| 2524 |
concurrency = ACTIVE_PIPELINE_POOL.size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2525 |
return (
|
| 2526 |
f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
|
| 2527 |
f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
|
| 2528 |
-
f"Switch mode: `unload then load`"
|
| 2529 |
)
|
| 2530 |
|
| 2531 |
|
|
@@ -3044,10 +3095,13 @@ if __name__ == "__main__":
|
|
| 3044 |
args = parse_args()
|
| 3045 |
os.environ["LANCE_GPUS"] = args.gpus
|
| 3046 |
QUEUE_MAX_SIZE = args.queue_size
|
| 3047 |
-
|
| 3048 |
-
|
| 3049 |
-
|
| 3050 |
-
|
|
|
|
|
|
|
|
|
|
| 3051 |
concurrency_limit = 1
|
| 3052 |
demo = build_demo()
|
| 3053 |
demo.queue(
|
|
|
|
| 75 |
LOCAL_MODEL_BASE_DIR = Path("downloads")
|
| 76 |
SPACE_MODEL_BASE_DIR = Path("/data/lance_models")
|
| 77 |
DEFAULT_MODEL_REPO_ID = "bytedance-research/Lance"
|
| 78 |
+
DEFAULT_FLASH_ATTN_VERSION = "2.6.3"
|
| 79 |
DEFAULT_MODEL_VARIANT = "video"
|
| 80 |
MODEL_VARIANT_VIDEO = "video"
|
| 81 |
MODEL_VARIANT_IMAGE = "image"
|
|
|
|
| 134 |
USE_KVCACHE = True
|
| 135 |
TEXT_TEMPLATE = True
|
| 136 |
RECORD_WRITE_LOCK = threading.Lock()
|
| 137 |
+
MODEL_ASSET_PREFETCH_LOCK = threading.Lock()
|
| 138 |
+
MODEL_ASSET_PREFETCH_STARTED = False
|
| 139 |
+
MODEL_ASSET_PREFETCH_DONE = threading.Event()
|
| 140 |
+
MODEL_ASSET_PREFETCH_ERROR: Optional[str] = None
|
| 141 |
|
| 142 |
LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
|
| 143 |
LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
|
|
|
|
| 1048 |
return True
|
| 1049 |
|
| 1050 |
|
| 1051 |
+
def compact_downloaded_model_weights(model_base_dir: Path, variants: Optional[list[str]] = None) -> None:
|
| 1052 |
+
model_dir_names = variants or [MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]]
|
| 1053 |
+
for model_dir_name in model_dir_names:
|
| 1054 |
model_path = model_base_dir / model_dir_name
|
| 1055 |
try:
|
| 1056 |
convert_model_weights_to_bf16_inplace(model_path)
|
|
|
|
| 1065 |
|
| 1066 |
required_paths = get_required_model_asset_paths(model_base_dir, model_path)
|
| 1067 |
if all(path.exists() for path in required_paths):
|
| 1068 |
+
compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
|
| 1069 |
return model_path
|
| 1070 |
|
| 1071 |
downloads_model_base_dir = Path("downloads")
|
|
|
|
| 1077 |
model_path = downloads_model_path
|
| 1078 |
required_paths = downloads_required_paths
|
| 1079 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
|
| 1080 |
+
compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
|
| 1081 |
return model_path
|
| 1082 |
|
| 1083 |
auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
|
|
|
|
| 1105 |
if snapshot_path != model_base_dir and not model_path.exists():
|
| 1106 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
|
| 1107 |
model_path = get_model_path(model_variant)
|
| 1108 |
+
compact_downloaded_model_weights(model_base_dir, [MODEL_VARIANT_TO_DIR[normalize_model_variant(model_variant)]])
|
| 1109 |
return model_path
|
| 1110 |
|
| 1111 |
|
|
|
|
| 2402 |
print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
|
| 2403 |
|
| 2404 |
|
| 2405 |
+
def prefetch_lance_runtime_assets() -> None:
|
| 2406 |
+
global MODEL_ASSET_PREFETCH_ERROR
|
| 2407 |
+
with MODEL_ASSET_PREFETCH_LOCK:
|
| 2408 |
+
if MODEL_ASSET_PREFETCH_DONE.is_set():
|
| 2409 |
+
return
|
| 2410 |
+
print(
|
| 2411 |
+
"[startup] Preloading Lance runtime assets on CPU: flash-attn plus both model variants.",
|
| 2412 |
+
flush=True,
|
| 2413 |
+
)
|
| 2414 |
+
try:
|
| 2415 |
+
ensure_flash_attn_installed()
|
| 2416 |
+
for variant in (MODEL_VARIANT_VIDEO, MODEL_VARIANT_IMAGE):
|
| 2417 |
+
model_path = ensure_model_assets(variant)
|
| 2418 |
+
print(
|
| 2419 |
+
f"[startup] CPU preload finished for {variant} at {display_path(model_path)}",
|
| 2420 |
+
flush=True,
|
| 2421 |
+
)
|
| 2422 |
+
MODEL_ASSET_PREFETCH_ERROR = None
|
| 2423 |
+
MODEL_ASSET_PREFETCH_DONE.set()
|
| 2424 |
+
print("[startup] CPU asset preload finished for all Lance variants.", flush=True)
|
| 2425 |
+
except Exception as exc:
|
| 2426 |
+
MODEL_ASSET_PREFETCH_ERROR = str(exc)
|
| 2427 |
+
print(f"[startup] CPU asset preload failed: {exc}", flush=True)
|
| 2428 |
+
|
| 2429 |
+
|
| 2430 |
+
def start_lance_runtime_asset_prefetch() -> None:
|
| 2431 |
+
global MODEL_ASSET_PREFETCH_STARTED
|
| 2432 |
+
with MODEL_ASSET_PREFETCH_LOCK:
|
| 2433 |
+
if MODEL_ASSET_PREFETCH_STARTED:
|
| 2434 |
+
return
|
| 2435 |
+
MODEL_ASSET_PREFETCH_STARTED = True
|
| 2436 |
+
thread = threading.Thread(
|
| 2437 |
+
target=prefetch_lance_runtime_assets,
|
| 2438 |
+
name="lance-runtime-asset-prefetch",
|
| 2439 |
+
daemon=True,
|
| 2440 |
+
)
|
| 2441 |
+
thread.start()
|
| 2442 |
+
|
| 2443 |
+
|
| 2444 |
def get_env_int(name: str, default: int) -> int:
|
| 2445 |
"""Read an integer environment variable, falling back safely on invalid values."""
|
| 2446 |
try:
|
|
|
|
| 2488 |
if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
|
| 2489 |
return clamp_zerogpu_duration(max(180, requested_seconds * 2))
|
| 2490 |
if internal_task == TASK_X2T_VIDEO:
|
| 2491 |
+
return clamp_zerogpu_duration(60)
|
| 2492 |
+
return clamp_zerogpu_duration(60)
|
| 2493 |
|
| 2494 |
|
| 2495 |
def get_pipeline_pool(task: str) -> PipelinePool:
|
|
|
|
| 2562 |
gpu_text = "unknown"
|
| 2563 |
concurrency = 1
|
| 2564 |
active_variant = "none"
|
| 2565 |
+
asset_status = "pending"
|
| 2566 |
if ACTIVE_PIPELINE_POOL is not None:
|
| 2567 |
active_variant = ACTIVE_PIPELINE_POOL.model_variant
|
| 2568 |
gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
|
| 2569 |
concurrency = ACTIVE_PIPELINE_POOL.size
|
| 2570 |
+
if MODEL_ASSET_PREFETCH_DONE.is_set():
|
| 2571 |
+
asset_status = "done"
|
| 2572 |
+
elif MODEL_ASSET_PREFETCH_STARTED:
|
| 2573 |
+
asset_status = "running"
|
| 2574 |
+
if MODEL_ASSET_PREFETCH_ERROR:
|
| 2575 |
+
asset_status = f"failed: {MODEL_ASSET_PREFETCH_ERROR}"
|
| 2576 |
return (
|
| 2577 |
f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
|
| 2578 |
f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
|
| 2579 |
+
f"Switch mode: `unload then load` | Asset preload: `{asset_status}`"
|
| 2580 |
)
|
| 2581 |
|
| 2582 |
|
|
|
|
| 3095 |
args = parse_args()
|
| 3096 |
os.environ["LANCE_GPUS"] = args.gpus
|
| 3097 |
QUEUE_MAX_SIZE = args.queue_size
|
| 3098 |
+
if env_flag("LANCE_PRELOAD_MODEL_ASSETS", running_on_space()):
|
| 3099 |
+
start_lance_runtime_asset_prefetch()
|
| 3100 |
+
else:
|
| 3101 |
+
print(
|
| 3102 |
+
"[startup] Model asset preload disabled. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
|
| 3103 |
+
flush=True,
|
| 3104 |
+
)
|
| 3105 |
concurrency_limit = 1
|
| 3106 |
demo = build_demo()
|
| 3107 |
demo.queue(
|