Spaces:
Running on Zero
Running on Zero
Convert Lance weights to bf16 on download
Browse files- app.py +103 -8
- assets/logo/{lance-logo.webp → lance-logo.png} +2 -2
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import html
|
|
| 8 |
import json
|
| 9 |
import os
|
| 10 |
import random
|
|
|
|
| 11 |
import subprocess
|
| 12 |
import threading
|
| 13 |
import time
|
|
@@ -21,7 +22,8 @@ from typing import Optional
|
|
| 21 |
import gradio as gr
|
| 22 |
import torch
|
| 23 |
from huggingface_hub import snapshot_download
|
| 24 |
-
from safetensors
|
|
|
|
| 25 |
from transformers import set_seed
|
| 26 |
from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLVisionConfig
|
| 27 |
|
|
@@ -117,7 +119,7 @@ LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
|
|
| 117 |
LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
|
| 118 |
LANCE_HUGGING_FACE_URL = "https://huggingface.co/bytedance-research/Lance"
|
| 119 |
LANCE_GITHUB_URL = "https://github.com/bytedance/Lance"
|
| 120 |
-
LANCE_LOGO_PATH = REPO_ROOT / "assets" / "logo" / "lance-logo.
|
| 121 |
|
| 122 |
APP_CSS = """
|
| 123 |
.gradio-container {
|
|
@@ -501,6 +503,13 @@ APP_CSS = """
|
|
| 501 |
border-radius: 10px !important;
|
| 502 |
}
|
| 503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
.generation-choice-grid .wrap label span {
|
| 505 |
font-size: 16px !important;
|
| 506 |
white-space: nowrap !important;
|
|
@@ -943,6 +952,74 @@ def get_required_model_asset_paths(model_base_dir: Path, model_path: Path) -> li
|
|
| 943 |
]
|
| 944 |
|
| 945 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 946 |
def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
| 947 |
model_base_dir = get_model_base_dir()
|
| 948 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
|
|
@@ -950,6 +1027,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
|
| 950 |
|
| 951 |
required_paths = get_required_model_asset_paths(model_base_dir, model_path)
|
| 952 |
if all(path.exists() for path in required_paths):
|
|
|
|
| 953 |
return model_path
|
| 954 |
|
| 955 |
downloads_model_base_dir = Path("downloads")
|
|
@@ -961,6 +1039,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
|
| 961 |
model_path = downloads_model_path
|
| 962 |
required_paths = downloads_required_paths
|
| 963 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
|
|
|
|
| 964 |
return model_path
|
| 965 |
|
| 966 |
auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
|
|
@@ -980,11 +1059,13 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
|
| 980 |
local_dir=str(model_base_dir),
|
| 981 |
local_dir_use_symlinks=False,
|
| 982 |
resume_download=True,
|
|
|
|
| 983 |
)
|
| 984 |
)
|
| 985 |
if snapshot_path != model_base_dir and not model_path.exists():
|
| 986 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
|
| 987 |
model_path = get_model_path(model_variant)
|
|
|
|
| 988 |
return model_path
|
| 989 |
|
| 990 |
|
|
@@ -2253,6 +2334,11 @@ def get_task_model_variant(task: str) -> str:
|
|
| 2253 |
|
| 2254 |
def get_pipeline_pool(task: str) -> PipelinePool:
|
| 2255 |
global ACTIVE_PIPELINE_POOL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2256 |
model_variant = get_task_model_variant(task)
|
| 2257 |
with ACTIVE_POOL_LOCK:
|
| 2258 |
if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
|
|
@@ -2841,16 +2927,25 @@ def parse_gpu_ids(gpu_string: str) -> list[int]:
|
|
| 2841 |
if __name__ == "__main__":
|
| 2842 |
args = parse_args()
|
| 2843 |
os.environ["LANCE_GPUS"] = args.gpus
|
| 2844 |
-
resolved_model_path = ensure_model_assets(MODEL_VARIANT_VIDEO)
|
| 2845 |
-
print(f"[startup] Using Lance model path: {resolved_model_path}", flush=True)
|
| 2846 |
QUEUE_MAX_SIZE = args.queue_size
|
| 2847 |
-
|
| 2848 |
-
|
| 2849 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2850 |
demo = build_demo()
|
| 2851 |
demo.queue(
|
| 2852 |
max_size=args.queue_size,
|
| 2853 |
-
default_concurrency_limit=
|
| 2854 |
).launch(
|
| 2855 |
server_name=args.server_name,
|
| 2856 |
server_port=args.server_port,
|
|
|
|
| 8 |
import json
|
| 9 |
import os
|
| 10 |
import random
|
| 11 |
+
import shutil
|
| 12 |
import subprocess
|
| 13 |
import threading
|
| 14 |
import time
|
|
|
|
| 22 |
import gradio as gr
|
| 23 |
import torch
|
| 24 |
from huggingface_hub import snapshot_download
|
| 25 |
+
from safetensors import safe_open
|
| 26 |
+
from safetensors.torch import load_file, save_file
|
| 27 |
from transformers import set_seed
|
| 28 |
from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLVisionConfig
|
| 29 |
|
|
|
|
| 119 |
LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
|
| 120 |
LANCE_HUGGING_FACE_URL = "https://huggingface.co/bytedance-research/Lance"
|
| 121 |
LANCE_GITHUB_URL = "https://github.com/bytedance/Lance"
|
| 122 |
+
LANCE_LOGO_PATH = REPO_ROOT / "assets" / "logo" / "lance-logo.png"
|
| 123 |
|
| 124 |
APP_CSS = """
|
| 125 |
.gradio-container {
|
|
|
|
| 503 |
border-radius: 10px !important;
|
| 504 |
}
|
| 505 |
|
| 506 |
+
.aspect-ratio-row .generation-choice-grid label,
|
| 507 |
+
.video-duration-row .generation-choice-grid label {
|
| 508 |
+
justify-content: flex-start !important;
|
| 509 |
+
text-align: left !important;
|
| 510 |
+
padding-left: 14px !important;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
.generation-choice-grid .wrap label span {
|
| 514 |
font-size: 16px !important;
|
| 515 |
white-space: nowrap !important;
|
|
|
|
| 952 |
]
|
| 953 |
|
| 954 |
|
| 955 |
+
def get_model_download_allow_patterns(model_variant: Optional[str] = None) -> list[str]:
|
| 956 |
+
variant = normalize_model_variant(model_variant)
|
| 957 |
+
model_dir_name = MODEL_VARIANT_TO_DIR[variant]
|
| 958 |
+
return [
|
| 959 |
+
f"{model_dir_name}/**",
|
| 960 |
+
"Qwen2.5-VL-ViT/**",
|
| 961 |
+
"Wan2.2_VAE.pth",
|
| 962 |
+
"generation_config.json",
|
| 963 |
+
"llm_config.json",
|
| 964 |
+
"tokenizer.json",
|
| 965 |
+
"tokenizer_config.json",
|
| 966 |
+
"vocab.json",
|
| 967 |
+
"merges.txt",
|
| 968 |
+
"config.json",
|
| 969 |
+
]
|
| 970 |
+
|
| 971 |
+
|
| 972 |
+
def _get_safetensors_first_tensor_dtype(path: Path) -> Optional[torch.dtype]:
|
| 973 |
+
if not path.exists():
|
| 974 |
+
return None
|
| 975 |
+
with safe_open(str(path), framework="pt", device="cpu") as f:
|
| 976 |
+
keys = list(f.keys())
|
| 977 |
+
if not keys:
|
| 978 |
+
return None
|
| 979 |
+
return f.get_tensor(keys[0]).dtype
|
| 980 |
+
|
| 981 |
+
|
| 982 |
+
def convert_model_weights_to_bf16_inplace(model_path: Path) -> bool:
|
| 983 |
+
weight_path = model_path / "model.safetensors"
|
| 984 |
+
if not weight_path.exists():
|
| 985 |
+
return False
|
| 986 |
+
|
| 987 |
+
first_dtype = _get_safetensors_first_tensor_dtype(weight_path)
|
| 988 |
+
if first_dtype is None or first_dtype == torch.bfloat16:
|
| 989 |
+
return False
|
| 990 |
+
|
| 991 |
+
if first_dtype != torch.float32:
|
| 992 |
+
print(
|
| 993 |
+
f"[startup] Skipping bf16 conversion for {weight_path} because the first tensor dtype is {first_dtype}.",
|
| 994 |
+
flush=True,
|
| 995 |
+
)
|
| 996 |
+
return False
|
| 997 |
+
|
| 998 |
+
temp_path = weight_path.with_suffix(".bf16.safetensors.tmp")
|
| 999 |
+
print(f"[startup] Converting {weight_path} to bf16 to reduce disk usage.", flush=True)
|
| 1000 |
+
with safe_open(str(weight_path), framework="pt", device="cpu") as f:
|
| 1001 |
+
metadata = f.metadata()
|
| 1002 |
+
tensor_names = list(f.keys())
|
| 1003 |
+
tensors = {}
|
| 1004 |
+
for name in tensor_names:
|
| 1005 |
+
tensor = f.get_tensor(name)
|
| 1006 |
+
tensors[name] = tensor.to(torch.bfloat16) if tensor.dtype == torch.float32 else tensor
|
| 1007 |
+
save_file(tensors, str(temp_path), metadata=metadata)
|
| 1008 |
+
|
| 1009 |
+
os.replace(temp_path, weight_path)
|
| 1010 |
+
print(f"[startup] Replaced original fp32 weights with bf16 weights at {weight_path}.", flush=True)
|
| 1011 |
+
return True
|
| 1012 |
+
|
| 1013 |
+
|
| 1014 |
+
def compact_downloaded_model_weights(model_base_dir: Path) -> None:
|
| 1015 |
+
for model_dir_name in (MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]):
|
| 1016 |
+
model_path = model_base_dir / model_dir_name
|
| 1017 |
+
try:
|
| 1018 |
+
convert_model_weights_to_bf16_inplace(model_path)
|
| 1019 |
+
except Exception as exc:
|
| 1020 |
+
print(f"[startup] bf16 compaction skipped for {display_path(model_path)}: {exc}", flush=True)
|
| 1021 |
+
|
| 1022 |
+
|
| 1023 |
def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
|
| 1024 |
model_base_dir = get_model_base_dir()
|
| 1025 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
|
|
|
|
| 1027 |
|
| 1028 |
required_paths = get_required_model_asset_paths(model_base_dir, model_path)
|
| 1029 |
if all(path.exists() for path in required_paths):
|
| 1030 |
+
compact_downloaded_model_weights(model_base_dir)
|
| 1031 |
return model_path
|
| 1032 |
|
| 1033 |
downloads_model_base_dir = Path("downloads")
|
|
|
|
| 1039 |
model_path = downloads_model_path
|
| 1040 |
required_paths = downloads_required_paths
|
| 1041 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
|
| 1042 |
+
compact_downloaded_model_weights(model_base_dir)
|
| 1043 |
return model_path
|
| 1044 |
|
| 1045 |
auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
|
|
|
|
| 1059 |
local_dir=str(model_base_dir),
|
| 1060 |
local_dir_use_symlinks=False,
|
| 1061 |
resume_download=True,
|
| 1062 |
+
allow_patterns=get_model_download_allow_patterns(model_variant),
|
| 1063 |
)
|
| 1064 |
)
|
| 1065 |
if snapshot_path != model_base_dir and not model_path.exists():
|
| 1066 |
os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
|
| 1067 |
model_path = get_model_path(model_variant)
|
| 1068 |
+
compact_downloaded_model_weights(model_base_dir)
|
| 1069 |
return model_path
|
| 1070 |
|
| 1071 |
|
|
|
|
| 2334 |
|
| 2335 |
def get_pipeline_pool(task: str) -> PipelinePool:
|
| 2336 |
global ACTIVE_PIPELINE_POOL
|
| 2337 |
+
if not torch.cuda.is_available():
|
| 2338 |
+
raise RuntimeError(
|
| 2339 |
+
"Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
|
| 2340 |
+
"until GPU hardware is attached."
|
| 2341 |
+
)
|
| 2342 |
model_variant = get_task_model_variant(task)
|
| 2343 |
with ACTIVE_POOL_LOCK:
|
| 2344 |
if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
|
|
|
|
| 2927 |
if __name__ == "__main__":
|
| 2928 |
args = parse_args()
|
| 2929 |
os.environ["LANCE_GPUS"] = args.gpus
|
|
|
|
|
|
|
| 2930 |
QUEUE_MAX_SIZE = args.queue_size
|
| 2931 |
+
preload_models = torch.cuda.is_available() and env_flag("LANCE_PRELOAD_MODELS", True)
|
| 2932 |
+
if preload_models:
|
| 2933 |
+
resolved_model_path = ensure_model_assets(MODEL_VARIANT_VIDEO)
|
| 2934 |
+
print(f"[startup] Using Lance model path: {resolved_model_path}", flush=True)
|
| 2935 |
+
gpu_ids = parse_gpu_ids(args.gpus)
|
| 2936 |
+
ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=MODEL_VARIANT_VIDEO)
|
| 2937 |
+
ACTIVE_PIPELINE_POOL.initialize_all()
|
| 2938 |
+
concurrency_limit = ACTIVE_PIPELINE_POOL.size
|
| 2939 |
+
else:
|
| 2940 |
+
print(
|
| 2941 |
+
"[startup] Skipping model preload. UI will launch without loading Lance weights until GPU hardware is available.",
|
| 2942 |
+
flush=True,
|
| 2943 |
+
)
|
| 2944 |
+
concurrency_limit = 1
|
| 2945 |
demo = build_demo()
|
| 2946 |
demo.queue(
|
| 2947 |
max_size=args.queue_size,
|
| 2948 |
+
default_concurrency_limit=concurrency_limit,
|
| 2949 |
).launch(
|
| 2950 |
server_name=args.server_name,
|
| 2951 |
server_port=args.server_port,
|
assets/logo/{lance-logo.webp → lance-logo.png}
RENAMED
|
File without changes
|