ffy2000 commited on
Commit
c3f6ca2
·
1 Parent(s): 7cbf4f4

Convert Lance weights to bf16 on download

Browse files
app.py CHANGED
@@ -8,6 +8,7 @@ import html
8
  import json
9
  import os
10
  import random
 
11
  import subprocess
12
  import threading
13
  import time
@@ -21,7 +22,8 @@ from typing import Optional
21
  import gradio as gr
22
  import torch
23
  from huggingface_hub import snapshot_download
24
- from safetensors.torch import load_file
 
25
  from transformers import set_seed
26
  from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLVisionConfig
27
 
@@ -117,7 +119,7 @@ LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
117
  LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
118
  LANCE_HUGGING_FACE_URL = "https://huggingface.co/bytedance-research/Lance"
119
  LANCE_GITHUB_URL = "https://github.com/bytedance/Lance"
120
- LANCE_LOGO_PATH = REPO_ROOT / "assets" / "logo" / "lance-logo.webp"
121
 
122
  APP_CSS = """
123
  .gradio-container {
@@ -501,6 +503,13 @@ APP_CSS = """
501
  border-radius: 10px !important;
502
  }
503
 
 
 
 
 
 
 
 
504
  .generation-choice-grid .wrap label span {
505
  font-size: 16px !important;
506
  white-space: nowrap !important;
@@ -943,6 +952,74 @@ def get_required_model_asset_paths(model_base_dir: Path, model_path: Path) -> li
943
  ]
944
 
945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
947
  model_base_dir = get_model_base_dir()
948
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
@@ -950,6 +1027,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
950
 
951
  required_paths = get_required_model_asset_paths(model_base_dir, model_path)
952
  if all(path.exists() for path in required_paths):
 
953
  return model_path
954
 
955
  downloads_model_base_dir = Path("downloads")
@@ -961,6 +1039,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
961
  model_path = downloads_model_path
962
  required_paths = downloads_required_paths
963
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
 
964
  return model_path
965
 
966
  auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
@@ -980,11 +1059,13 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
980
  local_dir=str(model_base_dir),
981
  local_dir_use_symlinks=False,
982
  resume_download=True,
 
983
  )
984
  )
985
  if snapshot_path != model_base_dir and not model_path.exists():
986
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
987
  model_path = get_model_path(model_variant)
 
988
  return model_path
989
 
990
 
@@ -2253,6 +2334,11 @@ def get_task_model_variant(task: str) -> str:
2253
 
2254
  def get_pipeline_pool(task: str) -> PipelinePool:
2255
  global ACTIVE_PIPELINE_POOL
 
 
 
 
 
2256
  model_variant = get_task_model_variant(task)
2257
  with ACTIVE_POOL_LOCK:
2258
  if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
@@ -2841,16 +2927,25 @@ def parse_gpu_ids(gpu_string: str) -> list[int]:
2841
  if __name__ == "__main__":
2842
  args = parse_args()
2843
  os.environ["LANCE_GPUS"] = args.gpus
2844
- resolved_model_path = ensure_model_assets(MODEL_VARIANT_VIDEO)
2845
- print(f"[startup] Using Lance model path: {resolved_model_path}", flush=True)
2846
  QUEUE_MAX_SIZE = args.queue_size
2847
- gpu_ids = parse_gpu_ids(args.gpus)
2848
- ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=MODEL_VARIANT_VIDEO)
2849
- ACTIVE_PIPELINE_POOL.initialize_all()
 
 
 
 
 
 
 
 
 
 
 
2850
  demo = build_demo()
2851
  demo.queue(
2852
  max_size=args.queue_size,
2853
- default_concurrency_limit=ACTIVE_PIPELINE_POOL.size,
2854
  ).launch(
2855
  server_name=args.server_name,
2856
  server_port=args.server_port,
 
8
  import json
9
  import os
10
  import random
11
+ import shutil
12
  import subprocess
13
  import threading
14
  import time
 
22
  import gradio as gr
23
  import torch
24
  from huggingface_hub import snapshot_download
25
+ from safetensors import safe_open
26
+ from safetensors.torch import load_file, save_file
27
  from transformers import set_seed
28
  from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLVisionConfig
29
 
 
119
  LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
120
  LANCE_HUGGING_FACE_URL = "https://huggingface.co/bytedance-research/Lance"
121
  LANCE_GITHUB_URL = "https://github.com/bytedance/Lance"
122
+ LANCE_LOGO_PATH = REPO_ROOT / "assets" / "logo" / "lance-logo.png"
123
 
124
  APP_CSS = """
125
  .gradio-container {
 
503
  border-radius: 10px !important;
504
  }
505
 
506
+ .aspect-ratio-row .generation-choice-grid label,
507
+ .video-duration-row .generation-choice-grid label {
508
+ justify-content: flex-start !important;
509
+ text-align: left !important;
510
+ padding-left: 14px !important;
511
+ }
512
+
513
  .generation-choice-grid .wrap label span {
514
  font-size: 16px !important;
515
  white-space: nowrap !important;
 
952
  ]
953
 
954
 
955
+ def get_model_download_allow_patterns(model_variant: Optional[str] = None) -> list[str]:
956
+ variant = normalize_model_variant(model_variant)
957
+ model_dir_name = MODEL_VARIANT_TO_DIR[variant]
958
+ return [
959
+ f"{model_dir_name}/**",
960
+ "Qwen2.5-VL-ViT/**",
961
+ "Wan2.2_VAE.pth",
962
+ "generation_config.json",
963
+ "llm_config.json",
964
+ "tokenizer.json",
965
+ "tokenizer_config.json",
966
+ "vocab.json",
967
+ "merges.txt",
968
+ "config.json",
969
+ ]
970
+
971
+
972
+ def _get_safetensors_first_tensor_dtype(path: Path) -> Optional[torch.dtype]:
973
+ if not path.exists():
974
+ return None
975
+ with safe_open(str(path), framework="pt", device="cpu") as f:
976
+ keys = list(f.keys())
977
+ if not keys:
978
+ return None
979
+ return f.get_tensor(keys[0]).dtype
980
+
981
+
982
+ def convert_model_weights_to_bf16_inplace(model_path: Path) -> bool:
983
+ weight_path = model_path / "model.safetensors"
984
+ if not weight_path.exists():
985
+ return False
986
+
987
+ first_dtype = _get_safetensors_first_tensor_dtype(weight_path)
988
+ if first_dtype is None or first_dtype == torch.bfloat16:
989
+ return False
990
+
991
+ if first_dtype != torch.float32:
992
+ print(
993
+ f"[startup] Skipping bf16 conversion for {weight_path} because the first tensor dtype is {first_dtype}.",
994
+ flush=True,
995
+ )
996
+ return False
997
+
998
+ temp_path = weight_path.with_suffix(".bf16.safetensors.tmp")
999
+ print(f"[startup] Converting {weight_path} to bf16 to reduce disk usage.", flush=True)
1000
+ with safe_open(str(weight_path), framework="pt", device="cpu") as f:
1001
+ metadata = f.metadata()
1002
+ tensor_names = list(f.keys())
1003
+ tensors = {}
1004
+ for name in tensor_names:
1005
+ tensor = f.get_tensor(name)
1006
+ tensors[name] = tensor.to(torch.bfloat16) if tensor.dtype == torch.float32 else tensor
1007
+ save_file(tensors, str(temp_path), metadata=metadata)
1008
+
1009
+ os.replace(temp_path, weight_path)
1010
+ print(f"[startup] Replaced original fp32 weights with bf16 weights at {weight_path}.", flush=True)
1011
+ return True
1012
+
1013
+
1014
+ def compact_downloaded_model_weights(model_base_dir: Path) -> None:
1015
+ for model_dir_name in (MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]):
1016
+ model_path = model_base_dir / model_dir_name
1017
+ try:
1018
+ convert_model_weights_to_bf16_inplace(model_path)
1019
+ except Exception as exc:
1020
+ print(f"[startup] bf16 compaction skipped for {display_path(model_path)}: {exc}", flush=True)
1021
+
1022
+
1023
  def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
1024
  model_base_dir = get_model_base_dir()
1025
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
 
1027
 
1028
  required_paths = get_required_model_asset_paths(model_base_dir, model_path)
1029
  if all(path.exists() for path in required_paths):
1030
+ compact_downloaded_model_weights(model_base_dir)
1031
  return model_path
1032
 
1033
  downloads_model_base_dir = Path("downloads")
 
1039
  model_path = downloads_model_path
1040
  required_paths = downloads_required_paths
1041
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
1042
+ compact_downloaded_model_weights(model_base_dir)
1043
  return model_path
1044
 
1045
  auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
 
1059
  local_dir=str(model_base_dir),
1060
  local_dir_use_symlinks=False,
1061
  resume_download=True,
1062
+ allow_patterns=get_model_download_allow_patterns(model_variant),
1063
  )
1064
  )
1065
  if snapshot_path != model_base_dir and not model_path.exists():
1066
  os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
1067
  model_path = get_model_path(model_variant)
1068
+ compact_downloaded_model_weights(model_base_dir)
1069
  return model_path
1070
 
1071
 
 
2334
 
2335
  def get_pipeline_pool(task: str) -> PipelinePool:
2336
  global ACTIVE_PIPELINE_POOL
2337
+ if not torch.cuda.is_available():
2338
+ raise RuntimeError(
2339
+ "Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
2340
+ "until GPU hardware is attached."
2341
+ )
2342
  model_variant = get_task_model_variant(task)
2343
  with ACTIVE_POOL_LOCK:
2344
  if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
 
2927
  if __name__ == "__main__":
2928
  args = parse_args()
2929
  os.environ["LANCE_GPUS"] = args.gpus
 
 
2930
  QUEUE_MAX_SIZE = args.queue_size
2931
+ preload_models = torch.cuda.is_available() and env_flag("LANCE_PRELOAD_MODELS", True)
2932
+ if preload_models:
2933
+ resolved_model_path = ensure_model_assets(MODEL_VARIANT_VIDEO)
2934
+ print(f"[startup] Using Lance model path: {resolved_model_path}", flush=True)
2935
+ gpu_ids = parse_gpu_ids(args.gpus)
2936
+ ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=MODEL_VARIANT_VIDEO)
2937
+ ACTIVE_PIPELINE_POOL.initialize_all()
2938
+ concurrency_limit = ACTIVE_PIPELINE_POOL.size
2939
+ else:
2940
+ print(
2941
+ "[startup] Skipping model preload. UI will launch without loading Lance weights until GPU hardware is available.",
2942
+ flush=True,
2943
+ )
2944
+ concurrency_limit = 1
2945
  demo = build_demo()
2946
  demo.queue(
2947
  max_size=args.queue_size,
2948
+ default_concurrency_limit=concurrency_limit,
2949
  ).launch(
2950
  server_name=args.server_name,
2951
  server_port=args.server_port,
assets/logo/{lance-logo.webp → lance-logo.png} RENAMED
File without changes