Spaces:

bytedance-research
/

Lance

Running on Zero

App Files Files Community

ffy2000 commited on 5 days ago

Commit

c3f6ca2

1 Parent(s): 7cbf4f4

Convert Lance weights to bf16 on download

Browse files

Files changed (2) hide show

app.py +103 -8
assets/logo/{lance-logo.webp → lance-logo.png} +2 -2

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import html
 import json
 import os
 import random
 import subprocess
 import threading
 import time
@@ -21,7 +22,8 @@ from typing import Optional
 import gradio as gr
 import torch
 from huggingface_hub import snapshot_download
-from safetensors.torch import load_file
 from transformers import set_seed
 from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLVisionConfig
@@ -117,7 +119,7 @@ LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
 LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
 LANCE_HUGGING_FACE_URL = "https://huggingface.co/bytedance-research/Lance"
 LANCE_GITHUB_URL = "https://github.com/bytedance/Lance"
-LANCE_LOGO_PATH = REPO_ROOT / "assets" / "logo" / "lance-logo.webp"
 APP_CSS = """
 .gradio-container {
@@ -501,6 +503,13 @@ APP_CSS = """
     border-radius: 10px !important;
 }
 .generation-choice-grid .wrap label span {
     font-size: 16px !important;
     white-space: nowrap !important;
@@ -943,6 +952,74 @@ def get_required_model_asset_paths(model_base_dir: Path, model_path: Path) -> li
     ]
 def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
     model_base_dir = get_model_base_dir()
     os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
@@ -950,6 +1027,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
     required_paths = get_required_model_asset_paths(model_base_dir, model_path)
     if all(path.exists() for path in required_paths):
         return model_path
     downloads_model_base_dir = Path("downloads")
@@ -961,6 +1039,7 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
             model_path = downloads_model_path
             required_paths = downloads_required_paths
             os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
             return model_path
     auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
@@ -980,11 +1059,13 @@ def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
             local_dir=str(model_base_dir),
             local_dir_use_symlinks=False,
             resume_download=True,
         )
     )
     if snapshot_path != model_base_dir and not model_path.exists():
         os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
         model_path = get_model_path(model_variant)
     return model_path
@@ -2253,6 +2334,11 @@ def get_task_model_variant(task: str) -> str:
 def get_pipeline_pool(task: str) -> PipelinePool:
     global ACTIVE_PIPELINE_POOL
     model_variant = get_task_model_variant(task)
     with ACTIVE_POOL_LOCK:
         if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
@@ -2841,16 +2927,25 @@ def parse_gpu_ids(gpu_string: str) -> list[int]:
 if __name__ == "__main__":
     args = parse_args()
     os.environ["LANCE_GPUS"] = args.gpus
-    resolved_model_path = ensure_model_assets(MODEL_VARIANT_VIDEO)
-    print(f"[startup] Using Lance model path: {resolved_model_path}", flush=True)
     QUEUE_MAX_SIZE = args.queue_size
-    gpu_ids = parse_gpu_ids(args.gpus)
-    ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=MODEL_VARIANT_VIDEO)
-    ACTIVE_PIPELINE_POOL.initialize_all()
     demo = build_demo()
     demo.queue(
         max_size=args.queue_size,
-        default_concurrency_limit=ACTIVE_PIPELINE_POOL.size,
     ).launch(
         server_name=args.server_name,
         server_port=args.server_port,

 import json
 import os
 import random
+import shutil
 import subprocess
 import threading
 import time
 import gradio as gr
 import torch
 from huggingface_hub import snapshot_download
+from safetensors import safe_open
+from safetensors.torch import load_file, save_file
 from transformers import set_seed
 from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLVisionConfig
 LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
 LANCE_HUGGING_FACE_URL = "https://huggingface.co/bytedance-research/Lance"
 LANCE_GITHUB_URL = "https://github.com/bytedance/Lance"
+LANCE_LOGO_PATH = REPO_ROOT / "assets" / "logo" / "lance-logo.png"
 APP_CSS = """
 .gradio-container {
     border-radius: 10px !important;
 }
+.aspect-ratio-row .generation-choice-grid label,
+.video-duration-row .generation-choice-grid label {
+    justify-content: flex-start !important;
+    text-align: left !important;
+    padding-left: 14px !important;
+}
 .generation-choice-grid .wrap label span {
     font-size: 16px !important;
     white-space: nowrap !important;
     ]
+def get_model_download_allow_patterns(model_variant: Optional[str] = None) -> list[str]:
+    variant = normalize_model_variant(model_variant)
+    model_dir_name = MODEL_VARIANT_TO_DIR[variant]
+    return [
+        f"{model_dir_name}/**",
+        "Qwen2.5-VL-ViT/**",
+        "Wan2.2_VAE.pth",
+        "generation_config.json",
+        "llm_config.json",
+        "tokenizer.json",
+        "tokenizer_config.json",
+        "vocab.json",
+        "merges.txt",
+        "config.json",
+    ]
+def _get_safetensors_first_tensor_dtype(path: Path) -> Optional[torch.dtype]:
+    if not path.exists():
+        return None
+    with safe_open(str(path), framework="pt", device="cpu") as f:
+        keys = list(f.keys())
+        if not keys:
+            return None
+        return f.get_tensor(keys[0]).dtype
+def convert_model_weights_to_bf16_inplace(model_path: Path) -> bool:
+    weight_path = model_path / "model.safetensors"
+    if not weight_path.exists():
+        return False
+    first_dtype = _get_safetensors_first_tensor_dtype(weight_path)
+    if first_dtype is None or first_dtype == torch.bfloat16:
+        return False
+    if first_dtype != torch.float32:
+        print(
+            f"[startup] Skipping bf16 conversion for {weight_path} because the first tensor dtype is {first_dtype}.",
+            flush=True,
+        )
+        return False
+    temp_path = weight_path.with_suffix(".bf16.safetensors.tmp")
+    print(f"[startup] Converting {weight_path} to bf16 to reduce disk usage.", flush=True)
+    with safe_open(str(weight_path), framework="pt", device="cpu") as f:
+        metadata = f.metadata()
+        tensor_names = list(f.keys())
+        tensors = {}
+        for name in tensor_names:
+            tensor = f.get_tensor(name)
+            tensors[name] = tensor.to(torch.bfloat16) if tensor.dtype == torch.float32 else tensor
+        save_file(tensors, str(temp_path), metadata=metadata)
+    os.replace(temp_path, weight_path)
+    print(f"[startup] Replaced original fp32 weights with bf16 weights at {weight_path}.", flush=True)
+    return True
+def compact_downloaded_model_weights(model_base_dir: Path) -> None:
+    for model_dir_name in (MODEL_VARIANT_TO_DIR[MODEL_VARIANT_IMAGE], MODEL_VARIANT_TO_DIR[MODEL_VARIANT_VIDEO]):
+        model_path = model_base_dir / model_dir_name
+        try:
+            convert_model_weights_to_bf16_inplace(model_path)
+        except Exception as exc:
+            print(f"[startup] bf16 compaction skipped for {display_path(model_path)}: {exc}", flush=True)
 def ensure_model_assets(model_variant: Optional[str] = None) -> Path:
     model_base_dir = get_model_base_dir()
     os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
     required_paths = get_required_model_asset_paths(model_base_dir, model_path)
     if all(path.exists() for path in required_paths):
+        compact_downloaded_model_weights(model_base_dir)
         return model_path
     downloads_model_base_dir = Path("downloads")
             model_path = downloads_model_path
             required_paths = downloads_required_paths
             os.environ["LANCE_MODEL_BASE_DIR"] = display_path(model_base_dir)
+            compact_downloaded_model_weights(model_base_dir)
             return model_path
     auto_download = env_flag("LANCE_AUTO_DOWNLOAD", running_on_space())
             local_dir=str(model_base_dir),
             local_dir_use_symlinks=False,
             resume_download=True,
+            allow_patterns=get_model_download_allow_patterns(model_variant),
         )
     )
     if snapshot_path != model_base_dir and not model_path.exists():
         os.environ["LANCE_MODEL_BASE_DIR"] = display_path(snapshot_path)
         model_path = get_model_path(model_variant)
+    compact_downloaded_model_weights(model_base_dir)
     return model_path
 def get_pipeline_pool(task: str) -> PipelinePool:
     global ACTIVE_PIPELINE_POOL
+    if not torch.cuda.is_available():
+        raise RuntimeError(
+            "Lance inference requires a GPU. The Gradio UI can start on CPU, but generation is disabled "
+            "until GPU hardware is attached."
+        )
     model_variant = get_task_model_variant(task)
     with ACTIVE_POOL_LOCK:
         if ACTIVE_PIPELINE_POOL is not None and ACTIVE_PIPELINE_POOL.model_variant == model_variant:
 if __name__ == "__main__":
     args = parse_args()
     os.environ["LANCE_GPUS"] = args.gpus
     QUEUE_MAX_SIZE = args.queue_size
+    preload_models = torch.cuda.is_available() and env_flag("LANCE_PRELOAD_MODELS", True)
+    if preload_models:
+        resolved_model_path = ensure_model_assets(MODEL_VARIANT_VIDEO)
+        print(f"[startup] Using Lance model path: {resolved_model_path}", flush=True)
+        gpu_ids = parse_gpu_ids(args.gpus)
+        ACTIVE_PIPELINE_POOL = PipelinePool(gpu_ids, model_variant=MODEL_VARIANT_VIDEO)
+        ACTIVE_PIPELINE_POOL.initialize_all()
+        concurrency_limit = ACTIVE_PIPELINE_POOL.size
+    else:
+        print(
+            "[startup] Skipping model preload. UI will launch without loading Lance weights until GPU hardware is available.",
+            flush=True,
+        )
+        concurrency_limit = 1
     demo = build_demo()
     demo.queue(
         max_size=args.queue_size,
+        default_concurrency_limit=concurrency_limit,
     ).launch(
         server_name=args.server_name,
         server_port=args.server_port,

assets/logo/{lance-logo.webp → lance-logo.png} RENAMED Viewed

File without changes