ffy2000 commited on
Commit
b7a0fa0
·
1 Parent(s): e79d110

Prepare Lance for Hugging Face Space

Browse files
README.md CHANGED
@@ -7,6 +7,8 @@ sdk: gradio
7
  python_version: "3.10.13"
8
  sdk_version: "5.31.0"
9
  app_file: app.py
 
 
10
  ---
11
 
12
  <div align="center">
 
7
  python_version: "3.10.13"
8
  sdk_version: "5.31.0"
9
  app_file: app.py
10
+ models:
11
+ - bytedance-research/Lance
12
  ---
13
 
14
  <div align="center">
app.py CHANGED
@@ -89,13 +89,14 @@ DEFAULT_TASK = "t2v"
89
  DEFAULT_TIMESTEPS = 30
90
  DEFAULT_TIMESTEP_SHIFT = 3.5
91
  DEFAULT_CFG_TEXT_SCALE = 4.0
92
- DEFAULT_RESOLUTION = "video_848x480"
 
93
  DEFAULT_IMAGE_RESOLUTION = "image_768x768"
94
  DEFAULT_BASIC_SEED = 42
95
- DEFAULT_HEIGHT = 480
96
- DEFAULT_WIDTH = 848
97
  DEFAULT_IMAGE_SIZE = 768
98
- DEFAULT_VIDEO_DURATION_SECONDS = 8
99
  MAX_VIDEO_DURATION_SECONDS = 360
100
  MAX_VIDEO_NUM_FRAMES = 12 * MAX_VIDEO_DURATION_SECONDS + 1
101
  DEFAULT_NUM_FRAMES = 12 * DEFAULT_VIDEO_DURATION_SECONDS + 1
@@ -106,7 +107,19 @@ FRAME_INTERPOLATION_NO = "No"
106
  DEFAULT_FRAME_INTERPOLATION = FRAME_INTERPOLATION_YES
107
  ASPECT_RATIO_CHOICES = ["21:9", "16:9", "3:2", "4:3", "1:1", "3:4", "2:3", "9:16", "9:21"]
108
 
109
- VIDEO_ASPECT_RATIO_TO_SIZE = {
 
 
 
 
 
 
 
 
 
 
 
 
110
  "21:9": (976, 416),
111
  "16:9": (848, 480),
112
  "3:2": (784, 528),
@@ -118,6 +131,11 @@ VIDEO_ASPECT_RATIO_TO_SIZE = {
118
  "9:21": (416, 976),
119
  }
120
 
 
 
 
 
 
121
  IMAGE_ASPECT_RATIO_TO_SIZE = {
122
  "21:9": (1168, 496),
123
  "16:9": (1024, 576),
@@ -134,10 +152,6 @@ DEFAULT_QUEUE_SIZE = 32
134
  USE_KVCACHE = True
135
  TEXT_TEMPLATE = True
136
  RECORD_WRITE_LOCK = threading.Lock()
137
- MODEL_ASSET_PREFETCH_LOCK = threading.Lock()
138
- MODEL_ASSET_PREFETCH_STARTED = False
139
- MODEL_ASSET_PREFETCH_DONE = threading.Event()
140
- MODEL_ASSET_PREFETCH_ERROR: Optional[str] = None
141
 
142
  LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
143
  LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
@@ -608,6 +622,112 @@ APP_CSS = """
608
  line-height: 1.35 !important;
609
  }
610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  .prompt-dataset .paginate {
612
  display: none !important;
613
  }
@@ -746,6 +866,327 @@ APP_CSS = """
746
  font-weight: 800 !important;
747
  }
748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
749
  @media (max-width: 900px) {
750
  .lance-main-row {
751
  grid-template-columns: minmax(0, 1fr) !important;
@@ -759,7 +1200,9 @@ APP_JS = """
759
  if (!element) {
760
  return;
761
  }
762
- element.style.setProperty(property, value, "important");
 
 
763
  };
764
 
765
  const enforceLanceLabelTypography = () => {
@@ -783,6 +1226,216 @@ APP_JS = """
783
  });
784
  };
785
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
786
  const syncOutputColumnHeight = () => {
787
  const row = document.querySelector(".lance-main-row");
788
  const inputColumn = document.querySelector(".lance-input-column");
@@ -812,6 +1465,9 @@ APP_JS = """
812
 
813
  const scheduleSync = () => requestAnimationFrame(() => {
814
  enforceLanceLabelTypography();
 
 
 
815
  syncOutputColumnHeight();
816
  });
817
  const attachObservers = () => {
@@ -834,9 +1490,15 @@ APP_JS = """
834
  };
835
 
836
  enforceLanceLabelTypography();
 
 
 
837
  attachObservers();
838
  new MutationObserver(() => {
839
  enforceLanceLabelTypography();
 
 
 
840
  attachObservers();
841
  }).observe(document.body, {
842
  childList: true,
@@ -888,7 +1550,12 @@ UNDERSTANDING_TASKS = {TASK_X2T_VIDEO, TASK_X2T_IMAGE}
888
  IMAGE_TASKS = {TASK_T2I, TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
889
  VIDEO_TASKS = {TASK_T2V, TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
890
  EDIT_TASKS = {TASK_IMAGE_EDIT, TASK_VIDEO_EDIT}
891
- VIDEO_RESOLUTION_CHOICES = [DEFAULT_RESOLUTION]
 
 
 
 
 
892
  IMAGE_RESOLUTION_CHOICES = [DEFAULT_IMAGE_RESOLUTION]
893
  RESOLUTION_CHOICES = VIDEO_RESOLUTION_CHOICES + IMAGE_RESOLUTION_CHOICES
894
  CAPTION_SYSTEM_PROMPT_TEMPLATE = (
@@ -911,7 +1578,7 @@ def get_aspect_ratio_choices_for_task(task: str) -> list[tuple[str, str]]:
911
 
912
 
913
  def get_video_duration_choices() -> list[tuple[str, int]]:
914
- return [(f"{seconds}s", seconds) for seconds in range(1, MAX_VIDEO_DURATION_SECONDS + 1)]
915
 
916
  def env_flag(name: str, default: bool) -> bool:
917
  value = os.getenv(name)
@@ -1136,7 +1803,7 @@ def normalize_frame_interpolation(value) -> bool:
1136
 
1137
 
1138
  def video_seconds_to_num_frames(seconds: int) -> int:
1139
- seconds = max(1, min(MAX_VIDEO_DURATION_SECONDS, int(seconds)))
1140
  return 12 * seconds + 1
1141
 
1142
 
@@ -1148,13 +1815,63 @@ def normalize_task(task: str) -> str:
1148
  return task
1149
 
1150
 
1151
- def normalize_resolution_for_backend(resolution: str, task: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152
  internal_task = normalize_task(task)
1153
  if internal_task in IMAGE_TASKS:
1154
- return DEFAULT_IMAGE_RESOLUTION
 
 
 
 
1155
  if internal_task in VIDEO_TASKS:
 
 
 
 
 
 
 
 
 
 
 
 
 
1156
  return DEFAULT_RESOLUTION
1157
- return str(resolution)
 
 
 
 
 
 
 
 
 
 
 
 
 
1158
 
1159
 
1160
  def get_default_aspect_ratio(task: str) -> str:
@@ -1162,10 +1879,21 @@ def get_default_aspect_ratio(task: str) -> str:
1162
  return DEFAULT_IMAGE_ASPECT_RATIO if internal_task in IMAGE_TASKS else DEFAULT_VIDEO_ASPECT_RATIO
1163
 
1164
 
1165
- def get_size_for_aspect_ratio(task: str, aspect_ratio: str) -> tuple[int, int]:
 
 
 
 
 
 
 
 
1166
  internal_task = normalize_task(task)
1167
  aspect_ratio = aspect_ratio if aspect_ratio in ASPECT_RATIO_CHOICES else get_default_aspect_ratio(internal_task)
1168
- size_map = IMAGE_ASPECT_RATIO_TO_SIZE if internal_task in IMAGE_TASKS else VIDEO_ASPECT_RATIO_TO_SIZE
 
 
 
1169
  return size_map[aspect_ratio]
1170
 
1171
 
@@ -1177,16 +1905,18 @@ def format_size_markdown(task: str, width: int, height: int) -> str:
1177
  return f"{width} x {height}"
1178
 
1179
 
1180
- def get_size_map_for_task(task: str) -> dict[str, tuple[int, int]]:
1181
  internal_task = normalize_task(task)
1182
- return IMAGE_ASPECT_RATIO_TO_SIZE if internal_task in IMAGE_TASKS else VIDEO_ASPECT_RATIO_TO_SIZE
 
 
1183
 
1184
 
1185
- def get_output_resolution_choices_for_task(task: str) -> list[tuple[str, str]]:
1186
  """Get Output Resolution choices with a one-to-one mapping to aspect ratios."""
1187
  internal_task = normalize_task(task)
1188
  default_ratio = get_default_aspect_ratio(internal_task)
1189
- size_map = get_size_map_for_task(internal_task)
1190
  choices = []
1191
  for ratio in ASPECT_RATIO_CHOICES:
1192
  width, height = size_map[ratio]
@@ -1196,10 +1926,10 @@ def get_output_resolution_choices_for_task(task: str) -> list[tuple[str, str]]:
1196
  return choices
1197
 
1198
 
1199
- def get_aspect_ratio_for_output_resolution(task: str, output_resolution: str) -> str:
1200
  internal_task = normalize_task(task)
1201
  resolution_text = str(output_resolution or "").strip()
1202
- size_map = get_size_map_for_task(internal_task)
1203
  for ratio in ASPECT_RATIO_CHOICES:
1204
  width, height = size_map[ratio]
1205
  if resolution_text == format_size_markdown(internal_task, width, height):
@@ -1256,24 +1986,42 @@ def build_lance_icon_label_html(text: str, icon: str, *extra_classes: str) -> st
1256
  return f'<div class="{class_names}">{icon_html}<span>{html.escape(text)}</span></div>'
1257
 
1258
 
1259
- def update_size_from_aspect_ratio(task: str, aspect_ratio: str):
1260
- width, height = get_size_for_aspect_ratio(task, aspect_ratio)
1261
- return height, width, format_size_markdown(task, width, height)
 
 
 
1262
 
1263
 
1264
- def update_aspect_ratio_from_output_resolution(task: str, output_resolution: str):
1265
- aspect_ratio = get_aspect_ratio_for_output_resolution(task, output_resolution)
1266
- width, height = get_size_for_aspect_ratio(task, aspect_ratio)
1267
  return aspect_ratio, height, width
1268
 
1269
 
 
 
 
 
 
 
 
 
 
 
 
 
1270
  def reset_generation_defaults_for_task(task: str):
1271
  internal_task = normalize_task(task)
1272
  aspect_ratio = get_default_aspect_ratio(internal_task)
1273
- width, height = get_size_for_aspect_ratio(internal_task, aspect_ratio)
1274
- resolution = DEFAULT_IMAGE_RESOLUTION if internal_task in IMAGE_TASKS else DEFAULT_RESOLUTION
1275
  num_frames = DEFAULT_VIDEO_DURATION_SECONDS
1276
- return aspect_ratio, height, width, num_frames, resolution, format_size_markdown(internal_task, width, height)
 
 
 
1277
 
1278
 
1279
  def apply_prompt_example(task: str, evt: gr.SelectData):
@@ -1288,6 +2036,41 @@ def apply_prompt_example(task: str, evt: gr.SelectData):
1288
  return (prompt_text, *defaults)
1289
 
1290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1291
  def get_understanding_system_prompt_choices(task: str) -> list[str]:
1292
  internal_task = normalize_task(task)
1293
  if internal_task == TASK_X2T_IMAGE:
@@ -1815,9 +2598,9 @@ class LanceT2VV2TPipeline:
1815
  )
1816
 
1817
  stage_start = time.perf_counter()
1818
- print(f"[startup][gpu:{self.device}] Moving Lance model to GPU {self.device}", flush=True)
1819
- model = model.to(self.device)
1820
- self._log_stage("Lance model move to GPU", stage_start)
1821
 
1822
  stage_start = time.perf_counter()
1823
  print(f"[startup][gpu:{self.device}] Loading tokenizer: {model_args.model_path}", flush=True)
@@ -1855,7 +2638,10 @@ class LanceT2VV2TPipeline:
1855
  != model.language_model.get_output_embeddings().weight.data.data_ptr()
1856
  ), "tie_word_embeddings conflict"
1857
 
1858
- model = model.to(device=self.device, dtype=torch.bfloat16)
 
 
 
1859
  model.eval()
1860
  if vae_model is not None and hasattr(vae_model, "eval"):
1861
  vae_model.eval()
@@ -2402,45 +3188,6 @@ def ensure_flash_attn_installed() -> None:
2402
  print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
2403
 
2404
 
2405
- def prefetch_lance_runtime_assets() -> None:
2406
- global MODEL_ASSET_PREFETCH_ERROR
2407
- with MODEL_ASSET_PREFETCH_LOCK:
2408
- if MODEL_ASSET_PREFETCH_DONE.is_set():
2409
- return
2410
- print(
2411
- "[startup] Preloading Lance runtime assets on CPU: flash-attn plus both model variants.",
2412
- flush=True,
2413
- )
2414
- try:
2415
- ensure_flash_attn_installed()
2416
- for variant in (MODEL_VARIANT_VIDEO, MODEL_VARIANT_IMAGE):
2417
- model_path = ensure_model_assets(variant)
2418
- print(
2419
- f"[startup] CPU preload finished for {variant} at {display_path(model_path)}",
2420
- flush=True,
2421
- )
2422
- MODEL_ASSET_PREFETCH_ERROR = None
2423
- MODEL_ASSET_PREFETCH_DONE.set()
2424
- print("[startup] CPU asset preload finished for all Lance variants.", flush=True)
2425
- except Exception as exc:
2426
- MODEL_ASSET_PREFETCH_ERROR = str(exc)
2427
- print(f"[startup] CPU asset preload failed: {exc}", flush=True)
2428
-
2429
-
2430
- def start_lance_runtime_asset_prefetch() -> None:
2431
- global MODEL_ASSET_PREFETCH_STARTED
2432
- with MODEL_ASSET_PREFETCH_LOCK:
2433
- if MODEL_ASSET_PREFETCH_STARTED:
2434
- return
2435
- MODEL_ASSET_PREFETCH_STARTED = True
2436
- thread = threading.Thread(
2437
- target=prefetch_lance_runtime_assets,
2438
- name="lance-runtime-asset-prefetch",
2439
- daemon=True,
2440
- )
2441
- thread.start()
2442
-
2443
-
2444
  def get_env_int(name: str, default: int) -> int:
2445
  """Read an integer environment variable, falling back safely on invalid values."""
2446
  try:
@@ -2449,19 +3196,54 @@ def get_env_int(name: str, default: int) -> int:
2449
  return default
2450
 
2451
 
 
 
 
 
 
 
 
 
2452
  def get_zerogpu_duration_cap() -> int:
2453
  """Maximum duration requested from ZeroGPU.
2454
 
2455
- You can lower or raise it without changing code by setting:
2456
- LANCE_ZEROGPU_MAX_DURATION_SECONDS=900
 
 
2457
  """
2458
- return max(1, get_env_int("LANCE_ZEROGPU_MAX_DURATION_SECONDS", 900))
2459
 
2460
 
2461
  def clamp_zerogpu_duration(seconds: int) -> int:
2462
  return max(1, min(int(seconds), get_zerogpu_duration_cap()))
2463
 
2464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2465
  def get_run_task_gpu_duration(
2466
  task: str,
2467
  prompt: str,
@@ -2478,18 +3260,39 @@ def get_run_task_gpu_duration(
2478
  cfg_text_scale: float,
2479
  enable_frame_interpolation: bool,
2480
  ) -> int:
2481
- """Return a legal ZeroGPU reservation duration.
2482
 
2483
- This value is only the requested ZeroGPU reservation time, not the user's total
2484
- daily quota.
 
 
2485
  """
2486
  internal_task = normalize_task(task)
2487
- requested_seconds = max(1, int(num_frames))
2488
- if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
2489
- return clamp_zerogpu_duration(max(180, requested_seconds * 2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2490
  if internal_task == TASK_X2T_VIDEO:
2491
- return clamp_zerogpu_duration(60)
2492
- return clamp_zerogpu_duration(60)
 
 
 
 
2493
 
2494
 
2495
  def get_pipeline_pool(task: str) -> PipelinePool:
@@ -2562,21 +3365,14 @@ def build_status_markdown() -> str:
2562
  gpu_text = "unknown"
2563
  concurrency = 1
2564
  active_variant = "none"
2565
- asset_status = "pending"
2566
  if ACTIVE_PIPELINE_POOL is not None:
2567
  active_variant = ACTIVE_PIPELINE_POOL.model_variant
2568
  gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
2569
  concurrency = ACTIVE_PIPELINE_POOL.size
2570
- if MODEL_ASSET_PREFETCH_DONE.is_set():
2571
- asset_status = "done"
2572
- elif MODEL_ASSET_PREFETCH_STARTED:
2573
- asset_status = "running"
2574
- if MODEL_ASSET_PREFETCH_ERROR:
2575
- asset_status = f"failed: {MODEL_ASSET_PREFETCH_ERROR}"
2576
  return (
2577
  f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
2578
  f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
2579
- f"Switch mode: `unload then load` | Asset preload: `{asset_status}`"
2580
  )
2581
 
2582
 
@@ -2639,15 +3435,16 @@ def update_task_ui(task: str):
2639
  is_edit_task = internal_task in EDIT_TASKS
2640
  is_understanding_task = internal_task in UNDERSTANDING_TASKS
2641
  is_generation_task = internal_task in GENERATION_TASKS
 
2642
  show_media_input = is_edit_task or is_understanding_task
2643
- resolution_choices = IMAGE_RESOLUTION_CHOICES if is_image_task else VIDEO_RESOLUTION_CHOICES
2644
- resolution_value = DEFAULT_IMAGE_RESOLUTION if is_image_task else DEFAULT_RESOLUTION
2645
  aspect_ratio_value = DEFAULT_IMAGE_ASPECT_RATIO if is_image_task else DEFAULT_VIDEO_ASPECT_RATIO
2646
- width_value, height_value = get_size_for_aspect_ratio(internal_task, aspect_ratio_value)
2647
  size_markdown = format_size_markdown(internal_task, width_value, height_value)
2648
  system_prompt_choices = get_understanding_system_prompt_choices(internal_task)
2649
 
2650
- if is_generation_task:
2651
  text_label = "Prompt"
2652
  text_placeholder = "Describe what you want to generate..."
2653
  elif is_edit_task:
@@ -2666,10 +3463,12 @@ def update_task_ui(task: str):
2666
 
2667
  output_icon = "video" if output_label == "Output Video" else "image" if output_label == "Output Image" else "text"
2668
  show_generation_settings = is_generation_task or is_edit_task
2669
- show_aspect_ratio = is_generation_task
 
2670
  show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
2671
  show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
2672
- show_video_generation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
 
2673
 
2674
  return (
2675
  gr.update(value=build_lance_label_html(text_label, "lance-prompt-label")),
@@ -2677,25 +3476,29 @@ def update_task_ui(task: str):
2677
  label=text_label,
2678
  placeholder=text_placeholder,
2679
  visible=True,
 
2680
  ),
2681
  gr.update(
2682
  choices=system_prompt_choices,
2683
  value=system_prompt_choices[0],
2684
  visible=False,
2685
  ),
 
 
2686
  gr.update(label="Input Video", visible=show_input_video, value=None),
2687
  gr.update(label="Input Image", visible=show_input_image, value=None),
2688
- gr.update(visible=show_video_generation_settings),
2689
  gr.update(visible=show_aspect_ratio),
2690
- gr.update(visible=False),
2691
  gr.update(visible=internal_task == TASK_T2V),
 
2692
  gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
2693
  gr.update(value=height_value),
2694
  gr.update(value=width_value),
2695
- gr.update(visible=show_video_generation_settings, value=DEFAULT_FRAME_INTERPOLATION),
2696
- gr.update(choices=get_output_resolution_choices_for_task(internal_task), value=size_markdown, visible=show_video_generation_settings),
2697
  gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
2698
- gr.update(choices=resolution_choices, value=resolution_value, visible=False),
2699
  gr.update(value=build_lance_icon_label_html(output_label, output_icon, "lance-output-label")),
2700
  gr.update(visible=internal_task in {TASK_T2V, TASK_VIDEO_EDIT}),
2701
  gr.update(visible=internal_task in {TASK_T2I, TASK_IMAGE_EDIT}),
@@ -2773,7 +3576,7 @@ def build_demo() -> gr.Blocks:
2773
  value=DEFAULT_VIDEO_ASPECT_RATIO,
2774
  elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
2775
  )
2776
- with gr.Row(visible=False, elem_classes=["generation-controls-row", "output-resolution-row"]) as output_resolution_row:
2777
  with gr.Column(elem_classes=["lance-control-field"]):
2778
  gr.HTML('<div class="lance-generation-label">Output Resolution</div>', elem_classes=["lance-label-html"])
2779
  real_size = gr.Radio(
@@ -2784,26 +3587,28 @@ def build_demo() -> gr.Blocks:
2784
  interactive=True,
2785
  elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
2786
  )
2787
- resolution = gr.Dropdown(
2788
- label="Resolution",
2789
- choices=RESOLUTION_CHOICES,
2790
- value=DEFAULT_RESOLUTION,
2791
- visible=False,
2792
- )
2793
- height = gr.Number(value=DEFAULT_HEIGHT, precision=0, visible=False)
2794
- width = gr.Number(value=DEFAULT_WIDTH, precision=0, visible=False)
2795
  with gr.Row(elem_classes=["generation-controls-row", "video-duration-row"]) as video_duration_row:
2796
  with gr.Column(elem_classes=["lance-control-field"]):
2797
  gr.HTML(build_lance_label_html("Video Duration (seconds)", "lance-generation-label"), elem_classes=["lance-label-html"])
2798
- num_frames = gr.Slider(
2799
  label="Video Duration (seconds)",
2800
  show_label=False,
2801
- minimum=1,
2802
- maximum=MAX_VIDEO_DURATION_SECONDS,
2803
- step=1,
2804
  value=DEFAULT_VIDEO_DURATION_SECONDS,
2805
  elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
2806
  )
 
 
 
 
 
 
 
 
 
 
 
 
2807
 
2808
  with gr.Accordion("Advanced Parameters", open=False, elem_classes=["lance-advanced-accordion"]):
2809
  with gr.Column(elem_classes=["lance-control-field"]):
@@ -2868,94 +3673,96 @@ def build_demo() -> gr.Blocks:
2868
 
2869
  run_button = gr.Button("🚀 Generate", variant="primary", elem_classes=["lance-run-button"])
2870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2871
  with gr.Column(visible=True, elem_classes=["lance-recommended-section"]) as video_generation_examples_group:
2872
  gr.HTML(build_lance_label_html("Video generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
2873
  with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
2874
- video_generation_examples = gr.Dataset(
2875
- samples=VIDEO_GENERATION_EXAMPLES,
2876
- components=[gr.Textbox(label="Prompt", visible=False)],
2877
- headers=["Prompt"],
2878
- show_label=False,
2879
- type="values",
2880
- layout="table",
2881
- samples_per_page=len(VIDEO_GENERATION_EXAMPLES),
2882
- elem_classes=["prompt-dataset"],
2883
- )
2884
 
2885
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_edit_examples_group:
2886
  gr.HTML(build_lance_label_html("Video edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
2887
- with gr.Group(elem_classes=["example-panel", "example-no-icon", "video-edit-examples"]):
2888
- video_edit_examples = gr.Examples(
2889
- examples=VIDEO_EDIT_EXAMPLES,
2890
- inputs=generation_example_inputs,
2891
- label="",
2892
- examples_per_page=3,
2893
- cache_examples=False,
2894
- preprocess=False,
2895
- postprocess=False,
2896
- )
2897
 
2898
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_understanding_examples_group:
2899
  gr.HTML(build_lance_label_html("Video understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
2900
- with gr.Group(elem_classes=["example-panel", "example-no-icon"]):
2901
- video_understanding_examples = gr.Examples(
2902
- examples=VIDEO_UNDERSTANDING_EXAMPLES,
2903
- inputs=generation_example_inputs,
2904
- label="",
2905
- examples_per_page=4,
2906
- cache_examples=False,
2907
- preprocess=False,
2908
- postprocess=False,
2909
- )
2910
 
2911
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_generation_examples_group:
2912
  gr.HTML(build_lance_label_html("Image generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
2913
  with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
2914
- image_generation_examples = gr.Dataset(
2915
- samples=IMAGE_GENERATION_EXAMPLES,
2916
- components=[gr.Textbox(label="Prompt", visible=False)],
2917
- headers=["Prompt"],
2918
- show_label=False,
2919
- type="values",
2920
- layout="table",
2921
- samples_per_page=len(IMAGE_GENERATION_EXAMPLES),
2922
- elem_classes=["prompt-dataset"],
2923
- )
2924
 
2925
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_edit_examples_group:
2926
  gr.HTML(build_lance_label_html("Image edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
2927
- with gr.Group(elem_classes=["example-panel", "example-no-icon"]):
2928
- image_edit_examples = gr.Examples(
2929
- examples=IMAGE_EDIT_EXAMPLES,
2930
- inputs=generation_example_inputs,
2931
- label="",
2932
- examples_per_page=5,
2933
- cache_examples=False,
2934
- preprocess=False,
2935
- postprocess=False,
2936
- )
2937
 
2938
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_understanding_examples_group:
2939
  gr.HTML(build_lance_label_html("Image understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
2940
- with gr.Group(elem_classes=["example-panel", "example-no-icon"]):
2941
- image_understanding_examples = gr.Examples(
2942
- examples=IMAGE_UNDERSTANDING_EXAMPLES,
2943
- inputs=generation_example_inputs,
2944
- label="",
2945
- examples_per_page=4,
2946
- cache_examples=False,
2947
- preprocess=False,
2948
- postprocess=False,
2949
- )
2950
-
2951
- keep_example_clicks_from_changing_visibility(
2952
- video_generation_examples,
2953
- video_edit_examples,
2954
- video_understanding_examples,
2955
- image_generation_examples,
2956
- image_edit_examples,
2957
- image_understanding_examples,
2958
- )
2959
 
2960
  task.change(
2961
  fn=update_task_ui,
@@ -2970,6 +3777,7 @@ def build_demo() -> gr.Blocks:
2970
  aspect_ratio_row,
2971
  output_resolution_row,
2972
  video_duration_row,
 
2973
  aspect_ratio,
2974
  height,
2975
  width,
@@ -2992,7 +3800,7 @@ def build_demo() -> gr.Blocks:
2992
 
2993
  aspect_ratio.change(
2994
  fn=update_size_from_aspect_ratio,
2995
- inputs=[task, aspect_ratio],
2996
  outputs=[height, width, real_size],
2997
  queue=False,
2998
  show_api=False,
@@ -3000,35 +3808,42 @@ def build_demo() -> gr.Blocks:
3000
 
3001
  real_size.change(
3002
  fn=update_aspect_ratio_from_output_resolution,
3003
- inputs=[task, real_size],
3004
  outputs=[aspect_ratio, height, width],
3005
  queue=False,
3006
  show_api=False,
3007
  )
3008
 
3009
- for examples_component in (video_edit_examples, video_understanding_examples, image_edit_examples, image_understanding_examples):
3010
- examples_component.load_input_event.then(
3011
- fn=reset_generation_defaults_for_task,
 
 
 
 
 
 
 
 
3012
  inputs=[task],
3013
- outputs=[aspect_ratio, height, width, num_frames, resolution, real_size],
3014
  queue=False,
3015
  show_api=False,
3016
  )
3017
 
3018
- video_generation_examples.select(
3019
- fn=apply_prompt_example,
3020
- inputs=[task],
3021
- outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
3022
- queue=False,
3023
- show_api=False,
3024
- )
3025
- image_generation_examples.select(
3026
- fn=apply_prompt_example,
3027
- inputs=[task],
3028
- outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
3029
- queue=False,
3030
- show_api=False,
3031
- )
3032
 
3033
  run_button.click(
3034
  fn=build_running_status_markdown,
@@ -3055,6 +3870,7 @@ def build_demo() -> gr.Blocks:
3055
  enable_frame_interpolation,
3056
  ],
3057
  outputs=[output_video, output_image, output_text, status, logs],
 
3058
  )
3059
 
3060
  return demo
@@ -3091,17 +3907,54 @@ def parse_gpu_ids(gpu_string: str) -> list[int]:
3091
  return gpu_ids
3092
 
3093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3094
  if __name__ == "__main__":
3095
  args = parse_args()
3096
  os.environ["LANCE_GPUS"] = args.gpus
3097
  QUEUE_MAX_SIZE = args.queue_size
3098
- if env_flag("LANCE_PRELOAD_MODEL_ASSETS", running_on_space()):
3099
- start_lance_runtime_asset_prefetch()
3100
- else:
3101
- print(
3102
- "[startup] Model asset preload disabled. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
3103
- flush=True,
3104
- )
3105
  concurrency_limit = 1
3106
  demo = build_demo()
3107
  demo.queue(
 
89
  DEFAULT_TIMESTEPS = 30
90
  DEFAULT_TIMESTEP_SHIFT = 3.5
91
  DEFAULT_CFG_TEXT_SCALE = 4.0
92
+ DEFAULT_RESOLUTION = "video_360p"
93
+ DEFAULT_VIDEO_EDIT_RESOLUTION = "video_480p"
94
  DEFAULT_IMAGE_RESOLUTION = "image_768x768"
95
  DEFAULT_BASIC_SEED = 42
96
+ DEFAULT_HEIGHT = 352
97
+ DEFAULT_WIDTH = 640
98
  DEFAULT_IMAGE_SIZE = 768
99
+ DEFAULT_VIDEO_DURATION_SECONDS = 3
100
  MAX_VIDEO_DURATION_SECONDS = 360
101
  MAX_VIDEO_NUM_FRAMES = 12 * MAX_VIDEO_DURATION_SECONDS + 1
102
  DEFAULT_NUM_FRAMES = 12 * DEFAULT_VIDEO_DURATION_SECONDS + 1
 
107
  DEFAULT_FRAME_INTERPOLATION = FRAME_INTERPOLATION_YES
108
  ASPECT_RATIO_CHOICES = ["21:9", "16:9", "3:2", "4:3", "1:1", "3:4", "2:3", "9:16", "9:21"]
109
 
110
+ VIDEO_360P_ASPECT_RATIO_TO_SIZE = {
111
+ "21:9": (672, 288),
112
+ "16:9": (640, 352),
113
+ "3:2": (528, 352),
114
+ "4:3": (560, 416),
115
+ "1:1": (480, 480),
116
+ "3:4": (416, 560),
117
+ "2:3": (352, 528),
118
+ "9:16": (352, 640),
119
+ "9:21": (288, 672),
120
+ }
121
+
122
+ VIDEO_480P_ASPECT_RATIO_TO_SIZE = {
123
  "21:9": (976, 416),
124
  "16:9": (848, 480),
125
  "3:2": (784, 528),
 
131
  "9:21": (416, 976),
132
  }
133
 
134
+ VIDEO_RESOLUTION_TO_SIZE_MAP = {
135
+ "video_360p": VIDEO_360P_ASPECT_RATIO_TO_SIZE,
136
+ "video_480p": VIDEO_480P_ASPECT_RATIO_TO_SIZE,
137
+ }
138
+
139
  IMAGE_ASPECT_RATIO_TO_SIZE = {
140
  "21:9": (1168, 496),
141
  "16:9": (1024, 576),
 
152
  USE_KVCACHE = True
153
  TEXT_TEMPLATE = True
154
  RECORD_WRITE_LOCK = threading.Lock()
 
 
 
 
155
 
156
  LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
157
  LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
 
622
  line-height: 1.35 !important;
623
  }
624
 
625
+ .prompt-dataset button,
626
+ .example-panel table td:first-child button {
627
+ max-height: 180px !important;
628
+ overflow-y: auto !important;
629
+ overscroll-behavior: contain !important;
630
+ }
631
+
632
+ .prompt-dataset button,
633
+ .example-panel table td:first-child button,
634
+ .prompt-dataset button span,
635
+ .prompt-dataset button p,
636
+ .example-panel table td:first-child span,
637
+ .example-panel table td:first-child p {
638
+ white-space: pre-wrap !important;
639
+ overflow-wrap: anywhere !important;
640
+ word-break: break-word !important;
641
+ text-overflow: clip !important;
642
+ -webkit-line-clamp: unset !important;
643
+ line-clamp: unset !important;
644
+ }
645
+
646
+ .prompt-dataset button span,
647
+ .prompt-dataset button p,
648
+ .example-panel table td:first-child span,
649
+ .example-panel table td:first-child p {
650
+ overflow: visible !important;
651
+ display: block !important;
652
+ }
653
+
654
+ .lance-recommended-section .example-panel td,
655
+ .lance-recommended-section .example-panel td *,
656
+ .lance-recommended-section .example-panel button,
657
+ .lance-recommended-section .example-panel button *,
658
+ .lance-recommended-section .example-panel label,
659
+ .lance-recommended-section .example-panel label *,
660
+ .lance-recommended-section .example-panel span,
661
+ .lance-recommended-section .example-panel p {
662
+ white-space: pre-wrap !important;
663
+ overflow-wrap: anywhere !important;
664
+ word-break: break-word !important;
665
+ text-overflow: clip !important;
666
+ -webkit-line-clamp: unset !important;
667
+ line-clamp: unset !important;
668
+ }
669
+
670
+ .lance-recommended-section .example-panel button,
671
+ .lance-recommended-section .example-panel td {
672
+ height: auto !important;
673
+ max-height: none !important;
674
+ overflow: visible !important;
675
+ }
676
+
677
+ .lance-recommended-section .example-panel [style*="ellipsis"],
678
+ .lance-recommended-section .example-panel [style*="nowrap"],
679
+ .lance-recommended-section .example-panel [style*="hidden"] {
680
+ white-space: pre-wrap !important;
681
+ overflow: visible !important;
682
+ text-overflow: clip !important;
683
+ }
684
+
685
+ .lance-recommended-section .example-panel {
686
+ overflow: visible !important;
687
+ }
688
+
689
+ .lance-recommended-section .example-panel table {
690
+ width: 100% !important;
691
+ table-layout: fixed !important;
692
+ border-collapse: collapse !important;
693
+ }
694
+
695
+ .lance-recommended-section .example-panel tr,
696
+ .lance-recommended-section .example-panel th,
697
+ .lance-recommended-section .example-panel td {
698
+ height: auto !important;
699
+ min-height: 0 !important;
700
+ max-height: none !important;
701
+ }
702
+
703
+ .lance-recommended-section .example-panel td:first-child,
704
+ .lance-recommended-section .example-panel td:first-child *,
705
+ .prompt-dataset td,
706
+ .prompt-dataset td *,
707
+ .prompt-dataset button,
708
+ .prompt-dataset button * {
709
+ white-space: pre-wrap !important;
710
+ overflow: visible !important;
711
+ overflow-wrap: anywhere !important;
712
+ word-break: break-word !important;
713
+ text-overflow: clip !important;
714
+ -webkit-line-clamp: unset !important;
715
+ line-clamp: unset !important;
716
+ }
717
+
718
+ .lance-recommended-section .example-panel td:first-child button,
719
+ .prompt-dataset button {
720
+ width: 100% !important;
721
+ height: auto !important;
722
+ min-height: 0 !important;
723
+ max-height: none !important;
724
+ padding: 12px 14px !important;
725
+ text-align: center !important;
726
+ justify-content: center !important;
727
+ align-items: center !important;
728
+ line-height: 1.35 !important;
729
+ }
730
+
731
  .prompt-dataset .paginate {
732
  display: none !important;
733
  }
 
866
  font-weight: 800 !important;
867
  }
868
 
869
+
870
+
871
+ /* Prompt example tables: Gradio Dataset renders Textbox cells with an inline
872
+ max-width: 35ch and a single-line preview, which causes long prompts to be
873
+ clipped with an ellipsis. These rules expand the Prompt column, wrap text,
874
+ and keep very long rows usable through scrolling. */
875
+ .prompt-dataset,
876
+ .prompt-dataset .table-wrap {
877
+ width: 100% !important;
878
+ max-width: 100% !important;
879
+ overflow-x: auto !important;
880
+ overflow-y: auto !important;
881
+ }
882
+
883
+ .prompt-dataset .table-wrap {
884
+ max-height: 420px !important;
885
+ overscroll-behavior: contain !important;
886
+ scrollbar-gutter: stable !important;
887
+ }
888
+
889
+ .prompt-dataset table {
890
+ width: 100% !important;
891
+ min-width: 720px !important;
892
+ max-width: none !important;
893
+ table-layout: fixed !important;
894
+ border-collapse: collapse !important;
895
+ }
896
+
897
+ .prompt-dataset thead,
898
+ .prompt-dataset tbody,
899
+ .prompt-dataset tr,
900
+ .prompt-dataset th,
901
+ .prompt-dataset td,
902
+ .prompt-dataset td.textbox,
903
+ .prompt-dataset td[style*="35ch"] {
904
+ height: auto !important;
905
+ min-height: 0 !important;
906
+ max-height: none !important;
907
+ max-width: none !important;
908
+ width: 100% !important;
909
+ min-width: 0 !important;
910
+ white-space: normal !important;
911
+ overflow: visible !important;
912
+ text-overflow: clip !important;
913
+ vertical-align: top !important;
914
+ }
915
+
916
+ .prompt-dataset th,
917
+ .prompt-dataset td {
918
+ padding: 12px 14px !important;
919
+ }
920
+
921
+ .prompt-dataset td > * {
922
+ width: 100% !important;
923
+ max-width: none !important;
924
+ min-width: 0 !important;
925
+ height: auto !important;
926
+ min-height: 0 !important;
927
+ max-height: 260px !important;
928
+ overflow-y: auto !important;
929
+ overflow-x: hidden !important;
930
+ overscroll-behavior: contain !important;
931
+ white-space: pre-wrap !important;
932
+ text-align: left !important;
933
+ }
934
+
935
+ .prompt-dataset td *,
936
+ .prompt-dataset td [class*="truncate"],
937
+ .prompt-dataset td [class*="ellipsis"],
938
+ .prompt-dataset td [class*="line-clamp"],
939
+ .prompt-dataset td [style*="nowrap"],
940
+ .prompt-dataset td [style*="ellipsis"],
941
+ .prompt-dataset td [style*="line-clamp"],
942
+ .prompt-dataset td span,
943
+ .prompt-dataset td p,
944
+ .prompt-dataset td div,
945
+ .prompt-dataset td button {
946
+ max-width: none !important;
947
+ white-space: pre-wrap !important;
948
+ overflow-wrap: anywhere !important;
949
+ word-break: break-word !important;
950
+ text-overflow: clip !important;
951
+ -webkit-line-clamp: unset !important;
952
+ line-clamp: unset !important;
953
+ }
954
+
955
+ .prompt-dataset td span,
956
+ .prompt-dataset td p {
957
+ display: block !important;
958
+ }
959
+
960
+
961
+
962
+ /* Full prompt example rows. Do not use gr.Dataset for these two generation
963
+ sections: Dataset table cells are rendered as compact previews and the
964
+ actual DOM text may already contain "...". These button rows keep and render
965
+ the original prompt string, wrap it fully, and make very long rows scrollable. */
966
+ .prompt-example-full-table,
967
+ .prompt-example-full-table > .form,
968
+ .prompt-example-full-table > div {
969
+ width: 100% !important;
970
+ max-width: 100% !important;
971
+ min-width: 0 !important;
972
+ }
973
+
974
+ .prompt-example-full-table {
975
+ max-height: 460px !important;
976
+ overflow-x: auto !important;
977
+ overflow-y: auto !important;
978
+ overscroll-behavior: contain !important;
979
+ scrollbar-gutter: stable !important;
980
+ border: 1px solid var(--border-color-primary) !important;
981
+ border-radius: 8px !important;
982
+ }
983
+
984
+ .prompt-example-table-header,
985
+ .prompt-example-table-header > div,
986
+ .prompt-example-table-header .wrap {
987
+ position: sticky !important;
988
+ top: 0 !important;
989
+ z-index: 3 !important;
990
+ width: 100% !important;
991
+ margin: 0 !important;
992
+ padding: 12px 14px !important;
993
+ border: 0 !important;
994
+ border-bottom: 1px solid var(--border-color-primary) !important;
995
+ background: var(--block-title-background-fill, var(--block-background-fill)) !important;
996
+ color: var(--body-text-color) !important;
997
+ font-size: 18px !important;
998
+ font-weight: 800 !important;
999
+ line-height: 1.25 !important;
1000
+ text-align: center !important;
1001
+ box-shadow: none !important;
1002
+ }
1003
+
1004
+ .prompt-example-table-body,
1005
+ .prompt-example-table-body > .form {
1006
+ gap: 0 !important;
1007
+ width: 100% !important;
1008
+ min-width: 720px !important;
1009
+ }
1010
+
1011
+ .prompt-examples .prompt-example-row-button,
1012
+ .prompt-examples .prompt-example-row-button > button,
1013
+ .prompt-examples .prompt-example-row-button button {
1014
+ width: 100% !important;
1015
+ max-width: none !important;
1016
+ min-width: 0 !important;
1017
+ height: auto !important;
1018
+ min-height: 54px !important;
1019
+ max-height: 220px !important;
1020
+ margin: 0 !important;
1021
+ padding: 12px 14px !important;
1022
+ border-radius: 0 !important;
1023
+ border: 0 !important;
1024
+ border-bottom: 1px solid var(--border-color-primary) !important;
1025
+ background: var(--block-background-fill) !important;
1026
+ color: var(--body-text-color) !important;
1027
+ display: flex !important;
1028
+ justify-content: flex-start !important;
1029
+ align-items: flex-start !important;
1030
+ text-align: left !important;
1031
+ overflow-x: hidden !important;
1032
+ overflow-y: auto !important;
1033
+ white-space: normal !important;
1034
+ cursor: pointer !important;
1035
+ }
1036
+
1037
+ .prompt-examples .prompt-example-row-button span,
1038
+ .prompt-examples .prompt-example-row-button p,
1039
+ .prompt-examples .prompt-example-row-button div {
1040
+ width: 100% !important;
1041
+ max-width: none !important;
1042
+ display: block !important;
1043
+ overflow: visible !important;
1044
+ white-space: pre-wrap !important;
1045
+ overflow-wrap: anywhere !important;
1046
+ word-break: break-word !important;
1047
+ text-overflow: clip !important;
1048
+ -webkit-line-clamp: unset !important;
1049
+ line-clamp: unset !important;
1050
+ font-size: 16px !important;
1051
+ line-height: 1.38 !important;
1052
+ text-align: left !important;
1053
+ }
1054
+
1055
+ .prompt-examples .prompt-example-row-button:last-child,
1056
+ .prompt-examples .prompt-example-row-button:last-child > button,
1057
+ .prompt-examples .prompt-example-row-button:last-child button {
1058
+ border-bottom: 0 !important;
1059
+ }
1060
+
1061
+
1062
+ .prompt-example-table-header-with-media,
1063
+ .prompt-example-table-header-with-media > div,
1064
+ .prompt-example-table-header-with-media .wrap {
1065
+ display: grid !important;
1066
+ grid-template-columns: minmax(0, 1fr) minmax(180px, 260px) !important;
1067
+ gap: 0 !important;
1068
+ text-align: center !important;
1069
+ }
1070
+
1071
+ .prompt-example-multimodal-row,
1072
+ .prompt-example-multimodal-row > .form {
1073
+ width: 100% !important;
1074
+ min-width: 720px !important;
1075
+ margin: 0 !important;
1076
+ gap: 0 !important;
1077
+ align-items: stretch !important;
1078
+ border-bottom: 1px solid var(--border-color-primary) !important;
1079
+ }
1080
+
1081
+ .prompt-example-multimodal-row > .form {
1082
+ display: grid !important;
1083
+ grid-template-columns: minmax(0, 1fr) minmax(180px, 260px) !important;
1084
+ }
1085
+
1086
+ .prompt-example-prompt-cell,
1087
+ .prompt-example-prompt-cell > .form,
1088
+ .prompt-example-media-cell,
1089
+ .prompt-example-media-cell > .form {
1090
+ width: 100% !important;
1091
+ min-width: 0 !important;
1092
+ margin: 0 !important;
1093
+ padding: 0 !important;
1094
+ border: 0 !important;
1095
+ background: transparent !important;
1096
+ box-shadow: none !important;
1097
+ }
1098
+
1099
+ .prompt-example-multimodal-row .prompt-example-row-button,
1100
+ .prompt-example-multimodal-row .prompt-example-row-button > button,
1101
+ .prompt-example-multimodal-row .prompt-example-row-button button {
1102
+ height: 100% !important;
1103
+ min-height: 150px !important;
1104
+ max-height: 260px !important;
1105
+ border-bottom: 0 !important;
1106
+ }
1107
+
1108
+ .prompt-example-media-cell {
1109
+ border-left: 1px solid var(--border-color-primary) !important;
1110
+ }
1111
+
1112
+ .prompt-example-media-preview,
1113
+ .prompt-example-media-preview > div,
1114
+ .prompt-example-media-preview .wrap {
1115
+ width: 100% !important;
1116
+ height: 150px !important;
1117
+ min-height: 150px !important;
1118
+ max-height: 150px !important;
1119
+ margin: 0 !important;
1120
+ border: 0 !important;
1121
+ border-radius: 0 !important;
1122
+ background: transparent !important;
1123
+ box-shadow: none !important;
1124
+ overflow: hidden !important;
1125
+ }
1126
+
1127
+ .prompt-example-media-preview video,
1128
+ .prompt-example-media-preview img {
1129
+ width: 100% !important;
1130
+ height: 150px !important;
1131
+ object-fit: cover !important;
1132
+ border-radius: 0 !important;
1133
+ }
1134
+
1135
+ /* Keep the prompt column unchanged. Video examples fill the current row height,
1136
+ keep their original aspect ratio, and adapt their width inside the media column. */
1137
+ .prompt-example-video-cell,
1138
+ .prompt-example-video-cell > .form {
1139
+ display: flex !important;
1140
+ align-items: stretch !important;
1141
+ justify-content: center !important;
1142
+ padding: 0 !important;
1143
+ height: 100% !important;
1144
+ min-height: 150px !important;
1145
+ max-height: 260px !important;
1146
+ overflow: hidden !important;
1147
+ }
1148
+
1149
+ .prompt-example-video-preview,
1150
+ .prompt-example-video-preview > div,
1151
+ .prompt-example-video-preview .wrap {
1152
+ display: flex !important;
1153
+ align-items: center !important;
1154
+ justify-content: center !important;
1155
+ width: 100% !important;
1156
+ min-width: 0 !important;
1157
+ max-width: 100% !important;
1158
+ height: 100% !important;
1159
+ min-height: 150px !important;
1160
+ max-height: 260px !important;
1161
+ margin: 0 auto !important;
1162
+ border-radius: 0 !important;
1163
+ overflow: hidden !important;
1164
+ }
1165
+
1166
+ .prompt-example-video-preview video {
1167
+ width: auto !important;
1168
+ max-width: 100% !important;
1169
+ height: 100% !important;
1170
+ min-height: 150px !important;
1171
+ max-height: 260px !important;
1172
+ object-fit: contain !important;
1173
+ border-radius: 0 !important;
1174
+ }
1175
+
1176
+ .prompt-example-multimodal-row:last-child,
1177
+ .prompt-example-multimodal-row:last-child > .form {
1178
+ border-bottom: 0 !important;
1179
+ }
1180
+
1181
+ @media (max-width: 900px) {
1182
+ .prompt-example-table-header-with-media,
1183
+ .prompt-example-table-header-with-media > div,
1184
+ .prompt-example-table-header-with-media .wrap,
1185
+ .prompt-example-multimodal-row > .form {
1186
+ grid-template-columns: minmax(0, 1fr) minmax(140px, 180px) !important;
1187
+ }
1188
+ }
1189
+
1190
  @media (max-width: 900px) {
1191
  .lance-main-row {
1192
  grid-template-columns: minmax(0, 1fr) !important;
 
1200
  if (!element) {
1201
  return;
1202
  }
1203
+ if (element.style.getPropertyValue(property) !== value || element.style.getPropertyPriority(property) !== "important") {
1204
+ element.style.setProperty(property, value, "important");
1205
+ }
1206
  };
1207
 
1208
  const enforceLanceLabelTypography = () => {
 
1226
  });
1227
  };
1228
 
1229
+ const enforceRecommendedCaseText = () => {
1230
+ document.querySelectorAll(".lance-recommended-section .example-panel").forEach((panel) => {
1231
+ applyImportantStyle(panel, "overflow", "visible");
1232
+ panel.querySelectorAll("table, tbody, tr, th, td, button, label, span, p, div").forEach((element) => {
1233
+ applyImportantStyle(element, "white-space", "pre-wrap");
1234
+ applyImportantStyle(element, "overflow-wrap", "anywhere");
1235
+ applyImportantStyle(element, "word-break", "break-word");
1236
+ applyImportantStyle(element, "text-overflow", "clip");
1237
+ applyImportantStyle(element, "-webkit-line-clamp", "unset");
1238
+ applyImportantStyle(element, "line-clamp", "unset");
1239
+ });
1240
+ panel.querySelectorAll("td, button").forEach((element) => {
1241
+ applyImportantStyle(element, "height", "auto");
1242
+ applyImportantStyle(element, "max-height", "none");
1243
+ applyImportantStyle(element, "overflow", "visible");
1244
+ });
1245
+ panel.querySelectorAll("button").forEach((element) => {
1246
+ applyImportantStyle(element, "width", "100%");
1247
+ applyImportantStyle(element, "text-align", "center");
1248
+ applyImportantStyle(element, "justify-content", "center");
1249
+ applyImportantStyle(element, "align-items", "center");
1250
+ });
1251
+ });
1252
+ };
1253
+
1254
+
1255
+
1256
+ const enforcePromptDatasetText = () => {
1257
+ document.querySelectorAll(".prompt-dataset").forEach((dataset) => {
1258
+ applyImportantStyle(dataset, "width", "100%");
1259
+ applyImportantStyle(dataset, "max-width", "100%");
1260
+ applyImportantStyle(dataset, "overflow-x", "auto");
1261
+ applyImportantStyle(dataset, "overflow-y", "auto");
1262
+
1263
+ dataset.querySelectorAll(".table-wrap").forEach((element) => {
1264
+ applyImportantStyle(element, "width", "100%");
1265
+ applyImportantStyle(element, "max-width", "100%");
1266
+ applyImportantStyle(element, "max-height", "420px");
1267
+ applyImportantStyle(element, "overflow-x", "auto");
1268
+ applyImportantStyle(element, "overflow-y", "auto");
1269
+ applyImportantStyle(element, "overscroll-behavior", "contain");
1270
+ });
1271
+
1272
+ dataset.querySelectorAll("table").forEach((element) => {
1273
+ applyImportantStyle(element, "width", "100%");
1274
+ applyImportantStyle(element, "min-width", "720px");
1275
+ applyImportantStyle(element, "max-width", "none");
1276
+ applyImportantStyle(element, "table-layout", "fixed");
1277
+ applyImportantStyle(element, "border-collapse", "collapse");
1278
+ });
1279
+
1280
+ dataset.querySelectorAll("thead, tbody, tr, th, td, td.textbox, td[style*='35ch']").forEach((element) => {
1281
+ applyImportantStyle(element, "height", "auto");
1282
+ applyImportantStyle(element, "min-height", "0");
1283
+ applyImportantStyle(element, "max-height", "none");
1284
+ applyImportantStyle(element, "max-width", "none");
1285
+ applyImportantStyle(element, "width", "100%");
1286
+ applyImportantStyle(element, "min-width", "0");
1287
+ applyImportantStyle(element, "white-space", "normal");
1288
+ applyImportantStyle(element, "overflow", "visible");
1289
+ applyImportantStyle(element, "text-overflow", "clip");
1290
+ applyImportantStyle(element, "vertical-align", "top");
1291
+ });
1292
+
1293
+ dataset.querySelectorAll("td *").forEach((element) => {
1294
+ applyImportantStyle(element, "max-width", "none");
1295
+ applyImportantStyle(element, "white-space", "pre-wrap");
1296
+ applyImportantStyle(element, "overflow-wrap", "anywhere");
1297
+ applyImportantStyle(element, "word-break", "break-word");
1298
+ applyImportantStyle(element, "text-overflow", "clip");
1299
+ applyImportantStyle(element, "-webkit-line-clamp", "unset");
1300
+ applyImportantStyle(element, "line-clamp", "unset");
1301
+ });
1302
+
1303
+ dataset.querySelectorAll("td > *").forEach((element) => {
1304
+ applyImportantStyle(element, "width", "100%");
1305
+ applyImportantStyle(element, "max-width", "none");
1306
+ applyImportantStyle(element, "min-width", "0");
1307
+ applyImportantStyle(element, "height", "auto");
1308
+ applyImportantStyle(element, "min-height", "0");
1309
+ applyImportantStyle(element, "max-height", "260px");
1310
+ applyImportantStyle(element, "overflow-y", "auto");
1311
+ applyImportantStyle(element, "overflow-x", "hidden");
1312
+ applyImportantStyle(element, "overscroll-behavior", "contain");
1313
+ applyImportantStyle(element, "white-space", "pre-wrap");
1314
+ applyImportantStyle(element, "text-align", "left");
1315
+ });
1316
+
1317
+ dataset.querySelectorAll("td span, td p").forEach((element) => {
1318
+ applyImportantStyle(element, "display", "block");
1319
+ });
1320
+ });
1321
+ };
1322
+
1323
+ const enforcePromptExampleRows = () => {
1324
+ document.querySelectorAll(".prompt-example-full-table").forEach((table) => {
1325
+ applyImportantStyle(table, "width", "100%");
1326
+ applyImportantStyle(table, "max-width", "100%");
1327
+ applyImportantStyle(table, "max-height", "460px");
1328
+ applyImportantStyle(table, "overflow-x", "auto");
1329
+ applyImportantStyle(table, "overflow-y", "auto");
1330
+ });
1331
+
1332
+ document.querySelectorAll(".prompt-example-table-body, .prompt-example-table-body > .form").forEach((element) => {
1333
+ applyImportantStyle(element, "width", "100%");
1334
+ applyImportantStyle(element, "min-width", "720px");
1335
+ applyImportantStyle(element, "gap", "0");
1336
+ });
1337
+
1338
+ document.querySelectorAll(".prompt-example-row-button, .prompt-example-row-button button").forEach((element) => {
1339
+ applyImportantStyle(element, "width", "100%");
1340
+ applyImportantStyle(element, "max-width", "none");
1341
+ applyImportantStyle(element, "height", "auto");
1342
+ applyImportantStyle(element, "min-height", "54px");
1343
+ applyImportantStyle(element, "max-height", "220px");
1344
+ applyImportantStyle(element, "margin", "0");
1345
+ applyImportantStyle(element, "padding", "12px 14px");
1346
+ applyImportantStyle(element, "border-radius", "0");
1347
+ applyImportantStyle(element, "border", "0");
1348
+ applyImportantStyle(element, "border-bottom", "1px solid var(--border-color-primary)");
1349
+ applyImportantStyle(element, "display", "flex");
1350
+ applyImportantStyle(element, "justify-content", "flex-start");
1351
+ applyImportantStyle(element, "align-items", "flex-start");
1352
+ applyImportantStyle(element, "text-align", "left");
1353
+ applyImportantStyle(element, "overflow-x", "hidden");
1354
+ applyImportantStyle(element, "overflow-y", "auto");
1355
+ applyImportantStyle(element, "white-space", "normal");
1356
+ });
1357
+
1358
+ document.querySelectorAll(".prompt-example-row-button span, .prompt-example-row-button p, .prompt-example-row-button div").forEach((element) => {
1359
+ applyImportantStyle(element, "width", "100%");
1360
+ applyImportantStyle(element, "max-width", "none");
1361
+ applyImportantStyle(element, "display", "block");
1362
+ applyImportantStyle(element, "overflow", "visible");
1363
+ applyImportantStyle(element, "white-space", "pre-wrap");
1364
+ applyImportantStyle(element, "overflow-wrap", "anywhere");
1365
+ applyImportantStyle(element, "word-break", "break-word");
1366
+ applyImportantStyle(element, "text-overflow", "clip");
1367
+ applyImportantStyle(element, "-webkit-line-clamp", "unset");
1368
+ applyImportantStyle(element, "line-clamp", "unset");
1369
+ applyImportantStyle(element, "font-size", "16px");
1370
+ applyImportantStyle(element, "line-height", "1.38");
1371
+ applyImportantStyle(element, "text-align", "left");
1372
+ });
1373
+
1374
+ document.querySelectorAll(".prompt-example-table-header-with-media, .prompt-example-table-header-with-media > div, .prompt-example-table-header-with-media .wrap, .prompt-example-multimodal-row > .form").forEach((element) => {
1375
+ applyImportantStyle(element, "display", "grid");
1376
+ applyImportantStyle(element, "grid-template-columns", "minmax(0, 1fr) minmax(180px, 260px)");
1377
+ applyImportantStyle(element, "gap", "0");
1378
+ });
1379
+
1380
+ document.querySelectorAll(".prompt-example-multimodal-row, .prompt-example-multimodal-row > .form").forEach((element) => {
1381
+ applyImportantStyle(element, "width", "100%");
1382
+ applyImportantStyle(element, "min-width", "720px");
1383
+ applyImportantStyle(element, "margin", "0");
1384
+ applyImportantStyle(element, "border-bottom", "1px solid var(--border-color-primary)");
1385
+ });
1386
+
1387
+ document.querySelectorAll(".prompt-example-multimodal-row .prompt-example-row-button, .prompt-example-multimodal-row .prompt-example-row-button button").forEach((element) => {
1388
+ applyImportantStyle(element, "height", "100%");
1389
+ applyImportantStyle(element, "min-height", "150px");
1390
+ applyImportantStyle(element, "max-height", "260px");
1391
+ applyImportantStyle(element, "border-bottom", "0");
1392
+ });
1393
+
1394
+ document.querySelectorAll(".prompt-example-media-preview, .prompt-example-media-preview > div, .prompt-example-media-preview .wrap, .prompt-example-media-preview video, .prompt-example-media-preview img").forEach((element) => {
1395
+ applyImportantStyle(element, "width", "100%");
1396
+ applyImportantStyle(element, "height", "150px");
1397
+ applyImportantStyle(element, "max-height", "150px");
1398
+ applyImportantStyle(element, "border-radius", "0");
1399
+ applyImportantStyle(element, "overflow", "hidden");
1400
+ });
1401
+
1402
+ document.querySelectorAll(".prompt-example-video-cell, .prompt-example-video-cell > .form").forEach((element) => {
1403
+ applyImportantStyle(element, "display", "flex");
1404
+ applyImportantStyle(element, "align-items", "stretch");
1405
+ applyImportantStyle(element, "justify-content", "center");
1406
+ applyImportantStyle(element, "padding", "0");
1407
+ applyImportantStyle(element, "height", "100%");
1408
+ applyImportantStyle(element, "min-height", "150px");
1409
+ applyImportantStyle(element, "max-height", "260px");
1410
+ applyImportantStyle(element, "overflow", "hidden");
1411
+ });
1412
+
1413
+ document.querySelectorAll(".prompt-example-video-preview, .prompt-example-video-preview > div, .prompt-example-video-preview .wrap").forEach((element) => {
1414
+ applyImportantStyle(element, "display", "flex");
1415
+ applyImportantStyle(element, "align-items", "center");
1416
+ applyImportantStyle(element, "justify-content", "center");
1417
+ applyImportantStyle(element, "width", "100%");
1418
+ applyImportantStyle(element, "min-width", "0");
1419
+ applyImportantStyle(element, "max-width", "100%");
1420
+ applyImportantStyle(element, "height", "100%");
1421
+ applyImportantStyle(element, "min-height", "150px");
1422
+ applyImportantStyle(element, "max-height", "260px");
1423
+ applyImportantStyle(element, "margin", "0 auto");
1424
+ applyImportantStyle(element, "border-radius", "0");
1425
+ applyImportantStyle(element, "overflow", "hidden");
1426
+ });
1427
+
1428
+ document.querySelectorAll(".prompt-example-video-preview video").forEach((element) => {
1429
+ applyImportantStyle(element, "width", "auto");
1430
+ applyImportantStyle(element, "max-width", "100%");
1431
+ applyImportantStyle(element, "height", "100%");
1432
+ applyImportantStyle(element, "min-height", "150px");
1433
+ applyImportantStyle(element, "max-height", "260px");
1434
+ applyImportantStyle(element, "object-fit", "contain");
1435
+ applyImportantStyle(element, "border-radius", "0");
1436
+ });
1437
+ };
1438
+
1439
  const syncOutputColumnHeight = () => {
1440
  const row = document.querySelector(".lance-main-row");
1441
  const inputColumn = document.querySelector(".lance-input-column");
 
1465
 
1466
  const scheduleSync = () => requestAnimationFrame(() => {
1467
  enforceLanceLabelTypography();
1468
+ enforceRecommendedCaseText();
1469
+ enforcePromptDatasetText();
1470
+ enforcePromptExampleRows();
1471
  syncOutputColumnHeight();
1472
  });
1473
  const attachObservers = () => {
 
1490
  };
1491
 
1492
  enforceLanceLabelTypography();
1493
+ enforceRecommendedCaseText();
1494
+ enforcePromptDatasetText();
1495
+ enforcePromptExampleRows();
1496
  attachObservers();
1497
  new MutationObserver(() => {
1498
  enforceLanceLabelTypography();
1499
+ enforceRecommendedCaseText();
1500
+ enforcePromptDatasetText();
1501
+ enforcePromptExampleRows();
1502
  attachObservers();
1503
  }).observe(document.body, {
1504
  childList: true,
 
1550
  IMAGE_TASKS = {TASK_T2I, TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
1551
  VIDEO_TASKS = {TASK_T2V, TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
1552
  EDIT_TASKS = {TASK_IMAGE_EDIT, TASK_VIDEO_EDIT}
1553
+ VIDEO_RESOLUTION_CHOICES = ["video_360p", "video_480p"]
1554
+ VIDEO_RESOLUTION_DISPLAY_CHOICES = [
1555
+ ("video_360p", "video_360p"),
1556
+ ("video_480p(Higher quota usage. Use sparingly.)", "video_480p"),
1557
+ ]
1558
+ VIDEO_EDIT_RESOLUTION_CHOICES = [DEFAULT_VIDEO_EDIT_RESOLUTION]
1559
  IMAGE_RESOLUTION_CHOICES = [DEFAULT_IMAGE_RESOLUTION]
1560
  RESOLUTION_CHOICES = VIDEO_RESOLUTION_CHOICES + IMAGE_RESOLUTION_CHOICES
1561
  CAPTION_SYSTEM_PROMPT_TEMPLATE = (
 
1578
 
1579
 
1580
  def get_video_duration_choices() -> list[tuple[str, int]]:
1581
+ return [(f"{seconds}s", seconds) for seconds in range(1, 11)]
1582
 
1583
  def env_flag(name: str, default: bool) -> bool:
1584
  value = os.getenv(name)
 
1803
 
1804
 
1805
  def video_seconds_to_num_frames(seconds: int) -> int:
1806
+ seconds = max(1, min(10, int(seconds)))
1807
  return 12 * seconds + 1
1808
 
1809
 
 
1815
  return task
1816
 
1817
 
1818
+ def normalize_resolution_choice_value(resolution: str, task: str) -> str:
1819
+ resolution_text = str(resolution or "").strip()
1820
+ for choice in get_resolution_choices_for_task(task):
1821
+ if isinstance(choice, tuple):
1822
+ label, value = choice
1823
+ if resolution_text in {str(label), str(value)}:
1824
+ return str(value)
1825
+ elif resolution_text == str(choice):
1826
+ return str(choice)
1827
+ return resolution_text
1828
+
1829
+
1830
+ def get_resolution_choice_values_for_task(task: str) -> list[str]:
1831
+ choices = get_resolution_choices_for_task(task)
1832
+ values = []
1833
+ for choice in choices:
1834
+ values.append(choice[1] if isinstance(choice, tuple) else choice)
1835
+ return values
1836
+
1837
+
1838
+ def get_resolution_choices_for_task(task: str) -> list[str | tuple[str, str]]:
1839
  internal_task = normalize_task(task)
1840
  if internal_task in IMAGE_TASKS:
1841
+ return IMAGE_RESOLUTION_CHOICES
1842
+ if internal_task == TASK_T2V:
1843
+ return VIDEO_RESOLUTION_DISPLAY_CHOICES
1844
+ if internal_task == TASK_VIDEO_EDIT:
1845
+ return VIDEO_EDIT_RESOLUTION_CHOICES
1846
  if internal_task in VIDEO_TASKS:
1847
+ return VIDEO_EDIT_RESOLUTION_CHOICES
1848
+ return VIDEO_RESOLUTION_CHOICES
1849
+
1850
+
1851
+ def get_default_resolution_for_task(task: str) -> str:
1852
+ internal_task = normalize_task(task)
1853
+ if internal_task in IMAGE_TASKS:
1854
+ return DEFAULT_IMAGE_RESOLUTION
1855
+ # Video Generation should default to the lightweight/recommended 360p profile.
1856
+ # This is used by both task switching and recommended-case click handlers
1857
+ # through reset_generation_defaults_for_task(), so every Video Generation
1858
+ # example fill now returns video_360p instead of falling through to 480p.
1859
+ if internal_task == TASK_T2V:
1860
  return DEFAULT_RESOLUTION
1861
+ if internal_task == TASK_VIDEO_EDIT:
1862
+ return DEFAULT_VIDEO_EDIT_RESOLUTION
1863
+ if internal_task in VIDEO_TASKS:
1864
+ return DEFAULT_VIDEO_EDIT_RESOLUTION
1865
+ return DEFAULT_RESOLUTION
1866
+
1867
+
1868
+ def normalize_resolution_for_backend(resolution: str, task: str) -> str:
1869
+ internal_task = normalize_task(task)
1870
+ normalized_resolution = normalize_resolution_choice_value(resolution, internal_task)
1871
+ choices = get_resolution_choice_values_for_task(internal_task)
1872
+ if normalized_resolution in choices:
1873
+ return normalized_resolution
1874
+ return get_default_resolution_for_task(internal_task)
1875
 
1876
 
1877
  def get_default_aspect_ratio(task: str) -> str:
 
1879
  return DEFAULT_IMAGE_ASPECT_RATIO if internal_task in IMAGE_TASKS else DEFAULT_VIDEO_ASPECT_RATIO
1880
 
1881
 
1882
+ def normalize_video_resolution(resolution: Optional[str], task: Optional[str] = None) -> str:
1883
+ if task is None:
1884
+ return resolution if resolution in VIDEO_RESOLUTION_CHOICES else DEFAULT_RESOLUTION
1885
+ normalized_resolution = normalize_resolution_choice_value(resolution, task)
1886
+ choices = get_resolution_choice_values_for_task(task)
1887
+ return normalized_resolution if normalized_resolution in choices else get_default_resolution_for_task(task)
1888
+
1889
+
1890
+ def get_size_for_aspect_ratio(task: str, aspect_ratio: str, video_resolution: Optional[str] = None) -> tuple[int, int]:
1891
  internal_task = normalize_task(task)
1892
  aspect_ratio = aspect_ratio if aspect_ratio in ASPECT_RATIO_CHOICES else get_default_aspect_ratio(internal_task)
1893
+ if internal_task in IMAGE_TASKS:
1894
+ size_map = IMAGE_ASPECT_RATIO_TO_SIZE
1895
+ else:
1896
+ size_map = VIDEO_RESOLUTION_TO_SIZE_MAP[normalize_video_resolution(video_resolution, internal_task)]
1897
  return size_map[aspect_ratio]
1898
 
1899
 
 
1905
  return f"{width} x {height}"
1906
 
1907
 
1908
+ def get_size_map_for_task(task: str, video_resolution: Optional[str] = None) -> dict[str, tuple[int, int]]:
1909
  internal_task = normalize_task(task)
1910
+ if internal_task in IMAGE_TASKS:
1911
+ return IMAGE_ASPECT_RATIO_TO_SIZE
1912
+ return VIDEO_RESOLUTION_TO_SIZE_MAP[normalize_video_resolution(video_resolution, internal_task)]
1913
 
1914
 
1915
+ def get_output_resolution_choices_for_task(task: str, video_resolution: Optional[str] = None) -> list[tuple[str, str]]:
1916
  """Get Output Resolution choices with a one-to-one mapping to aspect ratios."""
1917
  internal_task = normalize_task(task)
1918
  default_ratio = get_default_aspect_ratio(internal_task)
1919
+ size_map = get_size_map_for_task(internal_task, video_resolution)
1920
  choices = []
1921
  for ratio in ASPECT_RATIO_CHOICES:
1922
  width, height = size_map[ratio]
 
1926
  return choices
1927
 
1928
 
1929
+ def get_aspect_ratio_for_output_resolution(task: str, output_resolution: str, video_resolution: Optional[str] = None) -> str:
1930
  internal_task = normalize_task(task)
1931
  resolution_text = str(output_resolution or "").strip()
1932
+ size_map = get_size_map_for_task(internal_task, video_resolution)
1933
  for ratio in ASPECT_RATIO_CHOICES:
1934
  width, height = size_map[ratio]
1935
  if resolution_text == format_size_markdown(internal_task, width, height):
 
1986
  return f'<div class="{class_names}">{icon_html}<span>{html.escape(text)}</span></div>'
1987
 
1988
 
1989
+ def update_size_from_aspect_ratio(task: str, aspect_ratio: str, video_resolution: Optional[str] = None):
1990
+ width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
1991
+ return height, width, gr.update(
1992
+ choices=get_output_resolution_choices_for_task(task, video_resolution),
1993
+ value=format_size_markdown(task, width, height),
1994
+ )
1995
 
1996
 
1997
+ def update_aspect_ratio_from_output_resolution(task: str, output_resolution: str, video_resolution: Optional[str] = None):
1998
+ aspect_ratio = get_aspect_ratio_for_output_resolution(task, output_resolution, video_resolution)
1999
+ width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
2000
  return aspect_ratio, height, width
2001
 
2002
 
2003
+ def update_output_resolution_from_video_profile(task: str, aspect_ratio: str, video_resolution: str):
2004
+ width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
2005
+ return (
2006
+ gr.update(
2007
+ choices=get_output_resolution_choices_for_task(task, video_resolution),
2008
+ value=format_size_markdown(task, width, height),
2009
+ ),
2010
+ height,
2011
+ width,
2012
+ )
2013
+
2014
+
2015
  def reset_generation_defaults_for_task(task: str):
2016
  internal_task = normalize_task(task)
2017
  aspect_ratio = get_default_aspect_ratio(internal_task)
2018
+ resolution = get_default_resolution_for_task(internal_task)
2019
+ width, height = get_size_for_aspect_ratio(internal_task, aspect_ratio, resolution)
2020
  num_frames = DEFAULT_VIDEO_DURATION_SECONDS
2021
+ return aspect_ratio, height, width, num_frames, resolution, gr.update(
2022
+ choices=get_output_resolution_choices_for_task(internal_task, resolution),
2023
+ value=format_size_markdown(internal_task, width, height),
2024
+ )
2025
 
2026
 
2027
  def apply_prompt_example(task: str, evt: gr.SelectData):
 
2036
  return (prompt_text, *defaults)
2037
 
2038
 
2039
+ def make_prompt_example_click_handler(prompt_text: str):
2040
+ """Create a click handler for custom text-to-visual prompt-example rows.
2041
+
2042
+ gr.Dataset and gr.Examples render long text through compact preview cells, so
2043
+ long prompts/instructions/questions can be truncated before CSS gets a chance
2044
+ to wrap them. The custom rows below use normal buttons for display and keep
2045
+ the full prompt string in this closure for click-to-fill behavior.
2046
+ """
2047
+
2048
+ def _handler(task: str):
2049
+ defaults = reset_generation_defaults_for_task(task)
2050
+ return (prompt_text, *defaults)
2051
+
2052
+ return _handler
2053
+
2054
+
2055
+ def make_media_prompt_example_click_handler(
2056
+ prompt_text: str,
2057
+ input_video_path: Optional[str] = None,
2058
+ input_image_path: Optional[str] = None,
2059
+ ):
2060
+ """Create a click handler for edit/understanding example rows.
2061
+
2062
+ The row button renders the complete prompt/instruction/question, while the
2063
+ closure also carries the matching media path so one click still fills every
2064
+ required input component.
2065
+ """
2066
+
2067
+ def _handler(task: str):
2068
+ defaults = reset_generation_defaults_for_task(task)
2069
+ return (prompt_text, input_video_path, input_image_path, *defaults)
2070
+
2071
+ return _handler
2072
+
2073
+
2074
  def get_understanding_system_prompt_choices(task: str) -> list[str]:
2075
  internal_task = normalize_task(task)
2076
  if internal_task == TASK_X2T_IMAGE:
 
2598
  )
2599
 
2600
  stage_start = time.perf_counter()
2601
+ print(f"[startup][gpu:{self.device}] Casting Lance model to bf16 on CPU", flush=True)
2602
+ model = model.to(dtype=torch.bfloat16)
2603
+ self._log_stage("Lance model bf16 cast", stage_start)
2604
 
2605
  stage_start = time.perf_counter()
2606
  print(f"[startup][gpu:{self.device}] Loading tokenizer: {model_args.model_path}", flush=True)
 
2638
  != model.language_model.get_output_embeddings().weight.data.data_ptr()
2639
  ), "tie_word_embeddings conflict"
2640
 
2641
+ stage_start = time.perf_counter()
2642
+ print(f"[startup][gpu:{self.device}] Moving Lance model to GPU {self.device}", flush=True)
2643
+ model = model.to(device=self.device)
2644
+ self._log_stage("Lance model move to GPU", stage_start)
2645
  model.eval()
2646
  if vae_model is not None and hasattr(vae_model, "eval"):
2647
  vae_model.eval()
 
3188
  print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
3189
 
3190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3191
  def get_env_int(name: str, default: int) -> int:
3192
  """Read an integer environment variable, falling back safely on invalid values."""
3193
  try:
 
3196
  return default
3197
 
3198
 
3199
+ def get_env_float(name: str, default: float) -> float:
3200
+ """Read a float environment variable, falling back safely on invalid values."""
3201
+ try:
3202
+ return float(os.getenv(name, str(default)))
3203
+ except (TypeError, ValueError):
3204
+ return default
3205
+
3206
+
3207
  def get_zerogpu_duration_cap() -> int:
3208
  """Maximum duration requested from ZeroGPU.
3209
 
3210
+ The duration value is a ZeroGPU reservation/timeout hint. Shorter values can
3211
+ improve queue priority and reduce wasted quota, but the value must still cover
3212
+ model warm-up plus inference. Override per deployment when needed:
3213
+ LANCE_ZEROGPU_MAX_DURATION_SECONDS=300
3214
  """
3215
+ return max(1, get_env_int("LANCE_ZEROGPU_MAX_DURATION_SECONDS", 240))
3216
 
3217
 
3218
  def clamp_zerogpu_duration(seconds: int) -> int:
3219
  return max(1, min(int(seconds), get_zerogpu_duration_cap()))
3220
 
3221
 
3222
+ def is_pipeline_pool_ready_for_task(task: str) -> bool:
3223
+ """Return True when the required model variant is already resident on GPU.
3224
+
3225
+ ZeroGPU evaluates the dynamic duration before calling the decorated function.
3226
+ If the model is already loaded, we can request a shorter warm-run duration;
3227
+ otherwise we reserve extra time for the first request after startup or model
3228
+ switching. This does not change the UI layout or user-facing controls.
3229
+ """
3230
+ try:
3231
+ pool = ACTIVE_PIPELINE_POOL
3232
+ if pool is None or pool.model_variant != get_task_model_variant(task):
3233
+ return False
3234
+ return all(getattr(pipeline, "initialized", False) for pipeline in pool.pipelines)
3235
+ except Exception:
3236
+ return False
3237
+
3238
+
3239
+ def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
3240
+ """Add configurable safety margin and clamp the requested ZeroGPU duration."""
3241
+ margin = max(1.0, get_env_float("LANCE_ZEROGPU_DURATION_MARGIN", 1.10))
3242
+ if not is_pipeline_pool_ready_for_task(task):
3243
+ estimated_seconds += max(0, get_env_int("LANCE_ZEROGPU_COLD_START_BUFFER_SECONDS", 120))
3244
+ return clamp_zerogpu_duration(int(estimated_seconds * margin + 0.999))
3245
+
3246
+
3247
  def get_run_task_gpu_duration(
3248
  task: str,
3249
  prompt: str,
 
3260
  cfg_text_scale: float,
3261
  enable_frame_interpolation: bool,
3262
  ) -> int:
3263
+ """Return a dynamic ZeroGPU reservation duration.
3264
 
3265
+ The previous implementation used one conservative estimate for both cold and
3266
+ warm runs. This version keeps the first request safe, then asks for shorter
3267
+ durations once the matching Lance model is already loaded, which reduces
3268
+ wasted ZeroGPU quota and improves queue priority without changing the UI.
3269
  """
3270
  internal_task = normalize_task(task)
3271
+ timesteps = max(1, int(validation_num_timesteps or DEFAULT_TIMESTEPS))
3272
+ backend_resolution = normalize_resolution_for_backend(str(resolution), internal_task)
3273
+ resolution_multiplier = 1.28 if backend_resolution == "video_480p" else 1.0
3274
+ timestep_extra = max(0, timesteps - 20)
3275
+
3276
+ if internal_task == TASK_T2V:
3277
+ requested_seconds = max(1, int(num_frames or DEFAULT_VIDEO_DURATION_SECONDS))
3278
+ estimate = 35 + requested_seconds * 10 + timestep_extra * 1.5
3279
+ if normalize_frame_interpolation(enable_frame_interpolation):
3280
+ estimate += min(32, 8 + requested_seconds * 3)
3281
+ return finalize_zerogpu_duration(estimate * resolution_multiplier, internal_task)
3282
+
3283
+ if internal_task == TASK_VIDEO_EDIT:
3284
+ estimate = 85 + timestep_extra * 1.5
3285
+ if normalize_frame_interpolation(enable_frame_interpolation):
3286
+ estimate += 22
3287
+ return finalize_zerogpu_duration(estimate * resolution_multiplier, internal_task)
3288
+
3289
  if internal_task == TASK_X2T_VIDEO:
3290
+ return finalize_zerogpu_duration(32, internal_task)
3291
+ if internal_task == TASK_T2I:
3292
+ return finalize_zerogpu_duration(58, internal_task)
3293
+ if internal_task == TASK_IMAGE_EDIT:
3294
+ return finalize_zerogpu_duration(70, internal_task)
3295
+ return finalize_zerogpu_duration(28, internal_task)
3296
 
3297
 
3298
  def get_pipeline_pool(task: str) -> PipelinePool:
 
3365
  gpu_text = "unknown"
3366
  concurrency = 1
3367
  active_variant = "none"
 
3368
  if ACTIVE_PIPELINE_POOL is not None:
3369
  active_variant = ACTIVE_PIPELINE_POOL.model_variant
3370
  gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
3371
  concurrency = ACTIVE_PIPELINE_POOL.size
 
 
 
 
 
 
3372
  return (
3373
  f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
3374
  f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
3375
+ f"Switch mode: `unload then load`"
3376
  )
3377
 
3378
 
 
3435
  is_edit_task = internal_task in EDIT_TASKS
3436
  is_understanding_task = internal_task in UNDERSTANDING_TASKS
3437
  is_generation_task = internal_task in GENERATION_TASKS
3438
+ is_text_to_visual_task = internal_task in {TASK_T2V, TASK_T2I}
3439
  show_media_input = is_edit_task or is_understanding_task
3440
+ resolution_choices = get_resolution_choice_values_for_task(internal_task)
3441
+ resolution_value = get_default_resolution_for_task(internal_task)
3442
  aspect_ratio_value = DEFAULT_IMAGE_ASPECT_RATIO if is_image_task else DEFAULT_VIDEO_ASPECT_RATIO
3443
+ width_value, height_value = get_size_for_aspect_ratio(internal_task, aspect_ratio_value, resolution_value)
3444
  size_markdown = format_size_markdown(internal_task, width_value, height_value)
3445
  system_prompt_choices = get_understanding_system_prompt_choices(internal_task)
3446
 
3447
+ if is_text_to_visual_task:
3448
  text_label = "Prompt"
3449
  text_placeholder = "Describe what you want to generate..."
3450
  elif is_edit_task:
 
3463
 
3464
  output_icon = "video" if output_label == "Output Video" else "image" if output_label == "Output Image" else "text"
3465
  show_generation_settings = is_generation_task or is_edit_task
3466
+ show_aspect_ratio = is_text_to_visual_task
3467
+ show_output_resolution = is_text_to_visual_task
3468
  show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
3469
  show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
3470
+ show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
3471
+ show_video_resolution_settings = internal_task == TASK_T2V
3472
 
3473
  return (
3474
  gr.update(value=build_lance_label_html(text_label, "lance-prompt-label")),
 
3476
  label=text_label,
3477
  placeholder=text_placeholder,
3478
  visible=True,
3479
+ value="",
3480
  ),
3481
  gr.update(
3482
  choices=system_prompt_choices,
3483
  value=system_prompt_choices[0],
3484
  visible=False,
3485
  ),
3486
+ # Switching task pages should always start from a clean input state.
3487
+ # Clear both visual input boxes even if one of them stays visible across tasks.
3488
  gr.update(label="Input Video", visible=show_input_video, value=None),
3489
  gr.update(label="Input Image", visible=show_input_image, value=None),
3490
+ gr.update(visible=show_frame_interpolation_settings),
3491
  gr.update(visible=show_aspect_ratio),
3492
+ gr.update(visible=show_output_resolution),
3493
  gr.update(visible=internal_task == TASK_T2V),
3494
+ gr.update(visible=show_video_resolution_settings),
3495
  gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
3496
  gr.update(value=height_value),
3497
  gr.update(value=width_value),
3498
+ gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION),
3499
+ gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
3500
  gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
3501
+ gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
3502
  gr.update(value=build_lance_icon_label_html(output_label, output_icon, "lance-output-label")),
3503
  gr.update(visible=internal_task in {TASK_T2V, TASK_VIDEO_EDIT}),
3504
  gr.update(visible=internal_task in {TASK_T2I, TASK_IMAGE_EDIT}),
 
3576
  value=DEFAULT_VIDEO_ASPECT_RATIO,
3577
  elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
3578
  )
3579
+ with gr.Row(elem_classes=["generation-controls-row", "output-resolution-row"]) as output_resolution_row:
3580
  with gr.Column(elem_classes=["lance-control-field"]):
3581
  gr.HTML('<div class="lance-generation-label">Output Resolution</div>', elem_classes=["lance-label-html"])
3582
  real_size = gr.Radio(
 
3587
  interactive=True,
3588
  elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
3589
  )
 
 
 
 
 
 
 
 
3590
  with gr.Row(elem_classes=["generation-controls-row", "video-duration-row"]) as video_duration_row:
3591
  with gr.Column(elem_classes=["lance-control-field"]):
3592
  gr.HTML(build_lance_label_html("Video Duration (seconds)", "lance-generation-label"), elem_classes=["lance-label-html"])
3593
+ num_frames = gr.Radio(
3594
  label="Video Duration (seconds)",
3595
  show_label=False,
3596
+ choices=get_video_duration_choices(),
 
 
3597
  value=DEFAULT_VIDEO_DURATION_SECONDS,
3598
  elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
3599
  )
3600
+ with gr.Row(elem_classes=["generation-controls-row", "video-resolution-row"]) as video_resolution_row:
3601
+ with gr.Column(elem_classes=["lance-control-field"]):
3602
+ gr.HTML(build_lance_label_html("Video Resolution", "lance-generation-label"), elem_classes=["lance-label-html"])
3603
+ resolution = gr.Dropdown(
3604
+ label="Video Resolution",
3605
+ show_label=False,
3606
+ choices=VIDEO_RESOLUTION_DISPLAY_CHOICES,
3607
+ value=DEFAULT_RESOLUTION,
3608
+ elem_classes=["generation-control"],
3609
+ )
3610
+ height = gr.Number(value=DEFAULT_HEIGHT, precision=0, visible=False)
3611
+ width = gr.Number(value=DEFAULT_WIDTH, precision=0, visible=False)
3612
 
3613
  with gr.Accordion("Advanced Parameters", open=False, elem_classes=["lance-advanced-accordion"]):
3614
  with gr.Column(elem_classes=["lance-control-field"]):
 
3673
 
3674
  run_button = gr.Button("🚀 Generate", variant="primary", elem_classes=["lance-run-button"])
3675
 
3676
+ def build_prompt_example_table(examples: list[list], media_type: Optional[str] = None):
3677
+ """Render examples with full prompt text instead of Gradio compact previews."""
3678
+ example_buttons = []
3679
+ with gr.Column(elem_classes=["prompt-example-full-table"]):
3680
+ if media_type == "video":
3681
+ gr.HTML("<div>Prompt / Instruction / Question</div><div>Input Video</div>", elem_classes=["prompt-example-table-header", "prompt-example-table-header-with-media"])
3682
+ elif media_type == "image":
3683
+ gr.HTML("<div>Prompt / Instruction / Question</div><div>Input Image</div>", elem_classes=["prompt-example-table-header", "prompt-example-table-header-with-media"])
3684
+ else:
3685
+ gr.HTML("<div>Prompt</div>", elem_classes=["prompt-example-table-header"])
3686
+
3687
+ with gr.Column(elem_classes=["prompt-example-table-body"]):
3688
+ for example_row in examples:
3689
+ example_prompt = str(example_row[0]) if example_row else ""
3690
+ video_path = str(example_row[1]) if len(example_row) > 1 and example_row[1] else None
3691
+ image_path = str(example_row[2]) if len(example_row) > 2 and example_row[2] else None
3692
+
3693
+ if media_type == "video" and video_path:
3694
+ with gr.Row(elem_classes=["prompt-example-multimodal-row", "prompt-example-video-row"]):
3695
+ with gr.Column(elem_classes=["prompt-example-prompt-cell"]):
3696
+ example_button = gr.Button(
3697
+ example_prompt,
3698
+ variant="secondary",
3699
+ elem_classes=["prompt-example-row-button"],
3700
+ )
3701
+ with gr.Column(elem_classes=["prompt-example-media-cell", "prompt-example-video-cell"]):
3702
+ gr.Video(
3703
+ value=video_path,
3704
+ label="Input Video",
3705
+ show_label=False,
3706
+ interactive=False,
3707
+ elem_classes=["prompt-example-media-preview", "prompt-example-video-preview"],
3708
+ )
3709
+ example_buttons.append((example_button, example_prompt, video_path, None))
3710
+ elif media_type == "image" and image_path:
3711
+ with gr.Row(elem_classes=["prompt-example-multimodal-row"]):
3712
+ with gr.Column(elem_classes=["prompt-example-prompt-cell"]):
3713
+ example_button = gr.Button(
3714
+ example_prompt,
3715
+ variant="secondary",
3716
+ elem_classes=["prompt-example-row-button"],
3717
+ )
3718
+ with gr.Column(elem_classes=["prompt-example-media-cell"]):
3719
+ gr.Image(
3720
+ value=image_path,
3721
+ label="Input Image",
3722
+ show_label=False,
3723
+ interactive=False,
3724
+ type="filepath",
3725
+ elem_classes=["prompt-example-media-preview"],
3726
+ )
3727
+ example_buttons.append((example_button, example_prompt, None, image_path))
3728
+ else:
3729
+ example_button = gr.Button(
3730
+ example_prompt,
3731
+ variant="secondary",
3732
+ elem_classes=["prompt-example-row-button"],
3733
+ )
3734
+ example_buttons.append((example_button, example_prompt, None, None))
3735
+ return example_buttons
3736
+
3737
  with gr.Column(visible=True, elem_classes=["lance-recommended-section"]) as video_generation_examples_group:
3738
  gr.HTML(build_lance_label_html("Video generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
3739
  with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
3740
+ video_generation_example_buttons = build_prompt_example_table(VIDEO_GENERATION_EXAMPLES)
 
 
 
 
 
 
 
 
 
3741
 
3742
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_edit_examples_group:
3743
  gr.HTML(build_lance_label_html("Video edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
3744
+ with gr.Group(elem_classes=["example-panel", "prompt-examples", "video-edit-examples"]):
3745
+ video_edit_example_buttons = build_prompt_example_table(VIDEO_EDIT_EXAMPLES, media_type="video")
 
 
 
 
 
 
 
 
3746
 
3747
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_understanding_examples_group:
3748
  gr.HTML(build_lance_label_html("Video understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
3749
+ with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
3750
+ video_understanding_example_buttons = build_prompt_example_table(VIDEO_UNDERSTANDING_EXAMPLES, media_type="video")
 
 
 
 
 
 
 
 
3751
 
3752
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_generation_examples_group:
3753
  gr.HTML(build_lance_label_html("Image generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
3754
  with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
3755
+ image_generation_example_buttons = build_prompt_example_table(IMAGE_GENERATION_EXAMPLES)
 
 
 
 
 
 
 
 
 
3756
 
3757
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_edit_examples_group:
3758
  gr.HTML(build_lance_label_html("Image edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
3759
+ with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
3760
+ image_edit_example_buttons = build_prompt_example_table(IMAGE_EDIT_EXAMPLES, media_type="image")
 
 
 
 
 
 
 
 
3761
 
3762
  with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_understanding_examples_group:
3763
  gr.HTML(build_lance_label_html("Image understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
3764
+ with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
3765
+ image_understanding_example_buttons = build_prompt_example_table(IMAGE_UNDERSTANDING_EXAMPLES, media_type="image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3766
 
3767
  task.change(
3768
  fn=update_task_ui,
 
3777
  aspect_ratio_row,
3778
  output_resolution_row,
3779
  video_duration_row,
3780
+ video_resolution_row,
3781
  aspect_ratio,
3782
  height,
3783
  width,
 
3800
 
3801
  aspect_ratio.change(
3802
  fn=update_size_from_aspect_ratio,
3803
+ inputs=[task, aspect_ratio, resolution],
3804
  outputs=[height, width, real_size],
3805
  queue=False,
3806
  show_api=False,
 
3808
 
3809
  real_size.change(
3810
  fn=update_aspect_ratio_from_output_resolution,
3811
+ inputs=[task, real_size, resolution],
3812
  outputs=[aspect_ratio, height, width],
3813
  queue=False,
3814
  show_api=False,
3815
  )
3816
 
3817
+ resolution.change(
3818
+ fn=update_output_resolution_from_video_profile,
3819
+ inputs=[task, aspect_ratio, resolution],
3820
+ outputs=[real_size, height, width],
3821
+ queue=False,
3822
+ show_api=False,
3823
+ )
3824
+
3825
+ for example_button, example_prompt, _, _ in video_generation_example_buttons + image_generation_example_buttons:
3826
+ example_button.click(
3827
+ fn=make_prompt_example_click_handler(example_prompt),
3828
  inputs=[task],
3829
+ outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
3830
  queue=False,
3831
  show_api=False,
3832
  )
3833
 
3834
+ for example_button, example_prompt, example_video, example_image in (
3835
+ video_edit_example_buttons
3836
+ + video_understanding_example_buttons
3837
+ + image_edit_example_buttons
3838
+ + image_understanding_example_buttons
3839
+ ):
3840
+ example_button.click(
3841
+ fn=make_media_prompt_example_click_handler(example_prompt, example_video, example_image),
3842
+ inputs=[task],
3843
+ outputs=[prompt, input_video, input_image, aspect_ratio, height, width, num_frames, resolution, real_size],
3844
+ queue=False,
3845
+ show_api=False,
3846
+ )
 
3847
 
3848
  run_button.click(
3849
  fn=build_running_status_markdown,
 
3870
  enable_frame_interpolation,
3871
  ],
3872
  outputs=[output_video, output_image, output_text, status, logs],
3873
+ show_progress="minimal",
3874
  )
3875
 
3876
  return demo
 
3907
  return gpu_ids
3908
 
3909
 
3910
+ def prefetch_model_assets_before_launch() -> None:
3911
+ """Download and compact model files before the first ZeroGPU request.
3912
+
3913
+ On ZeroGPU, time spent downloading model snapshots inside @spaces.GPU burns
3914
+ the first user's GPU reservation. Prefetching only touches CPU/disk and keeps
3915
+ the visible UI unchanged. Set LANCE_PREFETCH_MODEL_ASSETS=0 to skip this at
3916
+ Space startup, or LANCE_PREFETCH_MODEL_VARIANTS=video to prefetch less.
3917
+ """
3918
+ if not env_flag("LANCE_PREFETCH_MODEL_ASSETS", running_on_space()):
3919
+ print("[startup] Model asset prefetch disabled.", flush=True)
3920
+ return
3921
+
3922
+ variants_text = os.getenv("LANCE_PREFETCH_MODEL_VARIANTS", f"{MODEL_VARIANT_VIDEO},{MODEL_VARIANT_IMAGE}")
3923
+ variants: list[str] = []
3924
+ for raw_variant in variants_text.split(","):
3925
+ raw_variant = raw_variant.strip()
3926
+ if not raw_variant:
3927
+ continue
3928
+ variant = normalize_model_variant(raw_variant)
3929
+ if variant not in variants:
3930
+ variants.append(variant)
3931
+
3932
+ for variant in variants:
3933
+ try:
3934
+ start = time.perf_counter()
3935
+ model_path = ensure_model_assets(variant)
3936
+ elapsed = time.perf_counter() - start
3937
+ print(
3938
+ f"[startup][{variant}] Model assets are ready at {display_path(model_path)} "
3939
+ f"before ZeroGPU inference. elapsed={elapsed:.2f}s",
3940
+ flush=True,
3941
+ )
3942
+ except Exception as exc:
3943
+ print(
3944
+ f"[startup][{variant}] Model asset prefetch failed and will be retried lazily during inference: {exc}",
3945
+ flush=True,
3946
+ )
3947
+
3948
+
3949
  if __name__ == "__main__":
3950
  args = parse_args()
3951
  os.environ["LANCE_GPUS"] = args.gpus
3952
  QUEUE_MAX_SIZE = args.queue_size
3953
+ prefetch_model_assets_before_launch()
3954
+ print(
3955
+ "[startup] Skipping GPU model preload. UI will launch first, and Lance weights will be loaded lazily inside ZeroGPU inference calls.",
3956
+ flush=True,
3957
+ )
 
 
3958
  concurrency_limit = 1
3959
  demo = build_demo()
3960
  demo.queue(
config/config_factory.py CHANGED
@@ -234,7 +234,7 @@ class InferenceArguments(TrainingArguments):
234
  video_width: int = 480
235
  num_frames: int = 50
236
  task: str = "t2v" # t2v / t2i / edit / idip ...
237
- resolution: str = "video_848x480" # image_768x768 or video_848x480
238
  text_template: bool = False # 是否使用 system_prompt 文本模板
239
  max_duration: float = 6.0 # 最大视频时长(秒)
240
 
 
234
  video_width: int = 480
235
  num_frames: int = 50
236
  task: str = "t2v" # t2v / t2i / edit / idip ...
237
+ resolution: str = "video_360p" # image_768x768 or video_360p / video_480p
238
  text_template: bool = False # 是否使用 system_prompt 文本模板
239
  max_duration: float = 6.0 # 最大视频时长(秒)
240
 
data/datasets_custom/validation_dataset.py CHANGED
@@ -116,7 +116,10 @@ class ValidationDataset(Dataset):
116
  if self.data_config.resolution == "image_768x768":
117
  resolution_vae = 768
118
  resolution_vit = 672
119
- elif self.data_config.resolution == "video_848x480":
 
 
 
120
  resolution_vae = 640
121
  resolution_vit = 616
122
  else:
 
116
  if self.data_config.resolution == "image_768x768":
117
  resolution_vae = 768
118
  resolution_vit = 672
119
+ elif self.data_config.resolution == "video_360p":
120
+ resolution_vae = 480
121
+ resolution_vit = 448
122
+ elif self.data_config.resolution == "video_480p":
123
  resolution_vae = 640
124
  resolution_vit = 616
125
  else:
inference_lance.py CHANGED
@@ -495,9 +495,9 @@ def main():
495
  training_args=training_args,
496
  )
497
  stage_start = time.perf_counter()
498
- log_rank0(f"[startup] Moving Lance model to GPU {DEVICE}")
499
- model = model.to(DEVICE)
500
- log_stage("Lance model move to GPU", stage_start)
501
 
502
  # Setup tokenizer for model:
503
  stage_start = time.perf_counter()
@@ -538,7 +538,10 @@ def main():
538
  else: # HACK!!!
539
  assert model.language_model.get_input_embeddings().weight.data.data_ptr() != model.language_model.get_output_embeddings().weight.data.data_ptr(), 'tie_word_embeddings conflict'
540
 
541
- model = model.to(device=DEVICE, dtype=torch.bfloat16)
 
 
 
542
  model.eval()
543
  if vae_model is not None and hasattr(vae_model, "eval"):
544
  vae_model.eval()
 
495
  training_args=training_args,
496
  )
497
  stage_start = time.perf_counter()
498
+ log_rank0("[startup] Casting Lance model to bf16 on CPU")
499
+ model = model.to(dtype=torch.bfloat16)
500
+ log_stage("Lance model bf16 cast", stage_start)
501
 
502
  # Setup tokenizer for model:
503
  stage_start = time.perf_counter()
 
538
  else: # HACK!!!
539
  assert model.language_model.get_input_embeddings().weight.data.data_ptr() != model.language_model.get_output_embeddings().weight.data.data_ptr(), 'tie_word_embeddings conflict'
540
 
541
+ stage_start = time.perf_counter()
542
+ log_rank0(f"[startup] Moving Lance model to GPU {DEVICE}")
543
+ model = model.to(device=DEVICE)
544
+ log_stage("Lance model move to GPU", stage_start)
545
  model.eval()
546
  if vae_model is not None and hasattr(vae_model, "eval"):
547
  vae_model.eval()
modeling/lance/lance.py CHANGED
@@ -301,7 +301,7 @@ class Lance(PreTrainedModel):
301
 
302
  packed_latent = (1 - packed_timesteps[:, None]) * packed_latent_clean + packed_timesteps[:, None] * noise
303
  packed_timestep_embeds = self.time_embedder(packed_timesteps) # [L, C]
304
- latent_token_pos_emb = self.latent_pos_embed(packed_latent_position_ids)
305
  packed_latent = self.vae2llm(packed_latent) + packed_timestep_embeds + latent_token_pos_emb
306
 
307
  packed_sequence[packed_vae_token_indexes] = packed_latent.to(packed_sequence.dtype) # NOTE: 这里替换真实的vae token embed!
@@ -655,7 +655,7 @@ class Lance(PreTrainedModel):
655
 
656
  # --- 视觉特征 编码 ---
657
  timestep_embed = self.time_embedder(timestep)
658
- latent_pos_embed = self.latent_pos_embed(vae_position_ids)
659
  vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
660
  vae_embed = vae_embed.to(current_sequence.dtype)
661
 
@@ -1641,7 +1641,7 @@ class Lance(PreTrainedModel):
1641
 
1642
  # --- 存入 视觉特征 编码 (vae condition)---
1643
  timestep_embed = self.time_embedder(timestep)
1644
- latent_pos_embed = self.latent_pos_embed(vae_position_ids)
1645
  vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
1646
  vae_embed = vae_embed.to(current_sequence.dtype)
1647
  current_sequence[current_vae_token_indexes_local] = vae_embed
@@ -1698,7 +1698,7 @@ class Lance(PreTrainedModel):
1698
 
1699
  # --- 视觉特征 编码 ---
1700
  timestep_embed = self.time_embedder(timestep)
1701
- latent_pos_embed = self.latent_pos_embed(vae_position_ids)
1702
  vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
1703
  vae_embed = vae_embed.to(current_sequence.dtype)
1704
 
 
301
 
302
  packed_latent = (1 - packed_timesteps[:, None]) * packed_latent_clean + packed_timesteps[:, None] * noise
303
  packed_timestep_embeds = self.time_embedder(packed_timesteps) # [L, C]
304
+ latent_token_pos_emb = self.latent_pos_embed(packed_latent_position_ids.to(device=packed_latent.device))
305
  packed_latent = self.vae2llm(packed_latent) + packed_timestep_embeds + latent_token_pos_emb
306
 
307
  packed_sequence[packed_vae_token_indexes] = packed_latent.to(packed_sequence.dtype) # NOTE: 这里替换真实的vae token embed!
 
655
 
656
  # --- 视觉特征 编码 ---
657
  timestep_embed = self.time_embedder(timestep)
658
+ latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
659
  vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
660
  vae_embed = vae_embed.to(current_sequence.dtype)
661
 
 
1641
 
1642
  # --- 存入 视觉特征 编码 (vae condition)---
1643
  timestep_embed = self.time_embedder(timestep)
1644
+ latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
1645
  vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
1646
  vae_embed = vae_embed.to(current_sequence.dtype)
1647
  current_sequence[current_vae_token_indexes_local] = vae_embed
 
1698
 
1699
  # --- 视觉特征 编码 ---
1700
  timestep_embed = self.time_embedder(timestep)
1701
+ latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
1702
  vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
1703
  vae_embed = vae_embed.to(current_sequence.dtype)
1704
 
modeling/lance/modeling_utils.py CHANGED
@@ -186,13 +186,38 @@ class PositionEmbedding3D(nn.Module):
186
  self.max_num_latent_frames = max_latent_num_frames # t
187
  self.max_latent_size = max_latent_size # h, w
188
  self.hidden_size = hidden_size
189
- self.pos_embed = nn.Parameter(torch.zeros(max_latent_num_frames * (max_latent_size**2), hidden_size), requires_grad=False)
190
- self._init_weights()
191
 
192
- def _init_weights(self):
193
- # Initialize (and freeze) pos_embed by sin-cos embedding:
194
- pos_embed = get_3d_sincos_pos_embed(self.hidden_size, self.max_num_latent_frames, self.max_latent_size, self.max_latent_size)
195
- self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  def forward(self, position_ids):
198
- return self.pos_embed[position_ids]
 
 
 
 
 
 
 
 
 
 
 
186
  self.max_num_latent_frames = max_latent_num_frames # t
187
  self.max_latent_size = max_latent_size # h, w
188
  self.hidden_size = hidden_size
189
+ self.temporal_dim, self.height_dim, self.width_dim = self._split_hidden_dims(hidden_size)
 
190
 
191
+ @staticmethod
192
+ def _split_hidden_dims(embed_dim: int) -> tuple[int, int, int]:
193
+ assert embed_dim % 2 == 0, "Embedding dimension must be even for 3D embeddings"
194
+ d = embed_dim // 3
195
+ d = d if d % 2 == 0 else d - 1
196
+ dim_t = d
197
+ dim_h = d
198
+ dim_w = embed_dim - 2 * d
199
+ assert dim_w % 2 == 0
200
+ return dim_t, dim_h, dim_w
201
+
202
+ @staticmethod
203
+ def _build_1d_sincos(coords: torch.Tensor, embed_dim: int) -> torch.Tensor:
204
+ assert embed_dim % 2 == 0, "Embedding dimension must be even for 1D embeddings"
205
+ half = embed_dim // 2
206
+ omega = torch.arange(half, device=coords.device, dtype=torch.float32)
207
+ omega = omega / (embed_dim / 2.0)
208
+ omega = 1.0 / (10000.0 ** omega)
209
+ args = coords.to(dtype=torch.float32)[:, None] * omega[None, :]
210
+ return torch.cat([torch.sin(args), torch.cos(args)], dim=-1)
211
 
212
  def forward(self, position_ids):
213
+ position_ids = position_ids.reshape(-1).to(dtype=torch.long)
214
+ plane_size = self.max_latent_size * self.max_latent_size
215
+ t = position_ids // plane_size
216
+ rem = position_ids % plane_size
217
+ h = rem // self.max_latent_size
218
+ w = rem % self.max_latent_size
219
+
220
+ emb_t = self._build_1d_sincos(t, self.temporal_dim)
221
+ emb_h = self._build_1d_sincos(h, self.height_dim)
222
+ emb_w = self._build_1d_sincos(w, self.width_dim)
223
+ return torch.cat([emb_t, emb_h, emb_w], dim=-1)
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  absl-py==0.15.0
2
  accelerate==1.13.0
3
  addict==2.4.0
4
- albumentations==1.4.3
5
  annotated-types==0.7.0
6
  bitsandbytes==0.49.2
7
  certifi==2024.8.30
@@ -23,7 +23,7 @@ joblib==1.4.2
23
  kornia==0.8.2
24
  librosa==0.10.2.post1
25
  markupsafe==2.1.5
26
- numpy==1.24.4
27
  omegaconf==2.3.0
28
  opencv-python==4.7.0.72
29
  opt_einsum==3.4.0
 
1
  absl-py==0.15.0
2
  accelerate==1.13.0
3
  addict==2.4.0
4
+ # albumentations==1.4.3
5
  annotated-types==0.7.0
6
  bitsandbytes==0.49.2
7
  certifi==2024.8.30
 
23
  kornia==0.8.2
24
  librosa==0.10.2.post1
25
  markupsafe==2.1.5
26
+ numpy==1.23.5
27
  omegaconf==2.3.0
28
  opencv-python==4.7.0.72
29
  opt_einsum==3.4.0