Spaces:

bytedance-research
/

Lance

Running on Zero

App Files Files Community

ffy2000 commited on 5 days ago

Commit

b7a0fa0

1 Parent(s): e79d110

Prepare Lance for Hugging Face Space

Browse files

Files changed (8) hide show

README.md +2 -0
app.py +1068 -215
config/config_factory.py +1 -1
data/datasets_custom/validation_dataset.py +4 -1
inference_lance.py +7 -4
modeling/lance/lance.py +4 -4
modeling/lance/modeling_utils.py +32 -7
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -7,6 +7,8 @@ sdk: gradio
 python_version: "3.10.13"
 sdk_version: "5.31.0"
 app_file: app.py
 ---
 <div align="center">

 python_version: "3.10.13"
 sdk_version: "5.31.0"
 app_file: app.py
+models:
+  - bytedance-research/Lance
 ---
 <div align="center">

app.py CHANGED Viewed

@@ -89,13 +89,14 @@ DEFAULT_TASK = "t2v"
 DEFAULT_TIMESTEPS = 30
 DEFAULT_TIMESTEP_SHIFT = 3.5
 DEFAULT_CFG_TEXT_SCALE = 4.0
-DEFAULT_RESOLUTION = "video_848x480"
 DEFAULT_IMAGE_RESOLUTION = "image_768x768"
 DEFAULT_BASIC_SEED = 42
-DEFAULT_HEIGHT = 480
-DEFAULT_WIDTH = 848
 DEFAULT_IMAGE_SIZE = 768
-DEFAULT_VIDEO_DURATION_SECONDS = 8
 MAX_VIDEO_DURATION_SECONDS = 360
 MAX_VIDEO_NUM_FRAMES = 12 * MAX_VIDEO_DURATION_SECONDS + 1
 DEFAULT_NUM_FRAMES = 12 * DEFAULT_VIDEO_DURATION_SECONDS + 1
@@ -106,7 +107,19 @@ FRAME_INTERPOLATION_NO = "No"
 DEFAULT_FRAME_INTERPOLATION = FRAME_INTERPOLATION_YES
 ASPECT_RATIO_CHOICES = ["21:9", "16:9", "3:2", "4:3", "1:1", "3:4", "2:3", "9:16", "9:21"]
-VIDEO_ASPECT_RATIO_TO_SIZE = {
     "21:9": (976, 416),
     "16:9": (848, 480),
     "3:2": (784, 528),
@@ -118,6 +131,11 @@ VIDEO_ASPECT_RATIO_TO_SIZE = {
     "9:21": (416, 976),
 }
 IMAGE_ASPECT_RATIO_TO_SIZE = {
     "21:9": (1168, 496),
     "16:9": (1024, 576),
@@ -134,10 +152,6 @@ DEFAULT_QUEUE_SIZE = 32
 USE_KVCACHE = True
 TEXT_TEMPLATE = True
 RECORD_WRITE_LOCK = threading.Lock()
-MODEL_ASSET_PREFETCH_LOCK = threading.Lock()
-MODEL_ASSET_PREFETCH_STARTED = False
-MODEL_ASSET_PREFETCH_DONE = threading.Event()
-MODEL_ASSET_PREFETCH_ERROR: Optional[str] = None
 LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
 LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
@@ -608,6 +622,112 @@ APP_CSS = """
     line-height: 1.35 !important;
 }
 .prompt-dataset .paginate {
     display: none !important;
 }
@@ -746,6 +866,327 @@ APP_CSS = """
     font-weight: 800 !important;
 }
 @media (max-width: 900px) {
     .lance-main-row {
         grid-template-columns: minmax(0, 1fr) !important;
@@ -759,7 +1200,9 @@ APP_JS = """
         if (!element) {
             return;
         }
-        element.style.setProperty(property, value, "important");
     };
     const enforceLanceLabelTypography = () => {
@@ -783,6 +1226,216 @@ APP_JS = """
         });
     };
     const syncOutputColumnHeight = () => {
         const row = document.querySelector(".lance-main-row");
         const inputColumn = document.querySelector(".lance-input-column");
@@ -812,6 +1465,9 @@ APP_JS = """
     const scheduleSync = () => requestAnimationFrame(() => {
         enforceLanceLabelTypography();
         syncOutputColumnHeight();
     });
     const attachObservers = () => {
@@ -834,9 +1490,15 @@ APP_JS = """
     };
     enforceLanceLabelTypography();
     attachObservers();
     new MutationObserver(() => {
         enforceLanceLabelTypography();
         attachObservers();
     }).observe(document.body, {
         childList: true,
@@ -888,7 +1550,12 @@ UNDERSTANDING_TASKS = {TASK_X2T_VIDEO, TASK_X2T_IMAGE}
 IMAGE_TASKS = {TASK_T2I, TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
 VIDEO_TASKS = {TASK_T2V, TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
 EDIT_TASKS = {TASK_IMAGE_EDIT, TASK_VIDEO_EDIT}
-VIDEO_RESOLUTION_CHOICES = [DEFAULT_RESOLUTION]
 IMAGE_RESOLUTION_CHOICES = [DEFAULT_IMAGE_RESOLUTION]
 RESOLUTION_CHOICES = VIDEO_RESOLUTION_CHOICES + IMAGE_RESOLUTION_CHOICES
 CAPTION_SYSTEM_PROMPT_TEMPLATE = (
@@ -911,7 +1578,7 @@ def get_aspect_ratio_choices_for_task(task: str) -> list[tuple[str, str]]:
 def get_video_duration_choices() -> list[tuple[str, int]]:
-    return [(f"{seconds}s", seconds) for seconds in range(1, MAX_VIDEO_DURATION_SECONDS + 1)]
 def env_flag(name: str, default: bool) -> bool:
     value = os.getenv(name)
@@ -1136,7 +1803,7 @@ def normalize_frame_interpolation(value) -> bool:
 def video_seconds_to_num_frames(seconds: int) -> int:
-    seconds = max(1, min(MAX_VIDEO_DURATION_SECONDS, int(seconds)))
     return 12 * seconds + 1
@@ -1148,13 +1815,63 @@ def normalize_task(task: str) -> str:
     return task
-def normalize_resolution_for_backend(resolution: str, task: str) -> str:
     internal_task = normalize_task(task)
     if internal_task in IMAGE_TASKS:
-        return DEFAULT_IMAGE_RESOLUTION
     if internal_task in VIDEO_TASKS:
         return DEFAULT_RESOLUTION
-    return str(resolution)
 def get_default_aspect_ratio(task: str) -> str:
@@ -1162,10 +1879,21 @@ def get_default_aspect_ratio(task: str) -> str:
     return DEFAULT_IMAGE_ASPECT_RATIO if internal_task in IMAGE_TASKS else DEFAULT_VIDEO_ASPECT_RATIO
-def get_size_for_aspect_ratio(task: str, aspect_ratio: str) -> tuple[int, int]:
     internal_task = normalize_task(task)
     aspect_ratio = aspect_ratio if aspect_ratio in ASPECT_RATIO_CHOICES else get_default_aspect_ratio(internal_task)
-    size_map = IMAGE_ASPECT_RATIO_TO_SIZE if internal_task in IMAGE_TASKS else VIDEO_ASPECT_RATIO_TO_SIZE
     return size_map[aspect_ratio]
@@ -1177,16 +1905,18 @@ def format_size_markdown(task: str, width: int, height: int) -> str:
     return f"{width} x {height}"
-def get_size_map_for_task(task: str) -> dict[str, tuple[int, int]]:
     internal_task = normalize_task(task)
-    return IMAGE_ASPECT_RATIO_TO_SIZE if internal_task in IMAGE_TASKS else VIDEO_ASPECT_RATIO_TO_SIZE
-def get_output_resolution_choices_for_task(task: str) -> list[tuple[str, str]]:
     """Get Output Resolution choices with a one-to-one mapping to aspect ratios."""
     internal_task = normalize_task(task)
     default_ratio = get_default_aspect_ratio(internal_task)
-    size_map = get_size_map_for_task(internal_task)
     choices = []
     for ratio in ASPECT_RATIO_CHOICES:
         width, height = size_map[ratio]
@@ -1196,10 +1926,10 @@ def get_output_resolution_choices_for_task(task: str) -> list[tuple[str, str]]:
     return choices
-def get_aspect_ratio_for_output_resolution(task: str, output_resolution: str) -> str:
     internal_task = normalize_task(task)
     resolution_text = str(output_resolution or "").strip()
-    size_map = get_size_map_for_task(internal_task)
     for ratio in ASPECT_RATIO_CHOICES:
         width, height = size_map[ratio]
         if resolution_text == format_size_markdown(internal_task, width, height):
@@ -1256,24 +1986,42 @@ def build_lance_icon_label_html(text: str, icon: str, *extra_classes: str) -> st
     return f'<div class="{class_names}">{icon_html}<span>{html.escape(text)}</span></div>'
-def update_size_from_aspect_ratio(task: str, aspect_ratio: str):
-    width, height = get_size_for_aspect_ratio(task, aspect_ratio)
-    return height, width, format_size_markdown(task, width, height)
-def update_aspect_ratio_from_output_resolution(task: str, output_resolution: str):
-    aspect_ratio = get_aspect_ratio_for_output_resolution(task, output_resolution)
-    width, height = get_size_for_aspect_ratio(task, aspect_ratio)
     return aspect_ratio, height, width
 def reset_generation_defaults_for_task(task: str):
     internal_task = normalize_task(task)
     aspect_ratio = get_default_aspect_ratio(internal_task)
-    width, height = get_size_for_aspect_ratio(internal_task, aspect_ratio)
-    resolution = DEFAULT_IMAGE_RESOLUTION if internal_task in IMAGE_TASKS else DEFAULT_RESOLUTION
     num_frames = DEFAULT_VIDEO_DURATION_SECONDS
-    return aspect_ratio, height, width, num_frames, resolution, format_size_markdown(internal_task, width, height)
 def apply_prompt_example(task: str, evt: gr.SelectData):
@@ -1288,6 +2036,41 @@ def apply_prompt_example(task: str, evt: gr.SelectData):
     return (prompt_text, *defaults)
 def get_understanding_system_prompt_choices(task: str) -> list[str]:
     internal_task = normalize_task(task)
     if internal_task == TASK_X2T_IMAGE:
@@ -1815,9 +2598,9 @@ class LanceT2VV2TPipeline:
             )
             stage_start = time.perf_counter()
-            print(f"[startup][gpu:{self.device}] Moving Lance model to GPU {self.device}", flush=True)
-            model = model.to(self.device)
-            self._log_stage("Lance model move to GPU", stage_start)
             stage_start = time.perf_counter()
             print(f"[startup][gpu:{self.device}] Loading tokenizer: {model_args.model_path}", flush=True)
@@ -1855,7 +2638,10 @@ class LanceT2VV2TPipeline:
                     != model.language_model.get_output_embeddings().weight.data.data_ptr()
                 ), "tie_word_embeddings conflict"
-            model = model.to(device=self.device, dtype=torch.bfloat16)
             model.eval()
             if vae_model is not None and hasattr(vae_model, "eval"):
                 vae_model.eval()
@@ -2402,45 +3188,6 @@ def ensure_flash_attn_installed() -> None:
     print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
-def prefetch_lance_runtime_assets() -> None:
-    global MODEL_ASSET_PREFETCH_ERROR
-    with MODEL_ASSET_PREFETCH_LOCK:
-        if MODEL_ASSET_PREFETCH_DONE.is_set():
-            return
-        print(
-            "[startup] Preloading Lance runtime assets on CPU: flash-attn plus both model variants.",
-            flush=True,
-        )
-        try:
-            ensure_flash_attn_installed()
-            for variant in (MODEL_VARIANT_VIDEO, MODEL_VARIANT_IMAGE):
-                model_path = ensure_model_assets(variant)
-                print(
-                    f"[startup] CPU preload finished for {variant} at {display_path(model_path)}",
-                    flush=True,
-                )
-            MODEL_ASSET_PREFETCH_ERROR = None
-            MODEL_ASSET_PREFETCH_DONE.set()
-            print("[startup] CPU asset preload finished for all Lance variants.", flush=True)
-        except Exception as exc:
-            MODEL_ASSET_PREFETCH_ERROR = str(exc)
-            print(f"[startup] CPU asset preload failed: {exc}", flush=True)
-def start_lance_runtime_asset_prefetch() -> None:
-    global MODEL_ASSET_PREFETCH_STARTED
-    with MODEL_ASSET_PREFETCH_LOCK:
-        if MODEL_ASSET_PREFETCH_STARTED:
-            return
-        MODEL_ASSET_PREFETCH_STARTED = True
-    thread = threading.Thread(
-        target=prefetch_lance_runtime_assets,
-        name="lance-runtime-asset-prefetch",
-        daemon=True,
-    )
-    thread.start()
 def get_env_int(name: str, default: int) -> int:
     """Read an integer environment variable, falling back safely on invalid values."""
     try:
@@ -2449,19 +3196,54 @@ def get_env_int(name: str, default: int) -> int:
         return default
 def get_zerogpu_duration_cap() -> int:
     """Maximum duration requested from ZeroGPU.
-    You can lower or raise it without changing code by setting:
-        LANCE_ZEROGPU_MAX_DURATION_SECONDS=900
     """
-    return max(1, get_env_int("LANCE_ZEROGPU_MAX_DURATION_SECONDS", 900))
 def clamp_zerogpu_duration(seconds: int) -> int:
     return max(1, min(int(seconds), get_zerogpu_duration_cap()))
 def get_run_task_gpu_duration(
     task: str,
     prompt: str,
@@ -2478,18 +3260,39 @@ def get_run_task_gpu_duration(
     cfg_text_scale: float,
     enable_frame_interpolation: bool,
 ) -> int:
-    """Return a legal ZeroGPU reservation duration.
-    This value is only the requested ZeroGPU reservation time, not the user's total
-    daily quota.
     """
     internal_task = normalize_task(task)
-    requested_seconds = max(1, int(num_frames))
-    if internal_task in {TASK_T2V, TASK_VIDEO_EDIT}:
-        return clamp_zerogpu_duration(max(180, requested_seconds * 2))
     if internal_task == TASK_X2T_VIDEO:
-        return clamp_zerogpu_duration(60)
-    return clamp_zerogpu_duration(60)
 def get_pipeline_pool(task: str) -> PipelinePool:
@@ -2562,21 +3365,14 @@ def build_status_markdown() -> str:
     gpu_text = "unknown"
     concurrency = 1
     active_variant = "none"
-    asset_status = "pending"
     if ACTIVE_PIPELINE_POOL is not None:
         active_variant = ACTIVE_PIPELINE_POOL.model_variant
         gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
         concurrency = ACTIVE_PIPELINE_POOL.size
-    if MODEL_ASSET_PREFETCH_DONE.is_set():
-        asset_status = "done"
-    elif MODEL_ASSET_PREFETCH_STARTED:
-        asset_status = "running"
-    if MODEL_ASSET_PREFETCH_ERROR:
-        asset_status = f"failed: {MODEL_ASSET_PREFETCH_ERROR}"
     return (
         f"**Status**  GPU: `{gpu_text}`  |  Max concurrency: `{concurrency}`  |  "
         f"Queue limit: `{QUEUE_MAX_SIZE}`  |  Active model: `{active_variant}`  |  "
-        f"Switch mode: `unload then load`  |  Asset preload: `{asset_status}`"
     )
@@ -2639,15 +3435,16 @@ def update_task_ui(task: str):
     is_edit_task = internal_task in EDIT_TASKS
     is_understanding_task = internal_task in UNDERSTANDING_TASKS
     is_generation_task = internal_task in GENERATION_TASKS
     show_media_input = is_edit_task or is_understanding_task
-    resolution_choices = IMAGE_RESOLUTION_CHOICES if is_image_task else VIDEO_RESOLUTION_CHOICES
-    resolution_value = DEFAULT_IMAGE_RESOLUTION if is_image_task else DEFAULT_RESOLUTION
     aspect_ratio_value = DEFAULT_IMAGE_ASPECT_RATIO if is_image_task else DEFAULT_VIDEO_ASPECT_RATIO
-    width_value, height_value = get_size_for_aspect_ratio(internal_task, aspect_ratio_value)
     size_markdown = format_size_markdown(internal_task, width_value, height_value)
     system_prompt_choices = get_understanding_system_prompt_choices(internal_task)
-    if is_generation_task:
         text_label = "Prompt"
         text_placeholder = "Describe what you want to generate..."
     elif is_edit_task:
@@ -2666,10 +3463,12 @@ def update_task_ui(task: str):
     output_icon = "video" if output_label == "Output Video" else "image" if output_label == "Output Image" else "text"
     show_generation_settings = is_generation_task or is_edit_task
-    show_aspect_ratio = is_generation_task
     show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
     show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
-    show_video_generation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
     return (
         gr.update(value=build_lance_label_html(text_label, "lance-prompt-label")),
@@ -2677,25 +3476,29 @@ def update_task_ui(task: str):
             label=text_label,
             placeholder=text_placeholder,
             visible=True,
         ),
         gr.update(
             choices=system_prompt_choices,
             value=system_prompt_choices[0],
             visible=False,
         ),
         gr.update(label="Input Video", visible=show_input_video, value=None),
         gr.update(label="Input Image", visible=show_input_image, value=None),
-        gr.update(visible=show_video_generation_settings),
         gr.update(visible=show_aspect_ratio),
-        gr.update(visible=False),
         gr.update(visible=internal_task == TASK_T2V),
         gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
         gr.update(value=height_value),
         gr.update(value=width_value),
-        gr.update(visible=show_video_generation_settings, value=DEFAULT_FRAME_INTERPOLATION),
-        gr.update(choices=get_output_resolution_choices_for_task(internal_task), value=size_markdown, visible=show_video_generation_settings),
         gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
-        gr.update(choices=resolution_choices, value=resolution_value, visible=False),
         gr.update(value=build_lance_icon_label_html(output_label, output_icon, "lance-output-label")),
         gr.update(visible=internal_task in {TASK_T2V, TASK_VIDEO_EDIT}),
         gr.update(visible=internal_task in {TASK_T2I, TASK_IMAGE_EDIT}),
@@ -2773,7 +3576,7 @@ def build_demo() -> gr.Blocks:
                                 value=DEFAULT_VIDEO_ASPECT_RATIO,
                                 elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
                             )
-                    with gr.Row(visible=False, elem_classes=["generation-controls-row", "output-resolution-row"]) as output_resolution_row:
                         with gr.Column(elem_classes=["lance-control-field"]):
                             gr.HTML('<div class="lance-generation-label">Output Resolution</div>', elem_classes=["lance-label-html"])
                             real_size = gr.Radio(
@@ -2784,26 +3587,28 @@ def build_demo() -> gr.Blocks:
                                 interactive=True,
                                 elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
                             )
-                resolution = gr.Dropdown(
-                    label="Resolution",
-                    choices=RESOLUTION_CHOICES,
-                    value=DEFAULT_RESOLUTION,
-                    visible=False,
-                )
-                height = gr.Number(value=DEFAULT_HEIGHT, precision=0, visible=False)
-                width = gr.Number(value=DEFAULT_WIDTH, precision=0, visible=False)
                 with gr.Row(elem_classes=["generation-controls-row", "video-duration-row"]) as video_duration_row:
                     with gr.Column(elem_classes=["lance-control-field"]):
                         gr.HTML(build_lance_label_html("Video Duration (seconds)", "lance-generation-label"), elem_classes=["lance-label-html"])
-                        num_frames = gr.Slider(
                             label="Video Duration (seconds)",
                             show_label=False,
-                            minimum=1,
-                            maximum=MAX_VIDEO_DURATION_SECONDS,
-                            step=1,
                             value=DEFAULT_VIDEO_DURATION_SECONDS,
                             elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
                         )
                 with gr.Accordion("Advanced Parameters", open=False, elem_classes=["lance-advanced-accordion"]):
                     with gr.Column(elem_classes=["lance-control-field"]):
@@ -2868,94 +3673,96 @@ def build_demo() -> gr.Blocks:
         run_button = gr.Button("🚀 Generate", variant="primary", elem_classes=["lance-run-button"])
         with gr.Column(visible=True, elem_classes=["lance-recommended-section"]) as video_generation_examples_group:
             gr.HTML(build_lance_label_html("Video generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
             with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
-                video_generation_examples = gr.Dataset(
-                    samples=VIDEO_GENERATION_EXAMPLES,
-                    components=[gr.Textbox(label="Prompt", visible=False)],
-                    headers=["Prompt"],
-                    show_label=False,
-                    type="values",
-                    layout="table",
-                    samples_per_page=len(VIDEO_GENERATION_EXAMPLES),
-                    elem_classes=["prompt-dataset"],
-                )
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_edit_examples_group:
             gr.HTML(build_lance_label_html("Video edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
-            with gr.Group(elem_classes=["example-panel", "example-no-icon", "video-edit-examples"]):
-                video_edit_examples = gr.Examples(
-                    examples=VIDEO_EDIT_EXAMPLES,
-                    inputs=generation_example_inputs,
-                    label="",
-                    examples_per_page=3,
-                    cache_examples=False,
-                    preprocess=False,
-                    postprocess=False,
-                )
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_understanding_examples_group:
             gr.HTML(build_lance_label_html("Video understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
-            with gr.Group(elem_classes=["example-panel", "example-no-icon"]):
-                video_understanding_examples = gr.Examples(
-                    examples=VIDEO_UNDERSTANDING_EXAMPLES,
-                    inputs=generation_example_inputs,
-                    label="",
-                    examples_per_page=4,
-                    cache_examples=False,
-                    preprocess=False,
-                    postprocess=False,
-                )
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_generation_examples_group:
             gr.HTML(build_lance_label_html("Image generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
             with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
-                image_generation_examples = gr.Dataset(
-                    samples=IMAGE_GENERATION_EXAMPLES,
-                    components=[gr.Textbox(label="Prompt", visible=False)],
-                    headers=["Prompt"],
-                    show_label=False,
-                    type="values",
-                    layout="table",
-                    samples_per_page=len(IMAGE_GENERATION_EXAMPLES),
-                    elem_classes=["prompt-dataset"],
-                )
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_edit_examples_group:
             gr.HTML(build_lance_label_html("Image edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
-            with gr.Group(elem_classes=["example-panel", "example-no-icon"]):
-                image_edit_examples = gr.Examples(
-                    examples=IMAGE_EDIT_EXAMPLES,
-                    inputs=generation_example_inputs,
-                    label="",
-                    examples_per_page=5,
-                    cache_examples=False,
-                    preprocess=False,
-                    postprocess=False,
-                )
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_understanding_examples_group:
             gr.HTML(build_lance_label_html("Image understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
-            with gr.Group(elem_classes=["example-panel", "example-no-icon"]):
-                image_understanding_examples = gr.Examples(
-                    examples=IMAGE_UNDERSTANDING_EXAMPLES,
-                    inputs=generation_example_inputs,
-                    label="",
-                    examples_per_page=4,
-                    cache_examples=False,
-                    preprocess=False,
-                    postprocess=False,
-                )
-        keep_example_clicks_from_changing_visibility(
-            video_generation_examples,
-            video_edit_examples,
-            video_understanding_examples,
-            image_generation_examples,
-            image_edit_examples,
-            image_understanding_examples,
-        )
         task.change(
             fn=update_task_ui,
@@ -2970,6 +3777,7 @@ def build_demo() -> gr.Blocks:
                 aspect_ratio_row,
                 output_resolution_row,
                 video_duration_row,
                 aspect_ratio,
                 height,
                 width,
@@ -2992,7 +3800,7 @@ def build_demo() -> gr.Blocks:
         aspect_ratio.change(
             fn=update_size_from_aspect_ratio,
-            inputs=[task, aspect_ratio],
             outputs=[height, width, real_size],
             queue=False,
             show_api=False,
@@ -3000,35 +3808,42 @@ def build_demo() -> gr.Blocks:
         real_size.change(
             fn=update_aspect_ratio_from_output_resolution,
-            inputs=[task, real_size],
             outputs=[aspect_ratio, height, width],
             queue=False,
             show_api=False,
         )
-        for examples_component in (video_edit_examples, video_understanding_examples, image_edit_examples, image_understanding_examples):
-            examples_component.load_input_event.then(
-                fn=reset_generation_defaults_for_task,
                 inputs=[task],
-                outputs=[aspect_ratio, height, width, num_frames, resolution, real_size],
                 queue=False,
                 show_api=False,
             )
-        video_generation_examples.select(
-            fn=apply_prompt_example,
-            inputs=[task],
-            outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
-            queue=False,
-            show_api=False,
-        )
-        image_generation_examples.select(
-            fn=apply_prompt_example,
-            inputs=[task],
-            outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
-            queue=False,
-            show_api=False,
-        )
         run_button.click(
             fn=build_running_status_markdown,
@@ -3055,6 +3870,7 @@ def build_demo() -> gr.Blocks:
                 enable_frame_interpolation,
             ],
             outputs=[output_video, output_image, output_text, status, logs],
         )
     return demo
@@ -3091,17 +3907,54 @@ def parse_gpu_ids(gpu_string: str) -> list[int]:
     return gpu_ids
 if __name__ == "__main__":
     args = parse_args()
     os.environ["LANCE_GPUS"] = args.gpus
     QUEUE_MAX_SIZE = args.queue_size
-    if env_flag("LANCE_PRELOAD_MODEL_ASSETS", running_on_space()):
-        start_lance_runtime_asset_prefetch()
-    else:
-        print(
-            "[startup] Model asset preload disabled. UI will launch first, and Lance weights will be downloaded lazily inside GPU inference calls.",
-            flush=True,
-        )
     concurrency_limit = 1
     demo = build_demo()
     demo.queue(

 DEFAULT_TIMESTEPS = 30
 DEFAULT_TIMESTEP_SHIFT = 3.5
 DEFAULT_CFG_TEXT_SCALE = 4.0
+DEFAULT_RESOLUTION = "video_360p"
+DEFAULT_VIDEO_EDIT_RESOLUTION = "video_480p"
 DEFAULT_IMAGE_RESOLUTION = "image_768x768"
 DEFAULT_BASIC_SEED = 42
+DEFAULT_HEIGHT = 352
+DEFAULT_WIDTH = 640
 DEFAULT_IMAGE_SIZE = 768
+DEFAULT_VIDEO_DURATION_SECONDS = 3
 MAX_VIDEO_DURATION_SECONDS = 360
 MAX_VIDEO_NUM_FRAMES = 12 * MAX_VIDEO_DURATION_SECONDS + 1
 DEFAULT_NUM_FRAMES = 12 * DEFAULT_VIDEO_DURATION_SECONDS + 1
 DEFAULT_FRAME_INTERPOLATION = FRAME_INTERPOLATION_YES
 ASPECT_RATIO_CHOICES = ["21:9", "16:9", "3:2", "4:3", "1:1", "3:4", "2:3", "9:16", "9:21"]
+VIDEO_360P_ASPECT_RATIO_TO_SIZE = {
+    "21:9": (672, 288),
+    "16:9": (640, 352),
+    "3:2": (528, 352),
+    "4:3": (560, 416),
+    "1:1": (480, 480),
+    "3:4": (416, 560),
+    "2:3": (352, 528),
+    "9:16": (352, 640),
+    "9:21": (288, 672),
+}
+VIDEO_480P_ASPECT_RATIO_TO_SIZE = {
     "21:9": (976, 416),
     "16:9": (848, 480),
     "3:2": (784, 528),
     "9:21": (416, 976),
 }
+VIDEO_RESOLUTION_TO_SIZE_MAP = {
+    "video_360p": VIDEO_360P_ASPECT_RATIO_TO_SIZE,
+    "video_480p": VIDEO_480P_ASPECT_RATIO_TO_SIZE,
+}
 IMAGE_ASPECT_RATIO_TO_SIZE = {
     "21:9": (1168, 496),
     "16:9": (1024, 576),
 USE_KVCACHE = True
 TEXT_TEMPLATE = True
 RECORD_WRITE_LOCK = threading.Lock()
 LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
 LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
     line-height: 1.35 !important;
 }
+.prompt-dataset button,
+.example-panel table td:first-child button {
+    max-height: 180px !important;
+    overflow-y: auto !important;
+    overscroll-behavior: contain !important;
+}
+.prompt-dataset button,
+.example-panel table td:first-child button,
+.prompt-dataset button span,
+.prompt-dataset button p,
+.example-panel table td:first-child span,
+.example-panel table td:first-child p {
+    white-space: pre-wrap !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+    text-overflow: clip !important;
+    -webkit-line-clamp: unset !important;
+    line-clamp: unset !important;
+}
+.prompt-dataset button span,
+.prompt-dataset button p,
+.example-panel table td:first-child span,
+.example-panel table td:first-child p {
+    overflow: visible !important;
+    display: block !important;
+}
+.lance-recommended-section .example-panel td,
+.lance-recommended-section .example-panel td *,
+.lance-recommended-section .example-panel button,
+.lance-recommended-section .example-panel button *,
+.lance-recommended-section .example-panel label,
+.lance-recommended-section .example-panel label *,
+.lance-recommended-section .example-panel span,
+.lance-recommended-section .example-panel p {
+    white-space: pre-wrap !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+    text-overflow: clip !important;
+    -webkit-line-clamp: unset !important;
+    line-clamp: unset !important;
+}
+.lance-recommended-section .example-panel button,
+.lance-recommended-section .example-panel td {
+    height: auto !important;
+    max-height: none !important;
+    overflow: visible !important;
+}
+.lance-recommended-section .example-panel [style*="ellipsis"],
+.lance-recommended-section .example-panel [style*="nowrap"],
+.lance-recommended-section .example-panel [style*="hidden"] {
+    white-space: pre-wrap !important;
+    overflow: visible !important;
+    text-overflow: clip !important;
+}
+.lance-recommended-section .example-panel {
+    overflow: visible !important;
+}
+.lance-recommended-section .example-panel table {
+    width: 100% !important;
+    table-layout: fixed !important;
+    border-collapse: collapse !important;
+}
+.lance-recommended-section .example-panel tr,
+.lance-recommended-section .example-panel th,
+.lance-recommended-section .example-panel td {
+    height: auto !important;
+    min-height: 0 !important;
+    max-height: none !important;
+}
+.lance-recommended-section .example-panel td:first-child,
+.lance-recommended-section .example-panel td:first-child *,
+.prompt-dataset td,
+.prompt-dataset td *,
+.prompt-dataset button,
+.prompt-dataset button * {
+    white-space: pre-wrap !important;
+    overflow: visible !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+    text-overflow: clip !important;
+    -webkit-line-clamp: unset !important;
+    line-clamp: unset !important;
+}
+.lance-recommended-section .example-panel td:first-child button,
+.prompt-dataset button {
+    width: 100% !important;
+    height: auto !important;
+    min-height: 0 !important;
+    max-height: none !important;
+    padding: 12px 14px !important;
+    text-align: center !important;
+    justify-content: center !important;
+    align-items: center !important;
+    line-height: 1.35 !important;
+}
 .prompt-dataset .paginate {
     display: none !important;
 }
     font-weight: 800 !important;
 }
+/* Prompt example tables: Gradio Dataset renders Textbox cells with an inline
+   max-width: 35ch and a single-line preview, which causes long prompts to be
+   clipped with an ellipsis. These rules expand the Prompt column, wrap text,
+   and keep very long rows usable through scrolling. */
+.prompt-dataset,
+.prompt-dataset .table-wrap {
+    width: 100% !important;
+    max-width: 100% !important;
+    overflow-x: auto !important;
+    overflow-y: auto !important;
+}
+.prompt-dataset .table-wrap {
+    max-height: 420px !important;
+    overscroll-behavior: contain !important;
+    scrollbar-gutter: stable !important;
+}
+.prompt-dataset table {
+    width: 100% !important;
+    min-width: 720px !important;
+    max-width: none !important;
+    table-layout: fixed !important;
+    border-collapse: collapse !important;
+}
+.prompt-dataset thead,
+.prompt-dataset tbody,
+.prompt-dataset tr,
+.prompt-dataset th,
+.prompt-dataset td,
+.prompt-dataset td.textbox,
+.prompt-dataset td[style*="35ch"] {
+    height: auto !important;
+    min-height: 0 !important;
+    max-height: none !important;
+    max-width: none !important;
+    width: 100% !important;
+    min-width: 0 !important;
+    white-space: normal !important;
+    overflow: visible !important;
+    text-overflow: clip !important;
+    vertical-align: top !important;
+}
+.prompt-dataset th,
+.prompt-dataset td {
+    padding: 12px 14px !important;
+}
+.prompt-dataset td > * {
+    width: 100% !important;
+    max-width: none !important;
+    min-width: 0 !important;
+    height: auto !important;
+    min-height: 0 !important;
+    max-height: 260px !important;
+    overflow-y: auto !important;
+    overflow-x: hidden !important;
+    overscroll-behavior: contain !important;
+    white-space: pre-wrap !important;
+    text-align: left !important;
+}
+.prompt-dataset td *,
+.prompt-dataset td [class*="truncate"],
+.prompt-dataset td [class*="ellipsis"],
+.prompt-dataset td [class*="line-clamp"],
+.prompt-dataset td [style*="nowrap"],
+.prompt-dataset td [style*="ellipsis"],
+.prompt-dataset td [style*="line-clamp"],
+.prompt-dataset td span,
+.prompt-dataset td p,
+.prompt-dataset td div,
+.prompt-dataset td button {
+    max-width: none !important;
+    white-space: pre-wrap !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+    text-overflow: clip !important;
+    -webkit-line-clamp: unset !important;
+    line-clamp: unset !important;
+}
+.prompt-dataset td span,
+.prompt-dataset td p {
+    display: block !important;
+}
+/* Full prompt example rows.  Do not use gr.Dataset for these two generation
+   sections: Dataset table cells are rendered as compact previews and the
+   actual DOM text may already contain "...".  These button rows keep and render
+   the original prompt string, wrap it fully, and make very long rows scrollable. */
+.prompt-example-full-table,
+.prompt-example-full-table > .form,
+.prompt-example-full-table > div {
+    width: 100% !important;
+    max-width: 100% !important;
+    min-width: 0 !important;
+}
+.prompt-example-full-table {
+    max-height: 460px !important;
+    overflow-x: auto !important;
+    overflow-y: auto !important;
+    overscroll-behavior: contain !important;
+    scrollbar-gutter: stable !important;
+    border: 1px solid var(--border-color-primary) !important;
+    border-radius: 8px !important;
+}
+.prompt-example-table-header,
+.prompt-example-table-header > div,
+.prompt-example-table-header .wrap {
+    position: sticky !important;
+    top: 0 !important;
+    z-index: 3 !important;
+    width: 100% !important;
+    margin: 0 !important;
+    padding: 12px 14px !important;
+    border: 0 !important;
+    border-bottom: 1px solid var(--border-color-primary) !important;
+    background: var(--block-title-background-fill, var(--block-background-fill)) !important;
+    color: var(--body-text-color) !important;
+    font-size: 18px !important;
+    font-weight: 800 !important;
+    line-height: 1.25 !important;
+    text-align: center !important;
+    box-shadow: none !important;
+}
+.prompt-example-table-body,
+.prompt-example-table-body > .form {
+    gap: 0 !important;
+    width: 100% !important;
+    min-width: 720px !important;
+}
+.prompt-examples .prompt-example-row-button,
+.prompt-examples .prompt-example-row-button > button,
+.prompt-examples .prompt-example-row-button button {
+    width: 100% !important;
+    max-width: none !important;
+    min-width: 0 !important;
+    height: auto !important;
+    min-height: 54px !important;
+    max-height: 220px !important;
+    margin: 0 !important;
+    padding: 12px 14px !important;
+    border-radius: 0 !important;
+    border: 0 !important;
+    border-bottom: 1px solid var(--border-color-primary) !important;
+    background: var(--block-background-fill) !important;
+    color: var(--body-text-color) !important;
+    display: flex !important;
+    justify-content: flex-start !important;
+    align-items: flex-start !important;
+    text-align: left !important;
+    overflow-x: hidden !important;
+    overflow-y: auto !important;
+    white-space: normal !important;
+    cursor: pointer !important;
+}
+.prompt-examples .prompt-example-row-button span,
+.prompt-examples .prompt-example-row-button p,
+.prompt-examples .prompt-example-row-button div {
+    width: 100% !important;
+    max-width: none !important;
+    display: block !important;
+    overflow: visible !important;
+    white-space: pre-wrap !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+    text-overflow: clip !important;
+    -webkit-line-clamp: unset !important;
+    line-clamp: unset !important;
+    font-size: 16px !important;
+    line-height: 1.38 !important;
+    text-align: left !important;
+}
+.prompt-examples .prompt-example-row-button:last-child,
+.prompt-examples .prompt-example-row-button:last-child > button,
+.prompt-examples .prompt-example-row-button:last-child button {
+    border-bottom: 0 !important;
+}
+.prompt-example-table-header-with-media,
+.prompt-example-table-header-with-media > div,
+.prompt-example-table-header-with-media .wrap {
+    display: grid !important;
+    grid-template-columns: minmax(0, 1fr) minmax(180px, 260px) !important;
+    gap: 0 !important;
+    text-align: center !important;
+}
+.prompt-example-multimodal-row,
+.prompt-example-multimodal-row > .form {
+    width: 100% !important;
+    min-width: 720px !important;
+    margin: 0 !important;
+    gap: 0 !important;
+    align-items: stretch !important;
+    border-bottom: 1px solid var(--border-color-primary) !important;
+}
+.prompt-example-multimodal-row > .form {
+    display: grid !important;
+    grid-template-columns: minmax(0, 1fr) minmax(180px, 260px) !important;
+}
+.prompt-example-prompt-cell,
+.prompt-example-prompt-cell > .form,
+.prompt-example-media-cell,
+.prompt-example-media-cell > .form {
+    width: 100% !important;
+    min-width: 0 !important;
+    margin: 0 !important;
+    padding: 0 !important;
+    border: 0 !important;
+    background: transparent !important;
+    box-shadow: none !important;
+}
+.prompt-example-multimodal-row .prompt-example-row-button,
+.prompt-example-multimodal-row .prompt-example-row-button > button,
+.prompt-example-multimodal-row .prompt-example-row-button button {
+    height: 100% !important;
+    min-height: 150px !important;
+    max-height: 260px !important;
+    border-bottom: 0 !important;
+}
+.prompt-example-media-cell {
+    border-left: 1px solid var(--border-color-primary) !important;
+}
+.prompt-example-media-preview,
+.prompt-example-media-preview > div,
+.prompt-example-media-preview .wrap {
+    width: 100% !important;
+    height: 150px !important;
+    min-height: 150px !important;
+    max-height: 150px !important;
+    margin: 0 !important;
+    border: 0 !important;
+    border-radius: 0 !important;
+    background: transparent !important;
+    box-shadow: none !important;
+    overflow: hidden !important;
+}
+.prompt-example-media-preview video,
+.prompt-example-media-preview img {
+    width: 100% !important;
+    height: 150px !important;
+    object-fit: cover !important;
+    border-radius: 0 !important;
+}
+/* Keep the prompt column unchanged. Video examples fill the current row height,
+   keep their original aspect ratio, and adapt their width inside the media column. */
+.prompt-example-video-cell,
+.prompt-example-video-cell > .form {
+    display: flex !important;
+    align-items: stretch !important;
+    justify-content: center !important;
+    padding: 0 !important;
+    height: 100% !important;
+    min-height: 150px !important;
+    max-height: 260px !important;
+    overflow: hidden !important;
+}
+.prompt-example-video-preview,
+.prompt-example-video-preview > div,
+.prompt-example-video-preview .wrap {
+    display: flex !important;
+    align-items: center !important;
+    justify-content: center !important;
+    width: 100% !important;
+    min-width: 0 !important;
+    max-width: 100% !important;
+    height: 100% !important;
+    min-height: 150px !important;
+    max-height: 260px !important;
+    margin: 0 auto !important;
+    border-radius: 0 !important;
+    overflow: hidden !important;
+}
+.prompt-example-video-preview video {
+    width: auto !important;
+    max-width: 100% !important;
+    height: 100% !important;
+    min-height: 150px !important;
+    max-height: 260px !important;
+    object-fit: contain !important;
+    border-radius: 0 !important;
+}
+.prompt-example-multimodal-row:last-child,
+.prompt-example-multimodal-row:last-child > .form {
+    border-bottom: 0 !important;
+}
+@media (max-width: 900px) {
+    .prompt-example-table-header-with-media,
+    .prompt-example-table-header-with-media > div,
+    .prompt-example-table-header-with-media .wrap,
+    .prompt-example-multimodal-row > .form {
+        grid-template-columns: minmax(0, 1fr) minmax(140px, 180px) !important;
+    }
+}
 @media (max-width: 900px) {
     .lance-main-row {
         grid-template-columns: minmax(0, 1fr) !important;
         if (!element) {
             return;
         }
+        if (element.style.getPropertyValue(property) !== value || element.style.getPropertyPriority(property) !== "important") {
+            element.style.setProperty(property, value, "important");
+        }
     };
     const enforceLanceLabelTypography = () => {
         });
     };
+    const enforceRecommendedCaseText = () => {
+        document.querySelectorAll(".lance-recommended-section .example-panel").forEach((panel) => {
+            applyImportantStyle(panel, "overflow", "visible");
+            panel.querySelectorAll("table, tbody, tr, th, td, button, label, span, p, div").forEach((element) => {
+                applyImportantStyle(element, "white-space", "pre-wrap");
+                applyImportantStyle(element, "overflow-wrap", "anywhere");
+                applyImportantStyle(element, "word-break", "break-word");
+                applyImportantStyle(element, "text-overflow", "clip");
+                applyImportantStyle(element, "-webkit-line-clamp", "unset");
+                applyImportantStyle(element, "line-clamp", "unset");
+            });
+            panel.querySelectorAll("td, button").forEach((element) => {
+                applyImportantStyle(element, "height", "auto");
+                applyImportantStyle(element, "max-height", "none");
+                applyImportantStyle(element, "overflow", "visible");
+            });
+            panel.querySelectorAll("button").forEach((element) => {
+                applyImportantStyle(element, "width", "100%");
+                applyImportantStyle(element, "text-align", "center");
+                applyImportantStyle(element, "justify-content", "center");
+                applyImportantStyle(element, "align-items", "center");
+            });
+        });
+    };
+    const enforcePromptDatasetText = () => {
+        document.querySelectorAll(".prompt-dataset").forEach((dataset) => {
+            applyImportantStyle(dataset, "width", "100%");
+            applyImportantStyle(dataset, "max-width", "100%");
+            applyImportantStyle(dataset, "overflow-x", "auto");
+            applyImportantStyle(dataset, "overflow-y", "auto");
+            dataset.querySelectorAll(".table-wrap").forEach((element) => {
+                applyImportantStyle(element, "width", "100%");
+                applyImportantStyle(element, "max-width", "100%");
+                applyImportantStyle(element, "max-height", "420px");
+                applyImportantStyle(element, "overflow-x", "auto");
+                applyImportantStyle(element, "overflow-y", "auto");
+                applyImportantStyle(element, "overscroll-behavior", "contain");
+            });
+            dataset.querySelectorAll("table").forEach((element) => {
+                applyImportantStyle(element, "width", "100%");
+                applyImportantStyle(element, "min-width", "720px");
+                applyImportantStyle(element, "max-width", "none");
+                applyImportantStyle(element, "table-layout", "fixed");
+                applyImportantStyle(element, "border-collapse", "collapse");
+            });
+            dataset.querySelectorAll("thead, tbody, tr, th, td, td.textbox, td[style*='35ch']").forEach((element) => {
+                applyImportantStyle(element, "height", "auto");
+                applyImportantStyle(element, "min-height", "0");
+                applyImportantStyle(element, "max-height", "none");
+                applyImportantStyle(element, "max-width", "none");
+                applyImportantStyle(element, "width", "100%");
+                applyImportantStyle(element, "min-width", "0");
+                applyImportantStyle(element, "white-space", "normal");
+                applyImportantStyle(element, "overflow", "visible");
+                applyImportantStyle(element, "text-overflow", "clip");
+                applyImportantStyle(element, "vertical-align", "top");
+            });
+            dataset.querySelectorAll("td *").forEach((element) => {
+                applyImportantStyle(element, "max-width", "none");
+                applyImportantStyle(element, "white-space", "pre-wrap");
+                applyImportantStyle(element, "overflow-wrap", "anywhere");
+                applyImportantStyle(element, "word-break", "break-word");
+                applyImportantStyle(element, "text-overflow", "clip");
+                applyImportantStyle(element, "-webkit-line-clamp", "unset");
+                applyImportantStyle(element, "line-clamp", "unset");
+            });
+            dataset.querySelectorAll("td > *").forEach((element) => {
+                applyImportantStyle(element, "width", "100%");
+                applyImportantStyle(element, "max-width", "none");
+                applyImportantStyle(element, "min-width", "0");
+                applyImportantStyle(element, "height", "auto");
+                applyImportantStyle(element, "min-height", "0");
+                applyImportantStyle(element, "max-height", "260px");
+                applyImportantStyle(element, "overflow-y", "auto");
+                applyImportantStyle(element, "overflow-x", "hidden");
+                applyImportantStyle(element, "overscroll-behavior", "contain");
+                applyImportantStyle(element, "white-space", "pre-wrap");
+                applyImportantStyle(element, "text-align", "left");
+            });
+            dataset.querySelectorAll("td span, td p").forEach((element) => {
+                applyImportantStyle(element, "display", "block");
+            });
+        });
+    };
+    const enforcePromptExampleRows = () => {
+        document.querySelectorAll(".prompt-example-full-table").forEach((table) => {
+            applyImportantStyle(table, "width", "100%");
+            applyImportantStyle(table, "max-width", "100%");
+            applyImportantStyle(table, "max-height", "460px");
+            applyImportantStyle(table, "overflow-x", "auto");
+            applyImportantStyle(table, "overflow-y", "auto");
+        });
+        document.querySelectorAll(".prompt-example-table-body, .prompt-example-table-body > .form").forEach((element) => {
+            applyImportantStyle(element, "width", "100%");
+            applyImportantStyle(element, "min-width", "720px");
+            applyImportantStyle(element, "gap", "0");
+        });
+        document.querySelectorAll(".prompt-example-row-button, .prompt-example-row-button button").forEach((element) => {
+            applyImportantStyle(element, "width", "100%");
+            applyImportantStyle(element, "max-width", "none");
+            applyImportantStyle(element, "height", "auto");
+            applyImportantStyle(element, "min-height", "54px");
+            applyImportantStyle(element, "max-height", "220px");
+            applyImportantStyle(element, "margin", "0");
+            applyImportantStyle(element, "padding", "12px 14px");
+            applyImportantStyle(element, "border-radius", "0");
+            applyImportantStyle(element, "border", "0");
+            applyImportantStyle(element, "border-bottom", "1px solid var(--border-color-primary)");
+            applyImportantStyle(element, "display", "flex");
+            applyImportantStyle(element, "justify-content", "flex-start");
+            applyImportantStyle(element, "align-items", "flex-start");
+            applyImportantStyle(element, "text-align", "left");
+            applyImportantStyle(element, "overflow-x", "hidden");
+            applyImportantStyle(element, "overflow-y", "auto");
+            applyImportantStyle(element, "white-space", "normal");
+        });
+        document.querySelectorAll(".prompt-example-row-button span, .prompt-example-row-button p, .prompt-example-row-button div").forEach((element) => {
+            applyImportantStyle(element, "width", "100%");
+            applyImportantStyle(element, "max-width", "none");
+            applyImportantStyle(element, "display", "block");
+            applyImportantStyle(element, "overflow", "visible");
+            applyImportantStyle(element, "white-space", "pre-wrap");
+            applyImportantStyle(element, "overflow-wrap", "anywhere");
+            applyImportantStyle(element, "word-break", "break-word");
+            applyImportantStyle(element, "text-overflow", "clip");
+            applyImportantStyle(element, "-webkit-line-clamp", "unset");
+            applyImportantStyle(element, "line-clamp", "unset");
+            applyImportantStyle(element, "font-size", "16px");
+            applyImportantStyle(element, "line-height", "1.38");
+            applyImportantStyle(element, "text-align", "left");
+        });
+        document.querySelectorAll(".prompt-example-table-header-with-media, .prompt-example-table-header-with-media > div, .prompt-example-table-header-with-media .wrap, .prompt-example-multimodal-row > .form").forEach((element) => {
+            applyImportantStyle(element, "display", "grid");
+            applyImportantStyle(element, "grid-template-columns", "minmax(0, 1fr) minmax(180px, 260px)");
+            applyImportantStyle(element, "gap", "0");
+        });
+        document.querySelectorAll(".prompt-example-multimodal-row, .prompt-example-multimodal-row > .form").forEach((element) => {
+            applyImportantStyle(element, "width", "100%");
+            applyImportantStyle(element, "min-width", "720px");
+            applyImportantStyle(element, "margin", "0");
+            applyImportantStyle(element, "border-bottom", "1px solid var(--border-color-primary)");
+        });
+        document.querySelectorAll(".prompt-example-multimodal-row .prompt-example-row-button, .prompt-example-multimodal-row .prompt-example-row-button button").forEach((element) => {
+            applyImportantStyle(element, "height", "100%");
+            applyImportantStyle(element, "min-height", "150px");
+            applyImportantStyle(element, "max-height", "260px");
+            applyImportantStyle(element, "border-bottom", "0");
+        });
+        document.querySelectorAll(".prompt-example-media-preview, .prompt-example-media-preview > div, .prompt-example-media-preview .wrap, .prompt-example-media-preview video, .prompt-example-media-preview img").forEach((element) => {
+            applyImportantStyle(element, "width", "100%");
+            applyImportantStyle(element, "height", "150px");
+            applyImportantStyle(element, "max-height", "150px");
+            applyImportantStyle(element, "border-radius", "0");
+            applyImportantStyle(element, "overflow", "hidden");
+        });
+        document.querySelectorAll(".prompt-example-video-cell, .prompt-example-video-cell > .form").forEach((element) => {
+            applyImportantStyle(element, "display", "flex");
+            applyImportantStyle(element, "align-items", "stretch");
+            applyImportantStyle(element, "justify-content", "center");
+            applyImportantStyle(element, "padding", "0");
+            applyImportantStyle(element, "height", "100%");
+            applyImportantStyle(element, "min-height", "150px");
+            applyImportantStyle(element, "max-height", "260px");
+            applyImportantStyle(element, "overflow", "hidden");
+        });
+        document.querySelectorAll(".prompt-example-video-preview, .prompt-example-video-preview > div, .prompt-example-video-preview .wrap").forEach((element) => {
+            applyImportantStyle(element, "display", "flex");
+            applyImportantStyle(element, "align-items", "center");
+            applyImportantStyle(element, "justify-content", "center");
+            applyImportantStyle(element, "width", "100%");
+            applyImportantStyle(element, "min-width", "0");
+            applyImportantStyle(element, "max-width", "100%");
+            applyImportantStyle(element, "height", "100%");
+            applyImportantStyle(element, "min-height", "150px");
+            applyImportantStyle(element, "max-height", "260px");
+            applyImportantStyle(element, "margin", "0 auto");
+            applyImportantStyle(element, "border-radius", "0");
+            applyImportantStyle(element, "overflow", "hidden");
+        });
+        document.querySelectorAll(".prompt-example-video-preview video").forEach((element) => {
+            applyImportantStyle(element, "width", "auto");
+            applyImportantStyle(element, "max-width", "100%");
+            applyImportantStyle(element, "height", "100%");
+            applyImportantStyle(element, "min-height", "150px");
+            applyImportantStyle(element, "max-height", "260px");
+            applyImportantStyle(element, "object-fit", "contain");
+            applyImportantStyle(element, "border-radius", "0");
+        });
+    };
     const syncOutputColumnHeight = () => {
         const row = document.querySelector(".lance-main-row");
         const inputColumn = document.querySelector(".lance-input-column");
     const scheduleSync = () => requestAnimationFrame(() => {
         enforceLanceLabelTypography();
+        enforceRecommendedCaseText();
+        enforcePromptDatasetText();
+        enforcePromptExampleRows();
         syncOutputColumnHeight();
     });
     const attachObservers = () => {
     };
     enforceLanceLabelTypography();
+    enforceRecommendedCaseText();
+    enforcePromptDatasetText();
+    enforcePromptExampleRows();
     attachObservers();
     new MutationObserver(() => {
         enforceLanceLabelTypography();
+        enforceRecommendedCaseText();
+        enforcePromptDatasetText();
+        enforcePromptExampleRows();
         attachObservers();
     }).observe(document.body, {
         childList: true,
 IMAGE_TASKS = {TASK_T2I, TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
 VIDEO_TASKS = {TASK_T2V, TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
 EDIT_TASKS = {TASK_IMAGE_EDIT, TASK_VIDEO_EDIT}
+VIDEO_RESOLUTION_CHOICES = ["video_360p", "video_480p"]
+VIDEO_RESOLUTION_DISPLAY_CHOICES = [
+    ("video_360p", "video_360p"),
+    ("video_480p（Higher quota usage. Use sparingly.）", "video_480p"),
+]
+VIDEO_EDIT_RESOLUTION_CHOICES = [DEFAULT_VIDEO_EDIT_RESOLUTION]
 IMAGE_RESOLUTION_CHOICES = [DEFAULT_IMAGE_RESOLUTION]
 RESOLUTION_CHOICES = VIDEO_RESOLUTION_CHOICES + IMAGE_RESOLUTION_CHOICES
 CAPTION_SYSTEM_PROMPT_TEMPLATE = (
 def get_video_duration_choices() -> list[tuple[str, int]]:
+    return [(f"{seconds}s", seconds) for seconds in range(1, 11)]
 def env_flag(name: str, default: bool) -> bool:
     value = os.getenv(name)
 def video_seconds_to_num_frames(seconds: int) -> int:
+    seconds = max(1, min(10, int(seconds)))
     return 12 * seconds + 1
     return task
+def normalize_resolution_choice_value(resolution: str, task: str) -> str:
+    resolution_text = str(resolution or "").strip()
+    for choice in get_resolution_choices_for_task(task):
+        if isinstance(choice, tuple):
+            label, value = choice
+            if resolution_text in {str(label), str(value)}:
+                return str(value)
+        elif resolution_text == str(choice):
+            return str(choice)
+    return resolution_text
+def get_resolution_choice_values_for_task(task: str) -> list[str]:
+    choices = get_resolution_choices_for_task(task)
+    values = []
+    for choice in choices:
+        values.append(choice[1] if isinstance(choice, tuple) else choice)
+    return values
+def get_resolution_choices_for_task(task: str) -> list[str | tuple[str, str]]:
     internal_task = normalize_task(task)
     if internal_task in IMAGE_TASKS:
+        return IMAGE_RESOLUTION_CHOICES
+    if internal_task == TASK_T2V:
+        return VIDEO_RESOLUTION_DISPLAY_CHOICES
+    if internal_task == TASK_VIDEO_EDIT:
+        return VIDEO_EDIT_RESOLUTION_CHOICES
     if internal_task in VIDEO_TASKS:
+        return VIDEO_EDIT_RESOLUTION_CHOICES
+    return VIDEO_RESOLUTION_CHOICES
+def get_default_resolution_for_task(task: str) -> str:
+    internal_task = normalize_task(task)
+    if internal_task in IMAGE_TASKS:
+        return DEFAULT_IMAGE_RESOLUTION
+    # Video Generation should default to the lightweight/recommended 360p profile.
+    # This is used by both task switching and recommended-case click handlers
+    # through reset_generation_defaults_for_task(), so every Video Generation
+    # example fill now returns video_360p instead of falling through to 480p.
+    if internal_task == TASK_T2V:
         return DEFAULT_RESOLUTION
+    if internal_task == TASK_VIDEO_EDIT:
+        return DEFAULT_VIDEO_EDIT_RESOLUTION
+    if internal_task in VIDEO_TASKS:
+        return DEFAULT_VIDEO_EDIT_RESOLUTION
+    return DEFAULT_RESOLUTION
+def normalize_resolution_for_backend(resolution: str, task: str) -> str:
+    internal_task = normalize_task(task)
+    normalized_resolution = normalize_resolution_choice_value(resolution, internal_task)
+    choices = get_resolution_choice_values_for_task(internal_task)
+    if normalized_resolution in choices:
+        return normalized_resolution
+    return get_default_resolution_for_task(internal_task)
 def get_default_aspect_ratio(task: str) -> str:
     return DEFAULT_IMAGE_ASPECT_RATIO if internal_task in IMAGE_TASKS else DEFAULT_VIDEO_ASPECT_RATIO
+def normalize_video_resolution(resolution: Optional[str], task: Optional[str] = None) -> str:
+    if task is None:
+        return resolution if resolution in VIDEO_RESOLUTION_CHOICES else DEFAULT_RESOLUTION
+    normalized_resolution = normalize_resolution_choice_value(resolution, task)
+    choices = get_resolution_choice_values_for_task(task)
+    return normalized_resolution if normalized_resolution in choices else get_default_resolution_for_task(task)
+def get_size_for_aspect_ratio(task: str, aspect_ratio: str, video_resolution: Optional[str] = None) -> tuple[int, int]:
     internal_task = normalize_task(task)
     aspect_ratio = aspect_ratio if aspect_ratio in ASPECT_RATIO_CHOICES else get_default_aspect_ratio(internal_task)
+    if internal_task in IMAGE_TASKS:
+        size_map = IMAGE_ASPECT_RATIO_TO_SIZE
+    else:
+        size_map = VIDEO_RESOLUTION_TO_SIZE_MAP[normalize_video_resolution(video_resolution, internal_task)]
     return size_map[aspect_ratio]
     return f"{width} x {height}"
+def get_size_map_for_task(task: str, video_resolution: Optional[str] = None) -> dict[str, tuple[int, int]]:
     internal_task = normalize_task(task)
+    if internal_task in IMAGE_TASKS:
+        return IMAGE_ASPECT_RATIO_TO_SIZE
+    return VIDEO_RESOLUTION_TO_SIZE_MAP[normalize_video_resolution(video_resolution, internal_task)]
+def get_output_resolution_choices_for_task(task: str, video_resolution: Optional[str] = None) -> list[tuple[str, str]]:
     """Get Output Resolution choices with a one-to-one mapping to aspect ratios."""
     internal_task = normalize_task(task)
     default_ratio = get_default_aspect_ratio(internal_task)
+    size_map = get_size_map_for_task(internal_task, video_resolution)
     choices = []
     for ratio in ASPECT_RATIO_CHOICES:
         width, height = size_map[ratio]
     return choices
+def get_aspect_ratio_for_output_resolution(task: str, output_resolution: str, video_resolution: Optional[str] = None) -> str:
     internal_task = normalize_task(task)
     resolution_text = str(output_resolution or "").strip()
+    size_map = get_size_map_for_task(internal_task, video_resolution)
     for ratio in ASPECT_RATIO_CHOICES:
         width, height = size_map[ratio]
         if resolution_text == format_size_markdown(internal_task, width, height):
     return f'<div class="{class_names}">{icon_html}<span>{html.escape(text)}</span></div>'
+def update_size_from_aspect_ratio(task: str, aspect_ratio: str, video_resolution: Optional[str] = None):
+    width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
+    return height, width, gr.update(
+        choices=get_output_resolution_choices_for_task(task, video_resolution),
+        value=format_size_markdown(task, width, height),
+    )
+def update_aspect_ratio_from_output_resolution(task: str, output_resolution: str, video_resolution: Optional[str] = None):
+    aspect_ratio = get_aspect_ratio_for_output_resolution(task, output_resolution, video_resolution)
+    width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
     return aspect_ratio, height, width
+def update_output_resolution_from_video_profile(task: str, aspect_ratio: str, video_resolution: str):
+    width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
+    return (
+        gr.update(
+            choices=get_output_resolution_choices_for_task(task, video_resolution),
+            value=format_size_markdown(task, width, height),
+        ),
+        height,
+        width,
+    )
 def reset_generation_defaults_for_task(task: str):
     internal_task = normalize_task(task)
     aspect_ratio = get_default_aspect_ratio(internal_task)
+    resolution = get_default_resolution_for_task(internal_task)
+    width, height = get_size_for_aspect_ratio(internal_task, aspect_ratio, resolution)
     num_frames = DEFAULT_VIDEO_DURATION_SECONDS
+    return aspect_ratio, height, width, num_frames, resolution, gr.update(
+        choices=get_output_resolution_choices_for_task(internal_task, resolution),
+        value=format_size_markdown(internal_task, width, height),
+    )
 def apply_prompt_example(task: str, evt: gr.SelectData):
     return (prompt_text, *defaults)
+def make_prompt_example_click_handler(prompt_text: str):
+    """Create a click handler for custom text-to-visual prompt-example rows.
+    gr.Dataset and gr.Examples render long text through compact preview cells, so
+    long prompts/instructions/questions can be truncated before CSS gets a chance
+    to wrap them. The custom rows below use normal buttons for display and keep
+    the full prompt string in this closure for click-to-fill behavior.
+    """
+    def _handler(task: str):
+        defaults = reset_generation_defaults_for_task(task)
+        return (prompt_text, *defaults)
+    return _handler
+def make_media_prompt_example_click_handler(
+    prompt_text: str,
+    input_video_path: Optional[str] = None,
+    input_image_path: Optional[str] = None,
+):
+    """Create a click handler for edit/understanding example rows.
+    The row button renders the complete prompt/instruction/question, while the
+    closure also carries the matching media path so one click still fills every
+    required input component.
+    """
+    def _handler(task: str):
+        defaults = reset_generation_defaults_for_task(task)
+        return (prompt_text, input_video_path, input_image_path, *defaults)
+    return _handler
 def get_understanding_system_prompt_choices(task: str) -> list[str]:
     internal_task = normalize_task(task)
     if internal_task == TASK_X2T_IMAGE:
             )
             stage_start = time.perf_counter()
+            print(f"[startup][gpu:{self.device}] Casting Lance model to bf16 on CPU", flush=True)
+            model = model.to(dtype=torch.bfloat16)
+            self._log_stage("Lance model bf16 cast", stage_start)
             stage_start = time.perf_counter()
             print(f"[startup][gpu:{self.device}] Loading tokenizer: {model_args.model_path}", flush=True)
                     != model.language_model.get_output_embeddings().weight.data.data_ptr()
                 ), "tie_word_embeddings conflict"
+            stage_start = time.perf_counter()
+            print(f"[startup][gpu:{self.device}] Moving Lance model to GPU {self.device}", flush=True)
+            model = model.to(device=self.device)
+            self._log_stage("Lance model move to GPU", stage_start)
             model.eval()
             if vae_model is not None and hasattr(vae_model, "eval"):
                 vae_model.eval()
     print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
 def get_env_int(name: str, default: int) -> int:
     """Read an integer environment variable, falling back safely on invalid values."""
     try:
         return default
+def get_env_float(name: str, default: float) -> float:
+    """Read a float environment variable, falling back safely on invalid values."""
+    try:
+        return float(os.getenv(name, str(default)))
+    except (TypeError, ValueError):
+        return default
 def get_zerogpu_duration_cap() -> int:
     """Maximum duration requested from ZeroGPU.
+    The duration value is a ZeroGPU reservation/timeout hint. Shorter values can
+    improve queue priority and reduce wasted quota, but the value must still cover
+    model warm-up plus inference. Override per deployment when needed:
+        LANCE_ZEROGPU_MAX_DURATION_SECONDS=300
     """
+    return max(1, get_env_int("LANCE_ZEROGPU_MAX_DURATION_SECONDS", 240))
 def clamp_zerogpu_duration(seconds: int) -> int:
     return max(1, min(int(seconds), get_zerogpu_duration_cap()))
+def is_pipeline_pool_ready_for_task(task: str) -> bool:
+    """Return True when the required model variant is already resident on GPU.
+    ZeroGPU evaluates the dynamic duration before calling the decorated function.
+    If the model is already loaded, we can request a shorter warm-run duration;
+    otherwise we reserve extra time for the first request after startup or model
+    switching. This does not change the UI layout or user-facing controls.
+    """
+    try:
+        pool = ACTIVE_PIPELINE_POOL
+        if pool is None or pool.model_variant != get_task_model_variant(task):
+            return False
+        return all(getattr(pipeline, "initialized", False) for pipeline in pool.pipelines)
+    except Exception:
+        return False
+def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
+    """Add configurable safety margin and clamp the requested ZeroGPU duration."""
+    margin = max(1.0, get_env_float("LANCE_ZEROGPU_DURATION_MARGIN", 1.10))
+    if not is_pipeline_pool_ready_for_task(task):
+        estimated_seconds += max(0, get_env_int("LANCE_ZEROGPU_COLD_START_BUFFER_SECONDS", 120))
+    return clamp_zerogpu_duration(int(estimated_seconds * margin + 0.999))
 def get_run_task_gpu_duration(
     task: str,
     prompt: str,
     cfg_text_scale: float,
     enable_frame_interpolation: bool,
 ) -> int:
+    """Return a dynamic ZeroGPU reservation duration.
+    The previous implementation used one conservative estimate for both cold and
+    warm runs. This version keeps the first request safe, then asks for shorter
+    durations once the matching Lance model is already loaded, which reduces
+    wasted ZeroGPU quota and improves queue priority without changing the UI.
     """
     internal_task = normalize_task(task)
+    timesteps = max(1, int(validation_num_timesteps or DEFAULT_TIMESTEPS))
+    backend_resolution = normalize_resolution_for_backend(str(resolution), internal_task)
+    resolution_multiplier = 1.28 if backend_resolution == "video_480p" else 1.0
+    timestep_extra = max(0, timesteps - 20)
+    if internal_task == TASK_T2V:
+        requested_seconds = max(1, int(num_frames or DEFAULT_VIDEO_DURATION_SECONDS))
+        estimate = 35 + requested_seconds * 10 + timestep_extra * 1.5
+        if normalize_frame_interpolation(enable_frame_interpolation):
+            estimate += min(32, 8 + requested_seconds * 3)
+        return finalize_zerogpu_duration(estimate * resolution_multiplier, internal_task)
+    if internal_task == TASK_VIDEO_EDIT:
+        estimate = 85 + timestep_extra * 1.5
+        if normalize_frame_interpolation(enable_frame_interpolation):
+            estimate += 22
+        return finalize_zerogpu_duration(estimate * resolution_multiplier, internal_task)
     if internal_task == TASK_X2T_VIDEO:
+        return finalize_zerogpu_duration(32, internal_task)
+    if internal_task == TASK_T2I:
+        return finalize_zerogpu_duration(58, internal_task)
+    if internal_task == TASK_IMAGE_EDIT:
+        return finalize_zerogpu_duration(70, internal_task)
+    return finalize_zerogpu_duration(28, internal_task)
 def get_pipeline_pool(task: str) -> PipelinePool:
     gpu_text = "unknown"
     concurrency = 1
     active_variant = "none"
     if ACTIVE_PIPELINE_POOL is not None:
         active_variant = ACTIVE_PIPELINE_POOL.model_variant
         gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
         concurrency = ACTIVE_PIPELINE_POOL.size
     return (
         f"**Status**  GPU: `{gpu_text}`  |  Max concurrency: `{concurrency}`  |  "
         f"Queue limit: `{QUEUE_MAX_SIZE}`  |  Active model: `{active_variant}`  |  "
+        f"Switch mode: `unload then load`"
     )
     is_edit_task = internal_task in EDIT_TASKS
     is_understanding_task = internal_task in UNDERSTANDING_TASKS
     is_generation_task = internal_task in GENERATION_TASKS
+    is_text_to_visual_task = internal_task in {TASK_T2V, TASK_T2I}
     show_media_input = is_edit_task or is_understanding_task
+    resolution_choices = get_resolution_choice_values_for_task(internal_task)
+    resolution_value = get_default_resolution_for_task(internal_task)
     aspect_ratio_value = DEFAULT_IMAGE_ASPECT_RATIO if is_image_task else DEFAULT_VIDEO_ASPECT_RATIO
+    width_value, height_value = get_size_for_aspect_ratio(internal_task, aspect_ratio_value, resolution_value)
     size_markdown = format_size_markdown(internal_task, width_value, height_value)
     system_prompt_choices = get_understanding_system_prompt_choices(internal_task)
+    if is_text_to_visual_task:
         text_label = "Prompt"
         text_placeholder = "Describe what you want to generate..."
     elif is_edit_task:
     output_icon = "video" if output_label == "Output Video" else "image" if output_label == "Output Image" else "text"
     show_generation_settings = is_generation_task or is_edit_task
+    show_aspect_ratio = is_text_to_visual_task
+    show_output_resolution = is_text_to_visual_task
     show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
     show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
+    show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
+    show_video_resolution_settings = internal_task == TASK_T2V
     return (
         gr.update(value=build_lance_label_html(text_label, "lance-prompt-label")),
             label=text_label,
             placeholder=text_placeholder,
             visible=True,
+            value="",
         ),
         gr.update(
             choices=system_prompt_choices,
             value=system_prompt_choices[0],
             visible=False,
         ),
+        # Switching task pages should always start from a clean input state.
+        # Clear both visual input boxes even if one of them stays visible across tasks.
         gr.update(label="Input Video", visible=show_input_video, value=None),
         gr.update(label="Input Image", visible=show_input_image, value=None),
+        gr.update(visible=show_frame_interpolation_settings),
         gr.update(visible=show_aspect_ratio),
+        gr.update(visible=show_output_resolution),
         gr.update(visible=internal_task == TASK_T2V),
+        gr.update(visible=show_video_resolution_settings),
         gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
         gr.update(value=height_value),
         gr.update(value=width_value),
+        gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION),
+        gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
         gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
+        gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
         gr.update(value=build_lance_icon_label_html(output_label, output_icon, "lance-output-label")),
         gr.update(visible=internal_task in {TASK_T2V, TASK_VIDEO_EDIT}),
         gr.update(visible=internal_task in {TASK_T2I, TASK_IMAGE_EDIT}),
                                 value=DEFAULT_VIDEO_ASPECT_RATIO,
                                 elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
                             )
+                    with gr.Row(elem_classes=["generation-controls-row", "output-resolution-row"]) as output_resolution_row:
                         with gr.Column(elem_classes=["lance-control-field"]):
                             gr.HTML('<div class="lance-generation-label">Output Resolution</div>', elem_classes=["lance-label-html"])
                             real_size = gr.Radio(
                                 interactive=True,
                                 elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
                             )
                 with gr.Row(elem_classes=["generation-controls-row", "video-duration-row"]) as video_duration_row:
                     with gr.Column(elem_classes=["lance-control-field"]):
                         gr.HTML(build_lance_label_html("Video Duration (seconds)", "lance-generation-label"), elem_classes=["lance-label-html"])
+                        num_frames = gr.Radio(
                             label="Video Duration (seconds)",
                             show_label=False,
+                            choices=get_video_duration_choices(),
                             value=DEFAULT_VIDEO_DURATION_SECONDS,
                             elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
                         )
+                with gr.Row(elem_classes=["generation-controls-row", "video-resolution-row"]) as video_resolution_row:
+                    with gr.Column(elem_classes=["lance-control-field"]):
+                        gr.HTML(build_lance_label_html("Video Resolution", "lance-generation-label"), elem_classes=["lance-label-html"])
+                        resolution = gr.Dropdown(
+                            label="Video Resolution",
+                            show_label=False,
+                            choices=VIDEO_RESOLUTION_DISPLAY_CHOICES,
+                            value=DEFAULT_RESOLUTION,
+                            elem_classes=["generation-control"],
+                        )
+                height = gr.Number(value=DEFAULT_HEIGHT, precision=0, visible=False)
+                width = gr.Number(value=DEFAULT_WIDTH, precision=0, visible=False)
                 with gr.Accordion("Advanced Parameters", open=False, elem_classes=["lance-advanced-accordion"]):
                     with gr.Column(elem_classes=["lance-control-field"]):
         run_button = gr.Button("🚀 Generate", variant="primary", elem_classes=["lance-run-button"])
+        def build_prompt_example_table(examples: list[list], media_type: Optional[str] = None):
+            """Render examples with full prompt text instead of Gradio compact previews."""
+            example_buttons = []
+            with gr.Column(elem_classes=["prompt-example-full-table"]):
+                if media_type == "video":
+                    gr.HTML("<div>Prompt / Instruction / Question</div><div>Input Video</div>", elem_classes=["prompt-example-table-header", "prompt-example-table-header-with-media"])
+                elif media_type == "image":
+                    gr.HTML("<div>Prompt / Instruction / Question</div><div>Input Image</div>", elem_classes=["prompt-example-table-header", "prompt-example-table-header-with-media"])
+                else:
+                    gr.HTML("<div>Prompt</div>", elem_classes=["prompt-example-table-header"])
+                with gr.Column(elem_classes=["prompt-example-table-body"]):
+                    for example_row in examples:
+                        example_prompt = str(example_row[0]) if example_row else ""
+                        video_path = str(example_row[1]) if len(example_row) > 1 and example_row[1] else None
+                        image_path = str(example_row[2]) if len(example_row) > 2 and example_row[2] else None
+                        if media_type == "video" and video_path:
+                            with gr.Row(elem_classes=["prompt-example-multimodal-row", "prompt-example-video-row"]):
+                                with gr.Column(elem_classes=["prompt-example-prompt-cell"]):
+                                    example_button = gr.Button(
+                                        example_prompt,
+                                        variant="secondary",
+                                        elem_classes=["prompt-example-row-button"],
+                                    )
+                                with gr.Column(elem_classes=["prompt-example-media-cell", "prompt-example-video-cell"]):
+                                    gr.Video(
+                                        value=video_path,
+                                        label="Input Video",
+                                        show_label=False,
+                                        interactive=False,
+                                        elem_classes=["prompt-example-media-preview", "prompt-example-video-preview"],
+                                    )
+                            example_buttons.append((example_button, example_prompt, video_path, None))
+                        elif media_type == "image" and image_path:
+                            with gr.Row(elem_classes=["prompt-example-multimodal-row"]):
+                                with gr.Column(elem_classes=["prompt-example-prompt-cell"]):
+                                    example_button = gr.Button(
+                                        example_prompt,
+                                        variant="secondary",
+                                        elem_classes=["prompt-example-row-button"],
+                                    )
+                                with gr.Column(elem_classes=["prompt-example-media-cell"]):
+                                    gr.Image(
+                                        value=image_path,
+                                        label="Input Image",
+                                        show_label=False,
+                                        interactive=False,
+                                        type="filepath",
+                                        elem_classes=["prompt-example-media-preview"],
+                                    )
+                            example_buttons.append((example_button, example_prompt, None, image_path))
+                        else:
+                            example_button = gr.Button(
+                                example_prompt,
+                                variant="secondary",
+                                elem_classes=["prompt-example-row-button"],
+                            )
+                            example_buttons.append((example_button, example_prompt, None, None))
+            return example_buttons
         with gr.Column(visible=True, elem_classes=["lance-recommended-section"]) as video_generation_examples_group:
             gr.HTML(build_lance_label_html("Video generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
             with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
+                video_generation_example_buttons = build_prompt_example_table(VIDEO_GENERATION_EXAMPLES)
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_edit_examples_group:
             gr.HTML(build_lance_label_html("Video edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
+            with gr.Group(elem_classes=["example-panel", "prompt-examples", "video-edit-examples"]):
+                video_edit_example_buttons = build_prompt_example_table(VIDEO_EDIT_EXAMPLES, media_type="video")
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_understanding_examples_group:
             gr.HTML(build_lance_label_html("Video understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
+            with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
+                video_understanding_example_buttons = build_prompt_example_table(VIDEO_UNDERSTANDING_EXAMPLES, media_type="video")
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_generation_examples_group:
             gr.HTML(build_lance_label_html("Image generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
             with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
+                image_generation_example_buttons = build_prompt_example_table(IMAGE_GENERATION_EXAMPLES)
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_edit_examples_group:
             gr.HTML(build_lance_label_html("Image edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
+            with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
+                image_edit_example_buttons = build_prompt_example_table(IMAGE_EDIT_EXAMPLES, media_type="image")
         with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_understanding_examples_group:
             gr.HTML(build_lance_label_html("Image understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
+            with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
+                image_understanding_example_buttons = build_prompt_example_table(IMAGE_UNDERSTANDING_EXAMPLES, media_type="image")
         task.change(
             fn=update_task_ui,
                 aspect_ratio_row,
                 output_resolution_row,
                 video_duration_row,
+                video_resolution_row,
                 aspect_ratio,
                 height,
                 width,
         aspect_ratio.change(
             fn=update_size_from_aspect_ratio,
+            inputs=[task, aspect_ratio, resolution],
             outputs=[height, width, real_size],
             queue=False,
             show_api=False,
         real_size.change(
             fn=update_aspect_ratio_from_output_resolution,
+            inputs=[task, real_size, resolution],
             outputs=[aspect_ratio, height, width],
             queue=False,
             show_api=False,
         )
+        resolution.change(
+            fn=update_output_resolution_from_video_profile,
+            inputs=[task, aspect_ratio, resolution],
+            outputs=[real_size, height, width],
+            queue=False,
+            show_api=False,
+        )
+        for example_button, example_prompt, _, _ in video_generation_example_buttons + image_generation_example_buttons:
+            example_button.click(
+                fn=make_prompt_example_click_handler(example_prompt),
                 inputs=[task],
+                outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
                 queue=False,
                 show_api=False,
             )
+        for example_button, example_prompt, example_video, example_image in (
+            video_edit_example_buttons
+            + video_understanding_example_buttons
+            + image_edit_example_buttons
+            + image_understanding_example_buttons
+        ):
+            example_button.click(
+                fn=make_media_prompt_example_click_handler(example_prompt, example_video, example_image),
+                inputs=[task],
+                outputs=[prompt, input_video, input_image, aspect_ratio, height, width, num_frames, resolution, real_size],
+                queue=False,
+                show_api=False,
+            )
         run_button.click(
             fn=build_running_status_markdown,
                 enable_frame_interpolation,
             ],
             outputs=[output_video, output_image, output_text, status, logs],
+            show_progress="minimal",
         )
     return demo
     return gpu_ids
+def prefetch_model_assets_before_launch() -> None:
+    """Download and compact model files before the first ZeroGPU request.
+    On ZeroGPU, time spent downloading model snapshots inside @spaces.GPU burns
+    the first user's GPU reservation. Prefetching only touches CPU/disk and keeps
+    the visible UI unchanged. Set LANCE_PREFETCH_MODEL_ASSETS=0 to skip this at
+    Space startup, or LANCE_PREFETCH_MODEL_VARIANTS=video to prefetch less.
+    """
+    if not env_flag("LANCE_PREFETCH_MODEL_ASSETS", running_on_space()):
+        print("[startup] Model asset prefetch disabled.", flush=True)
+        return
+    variants_text = os.getenv("LANCE_PREFETCH_MODEL_VARIANTS", f"{MODEL_VARIANT_VIDEO},{MODEL_VARIANT_IMAGE}")
+    variants: list[str] = []
+    for raw_variant in variants_text.split(","):
+        raw_variant = raw_variant.strip()
+        if not raw_variant:
+            continue
+        variant = normalize_model_variant(raw_variant)
+        if variant not in variants:
+            variants.append(variant)
+    for variant in variants:
+        try:
+            start = time.perf_counter()
+            model_path = ensure_model_assets(variant)
+            elapsed = time.perf_counter() - start
+            print(
+                f"[startup][{variant}] Model assets are ready at {display_path(model_path)} "
+                f"before ZeroGPU inference. elapsed={elapsed:.2f}s",
+                flush=True,
+            )
+        except Exception as exc:
+            print(
+                f"[startup][{variant}] Model asset prefetch failed and will be retried lazily during inference: {exc}",
+                flush=True,
+            )
 if __name__ == "__main__":
     args = parse_args()
     os.environ["LANCE_GPUS"] = args.gpus
     QUEUE_MAX_SIZE = args.queue_size
+    prefetch_model_assets_before_launch()
+    print(
+        "[startup] Skipping GPU model preload. UI will launch first, and Lance weights will be loaded lazily inside ZeroGPU inference calls.",
+        flush=True,
+    )
     concurrency_limit = 1
     demo = build_demo()
     demo.queue(

config/config_factory.py CHANGED Viewed

@@ -234,7 +234,7 @@ class InferenceArguments(TrainingArguments):
     video_width:                int = 480
     num_frames:                 int = 50
     task:                       str = "t2v"  # t2v / t2i / edit / idip ...
-    resolution:                 str = "video_848x480"  # image_768x768 or video_848x480
     text_template:              bool = False  # 是否使用 system_prompt 文本模板
     max_duration:               float = 6.0  # 最大视频时长（秒）

     video_width:                int = 480
     num_frames:                 int = 50
     task:                       str = "t2v"  # t2v / t2i / edit / idip ...
+    resolution:                 str = "video_360p"  # image_768x768 or video_360p / video_480p
     text_template:              bool = False  # 是否使用 system_prompt 文本模板
     max_duration:               float = 6.0  # 最大视频时长（秒）

data/datasets_custom/validation_dataset.py CHANGED Viewed

@@ -116,7 +116,10 @@ class ValidationDataset(Dataset):
         if self.data_config.resolution == "image_768x768":
             resolution_vae = 768
             resolution_vit = 672
-        elif self.data_config.resolution == "video_848x480":
             resolution_vae = 640
             resolution_vit = 616
         else:

         if self.data_config.resolution == "image_768x768":
             resolution_vae = 768
             resolution_vit = 672
+        elif self.data_config.resolution == "video_360p":
+            resolution_vae = 480
+            resolution_vit = 448
+        elif self.data_config.resolution == "video_480p":
             resolution_vae = 640
             resolution_vit = 616
         else:

inference_lance.py CHANGED Viewed

@@ -495,9 +495,9 @@ def main():
         training_args=training_args,
     )
     stage_start = time.perf_counter()
-    log_rank0(f"[startup] Moving Lance model to GPU {DEVICE}")
-    model = model.to(DEVICE)
-    log_stage("Lance model move to GPU", stage_start)
     # Setup tokenizer for model:
     stage_start = time.perf_counter()
@@ -538,7 +538,10 @@ def main():
     else: # HACK!!!
         assert model.language_model.get_input_embeddings().weight.data.data_ptr() != model.language_model.get_output_embeddings().weight.data.data_ptr(), 'tie_word_embeddings conflict'
-    model = model.to(device=DEVICE, dtype=torch.bfloat16)
     model.eval()
     if vae_model is not None and hasattr(vae_model, "eval"):
         vae_model.eval()

         training_args=training_args,
     )
     stage_start = time.perf_counter()
+    log_rank0("[startup] Casting Lance model to bf16 on CPU")
+    model = model.to(dtype=torch.bfloat16)
+    log_stage("Lance model bf16 cast", stage_start)
     # Setup tokenizer for model:
     stage_start = time.perf_counter()
     else: # HACK!!!
         assert model.language_model.get_input_embeddings().weight.data.data_ptr() != model.language_model.get_output_embeddings().weight.data.data_ptr(), 'tie_word_embeddings conflict'
+    stage_start = time.perf_counter()
+    log_rank0(f"[startup] Moving Lance model to GPU {DEVICE}")
+    model = model.to(device=DEVICE)
+    log_stage("Lance model move to GPU", stage_start)
     model.eval()
     if vae_model is not None and hasattr(vae_model, "eval"):
         vae_model.eval()

modeling/lance/lance.py CHANGED Viewed

@@ -301,7 +301,7 @@ class Lance(PreTrainedModel):
             packed_latent = (1 - packed_timesteps[:, None]) * packed_latent_clean + packed_timesteps[:, None] * noise
             packed_timestep_embeds = self.time_embedder(packed_timesteps) # [L, C]
-            latent_token_pos_emb = self.latent_pos_embed(packed_latent_position_ids)
             packed_latent = self.vae2llm(packed_latent) + packed_timestep_embeds + latent_token_pos_emb
             packed_sequence[packed_vae_token_indexes] = packed_latent.to(packed_sequence.dtype) # NOTE: 这里替换真实的vae token embed！
@@ -655,7 +655,7 @@ class Lance(PreTrainedModel):
                     # --- 视觉特征 编码 ---
                     timestep_embed = self.time_embedder(timestep)
-                    latent_pos_embed = self.latent_pos_embed(vae_position_ids)
                     vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
                     vae_embed = vae_embed.to(current_sequence.dtype)
@@ -1641,7 +1641,7 @@ class Lance(PreTrainedModel):
             # --- 存入 视觉特征 编码 （vae condition）---
             timestep_embed = self.time_embedder(timestep)
-            latent_pos_embed = self.latent_pos_embed(vae_position_ids)
             vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
             vae_embed = vae_embed.to(current_sequence.dtype)
             current_sequence[current_vae_token_indexes_local] = vae_embed
@@ -1698,7 +1698,7 @@ class Lance(PreTrainedModel):
                     # --- 视觉特征 编码 ---
                     timestep_embed = self.time_embedder(timestep)
-                    latent_pos_embed = self.latent_pos_embed(vae_position_ids)
                     vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
                     vae_embed = vae_embed.to(current_sequence.dtype)

             packed_latent = (1 - packed_timesteps[:, None]) * packed_latent_clean + packed_timesteps[:, None] * noise
             packed_timestep_embeds = self.time_embedder(packed_timesteps) # [L, C]
+            latent_token_pos_emb = self.latent_pos_embed(packed_latent_position_ids.to(device=packed_latent.device))
             packed_latent = self.vae2llm(packed_latent) + packed_timestep_embeds + latent_token_pos_emb
             packed_sequence[packed_vae_token_indexes] = packed_latent.to(packed_sequence.dtype) # NOTE: 这里替换真实的vae token embed！
                     # --- 视觉特征 编码 ---
                     timestep_embed = self.time_embedder(timestep)
+                    latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
                     vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
                     vae_embed = vae_embed.to(current_sequence.dtype)
             # --- 存入 视觉特征 编码 （vae condition）---
             timestep_embed = self.time_embedder(timestep)
+            latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
             vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
             vae_embed = vae_embed.to(current_sequence.dtype)
             current_sequence[current_vae_token_indexes_local] = vae_embed
                     # --- 视觉特征 编码 ---
                     timestep_embed = self.time_embedder(timestep)
+                    latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
                     vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
                     vae_embed = vae_embed.to(current_sequence.dtype)

modeling/lance/modeling_utils.py CHANGED Viewed

@@ -186,13 +186,38 @@ class PositionEmbedding3D(nn.Module):
         self.max_num_latent_frames = max_latent_num_frames  # t
         self.max_latent_size = max_latent_size  # h, w
         self.hidden_size = hidden_size
-        self.pos_embed = nn.Parameter(torch.zeros(max_latent_num_frames * (max_latent_size**2), hidden_size), requires_grad=False)
-        self._init_weights()
-    def _init_weights(self):
-        # Initialize (and freeze) pos_embed by sin-cos embedding:
-        pos_embed = get_3d_sincos_pos_embed(self.hidden_size, self.max_num_latent_frames, self.max_latent_size, self.max_latent_size)
-        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float())
     def forward(self, position_ids):
-        return self.pos_embed[position_ids]

         self.max_num_latent_frames = max_latent_num_frames  # t
         self.max_latent_size = max_latent_size  # h, w
         self.hidden_size = hidden_size
+        self.temporal_dim, self.height_dim, self.width_dim = self._split_hidden_dims(hidden_size)
+    @staticmethod
+    def _split_hidden_dims(embed_dim: int) -> tuple[int, int, int]:
+        assert embed_dim % 2 == 0, "Embedding dimension must be even for 3D embeddings"
+        d = embed_dim // 3
+        d = d if d % 2 == 0 else d - 1
+        dim_t = d
+        dim_h = d
+        dim_w = embed_dim - 2 * d
+        assert dim_w % 2 == 0
+        return dim_t, dim_h, dim_w
+    @staticmethod
+    def _build_1d_sincos(coords: torch.Tensor, embed_dim: int) -> torch.Tensor:
+        assert embed_dim % 2 == 0, "Embedding dimension must be even for 1D embeddings"
+        half = embed_dim // 2
+        omega = torch.arange(half, device=coords.device, dtype=torch.float32)
+        omega = omega / (embed_dim / 2.0)
+        omega = 1.0 / (10000.0 ** omega)
+        args = coords.to(dtype=torch.float32)[:, None] * omega[None, :]
+        return torch.cat([torch.sin(args), torch.cos(args)], dim=-1)
     def forward(self, position_ids):
+        position_ids = position_ids.reshape(-1).to(dtype=torch.long)
+        plane_size = self.max_latent_size * self.max_latent_size
+        t = position_ids // plane_size
+        rem = position_ids % plane_size
+        h = rem // self.max_latent_size
+        w = rem % self.max_latent_size
+        emb_t = self._build_1d_sincos(t, self.temporal_dim)
+        emb_h = self._build_1d_sincos(h, self.height_dim)
+        emb_w = self._build_1d_sincos(w, self.width_dim)
+        return torch.cat([emb_t, emb_h, emb_w], dim=-1)

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 absl-py==0.15.0
 accelerate==1.13.0
 addict==2.4.0
-albumentations==1.4.3
 annotated-types==0.7.0
 bitsandbytes==0.49.2
 certifi==2024.8.30
@@ -23,7 +23,7 @@ joblib==1.4.2
 kornia==0.8.2
 librosa==0.10.2.post1
 markupsafe==2.1.5
-numpy==1.24.4
 omegaconf==2.3.0
 opencv-python==4.7.0.72
 opt_einsum==3.4.0

 absl-py==0.15.0
 accelerate==1.13.0
 addict==2.4.0
+# albumentations==1.4.3
 annotated-types==0.7.0
 bitsandbytes==0.49.2
 certifi==2024.8.30
 kornia==0.8.2
 librosa==0.10.2.post1
 markupsafe==2.1.5
+numpy==1.23.5
 omegaconf==2.3.0
 opencv-python==4.7.0.72
 opt_einsum==3.4.0