Spaces:
Running on Zero
Running on Zero
Prepare Lance for Hugging Face Space
Browse files- README.md +2 -0
- app.py +1068 -215
- config/config_factory.py +1 -1
- data/datasets_custom/validation_dataset.py +4 -1
- inference_lance.py +7 -4
- modeling/lance/lance.py +4 -4
- modeling/lance/modeling_utils.py +32 -7
- requirements.txt +2 -2
README.md
CHANGED
|
@@ -7,6 +7,8 @@ sdk: gradio
|
|
| 7 |
python_version: "3.10.13"
|
| 8 |
sdk_version: "5.31.0"
|
| 9 |
app_file: app.py
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
<div align="center">
|
|
|
|
| 7 |
python_version: "3.10.13"
|
| 8 |
sdk_version: "5.31.0"
|
| 9 |
app_file: app.py
|
| 10 |
+
models:
|
| 11 |
+
- bytedance-research/Lance
|
| 12 |
---
|
| 13 |
|
| 14 |
<div align="center">
|
app.py
CHANGED
|
@@ -89,13 +89,14 @@ DEFAULT_TASK = "t2v"
|
|
| 89 |
DEFAULT_TIMESTEPS = 30
|
| 90 |
DEFAULT_TIMESTEP_SHIFT = 3.5
|
| 91 |
DEFAULT_CFG_TEXT_SCALE = 4.0
|
| 92 |
-
DEFAULT_RESOLUTION = "
|
|
|
|
| 93 |
DEFAULT_IMAGE_RESOLUTION = "image_768x768"
|
| 94 |
DEFAULT_BASIC_SEED = 42
|
| 95 |
-
DEFAULT_HEIGHT =
|
| 96 |
-
DEFAULT_WIDTH =
|
| 97 |
DEFAULT_IMAGE_SIZE = 768
|
| 98 |
-
DEFAULT_VIDEO_DURATION_SECONDS =
|
| 99 |
MAX_VIDEO_DURATION_SECONDS = 360
|
| 100 |
MAX_VIDEO_NUM_FRAMES = 12 * MAX_VIDEO_DURATION_SECONDS + 1
|
| 101 |
DEFAULT_NUM_FRAMES = 12 * DEFAULT_VIDEO_DURATION_SECONDS + 1
|
|
@@ -106,7 +107,19 @@ FRAME_INTERPOLATION_NO = "No"
|
|
| 106 |
DEFAULT_FRAME_INTERPOLATION = FRAME_INTERPOLATION_YES
|
| 107 |
ASPECT_RATIO_CHOICES = ["21:9", "16:9", "3:2", "4:3", "1:1", "3:4", "2:3", "9:16", "9:21"]
|
| 108 |
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
"21:9": (976, 416),
|
| 111 |
"16:9": (848, 480),
|
| 112 |
"3:2": (784, 528),
|
|
@@ -118,6 +131,11 @@ VIDEO_ASPECT_RATIO_TO_SIZE = {
|
|
| 118 |
"9:21": (416, 976),
|
| 119 |
}
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
IMAGE_ASPECT_RATIO_TO_SIZE = {
|
| 122 |
"21:9": (1168, 496),
|
| 123 |
"16:9": (1024, 576),
|
|
@@ -134,10 +152,6 @@ DEFAULT_QUEUE_SIZE = 32
|
|
| 134 |
USE_KVCACHE = True
|
| 135 |
TEXT_TEMPLATE = True
|
| 136 |
RECORD_WRITE_LOCK = threading.Lock()
|
| 137 |
-
MODEL_ASSET_PREFETCH_LOCK = threading.Lock()
|
| 138 |
-
MODEL_ASSET_PREFETCH_STARTED = False
|
| 139 |
-
MODEL_ASSET_PREFETCH_DONE = threading.Event()
|
| 140 |
-
MODEL_ASSET_PREFETCH_ERROR: Optional[str] = None
|
| 141 |
|
| 142 |
LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
|
| 143 |
LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
|
|
@@ -608,6 +622,112 @@ APP_CSS = """
|
|
| 608 |
line-height: 1.35 !important;
|
| 609 |
}
|
| 610 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
.prompt-dataset .paginate {
|
| 612 |
display: none !important;
|
| 613 |
}
|
|
@@ -746,6 +866,327 @@ APP_CSS = """
|
|
| 746 |
font-weight: 800 !important;
|
| 747 |
}
|
| 748 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
@media (max-width: 900px) {
|
| 750 |
.lance-main-row {
|
| 751 |
grid-template-columns: minmax(0, 1fr) !important;
|
|
@@ -759,7 +1200,9 @@ APP_JS = """
|
|
| 759 |
if (!element) {
|
| 760 |
return;
|
| 761 |
}
|
| 762 |
-
element.style.
|
|
|
|
|
|
|
| 763 |
};
|
| 764 |
|
| 765 |
const enforceLanceLabelTypography = () => {
|
|
@@ -783,6 +1226,216 @@ APP_JS = """
|
|
| 783 |
});
|
| 784 |
};
|
| 785 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
const syncOutputColumnHeight = () => {
|
| 787 |
const row = document.querySelector(".lance-main-row");
|
| 788 |
const inputColumn = document.querySelector(".lance-input-column");
|
|
@@ -812,6 +1465,9 @@ APP_JS = """
|
|
| 812 |
|
| 813 |
const scheduleSync = () => requestAnimationFrame(() => {
|
| 814 |
enforceLanceLabelTypography();
|
|
|
|
|
|
|
|
|
|
| 815 |
syncOutputColumnHeight();
|
| 816 |
});
|
| 817 |
const attachObservers = () => {
|
|
@@ -834,9 +1490,15 @@ APP_JS = """
|
|
| 834 |
};
|
| 835 |
|
| 836 |
enforceLanceLabelTypography();
|
|
|
|
|
|
|
|
|
|
| 837 |
attachObservers();
|
| 838 |
new MutationObserver(() => {
|
| 839 |
enforceLanceLabelTypography();
|
|
|
|
|
|
|
|
|
|
| 840 |
attachObservers();
|
| 841 |
}).observe(document.body, {
|
| 842 |
childList: true,
|
|
@@ -888,7 +1550,12 @@ UNDERSTANDING_TASKS = {TASK_X2T_VIDEO, TASK_X2T_IMAGE}
|
|
| 888 |
IMAGE_TASKS = {TASK_T2I, TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
|
| 889 |
VIDEO_TASKS = {TASK_T2V, TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
|
| 890 |
EDIT_TASKS = {TASK_IMAGE_EDIT, TASK_VIDEO_EDIT}
|
| 891 |
-
VIDEO_RESOLUTION_CHOICES = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 892 |
IMAGE_RESOLUTION_CHOICES = [DEFAULT_IMAGE_RESOLUTION]
|
| 893 |
RESOLUTION_CHOICES = VIDEO_RESOLUTION_CHOICES + IMAGE_RESOLUTION_CHOICES
|
| 894 |
CAPTION_SYSTEM_PROMPT_TEMPLATE = (
|
|
@@ -911,7 +1578,7 @@ def get_aspect_ratio_choices_for_task(task: str) -> list[tuple[str, str]]:
|
|
| 911 |
|
| 912 |
|
| 913 |
def get_video_duration_choices() -> list[tuple[str, int]]:
|
| 914 |
-
return [(f"{seconds}s", seconds) for seconds in range(1,
|
| 915 |
|
| 916 |
def env_flag(name: str, default: bool) -> bool:
|
| 917 |
value = os.getenv(name)
|
|
@@ -1136,7 +1803,7 @@ def normalize_frame_interpolation(value) -> bool:
|
|
| 1136 |
|
| 1137 |
|
| 1138 |
def video_seconds_to_num_frames(seconds: int) -> int:
|
| 1139 |
-
seconds = max(1, min(
|
| 1140 |
return 12 * seconds + 1
|
| 1141 |
|
| 1142 |
|
|
@@ -1148,13 +1815,63 @@ def normalize_task(task: str) -> str:
|
|
| 1148 |
return task
|
| 1149 |
|
| 1150 |
|
| 1151 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1152 |
internal_task = normalize_task(task)
|
| 1153 |
if internal_task in IMAGE_TASKS:
|
| 1154 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1155 |
if internal_task in VIDEO_TASKS:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1156 |
return DEFAULT_RESOLUTION
|
| 1157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1158 |
|
| 1159 |
|
| 1160 |
def get_default_aspect_ratio(task: str) -> str:
|
|
@@ -1162,10 +1879,21 @@ def get_default_aspect_ratio(task: str) -> str:
|
|
| 1162 |
return DEFAULT_IMAGE_ASPECT_RATIO if internal_task in IMAGE_TASKS else DEFAULT_VIDEO_ASPECT_RATIO
|
| 1163 |
|
| 1164 |
|
| 1165 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1166 |
internal_task = normalize_task(task)
|
| 1167 |
aspect_ratio = aspect_ratio if aspect_ratio in ASPECT_RATIO_CHOICES else get_default_aspect_ratio(internal_task)
|
| 1168 |
-
|
|
|
|
|
|
|
|
|
|
| 1169 |
return size_map[aspect_ratio]
|
| 1170 |
|
| 1171 |
|
|
@@ -1177,16 +1905,18 @@ def format_size_markdown(task: str, width: int, height: int) -> str:
|
|
| 1177 |
return f"{width} x {height}"
|
| 1178 |
|
| 1179 |
|
| 1180 |
-
def get_size_map_for_task(task: str) -> dict[str, tuple[int, int]]:
|
| 1181 |
internal_task = normalize_task(task)
|
| 1182 |
-
|
|
|
|
|
|
|
| 1183 |
|
| 1184 |
|
| 1185 |
-
def get_output_resolution_choices_for_task(task: str) -> list[tuple[str, str]]:
|
| 1186 |
"""Get Output Resolution choices with a one-to-one mapping to aspect ratios."""
|
| 1187 |
internal_task = normalize_task(task)
|
| 1188 |
default_ratio = get_default_aspect_ratio(internal_task)
|
| 1189 |
-
size_map = get_size_map_for_task(internal_task)
|
| 1190 |
choices = []
|
| 1191 |
for ratio in ASPECT_RATIO_CHOICES:
|
| 1192 |
width, height = size_map[ratio]
|
|
@@ -1196,10 +1926,10 @@ def get_output_resolution_choices_for_task(task: str) -> list[tuple[str, str]]:
|
|
| 1196 |
return choices
|
| 1197 |
|
| 1198 |
|
| 1199 |
-
def get_aspect_ratio_for_output_resolution(task: str, output_resolution: str) -> str:
|
| 1200 |
internal_task = normalize_task(task)
|
| 1201 |
resolution_text = str(output_resolution or "").strip()
|
| 1202 |
-
size_map = get_size_map_for_task(internal_task)
|
| 1203 |
for ratio in ASPECT_RATIO_CHOICES:
|
| 1204 |
width, height = size_map[ratio]
|
| 1205 |
if resolution_text == format_size_markdown(internal_task, width, height):
|
|
@@ -1256,24 +1986,42 @@ def build_lance_icon_label_html(text: str, icon: str, *extra_classes: str) -> st
|
|
| 1256 |
return f'<div class="{class_names}">{icon_html}<span>{html.escape(text)}</span></div>'
|
| 1257 |
|
| 1258 |
|
| 1259 |
-
def update_size_from_aspect_ratio(task: str, aspect_ratio: str):
|
| 1260 |
-
width, height = get_size_for_aspect_ratio(task, aspect_ratio)
|
| 1261 |
-
return height, width,
|
|
|
|
|
|
|
|
|
|
| 1262 |
|
| 1263 |
|
| 1264 |
-
def update_aspect_ratio_from_output_resolution(task: str, output_resolution: str):
|
| 1265 |
-
aspect_ratio = get_aspect_ratio_for_output_resolution(task, output_resolution)
|
| 1266 |
-
width, height = get_size_for_aspect_ratio(task, aspect_ratio)
|
| 1267 |
return aspect_ratio, height, width
|
| 1268 |
|
| 1269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
def reset_generation_defaults_for_task(task: str):
|
| 1271 |
internal_task = normalize_task(task)
|
| 1272 |
aspect_ratio = get_default_aspect_ratio(internal_task)
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
num_frames = DEFAULT_VIDEO_DURATION_SECONDS
|
| 1276 |
-
return aspect_ratio, height, width, num_frames, resolution,
|
|
|
|
|
|
|
|
|
|
| 1277 |
|
| 1278 |
|
| 1279 |
def apply_prompt_example(task: str, evt: gr.SelectData):
|
|
@@ -1288,6 +2036,41 @@ def apply_prompt_example(task: str, evt: gr.SelectData):
|
|
| 1288 |
return (prompt_text, *defaults)
|
| 1289 |
|
| 1290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1291 |
def get_understanding_system_prompt_choices(task: str) -> list[str]:
|
| 1292 |
internal_task = normalize_task(task)
|
| 1293 |
if internal_task == TASK_X2T_IMAGE:
|
|
@@ -1815,9 +2598,9 @@ class LanceT2VV2TPipeline:
|
|
| 1815 |
)
|
| 1816 |
|
| 1817 |
stage_start = time.perf_counter()
|
| 1818 |
-
print(f"[startup][gpu:{self.device}]
|
| 1819 |
-
model = model.to(
|
| 1820 |
-
self._log_stage("Lance model
|
| 1821 |
|
| 1822 |
stage_start = time.perf_counter()
|
| 1823 |
print(f"[startup][gpu:{self.device}] Loading tokenizer: {model_args.model_path}", flush=True)
|
|
@@ -1855,7 +2638,10 @@ class LanceT2VV2TPipeline:
|
|
| 1855 |
!= model.language_model.get_output_embeddings().weight.data.data_ptr()
|
| 1856 |
), "tie_word_embeddings conflict"
|
| 1857 |
|
| 1858 |
-
|
|
|
|
|
|
|
|
|
|
| 1859 |
model.eval()
|
| 1860 |
if vae_model is not None and hasattr(vae_model, "eval"):
|
| 1861 |
vae_model.eval()
|
|
@@ -2402,45 +3188,6 @@ def ensure_flash_attn_installed() -> None:
|
|
| 2402 |
print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
|
| 2403 |
|
| 2404 |
|
| 2405 |
-
def prefetch_lance_runtime_assets() -> None:
|
| 2406 |
-
global MODEL_ASSET_PREFETCH_ERROR
|
| 2407 |
-
with MODEL_ASSET_PREFETCH_LOCK:
|
| 2408 |
-
if MODEL_ASSET_PREFETCH_DONE.is_set():
|
| 2409 |
-
return
|
| 2410 |
-
print(
|
| 2411 |
-
"[startup] Preloading Lance runtime assets on CPU: flash-attn plus both model variants.",
|
| 2412 |
-
flush=True,
|
| 2413 |
-
)
|
| 2414 |
-
try:
|
| 2415 |
-
ensure_flash_attn_installed()
|
| 2416 |
-
for variant in (MODEL_VARIANT_VIDEO, MODEL_VARIANT_IMAGE):
|
| 2417 |
-
model_path = ensure_model_assets(variant)
|
| 2418 |
-
print(
|
| 2419 |
-
f"[startup] CPU preload finished for {variant} at {display_path(model_path)}",
|
| 2420 |
-
flush=True,
|
| 2421 |
-
)
|
| 2422 |
-
MODEL_ASSET_PREFETCH_ERROR = None
|
| 2423 |
-
MODEL_ASSET_PREFETCH_DONE.set()
|
| 2424 |
-
print("[startup] CPU asset preload finished for all Lance variants.", flush=True)
|
| 2425 |
-
except Exception as exc:
|
| 2426 |
-
MODEL_ASSET_PREFETCH_ERROR = str(exc)
|
| 2427 |
-
print(f"[startup] CPU asset preload failed: {exc}", flush=True)
|
| 2428 |
-
|
| 2429 |
-
|
| 2430 |
-
def start_lance_runtime_asset_prefetch() -> None:
|
| 2431 |
-
global MODEL_ASSET_PREFETCH_STARTED
|
| 2432 |
-
with MODEL_ASSET_PREFETCH_LOCK:
|
| 2433 |
-
if MODEL_ASSET_PREFETCH_STARTED:
|
| 2434 |
-
return
|
| 2435 |
-
MODEL_ASSET_PREFETCH_STARTED = True
|
| 2436 |
-
thread = threading.Thread(
|
| 2437 |
-
target=prefetch_lance_runtime_assets,
|
| 2438 |
-
name="lance-runtime-asset-prefetch",
|
| 2439 |
-
daemon=True,
|
| 2440 |
-
)
|
| 2441 |
-
thread.start()
|
| 2442 |
-
|
| 2443 |
-
|
| 2444 |
def get_env_int(name: str, default: int) -> int:
|
| 2445 |
"""Read an integer environment variable, falling back safely on invalid values."""
|
| 2446 |
try:
|
|
@@ -2449,19 +3196,54 @@ def get_env_int(name: str, default: int) -> int:
|
|
| 2449 |
return default
|
| 2450 |
|
| 2451 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2452 |
def get_zerogpu_duration_cap() -> int:
|
| 2453 |
"""Maximum duration requested from ZeroGPU.
|
| 2454 |
|
| 2455 |
-
|
| 2456 |
-
|
|
|
|
|
|
|
| 2457 |
"""
|
| 2458 |
-
return max(1, get_env_int("LANCE_ZEROGPU_MAX_DURATION_SECONDS",
|
| 2459 |
|
| 2460 |
|
| 2461 |
def clamp_zerogpu_duration(seconds: int) -> int:
|
| 2462 |
return max(1, min(int(seconds), get_zerogpu_duration_cap()))
|
| 2463 |
|
| 2464 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2465 |
def get_run_task_gpu_duration(
|
| 2466 |
task: str,
|
| 2467 |
prompt: str,
|
|
@@ -2478,18 +3260,39 @@ def get_run_task_gpu_duration(
|
|
| 2478 |
cfg_text_scale: float,
|
| 2479 |
enable_frame_interpolation: bool,
|
| 2480 |
) -> int:
|
| 2481 |
-
"""Return a
|
| 2482 |
|
| 2483 |
-
|
| 2484 |
-
|
|
|
|
|
|
|
| 2485 |
"""
|
| 2486 |
internal_task = normalize_task(task)
|
| 2487 |
-
|
| 2488 |
-
|
| 2489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2490 |
if internal_task == TASK_X2T_VIDEO:
|
| 2491 |
-
return
|
| 2492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2493 |
|
| 2494 |
|
| 2495 |
def get_pipeline_pool(task: str) -> PipelinePool:
|
|
@@ -2562,21 +3365,14 @@ def build_status_markdown() -> str:
|
|
| 2562 |
gpu_text = "unknown"
|
| 2563 |
concurrency = 1
|
| 2564 |
active_variant = "none"
|
| 2565 |
-
asset_status = "pending"
|
| 2566 |
if ACTIVE_PIPELINE_POOL is not None:
|
| 2567 |
active_variant = ACTIVE_PIPELINE_POOL.model_variant
|
| 2568 |
gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
|
| 2569 |
concurrency = ACTIVE_PIPELINE_POOL.size
|
| 2570 |
-
if MODEL_ASSET_PREFETCH_DONE.is_set():
|
| 2571 |
-
asset_status = "done"
|
| 2572 |
-
elif MODEL_ASSET_PREFETCH_STARTED:
|
| 2573 |
-
asset_status = "running"
|
| 2574 |
-
if MODEL_ASSET_PREFETCH_ERROR:
|
| 2575 |
-
asset_status = f"failed: {MODEL_ASSET_PREFETCH_ERROR}"
|
| 2576 |
return (
|
| 2577 |
f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
|
| 2578 |
f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
|
| 2579 |
-
f"Switch mode: `unload then load`
|
| 2580 |
)
|
| 2581 |
|
| 2582 |
|
|
@@ -2639,15 +3435,16 @@ def update_task_ui(task: str):
|
|
| 2639 |
is_edit_task = internal_task in EDIT_TASKS
|
| 2640 |
is_understanding_task = internal_task in UNDERSTANDING_TASKS
|
| 2641 |
is_generation_task = internal_task in GENERATION_TASKS
|
|
|
|
| 2642 |
show_media_input = is_edit_task or is_understanding_task
|
| 2643 |
-
resolution_choices =
|
| 2644 |
-
resolution_value =
|
| 2645 |
aspect_ratio_value = DEFAULT_IMAGE_ASPECT_RATIO if is_image_task else DEFAULT_VIDEO_ASPECT_RATIO
|
| 2646 |
-
width_value, height_value = get_size_for_aspect_ratio(internal_task, aspect_ratio_value)
|
| 2647 |
size_markdown = format_size_markdown(internal_task, width_value, height_value)
|
| 2648 |
system_prompt_choices = get_understanding_system_prompt_choices(internal_task)
|
| 2649 |
|
| 2650 |
-
if
|
| 2651 |
text_label = "Prompt"
|
| 2652 |
text_placeholder = "Describe what you want to generate..."
|
| 2653 |
elif is_edit_task:
|
|
@@ -2666,10 +3463,12 @@ def update_task_ui(task: str):
|
|
| 2666 |
|
| 2667 |
output_icon = "video" if output_label == "Output Video" else "image" if output_label == "Output Image" else "text"
|
| 2668 |
show_generation_settings = is_generation_task or is_edit_task
|
| 2669 |
-
show_aspect_ratio =
|
|
|
|
| 2670 |
show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
|
| 2671 |
show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
|
| 2672 |
-
|
|
|
|
| 2673 |
|
| 2674 |
return (
|
| 2675 |
gr.update(value=build_lance_label_html(text_label, "lance-prompt-label")),
|
|
@@ -2677,25 +3476,29 @@ def update_task_ui(task: str):
|
|
| 2677 |
label=text_label,
|
| 2678 |
placeholder=text_placeholder,
|
| 2679 |
visible=True,
|
|
|
|
| 2680 |
),
|
| 2681 |
gr.update(
|
| 2682 |
choices=system_prompt_choices,
|
| 2683 |
value=system_prompt_choices[0],
|
| 2684 |
visible=False,
|
| 2685 |
),
|
|
|
|
|
|
|
| 2686 |
gr.update(label="Input Video", visible=show_input_video, value=None),
|
| 2687 |
gr.update(label="Input Image", visible=show_input_image, value=None),
|
| 2688 |
-
gr.update(visible=
|
| 2689 |
gr.update(visible=show_aspect_ratio),
|
| 2690 |
-
gr.update(visible=
|
| 2691 |
gr.update(visible=internal_task == TASK_T2V),
|
|
|
|
| 2692 |
gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
|
| 2693 |
gr.update(value=height_value),
|
| 2694 |
gr.update(value=width_value),
|
| 2695 |
-
gr.update(visible=
|
| 2696 |
-
gr.update(choices=get_output_resolution_choices_for_task(internal_task), value=size_markdown, visible=
|
| 2697 |
gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
|
| 2698 |
-
gr.update(choices=resolution_choices, value=resolution_value, visible=
|
| 2699 |
gr.update(value=build_lance_icon_label_html(output_label, output_icon, "lance-output-label")),
|
| 2700 |
gr.update(visible=internal_task in {TASK_T2V, TASK_VIDEO_EDIT}),
|
| 2701 |
gr.update(visible=internal_task in {TASK_T2I, TASK_IMAGE_EDIT}),
|
|
@@ -2773,7 +3576,7 @@ def build_demo() -> gr.Blocks:
|
|
| 2773 |
value=DEFAULT_VIDEO_ASPECT_RATIO,
|
| 2774 |
elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
|
| 2775 |
)
|
| 2776 |
-
with gr.Row(
|
| 2777 |
with gr.Column(elem_classes=["lance-control-field"]):
|
| 2778 |
gr.HTML('<div class="lance-generation-label">Output Resolution</div>', elem_classes=["lance-label-html"])
|
| 2779 |
real_size = gr.Radio(
|
|
@@ -2784,26 +3587,28 @@ def build_demo() -> gr.Blocks:
|
|
| 2784 |
interactive=True,
|
| 2785 |
elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
|
| 2786 |
)
|
| 2787 |
-
resolution = gr.Dropdown(
|
| 2788 |
-
label="Resolution",
|
| 2789 |
-
choices=RESOLUTION_CHOICES,
|
| 2790 |
-
value=DEFAULT_RESOLUTION,
|
| 2791 |
-
visible=False,
|
| 2792 |
-
)
|
| 2793 |
-
height = gr.Number(value=DEFAULT_HEIGHT, precision=0, visible=False)
|
| 2794 |
-
width = gr.Number(value=DEFAULT_WIDTH, precision=0, visible=False)
|
| 2795 |
with gr.Row(elem_classes=["generation-controls-row", "video-duration-row"]) as video_duration_row:
|
| 2796 |
with gr.Column(elem_classes=["lance-control-field"]):
|
| 2797 |
gr.HTML(build_lance_label_html("Video Duration (seconds)", "lance-generation-label"), elem_classes=["lance-label-html"])
|
| 2798 |
-
num_frames = gr.
|
| 2799 |
label="Video Duration (seconds)",
|
| 2800 |
show_label=False,
|
| 2801 |
-
|
| 2802 |
-
maximum=MAX_VIDEO_DURATION_SECONDS,
|
| 2803 |
-
step=1,
|
| 2804 |
value=DEFAULT_VIDEO_DURATION_SECONDS,
|
| 2805 |
elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
|
| 2806 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2807 |
|
| 2808 |
with gr.Accordion("Advanced Parameters", open=False, elem_classes=["lance-advanced-accordion"]):
|
| 2809 |
with gr.Column(elem_classes=["lance-control-field"]):
|
|
@@ -2868,94 +3673,96 @@ def build_demo() -> gr.Blocks:
|
|
| 2868 |
|
| 2869 |
run_button = gr.Button("🚀 Generate", variant="primary", elem_classes=["lance-run-button"])
|
| 2870 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2871 |
with gr.Column(visible=True, elem_classes=["lance-recommended-section"]) as video_generation_examples_group:
|
| 2872 |
gr.HTML(build_lance_label_html("Video generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 2873 |
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 2874 |
-
|
| 2875 |
-
samples=VIDEO_GENERATION_EXAMPLES,
|
| 2876 |
-
components=[gr.Textbox(label="Prompt", visible=False)],
|
| 2877 |
-
headers=["Prompt"],
|
| 2878 |
-
show_label=False,
|
| 2879 |
-
type="values",
|
| 2880 |
-
layout="table",
|
| 2881 |
-
samples_per_page=len(VIDEO_GENERATION_EXAMPLES),
|
| 2882 |
-
elem_classes=["prompt-dataset"],
|
| 2883 |
-
)
|
| 2884 |
|
| 2885 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_edit_examples_group:
|
| 2886 |
gr.HTML(build_lance_label_html("Video edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 2887 |
-
with gr.Group(elem_classes=["example-panel", "
|
| 2888 |
-
|
| 2889 |
-
examples=VIDEO_EDIT_EXAMPLES,
|
| 2890 |
-
inputs=generation_example_inputs,
|
| 2891 |
-
label="",
|
| 2892 |
-
examples_per_page=3,
|
| 2893 |
-
cache_examples=False,
|
| 2894 |
-
preprocess=False,
|
| 2895 |
-
postprocess=False,
|
| 2896 |
-
)
|
| 2897 |
|
| 2898 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_understanding_examples_group:
|
| 2899 |
gr.HTML(build_lance_label_html("Video understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 2900 |
-
with gr.Group(elem_classes=["example-panel", "
|
| 2901 |
-
|
| 2902 |
-
examples=VIDEO_UNDERSTANDING_EXAMPLES,
|
| 2903 |
-
inputs=generation_example_inputs,
|
| 2904 |
-
label="",
|
| 2905 |
-
examples_per_page=4,
|
| 2906 |
-
cache_examples=False,
|
| 2907 |
-
preprocess=False,
|
| 2908 |
-
postprocess=False,
|
| 2909 |
-
)
|
| 2910 |
|
| 2911 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_generation_examples_group:
|
| 2912 |
gr.HTML(build_lance_label_html("Image generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 2913 |
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 2914 |
-
|
| 2915 |
-
samples=IMAGE_GENERATION_EXAMPLES,
|
| 2916 |
-
components=[gr.Textbox(label="Prompt", visible=False)],
|
| 2917 |
-
headers=["Prompt"],
|
| 2918 |
-
show_label=False,
|
| 2919 |
-
type="values",
|
| 2920 |
-
layout="table",
|
| 2921 |
-
samples_per_page=len(IMAGE_GENERATION_EXAMPLES),
|
| 2922 |
-
elem_classes=["prompt-dataset"],
|
| 2923 |
-
)
|
| 2924 |
|
| 2925 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_edit_examples_group:
|
| 2926 |
gr.HTML(build_lance_label_html("Image edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 2927 |
-
with gr.Group(elem_classes=["example-panel", "
|
| 2928 |
-
|
| 2929 |
-
examples=IMAGE_EDIT_EXAMPLES,
|
| 2930 |
-
inputs=generation_example_inputs,
|
| 2931 |
-
label="",
|
| 2932 |
-
examples_per_page=5,
|
| 2933 |
-
cache_examples=False,
|
| 2934 |
-
preprocess=False,
|
| 2935 |
-
postprocess=False,
|
| 2936 |
-
)
|
| 2937 |
|
| 2938 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_understanding_examples_group:
|
| 2939 |
gr.HTML(build_lance_label_html("Image understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 2940 |
-
with gr.Group(elem_classes=["example-panel", "
|
| 2941 |
-
|
| 2942 |
-
examples=IMAGE_UNDERSTANDING_EXAMPLES,
|
| 2943 |
-
inputs=generation_example_inputs,
|
| 2944 |
-
label="",
|
| 2945 |
-
examples_per_page=4,
|
| 2946 |
-
cache_examples=False,
|
| 2947 |
-
preprocess=False,
|
| 2948 |
-
postprocess=False,
|
| 2949 |
-
)
|
| 2950 |
-
|
| 2951 |
-
keep_example_clicks_from_changing_visibility(
|
| 2952 |
-
video_generation_examples,
|
| 2953 |
-
video_edit_examples,
|
| 2954 |
-
video_understanding_examples,
|
| 2955 |
-
image_generation_examples,
|
| 2956 |
-
image_edit_examples,
|
| 2957 |
-
image_understanding_examples,
|
| 2958 |
-
)
|
| 2959 |
|
| 2960 |
task.change(
|
| 2961 |
fn=update_task_ui,
|
|
@@ -2970,6 +3777,7 @@ def build_demo() -> gr.Blocks:
|
|
| 2970 |
aspect_ratio_row,
|
| 2971 |
output_resolution_row,
|
| 2972 |
video_duration_row,
|
|
|
|
| 2973 |
aspect_ratio,
|
| 2974 |
height,
|
| 2975 |
width,
|
|
@@ -2992,7 +3800,7 @@ def build_demo() -> gr.Blocks:
|
|
| 2992 |
|
| 2993 |
aspect_ratio.change(
|
| 2994 |
fn=update_size_from_aspect_ratio,
|
| 2995 |
-
inputs=[task, aspect_ratio],
|
| 2996 |
outputs=[height, width, real_size],
|
| 2997 |
queue=False,
|
| 2998 |
show_api=False,
|
|
@@ -3000,35 +3808,42 @@ def build_demo() -> gr.Blocks:
|
|
| 3000 |
|
| 3001 |
real_size.change(
|
| 3002 |
fn=update_aspect_ratio_from_output_resolution,
|
| 3003 |
-
inputs=[task, real_size],
|
| 3004 |
outputs=[aspect_ratio, height, width],
|
| 3005 |
queue=False,
|
| 3006 |
show_api=False,
|
| 3007 |
)
|
| 3008 |
|
| 3009 |
-
|
| 3010 |
-
|
| 3011 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3012 |
inputs=[task],
|
| 3013 |
-
outputs=[aspect_ratio, height, width, num_frames, resolution, real_size],
|
| 3014 |
queue=False,
|
| 3015 |
show_api=False,
|
| 3016 |
)
|
| 3017 |
|
| 3018 |
-
|
| 3019 |
-
|
| 3020 |
-
|
| 3021 |
-
|
| 3022 |
-
|
| 3023 |
-
|
| 3024 |
-
|
| 3025 |
-
|
| 3026 |
-
|
| 3027 |
-
|
| 3028 |
-
|
| 3029 |
-
|
| 3030 |
-
|
| 3031 |
-
)
|
| 3032 |
|
| 3033 |
run_button.click(
|
| 3034 |
fn=build_running_status_markdown,
|
|
@@ -3055,6 +3870,7 @@ def build_demo() -> gr.Blocks:
|
|
| 3055 |
enable_frame_interpolation,
|
| 3056 |
],
|
| 3057 |
outputs=[output_video, output_image, output_text, status, logs],
|
|
|
|
| 3058 |
)
|
| 3059 |
|
| 3060 |
return demo
|
|
@@ -3091,17 +3907,54 @@ def parse_gpu_ids(gpu_string: str) -> list[int]:
|
|
| 3091 |
return gpu_ids
|
| 3092 |
|
| 3093 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3094 |
if __name__ == "__main__":
|
| 3095 |
args = parse_args()
|
| 3096 |
os.environ["LANCE_GPUS"] = args.gpus
|
| 3097 |
QUEUE_MAX_SIZE = args.queue_size
|
| 3098 |
-
|
| 3099 |
-
|
| 3100 |
-
|
| 3101 |
-
|
| 3102 |
-
|
| 3103 |
-
flush=True,
|
| 3104 |
-
)
|
| 3105 |
concurrency_limit = 1
|
| 3106 |
demo = build_demo()
|
| 3107 |
demo.queue(
|
|
|
|
| 89 |
DEFAULT_TIMESTEPS = 30
|
| 90 |
DEFAULT_TIMESTEP_SHIFT = 3.5
|
| 91 |
DEFAULT_CFG_TEXT_SCALE = 4.0
|
| 92 |
+
DEFAULT_RESOLUTION = "video_360p"
|
| 93 |
+
DEFAULT_VIDEO_EDIT_RESOLUTION = "video_480p"
|
| 94 |
DEFAULT_IMAGE_RESOLUTION = "image_768x768"
|
| 95 |
DEFAULT_BASIC_SEED = 42
|
| 96 |
+
DEFAULT_HEIGHT = 352
|
| 97 |
+
DEFAULT_WIDTH = 640
|
| 98 |
DEFAULT_IMAGE_SIZE = 768
|
| 99 |
+
DEFAULT_VIDEO_DURATION_SECONDS = 3
|
| 100 |
MAX_VIDEO_DURATION_SECONDS = 360
|
| 101 |
MAX_VIDEO_NUM_FRAMES = 12 * MAX_VIDEO_DURATION_SECONDS + 1
|
| 102 |
DEFAULT_NUM_FRAMES = 12 * DEFAULT_VIDEO_DURATION_SECONDS + 1
|
|
|
|
| 107 |
DEFAULT_FRAME_INTERPOLATION = FRAME_INTERPOLATION_YES
|
| 108 |
ASPECT_RATIO_CHOICES = ["21:9", "16:9", "3:2", "4:3", "1:1", "3:4", "2:3", "9:16", "9:21"]
|
| 109 |
|
| 110 |
+
VIDEO_360P_ASPECT_RATIO_TO_SIZE = {
|
| 111 |
+
"21:9": (672, 288),
|
| 112 |
+
"16:9": (640, 352),
|
| 113 |
+
"3:2": (528, 352),
|
| 114 |
+
"4:3": (560, 416),
|
| 115 |
+
"1:1": (480, 480),
|
| 116 |
+
"3:4": (416, 560),
|
| 117 |
+
"2:3": (352, 528),
|
| 118 |
+
"9:16": (352, 640),
|
| 119 |
+
"9:21": (288, 672),
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
VIDEO_480P_ASPECT_RATIO_TO_SIZE = {
|
| 123 |
"21:9": (976, 416),
|
| 124 |
"16:9": (848, 480),
|
| 125 |
"3:2": (784, 528),
|
|
|
|
| 131 |
"9:21": (416, 976),
|
| 132 |
}
|
| 133 |
|
| 134 |
+
VIDEO_RESOLUTION_TO_SIZE_MAP = {
|
| 135 |
+
"video_360p": VIDEO_360P_ASPECT_RATIO_TO_SIZE,
|
| 136 |
+
"video_480p": VIDEO_480P_ASPECT_RATIO_TO_SIZE,
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
IMAGE_ASPECT_RATIO_TO_SIZE = {
|
| 140 |
"21:9": (1168, 496),
|
| 141 |
"16:9": (1024, 576),
|
|
|
|
| 152 |
USE_KVCACHE = True
|
| 153 |
TEXT_TEMPLATE = True
|
| 154 |
RECORD_WRITE_LOCK = threading.Lock()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
LANCE_HOMEPAGE_URL = "https://lance-project.github.io/"
|
| 157 |
LANCE_PAPER_URL = "http://arxiv.org/abs/2605.18678"
|
|
|
|
| 622 |
line-height: 1.35 !important;
|
| 623 |
}
|
| 624 |
|
| 625 |
+
.prompt-dataset button,
|
| 626 |
+
.example-panel table td:first-child button {
|
| 627 |
+
max-height: 180px !important;
|
| 628 |
+
overflow-y: auto !important;
|
| 629 |
+
overscroll-behavior: contain !important;
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
.prompt-dataset button,
|
| 633 |
+
.example-panel table td:first-child button,
|
| 634 |
+
.prompt-dataset button span,
|
| 635 |
+
.prompt-dataset button p,
|
| 636 |
+
.example-panel table td:first-child span,
|
| 637 |
+
.example-panel table td:first-child p {
|
| 638 |
+
white-space: pre-wrap !important;
|
| 639 |
+
overflow-wrap: anywhere !important;
|
| 640 |
+
word-break: break-word !important;
|
| 641 |
+
text-overflow: clip !important;
|
| 642 |
+
-webkit-line-clamp: unset !important;
|
| 643 |
+
line-clamp: unset !important;
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
.prompt-dataset button span,
|
| 647 |
+
.prompt-dataset button p,
|
| 648 |
+
.example-panel table td:first-child span,
|
| 649 |
+
.example-panel table td:first-child p {
|
| 650 |
+
overflow: visible !important;
|
| 651 |
+
display: block !important;
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
.lance-recommended-section .example-panel td,
|
| 655 |
+
.lance-recommended-section .example-panel td *,
|
| 656 |
+
.lance-recommended-section .example-panel button,
|
| 657 |
+
.lance-recommended-section .example-panel button *,
|
| 658 |
+
.lance-recommended-section .example-panel label,
|
| 659 |
+
.lance-recommended-section .example-panel label *,
|
| 660 |
+
.lance-recommended-section .example-panel span,
|
| 661 |
+
.lance-recommended-section .example-panel p {
|
| 662 |
+
white-space: pre-wrap !important;
|
| 663 |
+
overflow-wrap: anywhere !important;
|
| 664 |
+
word-break: break-word !important;
|
| 665 |
+
text-overflow: clip !important;
|
| 666 |
+
-webkit-line-clamp: unset !important;
|
| 667 |
+
line-clamp: unset !important;
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
.lance-recommended-section .example-panel button,
|
| 671 |
+
.lance-recommended-section .example-panel td {
|
| 672 |
+
height: auto !important;
|
| 673 |
+
max-height: none !important;
|
| 674 |
+
overflow: visible !important;
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
.lance-recommended-section .example-panel [style*="ellipsis"],
|
| 678 |
+
.lance-recommended-section .example-panel [style*="nowrap"],
|
| 679 |
+
.lance-recommended-section .example-panel [style*="hidden"] {
|
| 680 |
+
white-space: pre-wrap !important;
|
| 681 |
+
overflow: visible !important;
|
| 682 |
+
text-overflow: clip !important;
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
.lance-recommended-section .example-panel {
|
| 686 |
+
overflow: visible !important;
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
.lance-recommended-section .example-panel table {
|
| 690 |
+
width: 100% !important;
|
| 691 |
+
table-layout: fixed !important;
|
| 692 |
+
border-collapse: collapse !important;
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
.lance-recommended-section .example-panel tr,
|
| 696 |
+
.lance-recommended-section .example-panel th,
|
| 697 |
+
.lance-recommended-section .example-panel td {
|
| 698 |
+
height: auto !important;
|
| 699 |
+
min-height: 0 !important;
|
| 700 |
+
max-height: none !important;
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
.lance-recommended-section .example-panel td:first-child,
|
| 704 |
+
.lance-recommended-section .example-panel td:first-child *,
|
| 705 |
+
.prompt-dataset td,
|
| 706 |
+
.prompt-dataset td *,
|
| 707 |
+
.prompt-dataset button,
|
| 708 |
+
.prompt-dataset button * {
|
| 709 |
+
white-space: pre-wrap !important;
|
| 710 |
+
overflow: visible !important;
|
| 711 |
+
overflow-wrap: anywhere !important;
|
| 712 |
+
word-break: break-word !important;
|
| 713 |
+
text-overflow: clip !important;
|
| 714 |
+
-webkit-line-clamp: unset !important;
|
| 715 |
+
line-clamp: unset !important;
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
.lance-recommended-section .example-panel td:first-child button,
|
| 719 |
+
.prompt-dataset button {
|
| 720 |
+
width: 100% !important;
|
| 721 |
+
height: auto !important;
|
| 722 |
+
min-height: 0 !important;
|
| 723 |
+
max-height: none !important;
|
| 724 |
+
padding: 12px 14px !important;
|
| 725 |
+
text-align: center !important;
|
| 726 |
+
justify-content: center !important;
|
| 727 |
+
align-items: center !important;
|
| 728 |
+
line-height: 1.35 !important;
|
| 729 |
+
}
|
| 730 |
+
|
| 731 |
.prompt-dataset .paginate {
|
| 732 |
display: none !important;
|
| 733 |
}
|
|
|
|
| 866 |
font-weight: 800 !important;
|
| 867 |
}
|
| 868 |
|
| 869 |
+
|
| 870 |
+
|
| 871 |
+
/* Prompt example tables: Gradio Dataset renders Textbox cells with an inline
|
| 872 |
+
max-width: 35ch and a single-line preview, which causes long prompts to be
|
| 873 |
+
clipped with an ellipsis. These rules expand the Prompt column, wrap text,
|
| 874 |
+
and keep very long rows usable through scrolling. */
|
| 875 |
+
.prompt-dataset,
|
| 876 |
+
.prompt-dataset .table-wrap {
|
| 877 |
+
width: 100% !important;
|
| 878 |
+
max-width: 100% !important;
|
| 879 |
+
overflow-x: auto !important;
|
| 880 |
+
overflow-y: auto !important;
|
| 881 |
+
}
|
| 882 |
+
|
| 883 |
+
.prompt-dataset .table-wrap {
|
| 884 |
+
max-height: 420px !important;
|
| 885 |
+
overscroll-behavior: contain !important;
|
| 886 |
+
scrollbar-gutter: stable !important;
|
| 887 |
+
}
|
| 888 |
+
|
| 889 |
+
.prompt-dataset table {
|
| 890 |
+
width: 100% !important;
|
| 891 |
+
min-width: 720px !important;
|
| 892 |
+
max-width: none !important;
|
| 893 |
+
table-layout: fixed !important;
|
| 894 |
+
border-collapse: collapse !important;
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
.prompt-dataset thead,
|
| 898 |
+
.prompt-dataset tbody,
|
| 899 |
+
.prompt-dataset tr,
|
| 900 |
+
.prompt-dataset th,
|
| 901 |
+
.prompt-dataset td,
|
| 902 |
+
.prompt-dataset td.textbox,
|
| 903 |
+
.prompt-dataset td[style*="35ch"] {
|
| 904 |
+
height: auto !important;
|
| 905 |
+
min-height: 0 !important;
|
| 906 |
+
max-height: none !important;
|
| 907 |
+
max-width: none !important;
|
| 908 |
+
width: 100% !important;
|
| 909 |
+
min-width: 0 !important;
|
| 910 |
+
white-space: normal !important;
|
| 911 |
+
overflow: visible !important;
|
| 912 |
+
text-overflow: clip !important;
|
| 913 |
+
vertical-align: top !important;
|
| 914 |
+
}
|
| 915 |
+
|
| 916 |
+
.prompt-dataset th,
|
| 917 |
+
.prompt-dataset td {
|
| 918 |
+
padding: 12px 14px !important;
|
| 919 |
+
}
|
| 920 |
+
|
| 921 |
+
.prompt-dataset td > * {
|
| 922 |
+
width: 100% !important;
|
| 923 |
+
max-width: none !important;
|
| 924 |
+
min-width: 0 !important;
|
| 925 |
+
height: auto !important;
|
| 926 |
+
min-height: 0 !important;
|
| 927 |
+
max-height: 260px !important;
|
| 928 |
+
overflow-y: auto !important;
|
| 929 |
+
overflow-x: hidden !important;
|
| 930 |
+
overscroll-behavior: contain !important;
|
| 931 |
+
white-space: pre-wrap !important;
|
| 932 |
+
text-align: left !important;
|
| 933 |
+
}
|
| 934 |
+
|
| 935 |
+
.prompt-dataset td *,
|
| 936 |
+
.prompt-dataset td [class*="truncate"],
|
| 937 |
+
.prompt-dataset td [class*="ellipsis"],
|
| 938 |
+
.prompt-dataset td [class*="line-clamp"],
|
| 939 |
+
.prompt-dataset td [style*="nowrap"],
|
| 940 |
+
.prompt-dataset td [style*="ellipsis"],
|
| 941 |
+
.prompt-dataset td [style*="line-clamp"],
|
| 942 |
+
.prompt-dataset td span,
|
| 943 |
+
.prompt-dataset td p,
|
| 944 |
+
.prompt-dataset td div,
|
| 945 |
+
.prompt-dataset td button {
|
| 946 |
+
max-width: none !important;
|
| 947 |
+
white-space: pre-wrap !important;
|
| 948 |
+
overflow-wrap: anywhere !important;
|
| 949 |
+
word-break: break-word !important;
|
| 950 |
+
text-overflow: clip !important;
|
| 951 |
+
-webkit-line-clamp: unset !important;
|
| 952 |
+
line-clamp: unset !important;
|
| 953 |
+
}
|
| 954 |
+
|
| 955 |
+
.prompt-dataset td span,
|
| 956 |
+
.prompt-dataset td p {
|
| 957 |
+
display: block !important;
|
| 958 |
+
}
|
| 959 |
+
|
| 960 |
+
|
| 961 |
+
|
| 962 |
+
/* Full prompt example rows. Do not use gr.Dataset for these two generation
|
| 963 |
+
sections: Dataset table cells are rendered as compact previews and the
|
| 964 |
+
actual DOM text may already contain "...". These button rows keep and render
|
| 965 |
+
the original prompt string, wrap it fully, and make very long rows scrollable. */
|
| 966 |
+
.prompt-example-full-table,
|
| 967 |
+
.prompt-example-full-table > .form,
|
| 968 |
+
.prompt-example-full-table > div {
|
| 969 |
+
width: 100% !important;
|
| 970 |
+
max-width: 100% !important;
|
| 971 |
+
min-width: 0 !important;
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
.prompt-example-full-table {
|
| 975 |
+
max-height: 460px !important;
|
| 976 |
+
overflow-x: auto !important;
|
| 977 |
+
overflow-y: auto !important;
|
| 978 |
+
overscroll-behavior: contain !important;
|
| 979 |
+
scrollbar-gutter: stable !important;
|
| 980 |
+
border: 1px solid var(--border-color-primary) !important;
|
| 981 |
+
border-radius: 8px !important;
|
| 982 |
+
}
|
| 983 |
+
|
| 984 |
+
.prompt-example-table-header,
|
| 985 |
+
.prompt-example-table-header > div,
|
| 986 |
+
.prompt-example-table-header .wrap {
|
| 987 |
+
position: sticky !important;
|
| 988 |
+
top: 0 !important;
|
| 989 |
+
z-index: 3 !important;
|
| 990 |
+
width: 100% !important;
|
| 991 |
+
margin: 0 !important;
|
| 992 |
+
padding: 12px 14px !important;
|
| 993 |
+
border: 0 !important;
|
| 994 |
+
border-bottom: 1px solid var(--border-color-primary) !important;
|
| 995 |
+
background: var(--block-title-background-fill, var(--block-background-fill)) !important;
|
| 996 |
+
color: var(--body-text-color) !important;
|
| 997 |
+
font-size: 18px !important;
|
| 998 |
+
font-weight: 800 !important;
|
| 999 |
+
line-height: 1.25 !important;
|
| 1000 |
+
text-align: center !important;
|
| 1001 |
+
box-shadow: none !important;
|
| 1002 |
+
}
|
| 1003 |
+
|
| 1004 |
+
.prompt-example-table-body,
|
| 1005 |
+
.prompt-example-table-body > .form {
|
| 1006 |
+
gap: 0 !important;
|
| 1007 |
+
width: 100% !important;
|
| 1008 |
+
min-width: 720px !important;
|
| 1009 |
+
}
|
| 1010 |
+
|
| 1011 |
+
.prompt-examples .prompt-example-row-button,
|
| 1012 |
+
.prompt-examples .prompt-example-row-button > button,
|
| 1013 |
+
.prompt-examples .prompt-example-row-button button {
|
| 1014 |
+
width: 100% !important;
|
| 1015 |
+
max-width: none !important;
|
| 1016 |
+
min-width: 0 !important;
|
| 1017 |
+
height: auto !important;
|
| 1018 |
+
min-height: 54px !important;
|
| 1019 |
+
max-height: 220px !important;
|
| 1020 |
+
margin: 0 !important;
|
| 1021 |
+
padding: 12px 14px !important;
|
| 1022 |
+
border-radius: 0 !important;
|
| 1023 |
+
border: 0 !important;
|
| 1024 |
+
border-bottom: 1px solid var(--border-color-primary) !important;
|
| 1025 |
+
background: var(--block-background-fill) !important;
|
| 1026 |
+
color: var(--body-text-color) !important;
|
| 1027 |
+
display: flex !important;
|
| 1028 |
+
justify-content: flex-start !important;
|
| 1029 |
+
align-items: flex-start !important;
|
| 1030 |
+
text-align: left !important;
|
| 1031 |
+
overflow-x: hidden !important;
|
| 1032 |
+
overflow-y: auto !important;
|
| 1033 |
+
white-space: normal !important;
|
| 1034 |
+
cursor: pointer !important;
|
| 1035 |
+
}
|
| 1036 |
+
|
| 1037 |
+
.prompt-examples .prompt-example-row-button span,
|
| 1038 |
+
.prompt-examples .prompt-example-row-button p,
|
| 1039 |
+
.prompt-examples .prompt-example-row-button div {
|
| 1040 |
+
width: 100% !important;
|
| 1041 |
+
max-width: none !important;
|
| 1042 |
+
display: block !important;
|
| 1043 |
+
overflow: visible !important;
|
| 1044 |
+
white-space: pre-wrap !important;
|
| 1045 |
+
overflow-wrap: anywhere !important;
|
| 1046 |
+
word-break: break-word !important;
|
| 1047 |
+
text-overflow: clip !important;
|
| 1048 |
+
-webkit-line-clamp: unset !important;
|
| 1049 |
+
line-clamp: unset !important;
|
| 1050 |
+
font-size: 16px !important;
|
| 1051 |
+
line-height: 1.38 !important;
|
| 1052 |
+
text-align: left !important;
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
.prompt-examples .prompt-example-row-button:last-child,
|
| 1056 |
+
.prompt-examples .prompt-example-row-button:last-child > button,
|
| 1057 |
+
.prompt-examples .prompt-example-row-button:last-child button {
|
| 1058 |
+
border-bottom: 0 !important;
|
| 1059 |
+
}
|
| 1060 |
+
|
| 1061 |
+
|
| 1062 |
+
.prompt-example-table-header-with-media,
|
| 1063 |
+
.prompt-example-table-header-with-media > div,
|
| 1064 |
+
.prompt-example-table-header-with-media .wrap {
|
| 1065 |
+
display: grid !important;
|
| 1066 |
+
grid-template-columns: minmax(0, 1fr) minmax(180px, 260px) !important;
|
| 1067 |
+
gap: 0 !important;
|
| 1068 |
+
text-align: center !important;
|
| 1069 |
+
}
|
| 1070 |
+
|
| 1071 |
+
.prompt-example-multimodal-row,
|
| 1072 |
+
.prompt-example-multimodal-row > .form {
|
| 1073 |
+
width: 100% !important;
|
| 1074 |
+
min-width: 720px !important;
|
| 1075 |
+
margin: 0 !important;
|
| 1076 |
+
gap: 0 !important;
|
| 1077 |
+
align-items: stretch !important;
|
| 1078 |
+
border-bottom: 1px solid var(--border-color-primary) !important;
|
| 1079 |
+
}
|
| 1080 |
+
|
| 1081 |
+
.prompt-example-multimodal-row > .form {
|
| 1082 |
+
display: grid !important;
|
| 1083 |
+
grid-template-columns: minmax(0, 1fr) minmax(180px, 260px) !important;
|
| 1084 |
+
}
|
| 1085 |
+
|
| 1086 |
+
.prompt-example-prompt-cell,
|
| 1087 |
+
.prompt-example-prompt-cell > .form,
|
| 1088 |
+
.prompt-example-media-cell,
|
| 1089 |
+
.prompt-example-media-cell > .form {
|
| 1090 |
+
width: 100% !important;
|
| 1091 |
+
min-width: 0 !important;
|
| 1092 |
+
margin: 0 !important;
|
| 1093 |
+
padding: 0 !important;
|
| 1094 |
+
border: 0 !important;
|
| 1095 |
+
background: transparent !important;
|
| 1096 |
+
box-shadow: none !important;
|
| 1097 |
+
}
|
| 1098 |
+
|
| 1099 |
+
.prompt-example-multimodal-row .prompt-example-row-button,
|
| 1100 |
+
.prompt-example-multimodal-row .prompt-example-row-button > button,
|
| 1101 |
+
.prompt-example-multimodal-row .prompt-example-row-button button {
|
| 1102 |
+
height: 100% !important;
|
| 1103 |
+
min-height: 150px !important;
|
| 1104 |
+
max-height: 260px !important;
|
| 1105 |
+
border-bottom: 0 !important;
|
| 1106 |
+
}
|
| 1107 |
+
|
| 1108 |
+
.prompt-example-media-cell {
|
| 1109 |
+
border-left: 1px solid var(--border-color-primary) !important;
|
| 1110 |
+
}
|
| 1111 |
+
|
| 1112 |
+
.prompt-example-media-preview,
|
| 1113 |
+
.prompt-example-media-preview > div,
|
| 1114 |
+
.prompt-example-media-preview .wrap {
|
| 1115 |
+
width: 100% !important;
|
| 1116 |
+
height: 150px !important;
|
| 1117 |
+
min-height: 150px !important;
|
| 1118 |
+
max-height: 150px !important;
|
| 1119 |
+
margin: 0 !important;
|
| 1120 |
+
border: 0 !important;
|
| 1121 |
+
border-radius: 0 !important;
|
| 1122 |
+
background: transparent !important;
|
| 1123 |
+
box-shadow: none !important;
|
| 1124 |
+
overflow: hidden !important;
|
| 1125 |
+
}
|
| 1126 |
+
|
| 1127 |
+
.prompt-example-media-preview video,
|
| 1128 |
+
.prompt-example-media-preview img {
|
| 1129 |
+
width: 100% !important;
|
| 1130 |
+
height: 150px !important;
|
| 1131 |
+
object-fit: cover !important;
|
| 1132 |
+
border-radius: 0 !important;
|
| 1133 |
+
}
|
| 1134 |
+
|
| 1135 |
+
/* Keep the prompt column unchanged. Video examples fill the current row height,
|
| 1136 |
+
keep their original aspect ratio, and adapt their width inside the media column. */
|
| 1137 |
+
.prompt-example-video-cell,
|
| 1138 |
+
.prompt-example-video-cell > .form {
|
| 1139 |
+
display: flex !important;
|
| 1140 |
+
align-items: stretch !important;
|
| 1141 |
+
justify-content: center !important;
|
| 1142 |
+
padding: 0 !important;
|
| 1143 |
+
height: 100% !important;
|
| 1144 |
+
min-height: 150px !important;
|
| 1145 |
+
max-height: 260px !important;
|
| 1146 |
+
overflow: hidden !important;
|
| 1147 |
+
}
|
| 1148 |
+
|
| 1149 |
+
.prompt-example-video-preview,
|
| 1150 |
+
.prompt-example-video-preview > div,
|
| 1151 |
+
.prompt-example-video-preview .wrap {
|
| 1152 |
+
display: flex !important;
|
| 1153 |
+
align-items: center !important;
|
| 1154 |
+
justify-content: center !important;
|
| 1155 |
+
width: 100% !important;
|
| 1156 |
+
min-width: 0 !important;
|
| 1157 |
+
max-width: 100% !important;
|
| 1158 |
+
height: 100% !important;
|
| 1159 |
+
min-height: 150px !important;
|
| 1160 |
+
max-height: 260px !important;
|
| 1161 |
+
margin: 0 auto !important;
|
| 1162 |
+
border-radius: 0 !important;
|
| 1163 |
+
overflow: hidden !important;
|
| 1164 |
+
}
|
| 1165 |
+
|
| 1166 |
+
.prompt-example-video-preview video {
|
| 1167 |
+
width: auto !important;
|
| 1168 |
+
max-width: 100% !important;
|
| 1169 |
+
height: 100% !important;
|
| 1170 |
+
min-height: 150px !important;
|
| 1171 |
+
max-height: 260px !important;
|
| 1172 |
+
object-fit: contain !important;
|
| 1173 |
+
border-radius: 0 !important;
|
| 1174 |
+
}
|
| 1175 |
+
|
| 1176 |
+
.prompt-example-multimodal-row:last-child,
|
| 1177 |
+
.prompt-example-multimodal-row:last-child > .form {
|
| 1178 |
+
border-bottom: 0 !important;
|
| 1179 |
+
}
|
| 1180 |
+
|
| 1181 |
+
@media (max-width: 900px) {
|
| 1182 |
+
.prompt-example-table-header-with-media,
|
| 1183 |
+
.prompt-example-table-header-with-media > div,
|
| 1184 |
+
.prompt-example-table-header-with-media .wrap,
|
| 1185 |
+
.prompt-example-multimodal-row > .form {
|
| 1186 |
+
grid-template-columns: minmax(0, 1fr) minmax(140px, 180px) !important;
|
| 1187 |
+
}
|
| 1188 |
+
}
|
| 1189 |
+
|
| 1190 |
@media (max-width: 900px) {
|
| 1191 |
.lance-main-row {
|
| 1192 |
grid-template-columns: minmax(0, 1fr) !important;
|
|
|
|
| 1200 |
if (!element) {
|
| 1201 |
return;
|
| 1202 |
}
|
| 1203 |
+
if (element.style.getPropertyValue(property) !== value || element.style.getPropertyPriority(property) !== "important") {
|
| 1204 |
+
element.style.setProperty(property, value, "important");
|
| 1205 |
+
}
|
| 1206 |
};
|
| 1207 |
|
| 1208 |
const enforceLanceLabelTypography = () => {
|
|
|
|
| 1226 |
});
|
| 1227 |
};
|
| 1228 |
|
| 1229 |
+
const enforceRecommendedCaseText = () => {
|
| 1230 |
+
document.querySelectorAll(".lance-recommended-section .example-panel").forEach((panel) => {
|
| 1231 |
+
applyImportantStyle(panel, "overflow", "visible");
|
| 1232 |
+
panel.querySelectorAll("table, tbody, tr, th, td, button, label, span, p, div").forEach((element) => {
|
| 1233 |
+
applyImportantStyle(element, "white-space", "pre-wrap");
|
| 1234 |
+
applyImportantStyle(element, "overflow-wrap", "anywhere");
|
| 1235 |
+
applyImportantStyle(element, "word-break", "break-word");
|
| 1236 |
+
applyImportantStyle(element, "text-overflow", "clip");
|
| 1237 |
+
applyImportantStyle(element, "-webkit-line-clamp", "unset");
|
| 1238 |
+
applyImportantStyle(element, "line-clamp", "unset");
|
| 1239 |
+
});
|
| 1240 |
+
panel.querySelectorAll("td, button").forEach((element) => {
|
| 1241 |
+
applyImportantStyle(element, "height", "auto");
|
| 1242 |
+
applyImportantStyle(element, "max-height", "none");
|
| 1243 |
+
applyImportantStyle(element, "overflow", "visible");
|
| 1244 |
+
});
|
| 1245 |
+
panel.querySelectorAll("button").forEach((element) => {
|
| 1246 |
+
applyImportantStyle(element, "width", "100%");
|
| 1247 |
+
applyImportantStyle(element, "text-align", "center");
|
| 1248 |
+
applyImportantStyle(element, "justify-content", "center");
|
| 1249 |
+
applyImportantStyle(element, "align-items", "center");
|
| 1250 |
+
});
|
| 1251 |
+
});
|
| 1252 |
+
};
|
| 1253 |
+
|
| 1254 |
+
|
| 1255 |
+
|
| 1256 |
+
const enforcePromptDatasetText = () => {
|
| 1257 |
+
document.querySelectorAll(".prompt-dataset").forEach((dataset) => {
|
| 1258 |
+
applyImportantStyle(dataset, "width", "100%");
|
| 1259 |
+
applyImportantStyle(dataset, "max-width", "100%");
|
| 1260 |
+
applyImportantStyle(dataset, "overflow-x", "auto");
|
| 1261 |
+
applyImportantStyle(dataset, "overflow-y", "auto");
|
| 1262 |
+
|
| 1263 |
+
dataset.querySelectorAll(".table-wrap").forEach((element) => {
|
| 1264 |
+
applyImportantStyle(element, "width", "100%");
|
| 1265 |
+
applyImportantStyle(element, "max-width", "100%");
|
| 1266 |
+
applyImportantStyle(element, "max-height", "420px");
|
| 1267 |
+
applyImportantStyle(element, "overflow-x", "auto");
|
| 1268 |
+
applyImportantStyle(element, "overflow-y", "auto");
|
| 1269 |
+
applyImportantStyle(element, "overscroll-behavior", "contain");
|
| 1270 |
+
});
|
| 1271 |
+
|
| 1272 |
+
dataset.querySelectorAll("table").forEach((element) => {
|
| 1273 |
+
applyImportantStyle(element, "width", "100%");
|
| 1274 |
+
applyImportantStyle(element, "min-width", "720px");
|
| 1275 |
+
applyImportantStyle(element, "max-width", "none");
|
| 1276 |
+
applyImportantStyle(element, "table-layout", "fixed");
|
| 1277 |
+
applyImportantStyle(element, "border-collapse", "collapse");
|
| 1278 |
+
});
|
| 1279 |
+
|
| 1280 |
+
dataset.querySelectorAll("thead, tbody, tr, th, td, td.textbox, td[style*='35ch']").forEach((element) => {
|
| 1281 |
+
applyImportantStyle(element, "height", "auto");
|
| 1282 |
+
applyImportantStyle(element, "min-height", "0");
|
| 1283 |
+
applyImportantStyle(element, "max-height", "none");
|
| 1284 |
+
applyImportantStyle(element, "max-width", "none");
|
| 1285 |
+
applyImportantStyle(element, "width", "100%");
|
| 1286 |
+
applyImportantStyle(element, "min-width", "0");
|
| 1287 |
+
applyImportantStyle(element, "white-space", "normal");
|
| 1288 |
+
applyImportantStyle(element, "overflow", "visible");
|
| 1289 |
+
applyImportantStyle(element, "text-overflow", "clip");
|
| 1290 |
+
applyImportantStyle(element, "vertical-align", "top");
|
| 1291 |
+
});
|
| 1292 |
+
|
| 1293 |
+
dataset.querySelectorAll("td *").forEach((element) => {
|
| 1294 |
+
applyImportantStyle(element, "max-width", "none");
|
| 1295 |
+
applyImportantStyle(element, "white-space", "pre-wrap");
|
| 1296 |
+
applyImportantStyle(element, "overflow-wrap", "anywhere");
|
| 1297 |
+
applyImportantStyle(element, "word-break", "break-word");
|
| 1298 |
+
applyImportantStyle(element, "text-overflow", "clip");
|
| 1299 |
+
applyImportantStyle(element, "-webkit-line-clamp", "unset");
|
| 1300 |
+
applyImportantStyle(element, "line-clamp", "unset");
|
| 1301 |
+
});
|
| 1302 |
+
|
| 1303 |
+
dataset.querySelectorAll("td > *").forEach((element) => {
|
| 1304 |
+
applyImportantStyle(element, "width", "100%");
|
| 1305 |
+
applyImportantStyle(element, "max-width", "none");
|
| 1306 |
+
applyImportantStyle(element, "min-width", "0");
|
| 1307 |
+
applyImportantStyle(element, "height", "auto");
|
| 1308 |
+
applyImportantStyle(element, "min-height", "0");
|
| 1309 |
+
applyImportantStyle(element, "max-height", "260px");
|
| 1310 |
+
applyImportantStyle(element, "overflow-y", "auto");
|
| 1311 |
+
applyImportantStyle(element, "overflow-x", "hidden");
|
| 1312 |
+
applyImportantStyle(element, "overscroll-behavior", "contain");
|
| 1313 |
+
applyImportantStyle(element, "white-space", "pre-wrap");
|
| 1314 |
+
applyImportantStyle(element, "text-align", "left");
|
| 1315 |
+
});
|
| 1316 |
+
|
| 1317 |
+
dataset.querySelectorAll("td span, td p").forEach((element) => {
|
| 1318 |
+
applyImportantStyle(element, "display", "block");
|
| 1319 |
+
});
|
| 1320 |
+
});
|
| 1321 |
+
};
|
| 1322 |
+
|
| 1323 |
+
const enforcePromptExampleRows = () => {
|
| 1324 |
+
document.querySelectorAll(".prompt-example-full-table").forEach((table) => {
|
| 1325 |
+
applyImportantStyle(table, "width", "100%");
|
| 1326 |
+
applyImportantStyle(table, "max-width", "100%");
|
| 1327 |
+
applyImportantStyle(table, "max-height", "460px");
|
| 1328 |
+
applyImportantStyle(table, "overflow-x", "auto");
|
| 1329 |
+
applyImportantStyle(table, "overflow-y", "auto");
|
| 1330 |
+
});
|
| 1331 |
+
|
| 1332 |
+
document.querySelectorAll(".prompt-example-table-body, .prompt-example-table-body > .form").forEach((element) => {
|
| 1333 |
+
applyImportantStyle(element, "width", "100%");
|
| 1334 |
+
applyImportantStyle(element, "min-width", "720px");
|
| 1335 |
+
applyImportantStyle(element, "gap", "0");
|
| 1336 |
+
});
|
| 1337 |
+
|
| 1338 |
+
document.querySelectorAll(".prompt-example-row-button, .prompt-example-row-button button").forEach((element) => {
|
| 1339 |
+
applyImportantStyle(element, "width", "100%");
|
| 1340 |
+
applyImportantStyle(element, "max-width", "none");
|
| 1341 |
+
applyImportantStyle(element, "height", "auto");
|
| 1342 |
+
applyImportantStyle(element, "min-height", "54px");
|
| 1343 |
+
applyImportantStyle(element, "max-height", "220px");
|
| 1344 |
+
applyImportantStyle(element, "margin", "0");
|
| 1345 |
+
applyImportantStyle(element, "padding", "12px 14px");
|
| 1346 |
+
applyImportantStyle(element, "border-radius", "0");
|
| 1347 |
+
applyImportantStyle(element, "border", "0");
|
| 1348 |
+
applyImportantStyle(element, "border-bottom", "1px solid var(--border-color-primary)");
|
| 1349 |
+
applyImportantStyle(element, "display", "flex");
|
| 1350 |
+
applyImportantStyle(element, "justify-content", "flex-start");
|
| 1351 |
+
applyImportantStyle(element, "align-items", "flex-start");
|
| 1352 |
+
applyImportantStyle(element, "text-align", "left");
|
| 1353 |
+
applyImportantStyle(element, "overflow-x", "hidden");
|
| 1354 |
+
applyImportantStyle(element, "overflow-y", "auto");
|
| 1355 |
+
applyImportantStyle(element, "white-space", "normal");
|
| 1356 |
+
});
|
| 1357 |
+
|
| 1358 |
+
document.querySelectorAll(".prompt-example-row-button span, .prompt-example-row-button p, .prompt-example-row-button div").forEach((element) => {
|
| 1359 |
+
applyImportantStyle(element, "width", "100%");
|
| 1360 |
+
applyImportantStyle(element, "max-width", "none");
|
| 1361 |
+
applyImportantStyle(element, "display", "block");
|
| 1362 |
+
applyImportantStyle(element, "overflow", "visible");
|
| 1363 |
+
applyImportantStyle(element, "white-space", "pre-wrap");
|
| 1364 |
+
applyImportantStyle(element, "overflow-wrap", "anywhere");
|
| 1365 |
+
applyImportantStyle(element, "word-break", "break-word");
|
| 1366 |
+
applyImportantStyle(element, "text-overflow", "clip");
|
| 1367 |
+
applyImportantStyle(element, "-webkit-line-clamp", "unset");
|
| 1368 |
+
applyImportantStyle(element, "line-clamp", "unset");
|
| 1369 |
+
applyImportantStyle(element, "font-size", "16px");
|
| 1370 |
+
applyImportantStyle(element, "line-height", "1.38");
|
| 1371 |
+
applyImportantStyle(element, "text-align", "left");
|
| 1372 |
+
});
|
| 1373 |
+
|
| 1374 |
+
document.querySelectorAll(".prompt-example-table-header-with-media, .prompt-example-table-header-with-media > div, .prompt-example-table-header-with-media .wrap, .prompt-example-multimodal-row > .form").forEach((element) => {
|
| 1375 |
+
applyImportantStyle(element, "display", "grid");
|
| 1376 |
+
applyImportantStyle(element, "grid-template-columns", "minmax(0, 1fr) minmax(180px, 260px)");
|
| 1377 |
+
applyImportantStyle(element, "gap", "0");
|
| 1378 |
+
});
|
| 1379 |
+
|
| 1380 |
+
document.querySelectorAll(".prompt-example-multimodal-row, .prompt-example-multimodal-row > .form").forEach((element) => {
|
| 1381 |
+
applyImportantStyle(element, "width", "100%");
|
| 1382 |
+
applyImportantStyle(element, "min-width", "720px");
|
| 1383 |
+
applyImportantStyle(element, "margin", "0");
|
| 1384 |
+
applyImportantStyle(element, "border-bottom", "1px solid var(--border-color-primary)");
|
| 1385 |
+
});
|
| 1386 |
+
|
| 1387 |
+
document.querySelectorAll(".prompt-example-multimodal-row .prompt-example-row-button, .prompt-example-multimodal-row .prompt-example-row-button button").forEach((element) => {
|
| 1388 |
+
applyImportantStyle(element, "height", "100%");
|
| 1389 |
+
applyImportantStyle(element, "min-height", "150px");
|
| 1390 |
+
applyImportantStyle(element, "max-height", "260px");
|
| 1391 |
+
applyImportantStyle(element, "border-bottom", "0");
|
| 1392 |
+
});
|
| 1393 |
+
|
| 1394 |
+
document.querySelectorAll(".prompt-example-media-preview, .prompt-example-media-preview > div, .prompt-example-media-preview .wrap, .prompt-example-media-preview video, .prompt-example-media-preview img").forEach((element) => {
|
| 1395 |
+
applyImportantStyle(element, "width", "100%");
|
| 1396 |
+
applyImportantStyle(element, "height", "150px");
|
| 1397 |
+
applyImportantStyle(element, "max-height", "150px");
|
| 1398 |
+
applyImportantStyle(element, "border-radius", "0");
|
| 1399 |
+
applyImportantStyle(element, "overflow", "hidden");
|
| 1400 |
+
});
|
| 1401 |
+
|
| 1402 |
+
document.querySelectorAll(".prompt-example-video-cell, .prompt-example-video-cell > .form").forEach((element) => {
|
| 1403 |
+
applyImportantStyle(element, "display", "flex");
|
| 1404 |
+
applyImportantStyle(element, "align-items", "stretch");
|
| 1405 |
+
applyImportantStyle(element, "justify-content", "center");
|
| 1406 |
+
applyImportantStyle(element, "padding", "0");
|
| 1407 |
+
applyImportantStyle(element, "height", "100%");
|
| 1408 |
+
applyImportantStyle(element, "min-height", "150px");
|
| 1409 |
+
applyImportantStyle(element, "max-height", "260px");
|
| 1410 |
+
applyImportantStyle(element, "overflow", "hidden");
|
| 1411 |
+
});
|
| 1412 |
+
|
| 1413 |
+
document.querySelectorAll(".prompt-example-video-preview, .prompt-example-video-preview > div, .prompt-example-video-preview .wrap").forEach((element) => {
|
| 1414 |
+
applyImportantStyle(element, "display", "flex");
|
| 1415 |
+
applyImportantStyle(element, "align-items", "center");
|
| 1416 |
+
applyImportantStyle(element, "justify-content", "center");
|
| 1417 |
+
applyImportantStyle(element, "width", "100%");
|
| 1418 |
+
applyImportantStyle(element, "min-width", "0");
|
| 1419 |
+
applyImportantStyle(element, "max-width", "100%");
|
| 1420 |
+
applyImportantStyle(element, "height", "100%");
|
| 1421 |
+
applyImportantStyle(element, "min-height", "150px");
|
| 1422 |
+
applyImportantStyle(element, "max-height", "260px");
|
| 1423 |
+
applyImportantStyle(element, "margin", "0 auto");
|
| 1424 |
+
applyImportantStyle(element, "border-radius", "0");
|
| 1425 |
+
applyImportantStyle(element, "overflow", "hidden");
|
| 1426 |
+
});
|
| 1427 |
+
|
| 1428 |
+
document.querySelectorAll(".prompt-example-video-preview video").forEach((element) => {
|
| 1429 |
+
applyImportantStyle(element, "width", "auto");
|
| 1430 |
+
applyImportantStyle(element, "max-width", "100%");
|
| 1431 |
+
applyImportantStyle(element, "height", "100%");
|
| 1432 |
+
applyImportantStyle(element, "min-height", "150px");
|
| 1433 |
+
applyImportantStyle(element, "max-height", "260px");
|
| 1434 |
+
applyImportantStyle(element, "object-fit", "contain");
|
| 1435 |
+
applyImportantStyle(element, "border-radius", "0");
|
| 1436 |
+
});
|
| 1437 |
+
};
|
| 1438 |
+
|
| 1439 |
const syncOutputColumnHeight = () => {
|
| 1440 |
const row = document.querySelector(".lance-main-row");
|
| 1441 |
const inputColumn = document.querySelector(".lance-input-column");
|
|
|
|
| 1465 |
|
| 1466 |
const scheduleSync = () => requestAnimationFrame(() => {
|
| 1467 |
enforceLanceLabelTypography();
|
| 1468 |
+
enforceRecommendedCaseText();
|
| 1469 |
+
enforcePromptDatasetText();
|
| 1470 |
+
enforcePromptExampleRows();
|
| 1471 |
syncOutputColumnHeight();
|
| 1472 |
});
|
| 1473 |
const attachObservers = () => {
|
|
|
|
| 1490 |
};
|
| 1491 |
|
| 1492 |
enforceLanceLabelTypography();
|
| 1493 |
+
enforceRecommendedCaseText();
|
| 1494 |
+
enforcePromptDatasetText();
|
| 1495 |
+
enforcePromptExampleRows();
|
| 1496 |
attachObservers();
|
| 1497 |
new MutationObserver(() => {
|
| 1498 |
enforceLanceLabelTypography();
|
| 1499 |
+
enforceRecommendedCaseText();
|
| 1500 |
+
enforcePromptDatasetText();
|
| 1501 |
+
enforcePromptExampleRows();
|
| 1502 |
attachObservers();
|
| 1503 |
}).observe(document.body, {
|
| 1504 |
childList: true,
|
|
|
|
| 1550 |
IMAGE_TASKS = {TASK_T2I, TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
|
| 1551 |
VIDEO_TASKS = {TASK_T2V, TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
|
| 1552 |
EDIT_TASKS = {TASK_IMAGE_EDIT, TASK_VIDEO_EDIT}
|
| 1553 |
+
VIDEO_RESOLUTION_CHOICES = ["video_360p", "video_480p"]
|
| 1554 |
+
VIDEO_RESOLUTION_DISPLAY_CHOICES = [
|
| 1555 |
+
("video_360p", "video_360p"),
|
| 1556 |
+
("video_480p(Higher quota usage. Use sparingly.)", "video_480p"),
|
| 1557 |
+
]
|
| 1558 |
+
VIDEO_EDIT_RESOLUTION_CHOICES = [DEFAULT_VIDEO_EDIT_RESOLUTION]
|
| 1559 |
IMAGE_RESOLUTION_CHOICES = [DEFAULT_IMAGE_RESOLUTION]
|
| 1560 |
RESOLUTION_CHOICES = VIDEO_RESOLUTION_CHOICES + IMAGE_RESOLUTION_CHOICES
|
| 1561 |
CAPTION_SYSTEM_PROMPT_TEMPLATE = (
|
|
|
|
| 1578 |
|
| 1579 |
|
| 1580 |
def get_video_duration_choices() -> list[tuple[str, int]]:
|
| 1581 |
+
return [(f"{seconds}s", seconds) for seconds in range(1, 11)]
|
| 1582 |
|
| 1583 |
def env_flag(name: str, default: bool) -> bool:
|
| 1584 |
value = os.getenv(name)
|
|
|
|
| 1803 |
|
| 1804 |
|
| 1805 |
def video_seconds_to_num_frames(seconds: int) -> int:
|
| 1806 |
+
seconds = max(1, min(10, int(seconds)))
|
| 1807 |
return 12 * seconds + 1
|
| 1808 |
|
| 1809 |
|
|
|
|
| 1815 |
return task
|
| 1816 |
|
| 1817 |
|
| 1818 |
+
def normalize_resolution_choice_value(resolution: str, task: str) -> str:
|
| 1819 |
+
resolution_text = str(resolution or "").strip()
|
| 1820 |
+
for choice in get_resolution_choices_for_task(task):
|
| 1821 |
+
if isinstance(choice, tuple):
|
| 1822 |
+
label, value = choice
|
| 1823 |
+
if resolution_text in {str(label), str(value)}:
|
| 1824 |
+
return str(value)
|
| 1825 |
+
elif resolution_text == str(choice):
|
| 1826 |
+
return str(choice)
|
| 1827 |
+
return resolution_text
|
| 1828 |
+
|
| 1829 |
+
|
| 1830 |
+
def get_resolution_choice_values_for_task(task: str) -> list[str]:
|
| 1831 |
+
choices = get_resolution_choices_for_task(task)
|
| 1832 |
+
values = []
|
| 1833 |
+
for choice in choices:
|
| 1834 |
+
values.append(choice[1] if isinstance(choice, tuple) else choice)
|
| 1835 |
+
return values
|
| 1836 |
+
|
| 1837 |
+
|
| 1838 |
+
def get_resolution_choices_for_task(task: str) -> list[str | tuple[str, str]]:
|
| 1839 |
internal_task = normalize_task(task)
|
| 1840 |
if internal_task in IMAGE_TASKS:
|
| 1841 |
+
return IMAGE_RESOLUTION_CHOICES
|
| 1842 |
+
if internal_task == TASK_T2V:
|
| 1843 |
+
return VIDEO_RESOLUTION_DISPLAY_CHOICES
|
| 1844 |
+
if internal_task == TASK_VIDEO_EDIT:
|
| 1845 |
+
return VIDEO_EDIT_RESOLUTION_CHOICES
|
| 1846 |
if internal_task in VIDEO_TASKS:
|
| 1847 |
+
return VIDEO_EDIT_RESOLUTION_CHOICES
|
| 1848 |
+
return VIDEO_RESOLUTION_CHOICES
|
| 1849 |
+
|
| 1850 |
+
|
| 1851 |
+
def get_default_resolution_for_task(task: str) -> str:
|
| 1852 |
+
internal_task = normalize_task(task)
|
| 1853 |
+
if internal_task in IMAGE_TASKS:
|
| 1854 |
+
return DEFAULT_IMAGE_RESOLUTION
|
| 1855 |
+
# Video Generation should default to the lightweight/recommended 360p profile.
|
| 1856 |
+
# This is used by both task switching and recommended-case click handlers
|
| 1857 |
+
# through reset_generation_defaults_for_task(), so every Video Generation
|
| 1858 |
+
# example fill now returns video_360p instead of falling through to 480p.
|
| 1859 |
+
if internal_task == TASK_T2V:
|
| 1860 |
return DEFAULT_RESOLUTION
|
| 1861 |
+
if internal_task == TASK_VIDEO_EDIT:
|
| 1862 |
+
return DEFAULT_VIDEO_EDIT_RESOLUTION
|
| 1863 |
+
if internal_task in VIDEO_TASKS:
|
| 1864 |
+
return DEFAULT_VIDEO_EDIT_RESOLUTION
|
| 1865 |
+
return DEFAULT_RESOLUTION
|
| 1866 |
+
|
| 1867 |
+
|
| 1868 |
+
def normalize_resolution_for_backend(resolution: str, task: str) -> str:
|
| 1869 |
+
internal_task = normalize_task(task)
|
| 1870 |
+
normalized_resolution = normalize_resolution_choice_value(resolution, internal_task)
|
| 1871 |
+
choices = get_resolution_choice_values_for_task(internal_task)
|
| 1872 |
+
if normalized_resolution in choices:
|
| 1873 |
+
return normalized_resolution
|
| 1874 |
+
return get_default_resolution_for_task(internal_task)
|
| 1875 |
|
| 1876 |
|
| 1877 |
def get_default_aspect_ratio(task: str) -> str:
|
|
|
|
| 1879 |
return DEFAULT_IMAGE_ASPECT_RATIO if internal_task in IMAGE_TASKS else DEFAULT_VIDEO_ASPECT_RATIO
|
| 1880 |
|
| 1881 |
|
| 1882 |
+
def normalize_video_resolution(resolution: Optional[str], task: Optional[str] = None) -> str:
|
| 1883 |
+
if task is None:
|
| 1884 |
+
return resolution if resolution in VIDEO_RESOLUTION_CHOICES else DEFAULT_RESOLUTION
|
| 1885 |
+
normalized_resolution = normalize_resolution_choice_value(resolution, task)
|
| 1886 |
+
choices = get_resolution_choice_values_for_task(task)
|
| 1887 |
+
return normalized_resolution if normalized_resolution in choices else get_default_resolution_for_task(task)
|
| 1888 |
+
|
| 1889 |
+
|
| 1890 |
+
def get_size_for_aspect_ratio(task: str, aspect_ratio: str, video_resolution: Optional[str] = None) -> tuple[int, int]:
|
| 1891 |
internal_task = normalize_task(task)
|
| 1892 |
aspect_ratio = aspect_ratio if aspect_ratio in ASPECT_RATIO_CHOICES else get_default_aspect_ratio(internal_task)
|
| 1893 |
+
if internal_task in IMAGE_TASKS:
|
| 1894 |
+
size_map = IMAGE_ASPECT_RATIO_TO_SIZE
|
| 1895 |
+
else:
|
| 1896 |
+
size_map = VIDEO_RESOLUTION_TO_SIZE_MAP[normalize_video_resolution(video_resolution, internal_task)]
|
| 1897 |
return size_map[aspect_ratio]
|
| 1898 |
|
| 1899 |
|
|
|
|
| 1905 |
return f"{width} x {height}"
|
| 1906 |
|
| 1907 |
|
| 1908 |
+
def get_size_map_for_task(task: str, video_resolution: Optional[str] = None) -> dict[str, tuple[int, int]]:
|
| 1909 |
internal_task = normalize_task(task)
|
| 1910 |
+
if internal_task in IMAGE_TASKS:
|
| 1911 |
+
return IMAGE_ASPECT_RATIO_TO_SIZE
|
| 1912 |
+
return VIDEO_RESOLUTION_TO_SIZE_MAP[normalize_video_resolution(video_resolution, internal_task)]
|
| 1913 |
|
| 1914 |
|
| 1915 |
+
def get_output_resolution_choices_for_task(task: str, video_resolution: Optional[str] = None) -> list[tuple[str, str]]:
|
| 1916 |
"""Get Output Resolution choices with a one-to-one mapping to aspect ratios."""
|
| 1917 |
internal_task = normalize_task(task)
|
| 1918 |
default_ratio = get_default_aspect_ratio(internal_task)
|
| 1919 |
+
size_map = get_size_map_for_task(internal_task, video_resolution)
|
| 1920 |
choices = []
|
| 1921 |
for ratio in ASPECT_RATIO_CHOICES:
|
| 1922 |
width, height = size_map[ratio]
|
|
|
|
| 1926 |
return choices
|
| 1927 |
|
| 1928 |
|
| 1929 |
+
def get_aspect_ratio_for_output_resolution(task: str, output_resolution: str, video_resolution: Optional[str] = None) -> str:
|
| 1930 |
internal_task = normalize_task(task)
|
| 1931 |
resolution_text = str(output_resolution or "").strip()
|
| 1932 |
+
size_map = get_size_map_for_task(internal_task, video_resolution)
|
| 1933 |
for ratio in ASPECT_RATIO_CHOICES:
|
| 1934 |
width, height = size_map[ratio]
|
| 1935 |
if resolution_text == format_size_markdown(internal_task, width, height):
|
|
|
|
| 1986 |
return f'<div class="{class_names}">{icon_html}<span>{html.escape(text)}</span></div>'
|
| 1987 |
|
| 1988 |
|
| 1989 |
+
def update_size_from_aspect_ratio(task: str, aspect_ratio: str, video_resolution: Optional[str] = None):
|
| 1990 |
+
width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
|
| 1991 |
+
return height, width, gr.update(
|
| 1992 |
+
choices=get_output_resolution_choices_for_task(task, video_resolution),
|
| 1993 |
+
value=format_size_markdown(task, width, height),
|
| 1994 |
+
)
|
| 1995 |
|
| 1996 |
|
| 1997 |
+
def update_aspect_ratio_from_output_resolution(task: str, output_resolution: str, video_resolution: Optional[str] = None):
|
| 1998 |
+
aspect_ratio = get_aspect_ratio_for_output_resolution(task, output_resolution, video_resolution)
|
| 1999 |
+
width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
|
| 2000 |
return aspect_ratio, height, width
|
| 2001 |
|
| 2002 |
|
| 2003 |
+
def update_output_resolution_from_video_profile(task: str, aspect_ratio: str, video_resolution: str):
|
| 2004 |
+
width, height = get_size_for_aspect_ratio(task, aspect_ratio, video_resolution)
|
| 2005 |
+
return (
|
| 2006 |
+
gr.update(
|
| 2007 |
+
choices=get_output_resolution_choices_for_task(task, video_resolution),
|
| 2008 |
+
value=format_size_markdown(task, width, height),
|
| 2009 |
+
),
|
| 2010 |
+
height,
|
| 2011 |
+
width,
|
| 2012 |
+
)
|
| 2013 |
+
|
| 2014 |
+
|
| 2015 |
def reset_generation_defaults_for_task(task: str):
|
| 2016 |
internal_task = normalize_task(task)
|
| 2017 |
aspect_ratio = get_default_aspect_ratio(internal_task)
|
| 2018 |
+
resolution = get_default_resolution_for_task(internal_task)
|
| 2019 |
+
width, height = get_size_for_aspect_ratio(internal_task, aspect_ratio, resolution)
|
| 2020 |
num_frames = DEFAULT_VIDEO_DURATION_SECONDS
|
| 2021 |
+
return aspect_ratio, height, width, num_frames, resolution, gr.update(
|
| 2022 |
+
choices=get_output_resolution_choices_for_task(internal_task, resolution),
|
| 2023 |
+
value=format_size_markdown(internal_task, width, height),
|
| 2024 |
+
)
|
| 2025 |
|
| 2026 |
|
| 2027 |
def apply_prompt_example(task: str, evt: gr.SelectData):
|
|
|
|
| 2036 |
return (prompt_text, *defaults)
|
| 2037 |
|
| 2038 |
|
| 2039 |
+
def make_prompt_example_click_handler(prompt_text: str):
|
| 2040 |
+
"""Create a click handler for custom text-to-visual prompt-example rows.
|
| 2041 |
+
|
| 2042 |
+
gr.Dataset and gr.Examples render long text through compact preview cells, so
|
| 2043 |
+
long prompts/instructions/questions can be truncated before CSS gets a chance
|
| 2044 |
+
to wrap them. The custom rows below use normal buttons for display and keep
|
| 2045 |
+
the full prompt string in this closure for click-to-fill behavior.
|
| 2046 |
+
"""
|
| 2047 |
+
|
| 2048 |
+
def _handler(task: str):
|
| 2049 |
+
defaults = reset_generation_defaults_for_task(task)
|
| 2050 |
+
return (prompt_text, *defaults)
|
| 2051 |
+
|
| 2052 |
+
return _handler
|
| 2053 |
+
|
| 2054 |
+
|
| 2055 |
+
def make_media_prompt_example_click_handler(
|
| 2056 |
+
prompt_text: str,
|
| 2057 |
+
input_video_path: Optional[str] = None,
|
| 2058 |
+
input_image_path: Optional[str] = None,
|
| 2059 |
+
):
|
| 2060 |
+
"""Create a click handler for edit/understanding example rows.
|
| 2061 |
+
|
| 2062 |
+
The row button renders the complete prompt/instruction/question, while the
|
| 2063 |
+
closure also carries the matching media path so one click still fills every
|
| 2064 |
+
required input component.
|
| 2065 |
+
"""
|
| 2066 |
+
|
| 2067 |
+
def _handler(task: str):
|
| 2068 |
+
defaults = reset_generation_defaults_for_task(task)
|
| 2069 |
+
return (prompt_text, input_video_path, input_image_path, *defaults)
|
| 2070 |
+
|
| 2071 |
+
return _handler
|
| 2072 |
+
|
| 2073 |
+
|
| 2074 |
def get_understanding_system_prompt_choices(task: str) -> list[str]:
|
| 2075 |
internal_task = normalize_task(task)
|
| 2076 |
if internal_task == TASK_X2T_IMAGE:
|
|
|
|
| 2598 |
)
|
| 2599 |
|
| 2600 |
stage_start = time.perf_counter()
|
| 2601 |
+
print(f"[startup][gpu:{self.device}] Casting Lance model to bf16 on CPU", flush=True)
|
| 2602 |
+
model = model.to(dtype=torch.bfloat16)
|
| 2603 |
+
self._log_stage("Lance model bf16 cast", stage_start)
|
| 2604 |
|
| 2605 |
stage_start = time.perf_counter()
|
| 2606 |
print(f"[startup][gpu:{self.device}] Loading tokenizer: {model_args.model_path}", flush=True)
|
|
|
|
| 2638 |
!= model.language_model.get_output_embeddings().weight.data.data_ptr()
|
| 2639 |
), "tie_word_embeddings conflict"
|
| 2640 |
|
| 2641 |
+
stage_start = time.perf_counter()
|
| 2642 |
+
print(f"[startup][gpu:{self.device}] Moving Lance model to GPU {self.device}", flush=True)
|
| 2643 |
+
model = model.to(device=self.device)
|
| 2644 |
+
self._log_stage("Lance model move to GPU", stage_start)
|
| 2645 |
model.eval()
|
| 2646 |
if vae_model is not None and hasattr(vae_model, "eval"):
|
| 2647 |
vae_model.eval()
|
|
|
|
| 3188 |
print(f"[startup] flash-attn {DEFAULT_FLASH_ATTN_VERSION} installed successfully.", flush=True)
|
| 3189 |
|
| 3190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3191 |
def get_env_int(name: str, default: int) -> int:
|
| 3192 |
"""Read an integer environment variable, falling back safely on invalid values."""
|
| 3193 |
try:
|
|
|
|
| 3196 |
return default
|
| 3197 |
|
| 3198 |
|
| 3199 |
+
def get_env_float(name: str, default: float) -> float:
|
| 3200 |
+
"""Read a float environment variable, falling back safely on invalid values."""
|
| 3201 |
+
try:
|
| 3202 |
+
return float(os.getenv(name, str(default)))
|
| 3203 |
+
except (TypeError, ValueError):
|
| 3204 |
+
return default
|
| 3205 |
+
|
| 3206 |
+
|
| 3207 |
def get_zerogpu_duration_cap() -> int:
|
| 3208 |
"""Maximum duration requested from ZeroGPU.
|
| 3209 |
|
| 3210 |
+
The duration value is a ZeroGPU reservation/timeout hint. Shorter values can
|
| 3211 |
+
improve queue priority and reduce wasted quota, but the value must still cover
|
| 3212 |
+
model warm-up plus inference. Override per deployment when needed:
|
| 3213 |
+
LANCE_ZEROGPU_MAX_DURATION_SECONDS=300
|
| 3214 |
"""
|
| 3215 |
+
return max(1, get_env_int("LANCE_ZEROGPU_MAX_DURATION_SECONDS", 240))
|
| 3216 |
|
| 3217 |
|
| 3218 |
def clamp_zerogpu_duration(seconds: int) -> int:
|
| 3219 |
return max(1, min(int(seconds), get_zerogpu_duration_cap()))
|
| 3220 |
|
| 3221 |
|
| 3222 |
+
def is_pipeline_pool_ready_for_task(task: str) -> bool:
|
| 3223 |
+
"""Return True when the required model variant is already resident on GPU.
|
| 3224 |
+
|
| 3225 |
+
ZeroGPU evaluates the dynamic duration before calling the decorated function.
|
| 3226 |
+
If the model is already loaded, we can request a shorter warm-run duration;
|
| 3227 |
+
otherwise we reserve extra time for the first request after startup or model
|
| 3228 |
+
switching. This does not change the UI layout or user-facing controls.
|
| 3229 |
+
"""
|
| 3230 |
+
try:
|
| 3231 |
+
pool = ACTIVE_PIPELINE_POOL
|
| 3232 |
+
if pool is None or pool.model_variant != get_task_model_variant(task):
|
| 3233 |
+
return False
|
| 3234 |
+
return all(getattr(pipeline, "initialized", False) for pipeline in pool.pipelines)
|
| 3235 |
+
except Exception:
|
| 3236 |
+
return False
|
| 3237 |
+
|
| 3238 |
+
|
| 3239 |
+
def finalize_zerogpu_duration(estimated_seconds: float, task: str) -> int:
|
| 3240 |
+
"""Add configurable safety margin and clamp the requested ZeroGPU duration."""
|
| 3241 |
+
margin = max(1.0, get_env_float("LANCE_ZEROGPU_DURATION_MARGIN", 1.10))
|
| 3242 |
+
if not is_pipeline_pool_ready_for_task(task):
|
| 3243 |
+
estimated_seconds += max(0, get_env_int("LANCE_ZEROGPU_COLD_START_BUFFER_SECONDS", 120))
|
| 3244 |
+
return clamp_zerogpu_duration(int(estimated_seconds * margin + 0.999))
|
| 3245 |
+
|
| 3246 |
+
|
| 3247 |
def get_run_task_gpu_duration(
|
| 3248 |
task: str,
|
| 3249 |
prompt: str,
|
|
|
|
| 3260 |
cfg_text_scale: float,
|
| 3261 |
enable_frame_interpolation: bool,
|
| 3262 |
) -> int:
|
| 3263 |
+
"""Return a dynamic ZeroGPU reservation duration.
|
| 3264 |
|
| 3265 |
+
The previous implementation used one conservative estimate for both cold and
|
| 3266 |
+
warm runs. This version keeps the first request safe, then asks for shorter
|
| 3267 |
+
durations once the matching Lance model is already loaded, which reduces
|
| 3268 |
+
wasted ZeroGPU quota and improves queue priority without changing the UI.
|
| 3269 |
"""
|
| 3270 |
internal_task = normalize_task(task)
|
| 3271 |
+
timesteps = max(1, int(validation_num_timesteps or DEFAULT_TIMESTEPS))
|
| 3272 |
+
backend_resolution = normalize_resolution_for_backend(str(resolution), internal_task)
|
| 3273 |
+
resolution_multiplier = 1.28 if backend_resolution == "video_480p" else 1.0
|
| 3274 |
+
timestep_extra = max(0, timesteps - 20)
|
| 3275 |
+
|
| 3276 |
+
if internal_task == TASK_T2V:
|
| 3277 |
+
requested_seconds = max(1, int(num_frames or DEFAULT_VIDEO_DURATION_SECONDS))
|
| 3278 |
+
estimate = 35 + requested_seconds * 10 + timestep_extra * 1.5
|
| 3279 |
+
if normalize_frame_interpolation(enable_frame_interpolation):
|
| 3280 |
+
estimate += min(32, 8 + requested_seconds * 3)
|
| 3281 |
+
return finalize_zerogpu_duration(estimate * resolution_multiplier, internal_task)
|
| 3282 |
+
|
| 3283 |
+
if internal_task == TASK_VIDEO_EDIT:
|
| 3284 |
+
estimate = 85 + timestep_extra * 1.5
|
| 3285 |
+
if normalize_frame_interpolation(enable_frame_interpolation):
|
| 3286 |
+
estimate += 22
|
| 3287 |
+
return finalize_zerogpu_duration(estimate * resolution_multiplier, internal_task)
|
| 3288 |
+
|
| 3289 |
if internal_task == TASK_X2T_VIDEO:
|
| 3290 |
+
return finalize_zerogpu_duration(32, internal_task)
|
| 3291 |
+
if internal_task == TASK_T2I:
|
| 3292 |
+
return finalize_zerogpu_duration(58, internal_task)
|
| 3293 |
+
if internal_task == TASK_IMAGE_EDIT:
|
| 3294 |
+
return finalize_zerogpu_duration(70, internal_task)
|
| 3295 |
+
return finalize_zerogpu_duration(28, internal_task)
|
| 3296 |
|
| 3297 |
|
| 3298 |
def get_pipeline_pool(task: str) -> PipelinePool:
|
|
|
|
| 3365 |
gpu_text = "unknown"
|
| 3366 |
concurrency = 1
|
| 3367 |
active_variant = "none"
|
|
|
|
| 3368 |
if ACTIVE_PIPELINE_POOL is not None:
|
| 3369 |
active_variant = ACTIVE_PIPELINE_POOL.model_variant
|
| 3370 |
gpu_text = ACTIVE_PIPELINE_POOL.gpu_summary
|
| 3371 |
concurrency = ACTIVE_PIPELINE_POOL.size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3372 |
return (
|
| 3373 |
f"**Status** GPU: `{gpu_text}` | Max concurrency: `{concurrency}` | "
|
| 3374 |
f"Queue limit: `{QUEUE_MAX_SIZE}` | Active model: `{active_variant}` | "
|
| 3375 |
+
f"Switch mode: `unload then load`"
|
| 3376 |
)
|
| 3377 |
|
| 3378 |
|
|
|
|
| 3435 |
is_edit_task = internal_task in EDIT_TASKS
|
| 3436 |
is_understanding_task = internal_task in UNDERSTANDING_TASKS
|
| 3437 |
is_generation_task = internal_task in GENERATION_TASKS
|
| 3438 |
+
is_text_to_visual_task = internal_task in {TASK_T2V, TASK_T2I}
|
| 3439 |
show_media_input = is_edit_task or is_understanding_task
|
| 3440 |
+
resolution_choices = get_resolution_choice_values_for_task(internal_task)
|
| 3441 |
+
resolution_value = get_default_resolution_for_task(internal_task)
|
| 3442 |
aspect_ratio_value = DEFAULT_IMAGE_ASPECT_RATIO if is_image_task else DEFAULT_VIDEO_ASPECT_RATIO
|
| 3443 |
+
width_value, height_value = get_size_for_aspect_ratio(internal_task, aspect_ratio_value, resolution_value)
|
| 3444 |
size_markdown = format_size_markdown(internal_task, width_value, height_value)
|
| 3445 |
system_prompt_choices = get_understanding_system_prompt_choices(internal_task)
|
| 3446 |
|
| 3447 |
+
if is_text_to_visual_task:
|
| 3448 |
text_label = "Prompt"
|
| 3449 |
text_placeholder = "Describe what you want to generate..."
|
| 3450 |
elif is_edit_task:
|
|
|
|
| 3463 |
|
| 3464 |
output_icon = "video" if output_label == "Output Video" else "image" if output_label == "Output Image" else "text"
|
| 3465 |
show_generation_settings = is_generation_task or is_edit_task
|
| 3466 |
+
show_aspect_ratio = is_text_to_visual_task
|
| 3467 |
+
show_output_resolution = is_text_to_visual_task
|
| 3468 |
show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
|
| 3469 |
show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
|
| 3470 |
+
show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
|
| 3471 |
+
show_video_resolution_settings = internal_task == TASK_T2V
|
| 3472 |
|
| 3473 |
return (
|
| 3474 |
gr.update(value=build_lance_label_html(text_label, "lance-prompt-label")),
|
|
|
|
| 3476 |
label=text_label,
|
| 3477 |
placeholder=text_placeholder,
|
| 3478 |
visible=True,
|
| 3479 |
+
value="",
|
| 3480 |
),
|
| 3481 |
gr.update(
|
| 3482 |
choices=system_prompt_choices,
|
| 3483 |
value=system_prompt_choices[0],
|
| 3484 |
visible=False,
|
| 3485 |
),
|
| 3486 |
+
# Switching task pages should always start from a clean input state.
|
| 3487 |
+
# Clear both visual input boxes even if one of them stays visible across tasks.
|
| 3488 |
gr.update(label="Input Video", visible=show_input_video, value=None),
|
| 3489 |
gr.update(label="Input Image", visible=show_input_image, value=None),
|
| 3490 |
+
gr.update(visible=show_frame_interpolation_settings),
|
| 3491 |
gr.update(visible=show_aspect_ratio),
|
| 3492 |
+
gr.update(visible=show_output_resolution),
|
| 3493 |
gr.update(visible=internal_task == TASK_T2V),
|
| 3494 |
+
gr.update(visible=show_video_resolution_settings),
|
| 3495 |
gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
|
| 3496 |
gr.update(value=height_value),
|
| 3497 |
gr.update(value=width_value),
|
| 3498 |
+
gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION),
|
| 3499 |
+
gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
|
| 3500 |
gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
|
| 3501 |
+
gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
|
| 3502 |
gr.update(value=build_lance_icon_label_html(output_label, output_icon, "lance-output-label")),
|
| 3503 |
gr.update(visible=internal_task in {TASK_T2V, TASK_VIDEO_EDIT}),
|
| 3504 |
gr.update(visible=internal_task in {TASK_T2I, TASK_IMAGE_EDIT}),
|
|
|
|
| 3576 |
value=DEFAULT_VIDEO_ASPECT_RATIO,
|
| 3577 |
elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
|
| 3578 |
)
|
| 3579 |
+
with gr.Row(elem_classes=["generation-controls-row", "output-resolution-row"]) as output_resolution_row:
|
| 3580 |
with gr.Column(elem_classes=["lance-control-field"]):
|
| 3581 |
gr.HTML('<div class="lance-generation-label">Output Resolution</div>', elem_classes=["lance-label-html"])
|
| 3582 |
real_size = gr.Radio(
|
|
|
|
| 3587 |
interactive=True,
|
| 3588 |
elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
|
| 3589 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3590 |
with gr.Row(elem_classes=["generation-controls-row", "video-duration-row"]) as video_duration_row:
|
| 3591 |
with gr.Column(elem_classes=["lance-control-field"]):
|
| 3592 |
gr.HTML(build_lance_label_html("Video Duration (seconds)", "lance-generation-label"), elem_classes=["lance-label-html"])
|
| 3593 |
+
num_frames = gr.Radio(
|
| 3594 |
label="Video Duration (seconds)",
|
| 3595 |
show_label=False,
|
| 3596 |
+
choices=get_video_duration_choices(),
|
|
|
|
|
|
|
| 3597 |
value=DEFAULT_VIDEO_DURATION_SECONDS,
|
| 3598 |
elem_classes=["generation-control", "generation-choice-grid", "generation-two-line-label"],
|
| 3599 |
)
|
| 3600 |
+
with gr.Row(elem_classes=["generation-controls-row", "video-resolution-row"]) as video_resolution_row:
|
| 3601 |
+
with gr.Column(elem_classes=["lance-control-field"]):
|
| 3602 |
+
gr.HTML(build_lance_label_html("Video Resolution", "lance-generation-label"), elem_classes=["lance-label-html"])
|
| 3603 |
+
resolution = gr.Dropdown(
|
| 3604 |
+
label="Video Resolution",
|
| 3605 |
+
show_label=False,
|
| 3606 |
+
choices=VIDEO_RESOLUTION_DISPLAY_CHOICES,
|
| 3607 |
+
value=DEFAULT_RESOLUTION,
|
| 3608 |
+
elem_classes=["generation-control"],
|
| 3609 |
+
)
|
| 3610 |
+
height = gr.Number(value=DEFAULT_HEIGHT, precision=0, visible=False)
|
| 3611 |
+
width = gr.Number(value=DEFAULT_WIDTH, precision=0, visible=False)
|
| 3612 |
|
| 3613 |
with gr.Accordion("Advanced Parameters", open=False, elem_classes=["lance-advanced-accordion"]):
|
| 3614 |
with gr.Column(elem_classes=["lance-control-field"]):
|
|
|
|
| 3673 |
|
| 3674 |
run_button = gr.Button("🚀 Generate", variant="primary", elem_classes=["lance-run-button"])
|
| 3675 |
|
| 3676 |
+
def build_prompt_example_table(examples: list[list], media_type: Optional[str] = None):
|
| 3677 |
+
"""Render examples with full prompt text instead of Gradio compact previews."""
|
| 3678 |
+
example_buttons = []
|
| 3679 |
+
with gr.Column(elem_classes=["prompt-example-full-table"]):
|
| 3680 |
+
if media_type == "video":
|
| 3681 |
+
gr.HTML("<div>Prompt / Instruction / Question</div><div>Input Video</div>", elem_classes=["prompt-example-table-header", "prompt-example-table-header-with-media"])
|
| 3682 |
+
elif media_type == "image":
|
| 3683 |
+
gr.HTML("<div>Prompt / Instruction / Question</div><div>Input Image</div>", elem_classes=["prompt-example-table-header", "prompt-example-table-header-with-media"])
|
| 3684 |
+
else:
|
| 3685 |
+
gr.HTML("<div>Prompt</div>", elem_classes=["prompt-example-table-header"])
|
| 3686 |
+
|
| 3687 |
+
with gr.Column(elem_classes=["prompt-example-table-body"]):
|
| 3688 |
+
for example_row in examples:
|
| 3689 |
+
example_prompt = str(example_row[0]) if example_row else ""
|
| 3690 |
+
video_path = str(example_row[1]) if len(example_row) > 1 and example_row[1] else None
|
| 3691 |
+
image_path = str(example_row[2]) if len(example_row) > 2 and example_row[2] else None
|
| 3692 |
+
|
| 3693 |
+
if media_type == "video" and video_path:
|
| 3694 |
+
with gr.Row(elem_classes=["prompt-example-multimodal-row", "prompt-example-video-row"]):
|
| 3695 |
+
with gr.Column(elem_classes=["prompt-example-prompt-cell"]):
|
| 3696 |
+
example_button = gr.Button(
|
| 3697 |
+
example_prompt,
|
| 3698 |
+
variant="secondary",
|
| 3699 |
+
elem_classes=["prompt-example-row-button"],
|
| 3700 |
+
)
|
| 3701 |
+
with gr.Column(elem_classes=["prompt-example-media-cell", "prompt-example-video-cell"]):
|
| 3702 |
+
gr.Video(
|
| 3703 |
+
value=video_path,
|
| 3704 |
+
label="Input Video",
|
| 3705 |
+
show_label=False,
|
| 3706 |
+
interactive=False,
|
| 3707 |
+
elem_classes=["prompt-example-media-preview", "prompt-example-video-preview"],
|
| 3708 |
+
)
|
| 3709 |
+
example_buttons.append((example_button, example_prompt, video_path, None))
|
| 3710 |
+
elif media_type == "image" and image_path:
|
| 3711 |
+
with gr.Row(elem_classes=["prompt-example-multimodal-row"]):
|
| 3712 |
+
with gr.Column(elem_classes=["prompt-example-prompt-cell"]):
|
| 3713 |
+
example_button = gr.Button(
|
| 3714 |
+
example_prompt,
|
| 3715 |
+
variant="secondary",
|
| 3716 |
+
elem_classes=["prompt-example-row-button"],
|
| 3717 |
+
)
|
| 3718 |
+
with gr.Column(elem_classes=["prompt-example-media-cell"]):
|
| 3719 |
+
gr.Image(
|
| 3720 |
+
value=image_path,
|
| 3721 |
+
label="Input Image",
|
| 3722 |
+
show_label=False,
|
| 3723 |
+
interactive=False,
|
| 3724 |
+
type="filepath",
|
| 3725 |
+
elem_classes=["prompt-example-media-preview"],
|
| 3726 |
+
)
|
| 3727 |
+
example_buttons.append((example_button, example_prompt, None, image_path))
|
| 3728 |
+
else:
|
| 3729 |
+
example_button = gr.Button(
|
| 3730 |
+
example_prompt,
|
| 3731 |
+
variant="secondary",
|
| 3732 |
+
elem_classes=["prompt-example-row-button"],
|
| 3733 |
+
)
|
| 3734 |
+
example_buttons.append((example_button, example_prompt, None, None))
|
| 3735 |
+
return example_buttons
|
| 3736 |
+
|
| 3737 |
with gr.Column(visible=True, elem_classes=["lance-recommended-section"]) as video_generation_examples_group:
|
| 3738 |
gr.HTML(build_lance_label_html("Video generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 3739 |
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 3740 |
+
video_generation_example_buttons = build_prompt_example_table(VIDEO_GENERATION_EXAMPLES)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3741 |
|
| 3742 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_edit_examples_group:
|
| 3743 |
gr.HTML(build_lance_label_html("Video edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 3744 |
+
with gr.Group(elem_classes=["example-panel", "prompt-examples", "video-edit-examples"]):
|
| 3745 |
+
video_edit_example_buttons = build_prompt_example_table(VIDEO_EDIT_EXAMPLES, media_type="video")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3746 |
|
| 3747 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as video_understanding_examples_group:
|
| 3748 |
gr.HTML(build_lance_label_html("Video understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 3749 |
+
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 3750 |
+
video_understanding_example_buttons = build_prompt_example_table(VIDEO_UNDERSTANDING_EXAMPLES, media_type="video")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3751 |
|
| 3752 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_generation_examples_group:
|
| 3753 |
gr.HTML(build_lance_label_html("Image generation recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 3754 |
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 3755 |
+
image_generation_example_buttons = build_prompt_example_table(IMAGE_GENERATION_EXAMPLES)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3756 |
|
| 3757 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_edit_examples_group:
|
| 3758 |
gr.HTML(build_lance_label_html("Image edit recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 3759 |
+
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 3760 |
+
image_edit_example_buttons = build_prompt_example_table(IMAGE_EDIT_EXAMPLES, media_type="image")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3761 |
|
| 3762 |
with gr.Column(visible=False, elem_classes=["lance-recommended-section"]) as image_understanding_examples_group:
|
| 3763 |
gr.HTML(build_lance_label_html("Image understanding recommended cases", "lance-section-label"), elem_classes=["lance-label-html"])
|
| 3764 |
+
with gr.Group(elem_classes=["example-panel", "prompt-examples"]):
|
| 3765 |
+
image_understanding_example_buttons = build_prompt_example_table(IMAGE_UNDERSTANDING_EXAMPLES, media_type="image")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3766 |
|
| 3767 |
task.change(
|
| 3768 |
fn=update_task_ui,
|
|
|
|
| 3777 |
aspect_ratio_row,
|
| 3778 |
output_resolution_row,
|
| 3779 |
video_duration_row,
|
| 3780 |
+
video_resolution_row,
|
| 3781 |
aspect_ratio,
|
| 3782 |
height,
|
| 3783 |
width,
|
|
|
|
| 3800 |
|
| 3801 |
aspect_ratio.change(
|
| 3802 |
fn=update_size_from_aspect_ratio,
|
| 3803 |
+
inputs=[task, aspect_ratio, resolution],
|
| 3804 |
outputs=[height, width, real_size],
|
| 3805 |
queue=False,
|
| 3806 |
show_api=False,
|
|
|
|
| 3808 |
|
| 3809 |
real_size.change(
|
| 3810 |
fn=update_aspect_ratio_from_output_resolution,
|
| 3811 |
+
inputs=[task, real_size, resolution],
|
| 3812 |
outputs=[aspect_ratio, height, width],
|
| 3813 |
queue=False,
|
| 3814 |
show_api=False,
|
| 3815 |
)
|
| 3816 |
|
| 3817 |
+
resolution.change(
|
| 3818 |
+
fn=update_output_resolution_from_video_profile,
|
| 3819 |
+
inputs=[task, aspect_ratio, resolution],
|
| 3820 |
+
outputs=[real_size, height, width],
|
| 3821 |
+
queue=False,
|
| 3822 |
+
show_api=False,
|
| 3823 |
+
)
|
| 3824 |
+
|
| 3825 |
+
for example_button, example_prompt, _, _ in video_generation_example_buttons + image_generation_example_buttons:
|
| 3826 |
+
example_button.click(
|
| 3827 |
+
fn=make_prompt_example_click_handler(example_prompt),
|
| 3828 |
inputs=[task],
|
| 3829 |
+
outputs=[prompt, aspect_ratio, height, width, num_frames, resolution, real_size],
|
| 3830 |
queue=False,
|
| 3831 |
show_api=False,
|
| 3832 |
)
|
| 3833 |
|
| 3834 |
+
for example_button, example_prompt, example_video, example_image in (
|
| 3835 |
+
video_edit_example_buttons
|
| 3836 |
+
+ video_understanding_example_buttons
|
| 3837 |
+
+ image_edit_example_buttons
|
| 3838 |
+
+ image_understanding_example_buttons
|
| 3839 |
+
):
|
| 3840 |
+
example_button.click(
|
| 3841 |
+
fn=make_media_prompt_example_click_handler(example_prompt, example_video, example_image),
|
| 3842 |
+
inputs=[task],
|
| 3843 |
+
outputs=[prompt, input_video, input_image, aspect_ratio, height, width, num_frames, resolution, real_size],
|
| 3844 |
+
queue=False,
|
| 3845 |
+
show_api=False,
|
| 3846 |
+
)
|
|
|
|
| 3847 |
|
| 3848 |
run_button.click(
|
| 3849 |
fn=build_running_status_markdown,
|
|
|
|
| 3870 |
enable_frame_interpolation,
|
| 3871 |
],
|
| 3872 |
outputs=[output_video, output_image, output_text, status, logs],
|
| 3873 |
+
show_progress="minimal",
|
| 3874 |
)
|
| 3875 |
|
| 3876 |
return demo
|
|
|
|
| 3907 |
return gpu_ids
|
| 3908 |
|
| 3909 |
|
| 3910 |
+
def prefetch_model_assets_before_launch() -> None:
|
| 3911 |
+
"""Download and compact model files before the first ZeroGPU request.
|
| 3912 |
+
|
| 3913 |
+
On ZeroGPU, time spent downloading model snapshots inside @spaces.GPU burns
|
| 3914 |
+
the first user's GPU reservation. Prefetching only touches CPU/disk and keeps
|
| 3915 |
+
the visible UI unchanged. Set LANCE_PREFETCH_MODEL_ASSETS=0 to skip this at
|
| 3916 |
+
Space startup, or LANCE_PREFETCH_MODEL_VARIANTS=video to prefetch less.
|
| 3917 |
+
"""
|
| 3918 |
+
if not env_flag("LANCE_PREFETCH_MODEL_ASSETS", running_on_space()):
|
| 3919 |
+
print("[startup] Model asset prefetch disabled.", flush=True)
|
| 3920 |
+
return
|
| 3921 |
+
|
| 3922 |
+
variants_text = os.getenv("LANCE_PREFETCH_MODEL_VARIANTS", f"{MODEL_VARIANT_VIDEO},{MODEL_VARIANT_IMAGE}")
|
| 3923 |
+
variants: list[str] = []
|
| 3924 |
+
for raw_variant in variants_text.split(","):
|
| 3925 |
+
raw_variant = raw_variant.strip()
|
| 3926 |
+
if not raw_variant:
|
| 3927 |
+
continue
|
| 3928 |
+
variant = normalize_model_variant(raw_variant)
|
| 3929 |
+
if variant not in variants:
|
| 3930 |
+
variants.append(variant)
|
| 3931 |
+
|
| 3932 |
+
for variant in variants:
|
| 3933 |
+
try:
|
| 3934 |
+
start = time.perf_counter()
|
| 3935 |
+
model_path = ensure_model_assets(variant)
|
| 3936 |
+
elapsed = time.perf_counter() - start
|
| 3937 |
+
print(
|
| 3938 |
+
f"[startup][{variant}] Model assets are ready at {display_path(model_path)} "
|
| 3939 |
+
f"before ZeroGPU inference. elapsed={elapsed:.2f}s",
|
| 3940 |
+
flush=True,
|
| 3941 |
+
)
|
| 3942 |
+
except Exception as exc:
|
| 3943 |
+
print(
|
| 3944 |
+
f"[startup][{variant}] Model asset prefetch failed and will be retried lazily during inference: {exc}",
|
| 3945 |
+
flush=True,
|
| 3946 |
+
)
|
| 3947 |
+
|
| 3948 |
+
|
| 3949 |
if __name__ == "__main__":
|
| 3950 |
args = parse_args()
|
| 3951 |
os.environ["LANCE_GPUS"] = args.gpus
|
| 3952 |
QUEUE_MAX_SIZE = args.queue_size
|
| 3953 |
+
prefetch_model_assets_before_launch()
|
| 3954 |
+
print(
|
| 3955 |
+
"[startup] Skipping GPU model preload. UI will launch first, and Lance weights will be loaded lazily inside ZeroGPU inference calls.",
|
| 3956 |
+
flush=True,
|
| 3957 |
+
)
|
|
|
|
|
|
|
| 3958 |
concurrency_limit = 1
|
| 3959 |
demo = build_demo()
|
| 3960 |
demo.queue(
|
config/config_factory.py
CHANGED
|
@@ -234,7 +234,7 @@ class InferenceArguments(TrainingArguments):
|
|
| 234 |
video_width: int = 480
|
| 235 |
num_frames: int = 50
|
| 236 |
task: str = "t2v" # t2v / t2i / edit / idip ...
|
| 237 |
-
resolution: str = "
|
| 238 |
text_template: bool = False # 是否使用 system_prompt 文本模板
|
| 239 |
max_duration: float = 6.0 # 最大视频时长(秒)
|
| 240 |
|
|
|
|
| 234 |
video_width: int = 480
|
| 235 |
num_frames: int = 50
|
| 236 |
task: str = "t2v" # t2v / t2i / edit / idip ...
|
| 237 |
+
resolution: str = "video_360p" # image_768x768 or video_360p / video_480p
|
| 238 |
text_template: bool = False # 是否使用 system_prompt 文本模板
|
| 239 |
max_duration: float = 6.0 # 最大视频时长(秒)
|
| 240 |
|
data/datasets_custom/validation_dataset.py
CHANGED
|
@@ -116,7 +116,10 @@ class ValidationDataset(Dataset):
|
|
| 116 |
if self.data_config.resolution == "image_768x768":
|
| 117 |
resolution_vae = 768
|
| 118 |
resolution_vit = 672
|
| 119 |
-
elif self.data_config.resolution == "
|
|
|
|
|
|
|
|
|
|
| 120 |
resolution_vae = 640
|
| 121 |
resolution_vit = 616
|
| 122 |
else:
|
|
|
|
| 116 |
if self.data_config.resolution == "image_768x768":
|
| 117 |
resolution_vae = 768
|
| 118 |
resolution_vit = 672
|
| 119 |
+
elif self.data_config.resolution == "video_360p":
|
| 120 |
+
resolution_vae = 480
|
| 121 |
+
resolution_vit = 448
|
| 122 |
+
elif self.data_config.resolution == "video_480p":
|
| 123 |
resolution_vae = 640
|
| 124 |
resolution_vit = 616
|
| 125 |
else:
|
inference_lance.py
CHANGED
|
@@ -495,9 +495,9 @@ def main():
|
|
| 495 |
training_args=training_args,
|
| 496 |
)
|
| 497 |
stage_start = time.perf_counter()
|
| 498 |
-
log_rank0(
|
| 499 |
-
model = model.to(
|
| 500 |
-
log_stage("Lance model
|
| 501 |
|
| 502 |
# Setup tokenizer for model:
|
| 503 |
stage_start = time.perf_counter()
|
|
@@ -538,7 +538,10 @@ def main():
|
|
| 538 |
else: # HACK!!!
|
| 539 |
assert model.language_model.get_input_embeddings().weight.data.data_ptr() != model.language_model.get_output_embeddings().weight.data.data_ptr(), 'tie_word_embeddings conflict'
|
| 540 |
|
| 541 |
-
|
|
|
|
|
|
|
|
|
|
| 542 |
model.eval()
|
| 543 |
if vae_model is not None and hasattr(vae_model, "eval"):
|
| 544 |
vae_model.eval()
|
|
|
|
| 495 |
training_args=training_args,
|
| 496 |
)
|
| 497 |
stage_start = time.perf_counter()
|
| 498 |
+
log_rank0("[startup] Casting Lance model to bf16 on CPU")
|
| 499 |
+
model = model.to(dtype=torch.bfloat16)
|
| 500 |
+
log_stage("Lance model bf16 cast", stage_start)
|
| 501 |
|
| 502 |
# Setup tokenizer for model:
|
| 503 |
stage_start = time.perf_counter()
|
|
|
|
| 538 |
else: # HACK!!!
|
| 539 |
assert model.language_model.get_input_embeddings().weight.data.data_ptr() != model.language_model.get_output_embeddings().weight.data.data_ptr(), 'tie_word_embeddings conflict'
|
| 540 |
|
| 541 |
+
stage_start = time.perf_counter()
|
| 542 |
+
log_rank0(f"[startup] Moving Lance model to GPU {DEVICE}")
|
| 543 |
+
model = model.to(device=DEVICE)
|
| 544 |
+
log_stage("Lance model move to GPU", stage_start)
|
| 545 |
model.eval()
|
| 546 |
if vae_model is not None and hasattr(vae_model, "eval"):
|
| 547 |
vae_model.eval()
|
modeling/lance/lance.py
CHANGED
|
@@ -301,7 +301,7 @@ class Lance(PreTrainedModel):
|
|
| 301 |
|
| 302 |
packed_latent = (1 - packed_timesteps[:, None]) * packed_latent_clean + packed_timesteps[:, None] * noise
|
| 303 |
packed_timestep_embeds = self.time_embedder(packed_timesteps) # [L, C]
|
| 304 |
-
latent_token_pos_emb = self.latent_pos_embed(packed_latent_position_ids)
|
| 305 |
packed_latent = self.vae2llm(packed_latent) + packed_timestep_embeds + latent_token_pos_emb
|
| 306 |
|
| 307 |
packed_sequence[packed_vae_token_indexes] = packed_latent.to(packed_sequence.dtype) # NOTE: 这里替换真实的vae token embed!
|
|
@@ -655,7 +655,7 @@ class Lance(PreTrainedModel):
|
|
| 655 |
|
| 656 |
# --- 视觉特征 编码 ---
|
| 657 |
timestep_embed = self.time_embedder(timestep)
|
| 658 |
-
latent_pos_embed = self.latent_pos_embed(vae_position_ids)
|
| 659 |
vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
|
| 660 |
vae_embed = vae_embed.to(current_sequence.dtype)
|
| 661 |
|
|
@@ -1641,7 +1641,7 @@ class Lance(PreTrainedModel):
|
|
| 1641 |
|
| 1642 |
# --- 存入 视觉特征 编码 (vae condition)---
|
| 1643 |
timestep_embed = self.time_embedder(timestep)
|
| 1644 |
-
latent_pos_embed = self.latent_pos_embed(vae_position_ids)
|
| 1645 |
vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
|
| 1646 |
vae_embed = vae_embed.to(current_sequence.dtype)
|
| 1647 |
current_sequence[current_vae_token_indexes_local] = vae_embed
|
|
@@ -1698,7 +1698,7 @@ class Lance(PreTrainedModel):
|
|
| 1698 |
|
| 1699 |
# --- 视觉特征 编码 ---
|
| 1700 |
timestep_embed = self.time_embedder(timestep)
|
| 1701 |
-
latent_pos_embed = self.latent_pos_embed(vae_position_ids)
|
| 1702 |
vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
|
| 1703 |
vae_embed = vae_embed.to(current_sequence.dtype)
|
| 1704 |
|
|
|
|
| 301 |
|
| 302 |
packed_latent = (1 - packed_timesteps[:, None]) * packed_latent_clean + packed_timesteps[:, None] * noise
|
| 303 |
packed_timestep_embeds = self.time_embedder(packed_timesteps) # [L, C]
|
| 304 |
+
latent_token_pos_emb = self.latent_pos_embed(packed_latent_position_ids.to(device=packed_latent.device))
|
| 305 |
packed_latent = self.vae2llm(packed_latent) + packed_timestep_embeds + latent_token_pos_emb
|
| 306 |
|
| 307 |
packed_sequence[packed_vae_token_indexes] = packed_latent.to(packed_sequence.dtype) # NOTE: 这里替换真实的vae token embed!
|
|
|
|
| 655 |
|
| 656 |
# --- 视觉特征 编码 ---
|
| 657 |
timestep_embed = self.time_embedder(timestep)
|
| 658 |
+
latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
|
| 659 |
vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
|
| 660 |
vae_embed = vae_embed.to(current_sequence.dtype)
|
| 661 |
|
|
|
|
| 1641 |
|
| 1642 |
# --- 存入 视觉特征 编码 (vae condition)---
|
| 1643 |
timestep_embed = self.time_embedder(timestep)
|
| 1644 |
+
latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
|
| 1645 |
vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
|
| 1646 |
vae_embed = vae_embed.to(current_sequence.dtype)
|
| 1647 |
current_sequence[current_vae_token_indexes_local] = vae_embed
|
|
|
|
| 1698 |
|
| 1699 |
# --- 视觉特征 编码 ---
|
| 1700 |
timestep_embed = self.time_embedder(timestep)
|
| 1701 |
+
latent_pos_embed = self.latent_pos_embed(vae_position_ids.to(device=x_t.device))
|
| 1702 |
vae_embed = self.vae2llm(x_t) + timestep_embed + latent_pos_embed
|
| 1703 |
vae_embed = vae_embed.to(current_sequence.dtype)
|
| 1704 |
|
modeling/lance/modeling_utils.py
CHANGED
|
@@ -186,13 +186,38 @@ class PositionEmbedding3D(nn.Module):
|
|
| 186 |
self.max_num_latent_frames = max_latent_num_frames # t
|
| 187 |
self.max_latent_size = max_latent_size # h, w
|
| 188 |
self.hidden_size = hidden_size
|
| 189 |
-
self.
|
| 190 |
-
self._init_weights()
|
| 191 |
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
def forward(self, position_ids):
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
self.max_num_latent_frames = max_latent_num_frames # t
|
| 187 |
self.max_latent_size = max_latent_size # h, w
|
| 188 |
self.hidden_size = hidden_size
|
| 189 |
+
self.temporal_dim, self.height_dim, self.width_dim = self._split_hidden_dims(hidden_size)
|
|
|
|
| 190 |
|
| 191 |
+
@staticmethod
|
| 192 |
+
def _split_hidden_dims(embed_dim: int) -> tuple[int, int, int]:
|
| 193 |
+
assert embed_dim % 2 == 0, "Embedding dimension must be even for 3D embeddings"
|
| 194 |
+
d = embed_dim // 3
|
| 195 |
+
d = d if d % 2 == 0 else d - 1
|
| 196 |
+
dim_t = d
|
| 197 |
+
dim_h = d
|
| 198 |
+
dim_w = embed_dim - 2 * d
|
| 199 |
+
assert dim_w % 2 == 0
|
| 200 |
+
return dim_t, dim_h, dim_w
|
| 201 |
+
|
| 202 |
+
@staticmethod
|
| 203 |
+
def _build_1d_sincos(coords: torch.Tensor, embed_dim: int) -> torch.Tensor:
|
| 204 |
+
assert embed_dim % 2 == 0, "Embedding dimension must be even for 1D embeddings"
|
| 205 |
+
half = embed_dim // 2
|
| 206 |
+
omega = torch.arange(half, device=coords.device, dtype=torch.float32)
|
| 207 |
+
omega = omega / (embed_dim / 2.0)
|
| 208 |
+
omega = 1.0 / (10000.0 ** omega)
|
| 209 |
+
args = coords.to(dtype=torch.float32)[:, None] * omega[None, :]
|
| 210 |
+
return torch.cat([torch.sin(args), torch.cos(args)], dim=-1)
|
| 211 |
|
| 212 |
def forward(self, position_ids):
|
| 213 |
+
position_ids = position_ids.reshape(-1).to(dtype=torch.long)
|
| 214 |
+
plane_size = self.max_latent_size * self.max_latent_size
|
| 215 |
+
t = position_ids // plane_size
|
| 216 |
+
rem = position_ids % plane_size
|
| 217 |
+
h = rem // self.max_latent_size
|
| 218 |
+
w = rem % self.max_latent_size
|
| 219 |
+
|
| 220 |
+
emb_t = self._build_1d_sincos(t, self.temporal_dim)
|
| 221 |
+
emb_h = self._build_1d_sincos(h, self.height_dim)
|
| 222 |
+
emb_w = self._build_1d_sincos(w, self.width_dim)
|
| 223 |
+
return torch.cat([emb_t, emb_h, emb_w], dim=-1)
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
absl-py==0.15.0
|
| 2 |
accelerate==1.13.0
|
| 3 |
addict==2.4.0
|
| 4 |
-
albumentations==1.4.3
|
| 5 |
annotated-types==0.7.0
|
| 6 |
bitsandbytes==0.49.2
|
| 7 |
certifi==2024.8.30
|
|
@@ -23,7 +23,7 @@ joblib==1.4.2
|
|
| 23 |
kornia==0.8.2
|
| 24 |
librosa==0.10.2.post1
|
| 25 |
markupsafe==2.1.5
|
| 26 |
-
numpy==1.
|
| 27 |
omegaconf==2.3.0
|
| 28 |
opencv-python==4.7.0.72
|
| 29 |
opt_einsum==3.4.0
|
|
|
|
| 1 |
absl-py==0.15.0
|
| 2 |
accelerate==1.13.0
|
| 3 |
addict==2.4.0
|
| 4 |
+
# albumentations==1.4.3
|
| 5 |
annotated-types==0.7.0
|
| 6 |
bitsandbytes==0.49.2
|
| 7 |
certifi==2024.8.30
|
|
|
|
| 23 |
kornia==0.8.2
|
| 24 |
librosa==0.10.2.post1
|
| 25 |
markupsafe==2.1.5
|
| 26 |
+
numpy==1.23.5
|
| 27 |
omegaconf==2.3.0
|
| 28 |
opencv-python==4.7.0.72
|
| 29 |
opt_einsum==3.4.0
|