Spaces:
Running on Zero
Running on Zero
Vendor RIFE into repo
Browse files- README.md +2 -0
- RIFE/inference_video.py +16 -1
- SPACE_DEPLOYMENT.md +1 -0
- app.py +38 -19
README.md
CHANGED
|
@@ -250,6 +250,8 @@ pip install --no-cache-dir --no-deps --force-reinstall \
|
|
| 250 |
|
| 251 |
At runtime, `app.py` still keeps a startup fallback for flash-attn and model prefetch, but the Space should already have the right packages installed before the UI appears.
|
| 252 |
|
|
|
|
|
|
|
| 253 |
### Download Model Weights
|
| 254 |
|
| 255 |
Please download all necessary model checkpoints from [Lance-3B on Hugging Face](https://huggingface.co/bytedance-research/Lance) and place them in the `downloads/` directory.
|
|
|
|
| 250 |
|
| 251 |
At runtime, `app.py` still keeps a startup fallback for flash-attn and model prefetch, but the Space should already have the right packages installed before the UI appears.
|
| 252 |
|
| 253 |
+
RIFE frame interpolation is treated as an optional post-processing step. If the RIFE code and `RIFE/train_log/flownet.pkl` are not a matching pair, the app will keep the generated video and skip interpolation instead of failing the whole request.
|
| 254 |
+
|
| 255 |
### Download Model Weights
|
| 256 |
|
| 257 |
Please download all necessary model checkpoints from [Lance-3B on Hugging Face](https://huggingface.co/bytedance-research/Lance) and place them in the `downloads/` directory.
|
RIFE/inference_video.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import cv2
|
| 3 |
import torch
|
| 4 |
import argparse
|
|
@@ -13,12 +14,15 @@ from model.pytorch_msssim import ssim_matlab
|
|
| 13 |
|
| 14 |
warnings.filterwarnings("ignore")
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
|
| 17 |
parser.add_argument('--video', dest='video', type=str, default=None)
|
| 18 |
parser.add_argument('--output', dest='output', type=str, default=None)
|
| 19 |
parser.add_argument('--img', dest='img', type=str, default=None)
|
| 20 |
parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
|
| 21 |
-
parser.add_argument('--model', dest='modelDir', type=str, default=
|
| 22 |
parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
|
| 23 |
parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
|
| 24 |
parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
|
|
@@ -28,6 +32,17 @@ parser.add_argument('--png', dest='png', action='store_true', help='whether to v
|
|
| 28 |
parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
|
| 29 |
parser.add_argument('--exp', dest='exp', type=int, default=1)
|
| 30 |
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
assert (not args.video is None or not args.img is None)
|
| 32 |
if args.skip:
|
| 33 |
print("skip flag is abandoned, please refer to issue #207.")
|
|
|
|
| 1 |
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
import cv2
|
| 4 |
import torch
|
| 5 |
import argparse
|
|
|
|
| 14 |
|
| 15 |
warnings.filterwarnings("ignore")
|
| 16 |
|
| 17 |
+
SCRIPT_DIR = Path(__file__).resolve().parent
|
| 18 |
+
DEFAULT_MODEL_DIR = SCRIPT_DIR / 'train_log'
|
| 19 |
+
|
| 20 |
parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
|
| 21 |
parser.add_argument('--video', dest='video', type=str, default=None)
|
| 22 |
parser.add_argument('--output', dest='output', type=str, default=None)
|
| 23 |
parser.add_argument('--img', dest='img', type=str, default=None)
|
| 24 |
parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
|
| 25 |
+
parser.add_argument('--model', dest='modelDir', type=str, default=str(DEFAULT_MODEL_DIR), help='directory with trained model files')
|
| 26 |
parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
|
| 27 |
parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
|
| 28 |
parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
|
|
|
|
| 32 |
parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
|
| 33 |
parser.add_argument('--exp', dest='exp', type=int, default=1)
|
| 34 |
args = parser.parse_args()
|
| 35 |
+
model_dir = Path(args.modelDir)
|
| 36 |
+
if not model_dir.is_absolute():
|
| 37 |
+
script_relative = (SCRIPT_DIR / model_dir).resolve()
|
| 38 |
+
cwd_relative = (Path.cwd() / model_dir).resolve()
|
| 39 |
+
if script_relative.exists():
|
| 40 |
+
model_dir = script_relative
|
| 41 |
+
elif cwd_relative.exists():
|
| 42 |
+
model_dir = cwd_relative
|
| 43 |
+
else:
|
| 44 |
+
model_dir = script_relative
|
| 45 |
+
args.modelDir = str(model_dir)
|
| 46 |
assert (not args.video is None or not args.img is None)
|
| 47 |
if args.skip:
|
| 48 |
print("skip flag is abandoned, please refer to issue #207.")
|
SPACE_DEPLOYMENT.md
CHANGED
|
@@ -22,6 +22,7 @@ Default behavior:
|
|
| 22 |
- Hugging Face Space without local assets: download from `bytedance-research/Lance` into `/data/lance_models`
|
| 23 |
- Video tasks use the pre-fetched `Lance_3B_Video` assets when available.
|
| 24 |
- Startup prefetch downloads the model snapshots on CPU so the first GPU request does not pay that cold-start cost.
|
|
|
|
| 25 |
- Image tasks unload the active video model first, then load `Lance_3B`.
|
| 26 |
- Switching back to a video task unloads `Lance_3B`, then reloads `Lance_3B_Video`.
|
| 27 |
|
|
|
|
| 22 |
- Hugging Face Space without local assets: download from `bytedance-research/Lance` into `/data/lance_models`
|
| 23 |
- Video tasks use the pre-fetched `Lance_3B_Video` assets when available.
|
| 24 |
- Startup prefetch downloads the model snapshots on CPU so the first GPU request does not pay that cold-start cost.
|
| 25 |
+
- RIFE interpolation is optional. The app now falls back to the original video if the RIFE script or checkpoint is missing or incompatible. To restore interpolation, keep the RIFE code and `RIFE/train_log/flownet.pkl` from the same release.
|
| 26 |
- Image tasks unload the active video model first, then load `Lance_3B`.
|
| 27 |
- Switching back to a video task unloads `Lance_3B`, then reloads `Lance_3B_Video`.
|
| 28 |
|
app.py
CHANGED
|
@@ -66,6 +66,10 @@ from modeling.vit.qwen2_5_vl_vit import Qwen2_5_VisionTransformerPretrainedModel
|
|
| 66 |
|
| 67 |
|
| 68 |
REPO_ROOT = Path(__file__).resolve().parent
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
GRADIO_TMP_ROOT = Path(os.getenv("LANCE_GRADIO_TMP_ROOT", "/tmp/lance_gradio")).expanduser()
|
| 70 |
TMP_INPUT_DIR = GRADIO_TMP_ROOT / "inputs"
|
| 71 |
RESULTS_ROOT = GRADIO_TMP_ROOT / "results"
|
|
@@ -2321,10 +2325,16 @@ def find_generated_image(save_dir: Path) -> Optional[Path]:
|
|
| 2321 |
|
| 2322 |
|
| 2323 |
def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tuple[Path, str]:
|
| 2324 |
-
|
| 2325 |
-
rife_script = rife_dir / "inference_video.py"
|
| 2326 |
if not rife_script.exists():
|
| 2327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2328 |
|
| 2329 |
output_path = video_path.with_name(f"{video_path.stem}_rife_{2 ** exp}x{video_path.suffix}")
|
| 2330 |
env = os.environ.copy()
|
|
@@ -2339,7 +2349,7 @@ def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tu
|
|
| 2339 |
"--output",
|
| 2340 |
str(output_path),
|
| 2341 |
"--model",
|
| 2342 |
-
str(
|
| 2343 |
]
|
| 2344 |
rife_start = time.perf_counter()
|
| 2345 |
try:
|
|
@@ -2352,18 +2362,27 @@ def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tu
|
|
| 2352 |
text=True,
|
| 2353 |
)
|
| 2354 |
except subprocess.CalledProcessError as exc:
|
| 2355 |
-
|
| 2356 |
-
|
| 2357 |
-
[
|
| 2358 |
-
|
| 2359 |
-
|
| 2360 |
-
|
| 2361 |
-
|
| 2362 |
-
|
| 2363 |
-
|
| 2364 |
-
|
| 2365 |
if not output_path.exists():
|
| 2366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2367 |
elapsed = time.perf_counter() - rife_start
|
| 2368 |
log = "\n".join(
|
| 2369 |
[
|
|
@@ -2855,7 +2874,7 @@ class LanceT2VV2TPipeline:
|
|
| 2855 |
original_video_path = video_path
|
| 2856 |
rife_log = ""
|
| 2857 |
rife_error = ""
|
| 2858 |
-
frame_interpolation_enabled = normalize_frame_interpolation(enable_frame_interpolation) and internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
|
| 2859 |
if frame_interpolation_enabled and video_path is not None:
|
| 2860 |
try:
|
| 2861 |
clean_memory()
|
|
@@ -3425,7 +3444,7 @@ def update_task_ui(task: str):
|
|
| 3425 |
show_output_resolution = is_text_to_visual_task
|
| 3426 |
show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
|
| 3427 |
show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
|
| 3428 |
-
show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
|
| 3429 |
show_video_resolution_settings = internal_task == TASK_T2V
|
| 3430 |
|
| 3431 |
return (
|
|
@@ -3453,7 +3472,7 @@ def update_task_ui(task: str):
|
|
| 3453 |
gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
|
| 3454 |
gr.update(value=height_value),
|
| 3455 |
gr.update(value=width_value),
|
| 3456 |
-
gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION),
|
| 3457 |
gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
|
| 3458 |
gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
|
| 3459 |
gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
|
|
@@ -3520,7 +3539,7 @@ def build_demo() -> gr.Blocks:
|
|
| 3520 |
label="Frame Interpolation",
|
| 3521 |
show_label=False,
|
| 3522 |
choices=[FRAME_INTERPOLATION_YES, FRAME_INTERPOLATION_NO],
|
| 3523 |
-
value=DEFAULT_FRAME_INTERPOLATION,
|
| 3524 |
elem_classes=["generation-control", "generation-two-line-label"],
|
| 3525 |
)
|
| 3526 |
with gr.Row(elem_classes=["generation-controls-row", "aspect-ratio-row"]) as aspect_ratio_row:
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
REPO_ROOT = Path(__file__).resolve().parent
|
| 69 |
+
RIFE_DIR = REPO_ROOT / "RIFE"
|
| 70 |
+
RIFE_SCRIPT_PATH = RIFE_DIR / "inference_video.py"
|
| 71 |
+
RIFE_MODEL_DIR = RIFE_DIR / "train_log"
|
| 72 |
+
RIFE_AVAILABLE = RIFE_SCRIPT_PATH.exists()
|
| 73 |
GRADIO_TMP_ROOT = Path(os.getenv("LANCE_GRADIO_TMP_ROOT", "/tmp/lance_gradio")).expanduser()
|
| 74 |
TMP_INPUT_DIR = GRADIO_TMP_ROOT / "inputs"
|
| 75 |
RESULTS_ROOT = GRADIO_TMP_ROOT / "results"
|
|
|
|
| 2325 |
|
| 2326 |
|
| 2327 |
def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tuple[Path, str]:
|
| 2328 |
+
rife_script = RIFE_SCRIPT_PATH
|
|
|
|
| 2329 |
if not rife_script.exists():
|
| 2330 |
+
log = "\n".join(
|
| 2331 |
+
[
|
| 2332 |
+
"[rife] Frame interpolation skipped because the RIFE inference script is not available.",
|
| 2333 |
+
f"expected_script={rife_script}",
|
| 2334 |
+
f"input={video_path}",
|
| 2335 |
+
]
|
| 2336 |
+
).strip()
|
| 2337 |
+
return video_path, log
|
| 2338 |
|
| 2339 |
output_path = video_path.with_name(f"{video_path.stem}_rife_{2 ** exp}x{video_path.suffix}")
|
| 2340 |
env = os.environ.copy()
|
|
|
|
| 2349 |
"--output",
|
| 2350 |
str(output_path),
|
| 2351 |
"--model",
|
| 2352 |
+
str(RIFE_MODEL_DIR),
|
| 2353 |
]
|
| 2354 |
rife_start = time.perf_counter()
|
| 2355 |
try:
|
|
|
|
| 2362 |
text=True,
|
| 2363 |
)
|
| 2364 |
except subprocess.CalledProcessError as exc:
|
| 2365 |
+
log = "\n".join(
|
| 2366 |
+
[
|
| 2367 |
+
"[rife] Frame interpolation failed; returning the original generated video.",
|
| 2368 |
+
f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
|
| 2369 |
+
f"returncode={exc.returncode}",
|
| 2370 |
+
exc.stdout.strip() if exc.stdout else "",
|
| 2371 |
+
exc.stderr.strip() if exc.stderr else "",
|
| 2372 |
+
]
|
| 2373 |
+
).strip()
|
| 2374 |
+
return video_path, log
|
| 2375 |
if not output_path.exists():
|
| 2376 |
+
log = "\n".join(
|
| 2377 |
+
[
|
| 2378 |
+
"[rife] Frame interpolation finished without producing an output file; returning the original generated video.",
|
| 2379 |
+
f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
|
| 2380 |
+
f"expected_output={output_path}",
|
| 2381 |
+
completed.stdout.strip(),
|
| 2382 |
+
completed.stderr.strip(),
|
| 2383 |
+
]
|
| 2384 |
+
).strip()
|
| 2385 |
+
return video_path, log
|
| 2386 |
elapsed = time.perf_counter() - rife_start
|
| 2387 |
log = "\n".join(
|
| 2388 |
[
|
|
|
|
| 2874 |
original_video_path = video_path
|
| 2875 |
rife_log = ""
|
| 2876 |
rife_error = ""
|
| 2877 |
+
frame_interpolation_enabled = normalize_frame_interpolation(enable_frame_interpolation) and internal_task in {TASK_T2V, TASK_VIDEO_EDIT} and RIFE_AVAILABLE
|
| 2878 |
if frame_interpolation_enabled and video_path is not None:
|
| 2879 |
try:
|
| 2880 |
clean_memory()
|
|
|
|
| 3444 |
show_output_resolution = is_text_to_visual_task
|
| 3445 |
show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
|
| 3446 |
show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
|
| 3447 |
+
show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT} and RIFE_AVAILABLE
|
| 3448 |
show_video_resolution_settings = internal_task == TASK_T2V
|
| 3449 |
|
| 3450 |
return (
|
|
|
|
| 3472 |
gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
|
| 3473 |
gr.update(value=height_value),
|
| 3474 |
gr.update(value=width_value),
|
| 3475 |
+
gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION if RIFE_AVAILABLE else FRAME_INTERPOLATION_NO),
|
| 3476 |
gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
|
| 3477 |
gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
|
| 3478 |
gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
|
|
|
|
| 3539 |
label="Frame Interpolation",
|
| 3540 |
show_label=False,
|
| 3541 |
choices=[FRAME_INTERPOLATION_YES, FRAME_INTERPOLATION_NO],
|
| 3542 |
+
value=DEFAULT_FRAME_INTERPOLATION if RIFE_AVAILABLE else FRAME_INTERPOLATION_NO,
|
| 3543 |
elem_classes=["generation-control", "generation-two-line-label"],
|
| 3544 |
)
|
| 3545 |
with gr.Row(elem_classes=["generation-controls-row", "aspect-ratio-row"]) as aspect_ratio_row:
|