Spaces:

bytedance-research
/

Lance

Running on Zero

App Files Files Community

ffy2000 commited on 4 days ago

Commit

b3b72df

1 Parent(s): 14972c1

Vendor RIFE into repo

Browse files

Files changed (4) hide show

README.md +2 -0
RIFE/inference_video.py +16 -1
SPACE_DEPLOYMENT.md +1 -0
app.py +38 -19

README.md CHANGED Viewed

@@ -250,6 +250,8 @@ pip install --no-cache-dir --no-deps --force-reinstall \
 At runtime, `app.py` still keeps a startup fallback for flash-attn and model prefetch, but the Space should already have the right packages installed before the UI appears.
 ### Download Model Weights
 Please download all necessary model checkpoints from [Lance-3B on Hugging Face](https://huggingface.co/bytedance-research/Lance) and place them in the `downloads/` directory.

 At runtime, `app.py` still keeps a startup fallback for flash-attn and model prefetch, but the Space should already have the right packages installed before the UI appears.
+RIFE frame interpolation is treated as an optional post-processing step. If the RIFE code and `RIFE/train_log/flownet.pkl` are not a matching pair, the app will keep the generated video and skip interpolation instead of failing the whole request.
 ### Download Model Weights
 Please download all necessary model checkpoints from [Lance-3B on Hugging Face](https://huggingface.co/bytedance-research/Lance) and place them in the `downloads/` directory.

RIFE/inference_video.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import cv2
 import torch
 import argparse
@@ -13,12 +14,15 @@ from model.pytorch_msssim import ssim_matlab
 warnings.filterwarnings("ignore")
 parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
 parser.add_argument('--video', dest='video', type=str, default=None)
 parser.add_argument('--output', dest='output', type=str, default=None)
 parser.add_argument('--img', dest='img', type=str, default=None)
 parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
-parser.add_argument('--model', dest='modelDir', type=str, default='train_log', help='directory with trained model files')
 parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
 parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
 parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
@@ -28,6 +32,17 @@ parser.add_argument('--png', dest='png', action='store_true', help='whether to v
 parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
 parser.add_argument('--exp', dest='exp', type=int, default=1)
 args = parser.parse_args()
 assert (not args.video is None or not args.img is None)
 if args.skip:
     print("skip flag is abandoned, please refer to issue #207.")

 import os
+from pathlib import Path
 import cv2
 import torch
 import argparse
 warnings.filterwarnings("ignore")
+SCRIPT_DIR = Path(__file__).resolve().parent
+DEFAULT_MODEL_DIR = SCRIPT_DIR / 'train_log'
 parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
 parser.add_argument('--video', dest='video', type=str, default=None)
 parser.add_argument('--output', dest='output', type=str, default=None)
 parser.add_argument('--img', dest='img', type=str, default=None)
 parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
+parser.add_argument('--model', dest='modelDir', type=str, default=str(DEFAULT_MODEL_DIR), help='directory with trained model files')
 parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
 parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
 parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
 parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
 parser.add_argument('--exp', dest='exp', type=int, default=1)
 args = parser.parse_args()
+model_dir = Path(args.modelDir)
+if not model_dir.is_absolute():
+    script_relative = (SCRIPT_DIR / model_dir).resolve()
+    cwd_relative = (Path.cwd() / model_dir).resolve()
+    if script_relative.exists():
+        model_dir = script_relative
+    elif cwd_relative.exists():
+        model_dir = cwd_relative
+    else:
+        model_dir = script_relative
+args.modelDir = str(model_dir)
 assert (not args.video is None or not args.img is None)
 if args.skip:
     print("skip flag is abandoned, please refer to issue #207.")

SPACE_DEPLOYMENT.md CHANGED Viewed

@@ -22,6 +22,7 @@ Default behavior:
 - Hugging Face Space without local assets: download from `bytedance-research/Lance` into `/data/lance_models`
 - Video tasks use the pre-fetched `Lance_3B_Video` assets when available.
 - Startup prefetch downloads the model snapshots on CPU so the first GPU request does not pay that cold-start cost.
 - Image tasks unload the active video model first, then load `Lance_3B`.
 - Switching back to a video task unloads `Lance_3B`, then reloads `Lance_3B_Video`.

 - Hugging Face Space without local assets: download from `bytedance-research/Lance` into `/data/lance_models`
 - Video tasks use the pre-fetched `Lance_3B_Video` assets when available.
 - Startup prefetch downloads the model snapshots on CPU so the first GPU request does not pay that cold-start cost.
+- RIFE interpolation is optional. The app now falls back to the original video if the RIFE script or checkpoint is missing or incompatible. To restore interpolation, keep the RIFE code and `RIFE/train_log/flownet.pkl` from the same release.
 - Image tasks unload the active video model first, then load `Lance_3B`.
 - Switching back to a video task unloads `Lance_3B`, then reloads `Lance_3B_Video`.

app.py CHANGED Viewed

@@ -66,6 +66,10 @@ from modeling.vit.qwen2_5_vl_vit import Qwen2_5_VisionTransformerPretrainedModel
 REPO_ROOT = Path(__file__).resolve().parent
 GRADIO_TMP_ROOT = Path(os.getenv("LANCE_GRADIO_TMP_ROOT", "/tmp/lance_gradio")).expanduser()
 TMP_INPUT_DIR = GRADIO_TMP_ROOT / "inputs"
 RESULTS_ROOT = GRADIO_TMP_ROOT / "results"
@@ -2321,10 +2325,16 @@ def find_generated_image(save_dir: Path) -> Optional[Path]:
 def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tuple[Path, str]:
-    rife_dir = REPO_ROOT / "RIFE"
-    rife_script = rife_dir / "inference_video.py"
     if not rife_script.exists():
-        raise FileNotFoundError(f"RIFE inference script not found: {rife_script}")
     output_path = video_path.with_name(f"{video_path.stem}_rife_{2 ** exp}x{video_path.suffix}")
     env = os.environ.copy()
@@ -2339,7 +2349,7 @@ def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tu
         "--output",
         str(output_path),
         "--model",
-        str(rife_dir / "train_log"),
     ]
     rife_start = time.perf_counter()
     try:
@@ -2352,18 +2362,27 @@ def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tu
             text=True,
         )
     except subprocess.CalledProcessError as exc:
-        raise RuntimeError(
-            "\n".join(
-                [
-                    f"RIFE failed with exit code {exc.returncode}.",
-                    f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
-                    exc.stdout.strip() if exc.stdout else "",
-                    exc.stderr.strip() if exc.stderr else "",
-                ]
-            ).strip()
-        ) from exc
     if not output_path.exists():
-        raise FileNotFoundError(f"RIFE completed but output video was not found: {output_path}")
     elapsed = time.perf_counter() - rife_start
     log = "\n".join(
         [
@@ -2855,7 +2874,7 @@ class LanceT2VV2TPipeline:
                 original_video_path = video_path
                 rife_log = ""
                 rife_error = ""
-                frame_interpolation_enabled = normalize_frame_interpolation(enable_frame_interpolation) and internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
                 if frame_interpolation_enabled and video_path is not None:
                     try:
                         clean_memory()
@@ -3425,7 +3444,7 @@ def update_task_ui(task: str):
     show_output_resolution = is_text_to_visual_task
     show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
     show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
-    show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
     show_video_resolution_settings = internal_task == TASK_T2V
     return (
@@ -3453,7 +3472,7 @@ def update_task_ui(task: str):
         gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
         gr.update(value=height_value),
         gr.update(value=width_value),
-        gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION),
         gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
         gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
         gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
@@ -3520,7 +3539,7 @@ def build_demo() -> gr.Blocks:
                                 label="Frame Interpolation",
                                 show_label=False,
                                 choices=[FRAME_INTERPOLATION_YES, FRAME_INTERPOLATION_NO],
-                                value=DEFAULT_FRAME_INTERPOLATION,
                                 elem_classes=["generation-control", "generation-two-line-label"],
                             )
                     with gr.Row(elem_classes=["generation-controls-row", "aspect-ratio-row"]) as aspect_ratio_row:

 REPO_ROOT = Path(__file__).resolve().parent
+RIFE_DIR = REPO_ROOT / "RIFE"
+RIFE_SCRIPT_PATH = RIFE_DIR / "inference_video.py"
+RIFE_MODEL_DIR = RIFE_DIR / "train_log"
+RIFE_AVAILABLE = RIFE_SCRIPT_PATH.exists()
 GRADIO_TMP_ROOT = Path(os.getenv("LANCE_GRADIO_TMP_ROOT", "/tmp/lance_gradio")).expanduser()
 TMP_INPUT_DIR = GRADIO_TMP_ROOT / "inputs"
 RESULTS_ROOT = GRADIO_TMP_ROOT / "results"
 def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tuple[Path, str]:
+    rife_script = RIFE_SCRIPT_PATH
     if not rife_script.exists():
+        log = "\n".join(
+            [
+                "[rife] Frame interpolation skipped because the RIFE inference script is not available.",
+                f"expected_script={rife_script}",
+                f"input={video_path}",
+            ]
+        ).strip()
+        return video_path, log
     output_path = video_path.with_name(f"{video_path.stem}_rife_{2 ** exp}x{video_path.suffix}")
     env = os.environ.copy()
         "--output",
         str(output_path),
         "--model",
+        str(RIFE_MODEL_DIR),
     ]
     rife_start = time.perf_counter()
     try:
             text=True,
         )
     except subprocess.CalledProcessError as exc:
+        log = "\n".join(
+            [
+                "[rife] Frame interpolation failed; returning the original generated video.",
+                f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
+                f"returncode={exc.returncode}",
+                exc.stdout.strip() if exc.stdout else "",
+                exc.stderr.strip() if exc.stderr else "",
+            ]
+        ).strip()
+        return video_path, log
     if not output_path.exists():
+        log = "\n".join(
+            [
+                "[rife] Frame interpolation finished without producing an output file; returning the original generated video.",
+                f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
+                f"expected_output={output_path}",
+                completed.stdout.strip(),
+                completed.stderr.strip(),
+            ]
+        ).strip()
+        return video_path, log
     elapsed = time.perf_counter() - rife_start
     log = "\n".join(
         [
                 original_video_path = video_path
                 rife_log = ""
                 rife_error = ""
+                frame_interpolation_enabled = normalize_frame_interpolation(enable_frame_interpolation) and internal_task in {TASK_T2V, TASK_VIDEO_EDIT} and RIFE_AVAILABLE
                 if frame_interpolation_enabled and video_path is not None:
                     try:
                         clean_memory()
     show_output_resolution = is_text_to_visual_task
     show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
     show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
+    show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT} and RIFE_AVAILABLE
     show_video_resolution_settings = internal_task == TASK_T2V
     return (
         gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
         gr.update(value=height_value),
         gr.update(value=width_value),
+        gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION if RIFE_AVAILABLE else FRAME_INTERPOLATION_NO),
         gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
         gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
         gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
                                 label="Frame Interpolation",
                                 show_label=False,
                                 choices=[FRAME_INTERPOLATION_YES, FRAME_INTERPOLATION_NO],
+                                value=DEFAULT_FRAME_INTERPOLATION if RIFE_AVAILABLE else FRAME_INTERPOLATION_NO,
                                 elem_classes=["generation-control", "generation-two-line-label"],
                             )
                     with gr.Row(elem_classes=["generation-controls-row", "aspect-ratio-row"]) as aspect_ratio_row: