ffy2000 commited on
Commit
b3b72df
·
1 Parent(s): 14972c1

Vendor RIFE into repo

Browse files
Files changed (4) hide show
  1. README.md +2 -0
  2. RIFE/inference_video.py +16 -1
  3. SPACE_DEPLOYMENT.md +1 -0
  4. app.py +38 -19
README.md CHANGED
@@ -250,6 +250,8 @@ pip install --no-cache-dir --no-deps --force-reinstall \
250
 
251
  At runtime, `app.py` still keeps a startup fallback for flash-attn and model prefetch, but the Space should already have the right packages installed before the UI appears.
252
 
 
 
253
  ### Download Model Weights
254
 
255
  Please download all necessary model checkpoints from [Lance-3B on Hugging Face](https://huggingface.co/bytedance-research/Lance) and place them in the `downloads/` directory.
 
250
 
251
  At runtime, `app.py` still keeps a startup fallback for flash-attn and model prefetch, but the Space should already have the right packages installed before the UI appears.
252
 
253
+ RIFE frame interpolation is treated as an optional post-processing step. If the RIFE code and `RIFE/train_log/flownet.pkl` are not a matching pair, the app will keep the generated video and skip interpolation instead of failing the whole request.
254
+
255
  ### Download Model Weights
256
 
257
  Please download all necessary model checkpoints from [Lance-3B on Hugging Face](https://huggingface.co/bytedance-research/Lance) and place them in the `downloads/` directory.
RIFE/inference_video.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import cv2
3
  import torch
4
  import argparse
@@ -13,12 +14,15 @@ from model.pytorch_msssim import ssim_matlab
13
 
14
  warnings.filterwarnings("ignore")
15
 
 
 
 
16
  parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
17
  parser.add_argument('--video', dest='video', type=str, default=None)
18
  parser.add_argument('--output', dest='output', type=str, default=None)
19
  parser.add_argument('--img', dest='img', type=str, default=None)
20
  parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
21
- parser.add_argument('--model', dest='modelDir', type=str, default='train_log', help='directory with trained model files')
22
  parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
23
  parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
24
  parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
@@ -28,6 +32,17 @@ parser.add_argument('--png', dest='png', action='store_true', help='whether to v
28
  parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
29
  parser.add_argument('--exp', dest='exp', type=int, default=1)
30
  args = parser.parse_args()
 
 
 
 
 
 
 
 
 
 
 
31
  assert (not args.video is None or not args.img is None)
32
  if args.skip:
33
  print("skip flag is abandoned, please refer to issue #207.")
 
1
  import os
2
+ from pathlib import Path
3
  import cv2
4
  import torch
5
  import argparse
 
14
 
15
  warnings.filterwarnings("ignore")
16
 
17
+ SCRIPT_DIR = Path(__file__).resolve().parent
18
+ DEFAULT_MODEL_DIR = SCRIPT_DIR / 'train_log'
19
+
20
  parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
21
  parser.add_argument('--video', dest='video', type=str, default=None)
22
  parser.add_argument('--output', dest='output', type=str, default=None)
23
  parser.add_argument('--img', dest='img', type=str, default=None)
24
  parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
25
+ parser.add_argument('--model', dest='modelDir', type=str, default=str(DEFAULT_MODEL_DIR), help='directory with trained model files')
26
  parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
27
  parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
28
  parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
 
32
  parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
33
  parser.add_argument('--exp', dest='exp', type=int, default=1)
34
  args = parser.parse_args()
35
+ model_dir = Path(args.modelDir)
36
+ if not model_dir.is_absolute():
37
+ script_relative = (SCRIPT_DIR / model_dir).resolve()
38
+ cwd_relative = (Path.cwd() / model_dir).resolve()
39
+ if script_relative.exists():
40
+ model_dir = script_relative
41
+ elif cwd_relative.exists():
42
+ model_dir = cwd_relative
43
+ else:
44
+ model_dir = script_relative
45
+ args.modelDir = str(model_dir)
46
  assert (not args.video is None or not args.img is None)
47
  if args.skip:
48
  print("skip flag is abandoned, please refer to issue #207.")
SPACE_DEPLOYMENT.md CHANGED
@@ -22,6 +22,7 @@ Default behavior:
22
  - Hugging Face Space without local assets: download from `bytedance-research/Lance` into `/data/lance_models`
23
  - Video tasks use the pre-fetched `Lance_3B_Video` assets when available.
24
  - Startup prefetch downloads the model snapshots on CPU so the first GPU request does not pay that cold-start cost.
 
25
  - Image tasks unload the active video model first, then load `Lance_3B`.
26
  - Switching back to a video task unloads `Lance_3B`, then reloads `Lance_3B_Video`.
27
 
 
22
  - Hugging Face Space without local assets: download from `bytedance-research/Lance` into `/data/lance_models`
23
  - Video tasks use the pre-fetched `Lance_3B_Video` assets when available.
24
  - Startup prefetch downloads the model snapshots on CPU so the first GPU request does not pay that cold-start cost.
25
+ - RIFE interpolation is optional. The app now falls back to the original video if the RIFE script or checkpoint is missing or incompatible. To restore interpolation, keep the RIFE code and `RIFE/train_log/flownet.pkl` from the same release.
26
  - Image tasks unload the active video model first, then load `Lance_3B`.
27
  - Switching back to a video task unloads `Lance_3B`, then reloads `Lance_3B_Video`.
28
 
app.py CHANGED
@@ -66,6 +66,10 @@ from modeling.vit.qwen2_5_vl_vit import Qwen2_5_VisionTransformerPretrainedModel
66
 
67
 
68
  REPO_ROOT = Path(__file__).resolve().parent
 
 
 
 
69
  GRADIO_TMP_ROOT = Path(os.getenv("LANCE_GRADIO_TMP_ROOT", "/tmp/lance_gradio")).expanduser()
70
  TMP_INPUT_DIR = GRADIO_TMP_ROOT / "inputs"
71
  RESULTS_ROOT = GRADIO_TMP_ROOT / "results"
@@ -2321,10 +2325,16 @@ def find_generated_image(save_dir: Path) -> Optional[Path]:
2321
 
2322
 
2323
  def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tuple[Path, str]:
2324
- rife_dir = REPO_ROOT / "RIFE"
2325
- rife_script = rife_dir / "inference_video.py"
2326
  if not rife_script.exists():
2327
- raise FileNotFoundError(f"RIFE inference script not found: {rife_script}")
 
 
 
 
 
 
 
2328
 
2329
  output_path = video_path.with_name(f"{video_path.stem}_rife_{2 ** exp}x{video_path.suffix}")
2330
  env = os.environ.copy()
@@ -2339,7 +2349,7 @@ def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tu
2339
  "--output",
2340
  str(output_path),
2341
  "--model",
2342
- str(rife_dir / "train_log"),
2343
  ]
2344
  rife_start = time.perf_counter()
2345
  try:
@@ -2352,18 +2362,27 @@ def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tu
2352
  text=True,
2353
  )
2354
  except subprocess.CalledProcessError as exc:
2355
- raise RuntimeError(
2356
- "\n".join(
2357
- [
2358
- f"RIFE failed with exit code {exc.returncode}.",
2359
- f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
2360
- exc.stdout.strip() if exc.stdout else "",
2361
- exc.stderr.strip() if exc.stderr else "",
2362
- ]
2363
- ).strip()
2364
- ) from exc
2365
  if not output_path.exists():
2366
- raise FileNotFoundError(f"RIFE completed but output video was not found: {output_path}")
 
 
 
 
 
 
 
 
 
2367
  elapsed = time.perf_counter() - rife_start
2368
  log = "\n".join(
2369
  [
@@ -2855,7 +2874,7 @@ class LanceT2VV2TPipeline:
2855
  original_video_path = video_path
2856
  rife_log = ""
2857
  rife_error = ""
2858
- frame_interpolation_enabled = normalize_frame_interpolation(enable_frame_interpolation) and internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
2859
  if frame_interpolation_enabled and video_path is not None:
2860
  try:
2861
  clean_memory()
@@ -3425,7 +3444,7 @@ def update_task_ui(task: str):
3425
  show_output_resolution = is_text_to_visual_task
3426
  show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
3427
  show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
3428
- show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT}
3429
  show_video_resolution_settings = internal_task == TASK_T2V
3430
 
3431
  return (
@@ -3453,7 +3472,7 @@ def update_task_ui(task: str):
3453
  gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
3454
  gr.update(value=height_value),
3455
  gr.update(value=width_value),
3456
- gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION),
3457
  gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
3458
  gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
3459
  gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
@@ -3520,7 +3539,7 @@ def build_demo() -> gr.Blocks:
3520
  label="Frame Interpolation",
3521
  show_label=False,
3522
  choices=[FRAME_INTERPOLATION_YES, FRAME_INTERPOLATION_NO],
3523
- value=DEFAULT_FRAME_INTERPOLATION,
3524
  elem_classes=["generation-control", "generation-two-line-label"],
3525
  )
3526
  with gr.Row(elem_classes=["generation-controls-row", "aspect-ratio-row"]) as aspect_ratio_row:
 
66
 
67
 
68
  REPO_ROOT = Path(__file__).resolve().parent
69
+ RIFE_DIR = REPO_ROOT / "RIFE"
70
+ RIFE_SCRIPT_PATH = RIFE_DIR / "inference_video.py"
71
+ RIFE_MODEL_DIR = RIFE_DIR / "train_log"
72
+ RIFE_AVAILABLE = RIFE_SCRIPT_PATH.exists()
73
  GRADIO_TMP_ROOT = Path(os.getenv("LANCE_GRADIO_TMP_ROOT", "/tmp/lance_gradio")).expanduser()
74
  TMP_INPUT_DIR = GRADIO_TMP_ROOT / "inputs"
75
  RESULTS_ROOT = GRADIO_TMP_ROOT / "results"
 
2325
 
2326
 
2327
  def run_rife_interpolation(video_path: Path, device_id: int, exp: int = 1) -> tuple[Path, str]:
2328
+ rife_script = RIFE_SCRIPT_PATH
 
2329
  if not rife_script.exists():
2330
+ log = "\n".join(
2331
+ [
2332
+ "[rife] Frame interpolation skipped because the RIFE inference script is not available.",
2333
+ f"expected_script={rife_script}",
2334
+ f"input={video_path}",
2335
+ ]
2336
+ ).strip()
2337
+ return video_path, log
2338
 
2339
  output_path = video_path.with_name(f"{video_path.stem}_rife_{2 ** exp}x{video_path.suffix}")
2340
  env = os.environ.copy()
 
2349
  "--output",
2350
  str(output_path),
2351
  "--model",
2352
+ str(RIFE_MODEL_DIR),
2353
  ]
2354
  rife_start = time.perf_counter()
2355
  try:
 
2362
  text=True,
2363
  )
2364
  except subprocess.CalledProcessError as exc:
2365
+ log = "\n".join(
2366
+ [
2367
+ "[rife] Frame interpolation failed; returning the original generated video.",
2368
+ f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
2369
+ f"returncode={exc.returncode}",
2370
+ exc.stdout.strip() if exc.stdout else "",
2371
+ exc.stderr.strip() if exc.stderr else "",
2372
+ ]
2373
+ ).strip()
2374
+ return video_path, log
2375
  if not output_path.exists():
2376
+ log = "\n".join(
2377
+ [
2378
+ "[rife] Frame interpolation finished without producing an output file; returning the original generated video.",
2379
+ f"command=CUDA_VISIBLE_DEVICES={device_id} " + " ".join(command),
2380
+ f"expected_output={output_path}",
2381
+ completed.stdout.strip(),
2382
+ completed.stderr.strip(),
2383
+ ]
2384
+ ).strip()
2385
+ return video_path, log
2386
  elapsed = time.perf_counter() - rife_start
2387
  log = "\n".join(
2388
  [
 
2874
  original_video_path = video_path
2875
  rife_log = ""
2876
  rife_error = ""
2877
+ frame_interpolation_enabled = normalize_frame_interpolation(enable_frame_interpolation) and internal_task in {TASK_T2V, TASK_VIDEO_EDIT} and RIFE_AVAILABLE
2878
  if frame_interpolation_enabled and video_path is not None:
2879
  try:
2880
  clean_memory()
 
3444
  show_output_resolution = is_text_to_visual_task
3445
  show_input_video = internal_task in {TASK_VIDEO_EDIT, TASK_X2T_VIDEO}
3446
  show_input_image = internal_task in {TASK_IMAGE_EDIT, TASK_X2T_IMAGE}
3447
+ show_frame_interpolation_settings = internal_task in {TASK_T2V, TASK_VIDEO_EDIT} and RIFE_AVAILABLE
3448
  show_video_resolution_settings = internal_task == TASK_T2V
3449
 
3450
  return (
 
3472
  gr.update(choices=get_aspect_ratio_choices_for_task(internal_task), value=aspect_ratio_value, visible=show_aspect_ratio),
3473
  gr.update(value=height_value),
3474
  gr.update(value=width_value),
3475
+ gr.update(visible=show_frame_interpolation_settings, value=DEFAULT_FRAME_INTERPOLATION if RIFE_AVAILABLE else FRAME_INTERPOLATION_NO),
3476
  gr.update(choices=get_output_resolution_choices_for_task(internal_task, resolution_value), value=size_markdown, visible=show_output_resolution),
3477
  gr.update(visible=internal_task == TASK_T2V, value=DEFAULT_VIDEO_DURATION_SECONDS),
3478
  gr.update(choices=resolution_choices, value=resolution_value, visible=show_video_resolution_settings),
 
3539
  label="Frame Interpolation",
3540
  show_label=False,
3541
  choices=[FRAME_INTERPOLATION_YES, FRAME_INTERPOLATION_NO],
3542
+ value=DEFAULT_FRAME_INTERPOLATION if RIFE_AVAILABLE else FRAME_INTERPOLATION_NO,
3543
  elem_classes=["generation-control", "generation-two-line-label"],
3544
  )
3545
  with gr.Row(elem_classes=["generation-controls-row", "aspect-ratio-row"]) as aspect_ratio_row: