| import gradio as gr |
|
|
| """ |
| ===================================================== |
| Optical Flow: Predicting movement with the RAFT model |
| ===================================================== |
| |
| Optical flow is the task of predicting movement between two images, usually two |
| consecutive frames of a video. Optical flow models take two images as input, and |
| predict a flow: the flow indicates the displacement of every single pixel in the |
| first image, and maps it to its corresponding pixel in the second image. Flows |
| are (2, H, W)-dimensional tensors, where the first axis corresponds to the |
| predicted horizontal and vertical displacements. |
| |
| The following example illustrates how torchvision can be used to predict flows |
| using our implementation of the RAFT model. We will also see how to convert the |
| predicted flows to RGB images for visualization. |
| """ |
|
|
| import cv2 |
| import numpy as np |
| import os |
| import sys |
| import torch |
| from PIL import Image |
| import matplotlib.pyplot as plt |
| import torchvision.transforms.functional as F |
| from torchvision.io import read_video, read_image, ImageReadMode |
| from torchvision.models.optical_flow import Raft_Large_Weights |
| from torchvision.models.optical_flow import raft_large |
| from torchvision.io import write_jpeg |
| import torchvision.transforms as T |
|
|
| import tempfile |
| from pathlib import Path |
| from urllib.request import urlretrieve |
|
|
| from scipy.interpolate import LinearNDInterpolator |
| from imageio import imread, imwrite |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device) |
| model = model.eval() |
|
|
| def write_flo(flow, filename): |
| """ |
| Write optical flow in Middlebury .flo format |
| |
| :param flow: optical flow map |
| :param filename: optical flow file path to be saved |
| :return: None |
| |
| from https://github.com/liruoteng/OpticalFlowToolkit/ |
| |
| """ |
| |
| flow = flow.cpu().data.numpy() |
| flow = flow.astype(np.float32) |
| f = open(filename, 'wb') |
| magic = np.array([202021.25], dtype=np.float32) |
| (height, width) = flow.shape[0:2] |
| w = np.array([width], dtype=np.int32) |
| h = np.array([height], dtype=np.int32) |
| magic.tofile(f) |
| w.tofile(f) |
| h.tofile(f) |
| flow.tofile(f) |
| f.close() |
|
|
|
|
| |
| def infer(frameA, frameB): |
| |
| |
| |
| |
| |
| |
| |
| |
| input_frame_1 = read_image(str(frameA), ImageReadMode.UNCHANGED) |
| print(f"FRAME 1: {input_frame_1}") |
| input_frame_2 = read_image(str(frameB), ImageReadMode.UNCHANGED) |
| print(f"FRAME 1: {input_frame_2}") |
| |
| |
| |
|
|
| img1_batch = torch.stack([input_frame_1]) |
| img2_batch = torch.stack([input_frame_2]) |
| |
| print(f"FRAME AFTER stack: {img1_batch}") |
| |
| weights = Raft_Large_Weights.DEFAULT |
| transforms = weights.transforms() |
|
|
|
|
| def preprocess(img1_batch, img2_batch): |
| img1_batch = F.resize(img1_batch, size=[520, 960]) |
| img2_batch = F.resize(img2_batch, size=[520, 960]) |
| return transforms(img1_batch, img2_batch) |
|
|
|
|
| img1_batch, img2_batch = preprocess(img1_batch, img2_batch) |
|
|
| print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}") |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
|
|
| list_of_flows = model(img1_batch.to(device), img2_batch.to(device)) |
| print(f"list_of_flows type = {type(list_of_flows)}") |
| print(f"list_of_flows length = {len(list_of_flows)} = number of iterations of the model") |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| predicted_flows = list_of_flows[-1] |
| print(f"predicted_flows dtype = {predicted_flows.dtype}") |
| print(f"predicted_flows shape = {predicted_flows.shape} = (N, 2, H, W)") |
| print(f"predicted_flows min = {predicted_flows.min()}, predicted_flows max = {predicted_flows.max()}") |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from torchvision.utils import flow_to_image |
|
|
| |
|
|
| |
|
|
| predicted_flow = list_of_flows[-1][0] |
| print(f"predicted flow dtype = {predicted_flow.dtype}") |
| print(f"predicted flow shape = {predicted_flow.shape}") |
| |
| flow_img = flow_to_image(predicted_flow).to("cpu") |
| write_jpeg(flow_img, f"predicted_flow.jpg") |
| |
| flo_file = write_flo(predicted_flow, "flofile.flo") |
| |
| return "predicted_flow.jpg", "flofile.flo" |
|
|
| title=""" |
| <div style="text-align: center; max-width: 500px; margin: 0 auto;"> |
| <div |
| style=" |
| display: inline-flex; |
| align-items: center; |
| gap: 0.8rem; |
| font-size: 1.75rem; |
| margin-bottom: 10px; |
| " |
| > |
| <h1 style="font-weight: 600; margin-bottom: 7px;"> |
| RAFT Optical Flow |
| </h1> |
| </div> |
| |
| </div> |
| """ |
| description="<p style='text-align:center'>PyTorch way to Generate optical flow image & .flo file from 2 consecutive frames with RAFT model</p>" |
| css=""" |
| #col-container {max-width: 700px; margin-left: auto; margin-right: auto;} |
| a {text-decoration-line: underline; font-weight: 600;} |
| """ |
| with gr.Blocks(css=css) as block: |
| with gr.Column(elem_id="col-container"): |
| gr.HTML(title) |
| gr.HTML(description) |
|
|
| frame1_inp = gr.Image(source="upload", type="filepath", label="frame 1") |
| frame2_inp = gr.Image(source="upload", type="filepath", label="frame 2") |
| |
| submit_btn = gr.Button("Submit") |
| |
| flow_img_out = gr.Image(label="flow image") |
| flow_file_out = gr.File(label="flow file") |
| |
| |
| examples=[ |
| ['basket1.jpg','basket2.jpg'], |
| ['frame1.jpg', 'frame2.jpg'] |
| ] |
| ex = gr.Examples(examples=examples, fn=infer, inputs=[frame1_inp, frame2_inp], outputs=[flow_img_out, flow_file_out], cache_examples=True, run_on_click=True) |
| |
| |
| |
|
|
| submit_btn.click(fn=infer, inputs=[frame1_inp, frame2_inp], outputs=[flow_img_out, flow_file_out]) |
| |
|
|
| block.launch() |