| """ |
| MIT License |
| |
| Copyright (c) 2021 Wilson Yan |
| |
| Permission is hereby granted, free of charge, to any person obtaining a copy |
| of this software and associated documentation files (the "Software"), to deal |
| in the Software without restriction, including without limitation the rights |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| copies of the Software, and to permit persons to whom the Software is |
| furnished to do so, subject to the following conditions: |
| |
| The above copyright notice and this permission notice shall be included in all |
| copies or substantial portions of the Software. |
| |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| SOFTWARE. |
| |
| |
| This file is copied from https://github.com/wilson1yan/VideoGPT/blob/master/videogpt/utils.py |
| We adapted it to Hugging Face AutoModel for easier model loading. |
| """ |
|
|
|
|
| |
| |
| def shift_dim(x, src_dim=-1, dest_dim=-1, make_contiguous=True): |
| n_dims = len(x.shape) |
| if src_dim < 0: |
| src_dim = n_dims + src_dim |
| if dest_dim < 0: |
| dest_dim = n_dims + dest_dim |
|
|
| assert 0 <= src_dim < n_dims and 0 <= dest_dim < n_dims |
|
|
| dims = list(range(n_dims)) |
| del dims[src_dim] |
|
|
| permutation = [] |
| ctr = 0 |
| for i in range(n_dims): |
| if i == dest_dim: |
| permutation.append(src_dim) |
| else: |
| permutation.append(dims[ctr]) |
| ctr += 1 |
| x = x.permute(permutation) |
| if make_contiguous: |
| x = x.contiguous() |
| return x |
|
|
| |
| |
| |
| |
| |
| def view_range(x, i, j, shape): |
| shape = tuple(shape) |
|
|
| n_dims = len(x.shape) |
| if i < 0: |
| i = n_dims + i |
|
|
| if j is None: |
| j = n_dims |
| elif j < 0: |
| j = n_dims + j |
|
|
| assert 0 <= i < j <= n_dims |
|
|
| x_shape = x.shape |
| target_shape = x_shape[:i] + shape + x_shape[j:] |
| return x.view(target_shape) |
|
|
| |
| def tensor_slice(x, begin, size): |
| assert all([b >= 0 for b in begin]) |
| size = [l - b if s == -1 else s |
| for s, b, l in zip(size, begin, x.shape)] |
| assert all([s >= 0 for s in size]) |
|
|
| slices = [slice(b, b + s) for b, s in zip(begin, size)] |
| return x[slices] |
|
|
|
|
| import math |
| import numpy as np |
| import skvideo.io |
| def save_video_grid(video, fname, nrow=None): |
| b, c, t, h, w = video.shape |
| video = video.permute(0, 2, 3, 4, 1) |
| video = (video.cpu().numpy() * 255).astype('uint8') |
|
|
| if nrow is None: |
| nrow = math.ceil(math.sqrt(b)) |
| ncol = math.ceil(b / nrow) |
| padding = 1 |
| video_grid = np.zeros((t, (padding + h) * nrow + padding, |
| (padding + w) * ncol + padding, c), dtype='uint8') |
| for i in range(b): |
| r = i // ncol |
| c = i % ncol |
|
|
| start_r = (padding + h) * r |
| start_c = (padding + w) * c |
| video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i] |
|
|
| skvideo.io.vwrite(fname, video_grid, inputdict={'-r': '5'}) |
| print('saved videos to', fname) |
|
|
|
|