| from PIL import Image |
| import torch |
| import torchvision.transforms as TF |
|
|
|
|
| def preprocess_images(images_in): |
| """ |
| A quick start function to load and preprocess images for model input. |
| This assumes the images should have the same shape for easier batching, but our model can also work well with different shapes. |
| |
| Args: |
| image_path_list (list): List of paths to image files |
| |
| Returns: |
| torch.Tensor: Batched tensor of preprocessed images with shape (N, 3, H, W) |
| |
| Raises: |
| ValueError: If the input list is empty |
| |
| Notes: |
| - Images with different dimensions will be padded with white (value=1.0) |
| - A warning is printed when images have different shapes |
| - The function ensures width=518px while maintaining aspect ratio |
| - Height is adjusted to be divisible by 14 for compatibility with model requirements |
| """ |
| |
| if len(images_in) == 0: |
| raise ValueError("At least 1 image is required") |
|
|
| images = [] |
| shapes = set() |
| to_tensor = TF.ToTensor() |
|
|
| |
| for img in images_in: |
| img = Image.fromarray(img) |
|
|
| |
| if img.mode == "RGBA": |
| |
| background = Image.new("RGBA", img.size, (255, 255, 255, 255)) |
| |
| img = Image.alpha_composite(background, img) |
|
|
| |
| img = img.convert("RGB") |
|
|
| width, height = img.size |
| new_width = 518 |
|
|
| |
| new_height = round(height * (new_width / width) / 14) * 14 |
|
|
| |
| img = img.resize((new_width, new_height), Image.Resampling.BICUBIC) |
| img = to_tensor(img) |
|
|
| |
| if new_height > 518: |
| raise NotImplementedError("Don't support portrait mode for now") |
|
|
| shapes.add((img.shape[1], img.shape[2])) |
| images.append(img) |
|
|
| |
| |
|
|
| images = torch.stack(images) |
|
|
| |
| if len(images) == 1: |
| |
| if images.dim() == 3: |
| images = images.unsqueeze(0) |
|
|
| return images |
|
|