| # Last modified: 2024-02-08 | |
| # | |
| # Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # -------------------------------------------------------------------------- | |
| # If you find this code useful, we kindly ask you to cite our paper in your work. | |
| # Please find bibtex at: https://github.com/prs-eth/Marigold#-citation | |
| # If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. | |
| # More information about the method can be found at https://marigoldmonodepth.github.io | |
| # -------------------------------------------------------------------------- | |
| from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode | |
| import torch | |
| from torchvision.transforms import InterpolationMode, Resize, CenterCrop | |
| import torchvision.transforms as transforms | |
| class DepthAnythingDataset(BaseDepthDataset): | |
| def __init__( | |
| self, | |
| **kwargs, | |
| ) -> None: | |
| super().__init__( | |
| # ScanNet data parameter | |
| min_depth=-1, | |
| max_depth=256, | |
| has_filled_depth=False, | |
| name_mode=DepthFileNameMode.id, | |
| **kwargs, | |
| ) | |
| def _read_depth_file(self, rel_path): | |
| depth_in = self._read_image(rel_path) | |
| # Decode ScanNet depth | |
| # depth_decoded = depth_in / 1000.0 | |
| return depth_in | |
| def _training_preprocess(self, rasters): | |
| # Augmentation | |
| if self.augm_args is not None: | |
| rasters = self._augment_data(rasters) | |
| # Normalization | |
| rasters["depth_raw_norm"] = rasters["depth_raw_linear"] / 255.0 * 2.0 - 1.0 | |
| rasters["depth_filled_norm"] = rasters["depth_filled_linear"] / 255.0 * 2.0 - 1.0 | |
| # Set invalid pixel to far plane | |
| if self.move_invalid_to_far_plane: | |
| if self.depth_transform.far_plane_at_max: | |
| rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = ( | |
| self.depth_transform.norm_max | |
| ) | |
| else: | |
| rasters["depth_filled_norm"][~rasters["valid_mask_filled"]] = ( | |
| self.depth_transform.norm_min | |
| ) | |
| # Resize | |
| if self.resize_to_hw is not None: | |
| T = transforms.Compose([ | |
| Resize(self.resize_to_hw[0]), | |
| CenterCrop(self.resize_to_hw), | |
| ]) | |
| rasters = {k: T(v) for k, v in rasters.items()} | |
| return rasters | |
| # def _load_depth_data(self, depth_rel_path, filled_rel_path): | |
| # # Read depth data | |
| # outputs = {} | |
| # depth_raw = self._read_depth_file(depth_rel_path).squeeze() | |
| # depth_raw_linear = torch.from_numpy(depth_raw).float().unsqueeze(0) # [1, H, W] [0, 255] | |
| # outputs["depth_raw_linear"] = depth_raw_linear.clone() | |
| # | |
| # if self.has_filled_depth: | |
| # depth_filled = self._read_depth_file(filled_rel_path).squeeze() | |
| # depth_filled_linear = torch.from_numpy(depth_filled).float().unsqueeze(0) | |
| # outputs["depth_filled_linear"] = depth_filled_linear | |
| # else: | |
| # outputs["depth_filled_linear"] = depth_raw_linear.clone() | |
| # | |
| # return outputs |