Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| # Last modified: 2025-01-14 | |
| # | |
| # Copyright 2025 Ziyang Song, USTC. All rights reserved. | |
| # | |
| # This file has been modified from the original version. | |
| # Original copyright (c) 2023 Bingxin Ke, ETH Zurich. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # -------------------------------------------------------------------------- | |
| # If you find this code useful, we kindly ask you to cite our paper in your work. | |
| # Please find bibtex at: https://github.com/indu1ge/DepthMaster#-citation | |
| # More information about the method can be found at https://indu1ge.github.io/DepthMaster_page | |
| # -------------------------------------------------------------------------- | |
| import torch | |
| from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode | |
| class KITTIDataset(BaseDepthDataset): | |
| def __init__( | |
| self, | |
| kitti_bm_crop, # Crop to KITTI benchmark size | |
| valid_mask_crop, # Evaluation mask. [None, garg or eigen] | |
| **kwargs, | |
| ) -> None: | |
| super().__init__( | |
| # KITTI data parameter | |
| min_depth=1e-5, | |
| max_depth=80, | |
| has_filled_depth=False, | |
| has_egde_mask=False, | |
| name_mode=DepthFileNameMode.id, | |
| **kwargs, | |
| ) | |
| self.kitti_bm_crop = kitti_bm_crop | |
| self.valid_mask_crop = valid_mask_crop | |
| assert self.valid_mask_crop in [ | |
| None, | |
| "garg", # set evaluation mask according to Garg ECCV16 | |
| "eigen", # set evaluation mask according to Eigen NIPS14 | |
| ], f"Unknown crop type: {self.valid_mask_crop}" | |
| # Filter out empty depth | |
| self.filenames = [f for f in self.filenames if "None" != f[1]] | |
| def _read_depth_file(self, rel_path): | |
| depth_in = self._read_image(rel_path) | |
| # Decode KITTI depth | |
| depth_decoded = depth_in / 256.0 | |
| return depth_decoded | |
| def _load_rgb_data(self, rgb_rel_path): | |
| rgb_data = super()._load_rgb_data(rgb_rel_path) | |
| if self.kitti_bm_crop: | |
| rgb_data = {k: self.kitti_benchmark_crop(v) for k, v in rgb_data.items()} | |
| return rgb_data | |
| def _load_depth_data(self, depth_rel_path, filled_rel_path): | |
| depth_data = super()._load_depth_data(depth_rel_path, filled_rel_path) | |
| if self.kitti_bm_crop: | |
| depth_data = { | |
| k: self.kitti_benchmark_crop(v) for k, v in depth_data.items() | |
| } | |
| return depth_data | |
| def kitti_benchmark_crop(input_img): | |
| """ | |
| Crop images to KITTI benchmark size | |
| Args: | |
| `input_img` (torch.Tensor): Input image to be cropped. | |
| Returns: | |
| torch.Tensor:Cropped image. | |
| """ | |
| KB_CROP_HEIGHT = 352 | |
| KB_CROP_WIDTH = 1216 | |
| height, width = input_img.shape[-2:] | |
| top_margin = int(height - KB_CROP_HEIGHT) | |
| left_margin = int((width - KB_CROP_WIDTH) / 2) | |
| if 2 == len(input_img.shape): | |
| out = input_img[ | |
| top_margin : top_margin + KB_CROP_HEIGHT, | |
| left_margin : left_margin + KB_CROP_WIDTH, | |
| ] | |
| elif 3 == len(input_img.shape): | |
| out = input_img[ | |
| :, | |
| top_margin : top_margin + KB_CROP_HEIGHT, | |
| left_margin : left_margin + KB_CROP_WIDTH, | |
| ] | |
| return out | |
| def _get_valid_mask(self, depth: torch.Tensor): | |
| # reference: https://github.com/cleinc/bts/blob/master/pytorch/bts_eval.py | |
| valid_mask = super()._get_valid_mask(depth) # [1, H, W] | |
| if self.valid_mask_crop is not None: | |
| eval_mask = torch.zeros_like(valid_mask.squeeze()).bool() | |
| gt_height, gt_width = eval_mask.shape | |
| if "garg" == self.valid_mask_crop: | |
| eval_mask[ | |
| int(0.40810811 * gt_height) : int(0.99189189 * gt_height), | |
| int(0.03594771 * gt_width) : int(0.96405229 * gt_width), | |
| ] = 1 | |
| elif "eigen" == self.valid_mask_crop: | |
| eval_mask[ | |
| int(0.3324324 * gt_height) : int(0.91351351 * gt_height), | |
| int(0.0359477 * gt_width) : int(0.96405229 * gt_width), | |
| ] = 1 | |
| eval_mask.reshape(valid_mask.shape) | |
| valid_mask = torch.logical_and(valid_mask, eval_mask) | |
| return valid_mask | |