# Last modified: 2025-01-14 # # Copyright 2025 Ziyang Song, USTC. All rights reserved. # # This file has been modified from the original version. # Original copyright (c) 2023 Bingxin Ke, ETH Zurich. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # -------------------------------------------------------------------------- # If you find this code useful, we kindly ask you to cite our paper in your work. # Please find bibtex at: https://github.com/indu1ge/DepthMaster#-citation # More information about the method can be found at https://indu1ge.github.io/DepthMaster_page # -------------------------------------------------------------------------- import pandas as pd import torch from typing import Tuple import torch.nn.functional as F # from src.util.loss import SSIM from skimage.metrics import structural_similarity import numpy as np from skimage import feature from scipy import ndimage # Adapted from: https://github.com/victoresque/pytorch-template/blob/master/utils/util.py class MetricTracker: def __init__(self, *keys, writer=None): self.writer = writer self._data = pd.DataFrame(index=keys, columns=["total", "counts", "average"]) self.reset() def reset(self): for col in self._data.columns: self._data[col].values[:] = 0 def update(self, key, value, n=1): if self.writer is not None: self.writer.add_scalar(key, value) self._data.loc[key, "total"] += value * n self._data.loc[key, "counts"] += n self._data.loc[key, "average"] = self._data.total[key] / self._data.counts[key] def avg(self, key): return self._data.average[key] def result(self): return dict(self._data.average) def abs_relative_difference(output, target, valid_mask=None): actual_output = output actual_target = target abs_relative_diff = torch.abs(actual_output - actual_target) / actual_target if valid_mask is not None: abs_relative_diff[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = output.shape[-1] * output.shape[-2] abs_relative_diff = torch.sum(abs_relative_diff, (-1, -2)) / n return abs_relative_diff.mean() def squared_relative_difference(output, target, valid_mask=None): actual_output = output actual_target = target square_relative_diff = ( torch.pow(torch.abs(actual_output - actual_target), 2) / actual_target ) if valid_mask is not None: square_relative_diff[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = output.shape[-1] * output.shape[-2] square_relative_diff = torch.sum(square_relative_diff, (-1, -2)) / n return square_relative_diff.mean() def rmse_linear(output, target, valid_mask=None): actual_output = output actual_target = target diff = actual_output - actual_target if valid_mask is not None: diff[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = output.shape[-1] * output.shape[-2] diff2 = torch.pow(diff, 2) mse = torch.sum(diff2, (-1, -2)) / n rmse = torch.sqrt(mse) return rmse.mean() def rmse_log(output, target, valid_mask=None): diff = torch.log(output) - torch.log(target) if valid_mask is not None: diff[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = output.shape[-1] * output.shape[-2] diff2 = torch.pow(diff, 2) mse = torch.sum(diff2, (-1, -2)) / n # [B] rmse = torch.sqrt(mse) return rmse.mean() def log10(output, target, valid_mask=None): if valid_mask is not None: diff = torch.abs( torch.log10(output[valid_mask]) - torch.log10(target[valid_mask]) ) else: diff = torch.abs(torch.log10(output) - torch.log10(target)) return diff.mean() # adapt from: https://github.com/imran3180/depth-map-prediction/blob/master/main.py def threshold_percentage(output, target, threshold_val, valid_mask=None): d1 = output / target d2 = target / output max_d1_d2 = torch.max(d1, d2) zero = torch.zeros(*output.shape) one = torch.ones(*output.shape) bit_mat = torch.where(max_d1_d2.cpu() < threshold_val, one, zero) if valid_mask is not None: bit_mat[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = output.shape[-1] * output.shape[-2] count_mat = torch.sum(bit_mat, (-1, -2)) threshold_mat = count_mat / n.cpu() return threshold_mat.mean() def delta1_acc(pred, gt, valid_mask): return threshold_percentage(pred, gt, 1.25, valid_mask) def delta2_acc(pred, gt, valid_mask): return threshold_percentage(pred, gt, 1.25**2, valid_mask) def delta3_acc(pred, gt, valid_mask): return threshold_percentage(pred, gt, 1.25**3, valid_mask) def i_rmse(output, target, valid_mask=None): output_inv = 1.0 / output target_inv = 1.0 / target diff = output_inv - target_inv if valid_mask is not None: diff[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = output.shape[-1] * output.shape[-2] diff2 = torch.pow(diff, 2) mse = torch.sum(diff2, (-1, -2)) / n # [B] rmse = torch.sqrt(mse) return rmse.mean() def silog_rmse(depth_pred, depth_gt, valid_mask=None): diff = torch.log(depth_pred) - torch.log(depth_gt) if valid_mask is not None: diff[~valid_mask] = 0 n = valid_mask.sum((-1, -2)) else: n = depth_gt.shape[-2] * depth_gt.shape[-1] diff2 = torch.pow(diff, 2) first_term = torch.sum(diff2, (-1, -2)) / n second_term = torch.pow(torch.sum(diff, (-1, -2)), 2) / (n**2) loss = torch.sqrt(torch.mean(first_term - second_term)) * 100 return loss def si_boundary_F1( predicted_depth: torch.Tensor, target_depth: torch.Tensor, valid_mask=None, t_min: float = 1.05, t_max: float = 1.25, N: int = 10, ) -> float: predicted_depth = predicted_depth.squeeze() # predicted_depth = (predicted_depth + 1) target_depth = target_depth.squeeze() assert predicted_depth.ndim == target_depth.ndim == 2 thresholds, weights = get_thresholds_and_weights(t_min, t_max, N) # print(target_depth.min()) f1_scores = torch.Tensor( [ boundary_f1(invert_depth(predicted_depth), invert_depth(target_depth), t, valid_mask) # boundary_f1(predicted_depth, target_depth, t) for t in thresholds ] ) return torch.sum(f1_scores * weights) def get_thresholds_and_weights( t_min: float, t_max: float, N: int ) -> Tuple[torch.Tensor, torch.Tensor]: """Generate thresholds and weights for the given range. Args: ---- t_min (float): Minimum threshold. t_max (float): Maximum threshold. N (int): Number of thresholds. Returns: ------- Tuple[np.ndarray, np.ndarray]: Array of thresholds and corresponding weights. """ thresholds = torch.linspace(t_min, t_max, N) weights = thresholds / thresholds.sum() return thresholds, weights def invert_depth(depth: torch.Tensor, eps: float = 1e-6) -> torch.Tensor: """Inverts a depth map with numerical stability. Args: ---- depth (np.ndarray): Depth map to be inverted. eps (float): Minimum value to avoid division by zero (default is 1e-6). Returns: ------- np.ndarray: Inverted depth map. """ inverse_depth = 1.0 / depth.clip(min=eps) return inverse_depth def boundary_f1( pr: torch.Tensor, gt: torch.Tensor, t: float, valid_mask: torch.Tensor, return_p: bool = False, return_r: bool = False, ) -> float: """Calculate Boundary F1 score. Args: ---- pr (np.ndarray): Predicted depth matrix. gt (np.ndarray): Ground truth depth matrix. t (float): Threshold for comparison. return_p (bool, optional): If True, return precision. Defaults to False. return_r (bool, optional): If True, return recall. Defaults to False. Returns: ------- float: Boundary F1 score, or precision, or recall depending on the flags. """ ap, bp, cp, dp = fgbg_depth(pr, t, valid_mask) ag, bg, cg, dg = fgbg_depth(gt, t, valid_mask) r = 0.25 * ( torch.count_nonzero(ap & ag) / max(torch.count_nonzero(ag), 1) + torch.count_nonzero(bp & bg) / max(torch.count_nonzero(bg), 1) + torch.count_nonzero(cp & cg) / max(torch.count_nonzero(cg), 1) + torch.count_nonzero(dp & dg) / max(torch.count_nonzero(dg), 1) ) p = 0.25 * ( torch.count_nonzero(ap & ag) / max(torch.count_nonzero(ap), 1) + torch.count_nonzero(bp & bg) / max(torch.count_nonzero(bp), 1) + torch.count_nonzero(cp & cg) / max(torch.count_nonzero(cp), 1) + torch.count_nonzero(dp & dg) / max(torch.count_nonzero(dp), 1) ) if r + p == 0: return 0.0 if return_p: return p if return_r: return r return 2 * (r * p) / (r + p) def fgbg_depth( d: torch.Tensor, t: float, valid_mask: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """Find foreground-background relations between neighboring pixels. Args: ---- d (np.ndarray): Depth matrix. t (float): Threshold for comparison. Returns: ------- Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Four matrices indicating left, top, right, and bottom foreground-background relations. """ h, w = d.shape invalid_h = torch.zeros(h, 1).bool().to(valid_mask.device) invalid_w = torch.zeros(1, w).bool().to(valid_mask.device) right_is_big_enough = (d[..., :, 1:] / d[..., :, :-1]) > t left_is_big_enough = (d[..., :, :-1] / d[..., :, 1:]) > t bottom_is_big_enough = (d[..., 1:, :] / d[..., :-1, :]) > t top_is_big_enough = (d[..., :-1, :] / d[..., 1:, :]) > t right_is_big_enough = torch.cat([right_is_big_enough, invalid_h], dim=1) & valid_mask left_is_big_enough = torch.cat([invalid_h, left_is_big_enough], dim=1) & valid_mask bottom_is_big_enough = torch.cat([bottom_is_big_enough, invalid_w], dim=0) & valid_mask top_is_big_enough = torch.cat([invalid_w, top_is_big_enough], dim=0) & valid_mask return ( left_is_big_enough, top_is_big_enough, right_is_big_enough, bottom_is_big_enough, ) def gauss(x, sigma): y = torch.exp(-(x**2) / (2 * sigma**2)) / (sigma * torch.sqrt(2 * torch.tensor(torch.pi))) return y def dgauss(x, sigma): y = -x * gauss(x, sigma) / (sigma**2) return y def gaussgradient(im, sigma): epsilon = torch.tensor(1e-2) halfsize = int(torch.ceil(sigma * torch.sqrt(-2 * torch.log(torch.sqrt(2 * torch.tensor(torch.pi)) * sigma * epsilon)))) size = 2 * halfsize + 1 hx = torch.zeros((size, size)) for i in range(0, size): for j in range(0, size): u = [i - halfsize, j - halfsize] hx[i, j] = gauss(u[0], sigma) * dgauss(u[1], sigma) hx = hx / torch.sqrt(torch.sum(torch.abs(hx) * torch.abs(hx))) hx = hx.to(im.device) hy = hx.t().to(im.device) # gx = scipy.ndimage.convolve(im, hx, mode="nearest") # gy = scipy.ndimage.convolve(im, hy, mode="nearest") gx = F.conv2d(im.unsqueeze(0).unsqueeze(0), hx.unsqueeze(0).unsqueeze(0), padding=halfsize) gy = F.conv2d(im.unsqueeze(0).unsqueeze(0), hy.unsqueeze(0).unsqueeze(0), padding=halfsize) return gx.squeeze(0).squeeze(0), gy.squeeze(0).squeeze(0) def gradient_loss(pred, target, valid_mask=None): # min_d = target[valid_mask].min() # max_d = target[valid_mask].max() # pred = (pred - pred.min())/ (pred.max() - pred.min()) # target = (target - min_d)/ (max_d - min_d) _min, _max = torch.quantile( target[valid_mask], torch.tensor([0.02, 0.98]).to(pred.device), ) target_norm = (target - _min) / (_max - _min) target_norm = torch.clip(target_norm, 0, 1) target_norm[~valid_mask] = 0. pred_norm = (pred - pred.min()) / (pred.max() - pred.min()) pred_x, pred_y = gaussgradient(pred_norm, torch.tensor(1.4)) target_x, target_y = gaussgradient(target_norm, torch.tensor(1.4)) pred_amp = torch.sqrt(pred_x**2 + pred_y**2) target_amp = torch.sqrt(target_x**2 + target_y**2) error_map = (pred_amp - target_amp) ** 2 mask = target_amp > 0.05 if valid_mask is not None: loss = torch.mean(error_map[valid_mask & mask]) else: loss = torch.mean(error_map[mask]) return loss def grad_sim(pred: torch.Tensor, target: torch.Tensor, valid_mask=None): _min, _max = torch.quantile( target[valid_mask], torch.tensor([0.02, 0.98]).to(pred.device), ) target_norm = (target - _min) / (_max - _min) target_norm = torch.clip(target_norm, 0, 1) target_norm[~valid_mask] = 0. pred_norm = (pred - pred.min()) / (pred.max() - pred.min()) grad_pred = torch.sqrt(grad(pred_norm)) grad_gt = torch.sqrt(grad(target_norm)) error_map = (grad_pred - grad_gt)**2 mask = grad_gt > 0.05 if valid_mask is not None: valid_mask1 = valid_mask[1:, 1:] valid_mask2 = valid_mask[:-1, :-1] valid_mask = valid_mask1 & valid_mask2 & mask loss = torch.mean(error_map[valid_mask]) else: loss = torch.mean(error_map[mask]) return loss def grad(x): # x.shape : n, c, h, w diff_x = x[1:, 1:] - x[1:, :-1] diff_y = x[1:, 1:] - x[:-1, 1:] mag = diff_x**2 + diff_y**2 # # angle_ratio # angle = torch.atan(diff_y / (diff_x + 1e-10)) # result = torch.cat([mag, angle], dim=1) return mag def psnr(pred: torch.Tensor, target: torch.Tensor, valid_mask=None): _min, _max = torch.quantile( target[valid_mask], torch.tensor([0.02, 0.98]).to(pred.device), ) target_norm = (target - _min) / (_max - _min) target_norm = torch.clip(target_norm, 0, 1) target_norm[~valid_mask] = 0. pred_norm = (pred - pred.min()) / (pred.max() - pred.min()) mse = ((((pred_norm - target_norm)) ** 2)[valid_mask]).mean() return 20 * torch.log10(1.0 / torch.sqrt(mse)) def ssim(pred: torch.Tensor, target: torch.Tensor, valid_mask=None): _min, _max = torch.quantile( target[valid_mask], torch.tensor([0.02, 0.98]).to(pred.device), ) target_norm = (target - _min) / (_max - _min) target_norm = torch.clip(target_norm, 0, 1) target_norm[~valid_mask] = 0. pred_norm = (pred - pred.min()) / (pred.max() - pred.min()) ssim, S = structural_similarity(pred_norm.cpu().numpy(), target_norm.cpu().numpy(), win_size=3, gradient=False, data_range=1.0, multichannel=False, channel_axis=None, gaussian_weights=False, full=True) return S[valid_mask.cpu().numpy()].mean() def compute_depth_boundary_error(edges,pred): # skip dbe for this image if there is no ground truth distinc edge if np.sum(edges) == 0: dbe_acc = 0 dbe_com = 0 else: # normalize est depth map from 0 to 1 pred_normalized = pred.copy().astype('f') pred_normalized[pred_normalized==0]=np.nan pred_normalized = pred_normalized - np.nanmin(pred_normalized) pred_normalized = pred_normalized/np.nanmax(pred_normalized) # apply canny filter edges_est = feature.canny(pred_normalized,sigma=np.sqrt(2),low_threshold=0.1,high_threshold=0.2) #plt.imshow(edges_est) # compute distance transform for chamfer metric D_gt = ndimage.distance_transform_edt(1-edges) D_est = ndimage.distance_transform_edt(1-edges_est) max_dist_thr = 10.; # Threshold for local neighborhood mask_D_gt = D_gt