File size: 11,858 Bytes

7f921f4

# Copyright (C) 2019 Jin Han Lee
#
# This file is a part of BTS.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>

from __future__ import absolute_import, division, print_function
from utils.metric import compute_normal_metrics
import os
os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1'
import argparse
import fnmatch
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

def hwc2chw(array):
    return array.transpose(2, 0, 1)
def chw2hwc(array):
    return array.transpose(1, 2, 0)

def convert_arg_line_to_args(arg_line):
    for arg in arg_line.split():
        if not arg.strip():
            continue
        yield arg



def resize_tensor(input_tensor, target_height, target_width):
    """
    使用双线性插值调整深度图像大小
    """
    # 多通道resize
    input_tensor = torch.from_numpy(hwc2chw(input_tensor)).unsqueeze(0).float()
    # 使用双线性插值调整大小
    resized_tensor = F.interpolate(
        input_tensor, 
        size=(target_height, target_width), 
        mode='bilinear', 
        align_corners=True
    )
    # 转换回numpy
    resized = resized_tensor.squeeze().numpy()
    resized = chw2hwc(resized)
    # input_tensor = np.ascontiguousarray(input_tensor)
    # resized = cv2.resize(input_tensor, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
    return resized

def load_image_rgb_or_grayscale(image_path):
    """
    加载图像，支持RGB和灰度图像，统一转换为numpy数组
    """
    if image_path.endswith('.npy'):
        # 如果是npy文件，直接读取
        img_array = np.load(image_path)
    else:
    # 首先尝试用PIL加载，可以更好地处理不同格式
        img = Image.open(image_path)
        img_array = np.array(img)
        
        # 如果是RGBA图像，取前3通道，否则RGB或者Gray则不处理
        if len(img_array.shape) == 3 and img_array.shape[2] == 4:  # RGBA
            img_array = img_array[:, :, :3]  # 去掉Alpha通道
        # 对3通道取均值返回
    return img_array


def test(args):
    global gt_depths, missing_ids, pred_filenames,gt_depths_mask
    gt_depths = []
    gt_depths_mask = []
    missing_ids = set()
    pred_filenames = []
    if getattr(args, 'txt_file_list', None) is not None:
        with open(args.txt_file_list, 'r') as f:
            lines = f.readlines()
        for i,line in enumerate(lines):
            line = line.strip().split()[0]
            if line == '':
                continue
            pred_filenames.append(line.replace(".png",".npy"))
    else:
        for root, dirnames, filenames in os.walk(args.pred_path):
            for pred_filename in fnmatch.filter(filenames, '*.png') + fnmatch.filter(filenames, '*.jpg') + fnmatch.filter(filenames, '*.npy'):
                if 'cmap' in pred_filename or 'gt' in pred_filename:
                    continue
                dirname = root.replace(args.pred_path, '')
                if dirname.startswith('/'):
                    dirname = dirname[1:]
                pred_filenames.append(os.path.join(dirname, pred_filename))

    num_test_samples = len(pred_filenames)
    print(f'Found {num_test_samples} prediction files.')
    pred_depths = []

    for i in tqdm(range(num_test_samples)):
        pred_depth_path = os.path.join(args.pred_path,pred_filenames[i])
        pred_depth = load_image_rgb_or_grayscale(pred_depth_path)

        
        if pred_depth is None:
            print('Missing: %s ' % pred_depth_path)
            missing_ids.add(i)
            continue

        # 预测图像是0-255的relative depth，先转换为float
        pred_depth = pred_depth.astype(np.float32)
        
        pred_depths.append(pred_depth)


    # 加载GT深度图
    if args.dataset == 'nyu' or args.dataset == 'scannet' or args.dataset == 'ibims' or args.dataset == 'oasis':
        for t_id in range(num_test_samples):
            if t_id in missing_ids:
                continue
                
            # 构建GT路径，保持与pred相同的目录结构
            pred_relative_path = pred_filenames[t_id]
            gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
            gt_depth_path = gt_depth_path.replace("_img.npy","_normal.npy")
            depth = load_image_rgb_or_grayscale(gt_depth_path)
            if depth is None:
                print('Missing: %s ' % gt_depth_path)
                missing_ids.add(t_id)
                continue
            gt_depths.append(depth)
    elif args.dataset == 'diode':
        for t_id in range(num_test_samples):
            if t_id in missing_ids:
                continue
                
            # 构建GT路径，保持与pred相同的目录结构
            pred_relative_path = pred_filenames[t_id]
            gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
            gt_depth_path = gt_depth_path.replace(".npy","_normal.npy")
            gt_depth_mask_path = gt_depth_path.replace("_depth.npy","_depth_mask.npy")
            depth = load_image_rgb_or_grayscale(gt_depth_path)
            depth_mask = load_image_rgb_or_grayscale(gt_depth_mask_path)
            if depth is None:
                print('Missing: %s ' % gt_depth_path)
                missing_ids.add(t_id)
                continue
            gt_depths.append(depth)
            gt_depths_mask.append(depth_mask)
    else:
        raise ValueError(f"Unsupported dataset: {args.dataset}")
    print(f'### Computing errors for {len(gt_depths)} files with {len(missing_ids)} missing' if not gt_depths_mask else 'Computing errors with masks')

    results = eval(pred_depths,args)

    print('Done.')
    return results

def eval(pred_depths,args):
    num_samples = len(pred_depths)
    pred_depths_valid = []
    gt_depths_valid = []

    # 收集有效的预测和GT深度
    gt_idx = 0
    for t_id in range(num_samples):
        if t_id in missing_ids:
            continue

        pred_depths_valid.append(pred_depths[t_id])
        gt_depths_valid.append(gt_depths[gt_idx])
        gt_idx += 1

    num_samples = len(pred_depths_valid)

    mean_angular_error = np.zeros(num_samples, dtype=np.float32)
    median_angular_error = np.zeros(num_samples, dtype=np.float32)
    rmse_angular_error = np.zeros(num_samples, dtype=np.float32)
    sub5_error = np.zeros(num_samples, dtype=np.float32)
    sub7_5_error = np.zeros(num_samples, dtype=np.float32)
    sub11_25_error = np.zeros(num_samples, dtype=np.float32)
    sub22_5_error = np.zeros(num_samples, dtype=np.float32)
    sub30_error = np.zeros(num_samples, dtype=np.float32)
    
    for i in range(num_samples):
        gt_depth = gt_depths_valid[i]
        gt_depth[:,:,0] *= -1
        gt_depth[np.isinf(gt_depth)] = 0
        gt_depth[np.isnan(gt_depth)] = 0
        pred_depth = pred_depths_valid[i]
        pred_depth[np.isinf(pred_depth)] = 0
        pred_depth[np.isnan(pred_depth)] = 0

        # 1. 首先调整预测深度的大小以匹配GT
        if pred_depth.shape != gt_depth.shape:
            pred_depth = resize_tensor(pred_depth, gt_depth.shape[0], gt_depth.shape[1])
        # if i < 5:
        #     H, W, _ = gt_depth.shape
        #     # num_points = 200
        #     # ys = np.random.randint(0, H, size=num_points)
        #     # xs = np.random.randint(0, W, size=num_points)
        #     # make grid to sample
        #     sep = 20
        #     grid_y, grid_x = np.mgrid[0:H:sep, 0:W:sep]
        #     ys, xs = grid_y.ravel(), grid_x.ravel()

        #     # 取出法向量 (x,y,z)
        #     gt_normals = gt_depth[ys, xs, :]
        #     pred_normals = pred_depth[ys, xs, :]

        #     # 归一化
        #     gt_normals = gt_normals / (np.linalg.norm(gt_normals, axis=1, keepdims=True) + 1e-8)
        #     pred_normals = pred_normals / (np.linalg.norm(pred_normals, axis=1, keepdims=True) + 1e-8)

        #     plt.figure(figsize=(18, 6))

        #     # -------- 左：GT 法线 --------
        #     plt.subplot(1, 3, 1)
        #     plt.imshow((gt_depth * 127.5 + 127.5).astype(np.uint8))  # normal map可视化到[0,255]
        #     plt.quiver(xs, ys, gt_normals[:, 0], -gt_normals[:, 1], color='r', scale=20, width=0.005)
        #     plt.title(f'GT Normals {i}')
        #     plt.axis('off')

        #     # -------- 中：Pred 法线 --------
        #     plt.subplot(1, 3, 2)
        #     plt.imshow((pred_depth * 127.5 + 127.5).astype(np.uint8))
        #     plt.quiver(xs, ys, pred_normals[:, 0], -pred_normals[:, 1], color='b', scale=20, width=0.005)
        #     plt.title(f'Pred Normals {i}')
        #     plt.axis('off')

        #     # -------- 右：GT depth + 两种箭头 --------
        #     plt.subplot(1, 3, 3)
        #     plt.imshow(gt_depth.astype(np.uint8))
        #     plt.quiver(xs, ys, gt_normals[:, 0], -gt_normals[:, 1], color='r', scale=20, width=0.005, label="GT")
        #     plt.quiver(xs, ys, pred_normals[:, 0], -pred_normals[:, 1], color='b', scale=20, width=0.005, label="Pred")
        #     plt.title(f'GT+Pred Normals {i}')
        #     plt.axis('off')
        #     plt.legend(loc="lower right")

        #     plt.tight_layout()
        #     plt.savefig(f'normals_compare_{i}.png', dpi=300)
        #     plt.close()


        try:
            mean_angular_error[i], median_angular_error[i], rmse_angular_error[i], sub5_error[i], sub7_5_error[i], sub11_25_error[i], sub22_5_error[i], sub30_error[i] = compute_normal_metrics(
                pred_depth, gt_depth)
        except Exception as e:
            print(f'Error computing metrics for sample {i}: {e}')
            continue

    # 过滤掉无效值
    valid_results = ~np.isnan(mean_angular_error) & ~np.isinf(mean_angular_error) 
    results = "{:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}".format(
        mean_angular_error[valid_results].mean(), median_angular_error[valid_results].mean(), sub5_error[valid_results].mean(),
        sub7_5_error[valid_results].mean(), sub11_25_error[valid_results].mean(), sub22_5_error[valid_results].mean(), 
        sub30_error[valid_results].mean())
    print("{:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}".format(
        "mean", "median", "sub5", "sub7.5", "sub11.25", "sub22.5", "sub30")
    )
    print(results)
    
    print(f'Valid results: {valid_results.sum()}/{len(valid_results)}')
    return results
    # return silog, log10, abs_rel, sq_rel, rms, log_rms, d1, d2, d3



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='BTS TensorFlow implementation.', fromfile_prefix_chars='@')
    parser.convert_arg_line_to_args = convert_arg_line_to_args

    parser.add_argument('--pred_path',           type=str,   help='path to the prediction results in png', required=True)
    parser.add_argument('--gt_path',             type=str,   help='root path to the groundtruth data', required=False)
    parser.add_argument('--dataset',             type=str,   help='dataset to test on, nyu or kitti', default='nyu')
    parser.add_argument('--txt_file_list',      type=str,   help='text file containing list of files to evaluate', default=None)
    args = parser.parse_args()
    test(args)