EvalMDE / Edit2Perceive /utils /eval_depth.py

zeyuren2002

Add files using upload-large-folder tool

7f921f4 verified 1 day ago

22.9 kB

	# Copyright (C) 2019 Jin Han Lee
	#
	# This file is a part of BTS.
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <http://www.gnu.org/licenses/>

	import os
	import argparse
	import fnmatch
	import cv2
	import numpy as np
	import torch
	import torch.nn.functional as F
	from PIL import Image
	import struct
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'


	def convert_arg_line_to_args(arg_line):
	for arg in arg_line.split():
	if not arg.strip():
	continue
	yield arg




	def scale_and_shift_align(pred, gt, valid_mask):
	"""
	使用最小二乘法对齐预测深度的scale和shift
	pred: 预测深度 (相对深度 0-255)
	gt: 真实深度 (绝对深度)
	valid_mask: 有效像素掩码
	"""
	pred_valid = pred[valid_mask].flatten()
	gt_valid = gt[valid_mask].flatten()

	# 构建最小二乘法系统 Ax = b
	# 其中 A = [pred_valid, ones], x = [scale, shift], b = gt_valid
	A = np.vstack([pred_valid, np.ones(len(pred_valid))]).T
	scale, shift = np.linalg.lstsq(A, gt_valid, rcond=None)[0]

	# 应用scale和shift
	pred_aligned = pred * scale + shift

	return pred_aligned

	def resize_depth_tensor(depth_img, target_height, target_width):
	"""
	使用双线性插值调整深度图像大小
	"""
	# 转换为tensor (1, 1, H, W)
	# depth_img = (depth_img - depth_img.min()) / (depth_img.max() - depth_img.min()) * np.float32(65535)
	# depth_img = depth_img.astype(np.uint16)
	depth_tensor = torch.from_numpy(depth_img).unsqueeze(0).unsqueeze(0).float()

	# 使用双线性插值调整大小
	resized_tensor = F.interpolate(
	depth_tensor,
	size=(target_height, target_width),
	mode='bilinear',
	align_corners=True
	)

	# 转换回numpy
	resized_depth = resized_tensor.squeeze().numpy()

	return resized_depth

	def read_depth(filename):
	with open(filename, 'rb') as f:
	tag = f.read(4)
	if tag != b'PIEH':
	raise ValueError("Invalid file format: expected 'PIEH' tag")

	width = struct.unpack('<I', f.read(4))[0]
	height = struct.unpack('<I', f.read(4))[0]

	depth_data = f.read(width * height * 4)
	if len(depth_data) != width * height * 4:
	raise ValueError("Incomplete depth data")

	# Convert the byte data to a list of floats
	depth_map = list(struct.unpack('<' + 'f' * (width * height), depth_data))
	# convert into a array
	depth_map = np.array(depth_map, dtype=np.float32).reshape((height, width))
	return depth_map

	def read_depth_binary_file(file_path, image_width, image_height):
	"""
	读取一个二进制格式的深度图文件，该文件实际上是由 float32 组成的，
	按行优先顺序存储，每个像素一个深度值（float32，4字节）。

	参数:
	file_path (str): 深度图文件的路径（例如：'depth.bin' 或 'depth.jpg'）
	image_width (int): 图像的宽度（像素数）
	image_height (int): 图像的高度（像素数）

	返回:
	numpy.ndarray: 一个形状为 (image_height, image_width) 的二维数组，
	每个元素为一个 float32 的深度值，无深度处为 NaN。
	"""
	# 每个像素 4 字节 (float32)
	try:
	bytes_per_pixel = 4
	total_pixels = image_width * image_height

	# 二进制文件总字节数
	expected_file_size = total_pixels * bytes_per_pixel

	# 以二进制方式读取文件
	with open(file_path, 'rb') as f:
	data = f.read()

	# 检查文件大小是否匹配预期
	if len(data) != expected_file_size:
	raise ValueError(f"文件大小不符合预期。期望 {expected_file_size} 字节，实际 {len(data)} 字节。"
	f"请检查图像尺寸({image_width}x{image_height})是否正确。")

	# 将二进制数据解析为 float32 数组
	depth_values = np.frombuffer(data, dtype=np.float32)

	# 重塑为二维数组，形状为 (height, width)
	depth_map = depth_values.reshape((image_height, image_width))

	return depth_map
	except Exception as e:
	raise e

	def load_image_rgb_or_grayscale(image_path):
	"""
	加载图像，支持RGB和灰度图像，统一转换为numpy数组
	"""
	try:
	if 'eth3d/depth' in image_path.lower(): # depth save as a 4bytes float32
	img_array = read_depth_binary_file(image_path, 6048, 4032)
	elif image_path.endswith('.dpt'):
	# 如果是dpt文件，直接读取
	img_array = read_depth(image_path)
	elif image_path.endswith('.npy'):
	# 如果是npy文件，直接读取
	img_array = np.load(image_path)
	else:
	# 首先尝试用PIL加载，可以更好地处理不同格式
	img = Image.open(image_path)
	img_array = np.array(img)

	# 如果是RGBA图像，取前3通道，否则RGB或者Gray则不处理
	if len(img_array.shape) == 3 and img_array.shape[2] == 4: # RGBA
	img_array = img_array[:, :, :3] # 去掉Alpha通道
	# 对3通道取均值返回
	return np.mean(img_array, axis=2) if len(img_array.shape) == 3 else img_array
	# return img_array[:,:,0] if len(img_array.shape) == 3 else img_array
	except:

	raise ValueError(f"Failed to load image from {image_path}. Ensure it is a valid image file or depth map.")
	def compute_errors(gt, pred):
	thresh = np.maximum((gt / pred), (pred / gt))
	d1 = (thresh < 1.25).mean()
	d2 = (thresh < 1.25 ** 2).mean()
	d3 = (thresh < 1.25 ** 3).mean()

	rmse = (gt - pred) ** 2
	rmse = np.sqrt(rmse.mean())

	rmse_log = (np.log(gt) - np.log(pred)) ** 2
	rmse_log = np.sqrt(rmse_log.mean())

	abs_rel = np.mean(np.abs(gt - pred) / gt)
	sq_rel = np.mean(((gt - pred)**2) / gt)

	err = np.log(pred) - np.log(gt)
	silog = np.sqrt(np.mean(err 2) - np.mean(err) 2) * 100

	err = np.abs(np.log10(pred) - np.log10(gt))
	log10 = np.mean(err)

	return silog, log10, abs_rel, sq_rel, rmse, rmse_log, d1, d2, d3


	def test(args):
	global gt_depths, missing_ids, pred_filenames,gt_depths_mask
	gt_depths = []
	gt_depths_mask = []
	missing_ids = set()
	pred_filenames = []
	if "inverse" in args.pred_path:
	print('!!! Important: Inverse depth detected, will convert to depth during evaluation.')
	for root, dirnames, filenames in os.walk(args.pred_path):
	for pred_filename in fnmatch.filter(filenames, '.png') + fnmatch.filter(filenames, '.npy'):
	if 'cmap' in pred_filename or 'gt' in pred_filename:
	continue
	dirname = root.replace(args.pred_path, '')
	if dirname.startswith('/'):
	dirname = dirname[1:]
	pred_filenames.append(os.path.join(dirname, pred_filename))

	num_test_samples = len(pred_filenames)
	# print(f'Found {num_test_samples} prediction files')

	pred_depths = []
	if args.gt_path[-1]=='/':
	args.gt_path = args.gt_path[:-1]
	for i in range(num_test_samples):
	pred_depth_path = os.path.join(args.pred_path,pred_filenames[i])
	pred_depth = load_image_rgb_or_grayscale(pred_depth_path)

	if pred_depth is None:
	print('Missing: %s ' % pred_depth_path)
	missing_ids.add(i)
	continue

	# 预测图像是0-255的relative depth，先转换为float
	pred_depth = pred_depth.astype(np.float32)

	pred_depths.append(pred_depth)

	if args.dataset == 'kitti':
	for t_id in range(num_test_samples):
	if t_id in missing_ids:
	continue

	# 构建GT路径，保持与pred相同的目录结构
	pred_relative_path = pred_filenames[t_id]
	gt_depth_path = os.path.join(args.gt_path, pred_relative_path[11:]) # 去掉前面的20xx_xx_xx/
	gt_depth_path = gt_depth_path.replace("image_02/data","proj_depth/groundtruth/image_02").replace(".npy",".png")
	depth = cv2.imread(gt_depth_path, -1)
	if depth is None:
	print(f'Missing: {gt_depth_path} for pred file {pred_relative_path}')
	missing_ids.add(t_id)
	continue
	# depth = cv2.cvtColor(depth, cv2.COLOR_BGR2GRAY)
	depth = depth.astype(np.float32) / 256.0
	# print(f" depth shape: {depth.shape}")
	gt_depths.append(depth)
	elif args.dataset == 'nyu' or args.dataset == 'nyuv2':
	for t_id in range(num_test_samples):
	if t_id in missing_ids:
	continue

	# 构建GT路径，保持与pred相同的目录结构
	pred_relative_path = pred_filenames[t_id]
	gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
	gt_depth_path = gt_depth_path.replace("rgb","depth").replace(".npy",".png")
	depth = cv2.imread(gt_depth_path, -1)
	if depth is None:
	print('Missing: %s ' % gt_depth_path)
	missing_ids.add(t_id)
	continue

	depth = depth.astype(np.float32) / 1000.0
	gt_depths.append(depth)
	elif args.dataset == 'Sintel':
	for t_id in range(num_test_samples):
	if t_id in missing_ids:
	continue

	# 构建GT路径，保持与pred相同的目录结构
	pred_relative_path = pred_filenames[t_id]
	gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
	gt_depth_path = gt_depth_path.replace("final","depth").replace(".png",".dpt")
	depth = load_image_rgb_or_grayscale(gt_depth_path)
	if depth is None:
	print('Missing: %s ' % gt_depth_path)
	missing_ids.add(t_id)
	continue

	depth = depth.astype(np.float32) / 1000.0
	gt_depths.append(depth)
	elif args.dataset == 'diode':
	for t_id in range(num_test_samples):
	if t_id in missing_ids:
	continue

	# 构建GT路径，保持与pred相同的目录结构
	pred_relative_path = pred_filenames[t_id]
	gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
	gt_depth_path = gt_depth_path.replace(".npy","_depth.npy")
	gt_depth_mask_path = gt_depth_path.replace("_depth.npy","_depth_mask.npy")
	depth = load_image_rgb_or_grayscale(gt_depth_path)
	depth_mask = load_image_rgb_or_grayscale(gt_depth_mask_path)
	if depth is None:
	print('Missing: %s ' % gt_depth_path)
	missing_ids.add(t_id)
	continue
	gt_depths.append(depth)
	gt_depths_mask.append(depth_mask)
	elif args.dataset == 'eth3d':
	for t_id in range(num_test_samples):
	if t_id in missing_ids:
	continue
	pred_relative_path = pred_filenames[t_id]
	# gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
	# gt_depth_path = gt_depth_path.replace("rgb","depth").replace(".npy",".png")
	parts = pred_relative_path.split('/')
	assert parts[0]=='rgb'
	scene = parts[1]
	fixed_prefix = '/opt/liblibai-models/user-workspace2/users/syq/Depth_Post_Train/dataset/Eval/depth/ETH3D/'
	# 目标路径的模板：

	gt_depth_path = f"{args.gt_path}/depth/{scene}_dslr_depth/{scene}/ground_truth_depth/dslr_images/{parts[-1].replace('.npy','.JPG')}"
	# depth = cv2.imread(gt_depth_path, -1)
	depth = load_image_rgb_or_grayscale(gt_depth_path)

	if depth is None:
	print('Missing: %s ' % gt_depth_path)
	missing_ids.add(t_id)
	continue
	gt_depths.append(depth)
	elif args.dataset == 'scannet':
	for t_id in range(num_test_samples):
	if t_id in missing_ids:
	continue

	# 构建GT路径，保持与pred相同的目录结构
	pred_relative_path = pred_filenames[t_id]
	gt_depth_path = os.path.join(args.gt_path, pred_relative_path)
	gt_depth_path = gt_depth_path.replace("color","depth").replace(".npy",".png")
	depth = cv2.imread(gt_depth_path, -1)
	if depth is None:
	print('Missing: %s ' % gt_depth_path)
	missing_ids.add(t_id)
	continue

	depth = depth.astype(np.float32)/1000.0
	gt_depths.append(depth)
	else:
	raise ValueError('Unknown dataset: %s' % args.dataset)
	print(f'### Computing errors for {len(gt_depths)} files and {len(missing_ids)} missing' if not gt_depths_mask else f'Computing errors with masks for {len(gt_depths)} files and {len(missing_ids)} missing')

	result = eval(pred_depths,args)

	print('Done.')
	return result


	def eval(pred_depths,args):
	num_samples = len(pred_depths)
	pred_depths_valid = []
	gt_depths_valid = []
	if args.using_pdf:
	pdf = np.load('depth_mapping_lookup_table.npz')
	# 收集有效的预测和GT深度
	gt_idx = 0
	for t_id in range(num_samples):
	if t_id in missing_ids:
	continue

	pred_depths_valid.append(pred_depths[t_id])
	gt_depths_valid.append(gt_depths[gt_idx])
	gt_idx += 1

	num_samples = len(pred_depths_valid)

	silog = np.zeros(num_samples, np.float32)
	log10 = np.zeros(num_samples, np.float32)
	rms = np.zeros(num_samples, np.float32)
	log_rms = np.zeros(num_samples, np.float32)
	abs_rel = np.zeros(num_samples, np.float32)
	sq_rel = np.zeros(num_samples, np.float32)
	d1 = np.zeros(num_samples, np.float32)
	d2 = np.zeros(num_samples, np.float32)
	d3 = np.zeros(num_samples, np.float32)

	for i in range(num_samples):
	gt_depth = gt_depths_valid[i]
	pred_depth = pred_depths_valid[i]

	# 1. 首先调整预测深度的大小以匹配GT
	if pred_depth.shape != gt_depth.shape:
	if args.do_kb_crop:
	target_h, target_w = 352, 1216
	else:
	target_h, target_w = gt_depth.shape[0], gt_depth.shape[1]
	pred_depth = resize_depth_tensor(pred_depth, target_h, target_w)
	gt_depth = gt_depth.copy()
	# 处理无效值
	gt_depth[np.isinf(gt_depth)] = 0
	gt_depth[np.isnan(gt_depth)] = 0

	pred_depth[np.isinf(pred_depth)] = 0
	pred_depth[np.isnan(pred_depth)] = 0

	# 创建有效掩码，只评估不为nan或者inf，且在深度范围内的像素
	valid_mask = np.logical_and(gt_depth > args.min_depth_eval, gt_depth < args.max_depth_eval)
	valid_mask = np.logical_and(valid_mask, ~np.isnan(gt_depth))
	valid_mask = np.logical_and(valid_mask, ~np.isinf(gt_depth))

	if gt_depths_mask:
	valid_mask = np.logical_and(valid_mask, gt_depths_mask[i] > 0)
	if args.dataset == 'nyu':
	_valid_mask = np.zeros_like(valid_mask)
	_valid_mask[45:471, 41:601] = 1
	valid_mask = np.logical_and(valid_mask, _valid_mask)
	del _valid_mask
	# 处理裁剪
	if args.do_kb_crop:
	height, width = gt_depth.shape
	top_margin = int(height - 352)
	left_margin = int((width - 1216) / 2)
	pred_depth_uncropped = np.zeros((height, width), dtype=np.float32)
	try:
	if abs(pred_depth.shape[0]-375) < 10:
	pred_depth_uncropped[top_margin:top_margin + 352, left_margin:left_margin + 1216] = pred_depth[top_margin:top_margin + 352, left_margin:left_margin + 1216]
	pred_depth = pred_depth_uncropped
	else:
	pred_depth_uncropped[top_margin:top_margin + 352, left_margin:left_margin + 1216] = pred_depth
	pred_depth = pred_depth_uncropped
	except Exception as e:
	print(f"Error in do_kb_crop for sample {i}: {e}")
	print(f"pred shape:{pred_depth.shape}, uncropped shape:{pred_depth_uncropped.shape}")
	_valid_mask = np.zeros_like(valid_mask)
	_valid_mask[top_margin:top_margin + 352, left_margin:left_margin + 1216] = valid_mask[top_margin:top_margin + 352, left_margin:left_margin + 1216]
	valid_mask = _valid_mask
	if args.garg_crop or args.eigen_crop:
	gt_height, gt_width = gt_depth.shape
	eval_mask = np.zeros(valid_mask.shape)

	if args.garg_crop:
	eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height), int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1

	elif args.eigen_crop:
	if args.dataset == 'kitti':
	eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height), int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1
	else:
	eval_mask[45:471, 41:601] = 1

	valid_mask = np.logical_and(valid_mask, eval_mask)

	# 检查是否有足够的有效像素
	if valid_mask.sum() < 100:
	print(f'Warning: Sample {i} has very few valid pixels ({valid_mask.sum()})')
	continue

	# 2. 应用scale-and-shift对齐
	# print("original gt depth min:{:.4f} max:{:.4f} mean:{:.4f}".format(gt_depth[valid_mask].min(), gt_depth[valid_mask].max(), gt_depth[valid_mask].mean()))
	if getattr(args, 'using_log',None):
	gt_depth_cp = np.log(gt_depth+1e-6)
	elif getattr(args, 'using_sqrt_disp',None):
	gt_depth_cp = 1/np.sqrt(gt_depth+1e-8)
	elif getattr(args, 'using_disp',None):
	gt_depth_cp = 1/(gt_depth+1e-8)
	elif getattr(args, 'using_sqrt',None):
	gt_depth_cp = np.sqrt(gt_depth+1e-6)
	elif getattr(args, 'using_pdf',None):
	gt_depth_cp = np.interp(gt_depth, pdf['bins'], pdf['y_map'])
	else:
	gt_depth_cp = gt_depth.copy()
	pred_depth_aligned = scale_and_shift_align(pred_depth, gt_depth_cp, valid_mask)
	if getattr(args, 'using_log',None):
	pred_depth_aligned = np.exp(pred_depth_aligned)
	elif getattr(args, 'using_sqrt_disp',None):
	pred_depth_aligned = 1/(pred_depth_aligned**2)
	elif getattr(args, 'using_disp',None):
	pred_depth_aligned = 1/pred_depth_aligned
	elif getattr(args, 'using_sqrt',None):
	pred_depth_aligned = np.power(pred_depth_aligned, 2)
	elif getattr(args, 'using_pdf',None):
	pred_depth_aligned = np.interp(pred_depth_aligned, pdf['y_map'], pdf['bins'])
	pred_depth_aligned = np.clip(pred_depth_aligned, args.min_depth_eval, args.max_depth_eval)

	# 计算误差
	try:
	silog[i], log10[i], abs_rel[i], sq_rel[i], rms[i], log_rms[i], d1[i], d2[i], d3[i] = compute_errors(
	gt_depth[valid_mask], pred_depth_aligned[valid_mask])
	except Exception as e:
	print(f'Error computing metrics for sample {i}: {e}')
	continue

	# 过滤掉无效值
	valid_results = ~np.isnan(silog) & ~np.isinf(silog) & (silog != 0)
	results = "{:7.5f}, {:7.5f}, {:7.5f}, {:7.5f}, {:7.5f}, {:7.5f}, {:7.5f}, {:7.5f}, {:7.5f}".format(
	d1[valid_results].mean(), d2[valid_results].mean(), d3[valid_results].mean(),
	abs_rel[valid_results].mean(), sq_rel[valid_results].mean(), rms[valid_results].mean(),
	log_rms[valid_results].mean(), silog[valid_results].mean(), log10[valid_results].mean())
	if not args.no_verbose:
	print("{:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}".format(
	'd1', 'd2', 'd3', 'AbsRel', 'SqRel', 'RMSE', 'RMSElog', 'SILog', 'log10'))
	print(results)
	print(f'Valid results: {valid_results.sum()}/{len(valid_results)}')


	return results
	# return silog, log10, abs_rel, sq_rel, rms, log_rms, d1, d2, d3


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='BTS TensorFlow implementation.', fromfile_prefix_chars='@')
	parser.convert_arg_line_to_args = convert_arg_line_to_args

	parser.add_argument('--pred_path', type=str, help='path to the prediction results in png', required=True)
	parser.add_argument('--gt_path', type=str, help='root path to the groundtruth data', required=False)
	parser.add_argument('--dataset', type=str, help='dataset to test on, nyu or kitti', default='nyu')
	parser.add_argument('--eigen_crop', help='if set, crops according to Eigen NIPS14', action='store_true')
	parser.add_argument('--garg_crop', help='if set, crops according to Garg ECCV16', action='store_true')
	parser.add_argument('--min_depth_eval', type=float, help='minimum depth for evaluation', default=1e-3)
	parser.add_argument('--max_depth_eval', type=float, help='maximum depth for evaluation', default=80)
	parser.add_argument('--do_kb_crop', help='if set, crop input images as kitti benchmark images', action='store_true')
	parser.add_argument('--no_verbose', default=False, action='store_true', help='if set, do not print out per image results')
	parser.add_argument('--using_log', default=False, action='store_true', help='if set, use log depth for eval')
	parser.add_argument('--using_disp', default=False, action='store_true', help='if set, use disparity (1/depth) for eval')
	parser.add_argument('--using_sqrt', default=False, action='store_true', help='if set, use sqrt depth for eval')
	parser.add_argument('--using_pdf', default=False, action='store_true', help='if set, use pdf for eval')
	args = parser.parse_args()
	test(args)
	# load_image_rgb_or_grayscale("/opt/liblibai-models/user-workspace2/users/syq/Depth_Post_Train/dataset/Eval/depth/ETH3D/depth/kicker_dslr_depth/kicker/ground_truth_depth/dslr_images/DSC_6493.JPG")