# Copyright 2023-2025 Marigold Team, ETH Zürich. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # -------------------------------------------------------------------------- # More information about Marigold: # https://marigoldmonodepth.github.io # https://marigoldcomputervision.github.io # Efficient inference pipelines are now part of diffusers: # https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage # https://huggingface.co/docs/diffusers/api/pipelines/marigold # Examples of trained models and live demos: # https://huggingface.co/prs-eth # Related projects: # https://rollingdepth.github.io/ # https://marigolddepthcompletion.github.io/ # Citation (BibTeX): # https://github.com/prs-eth/Marigold#-citation # If you find Marigold useful, we kindly ask you to cite our papers. # -------------------------------------------------------------------------- import argparse import cv2 import h5py import numpy as np import os import pandas as pd from tqdm import tqdm from hypersim_util import dist_2_depth, tone_map IMG_WIDTH = 1024 IMG_HEIGHT = 768 FOCAL_LENGTH = 886.81 if "__main__" == __name__: parser = argparse.ArgumentParser() parser.add_argument( "--split_csv", type=str, default="data/Hypersim/metadata_images_split_scene_v1.csv", ) parser.add_argument("--dataset_dir", type=str, default="data/Hypersim/raw_data") parser.add_argument("--output_dir", type=str, default="data/Hypersim/processed") args = parser.parse_args() split_csv = args.split_csv dataset_dir = args.dataset_dir output_dir = args.output_dir # %% raw_meta_df = pd.read_csv(split_csv) meta_df = raw_meta_df[raw_meta_df.included_in_public_release].copy() # %% for split in ["train", "val", "test"]: split_output_dir = os.path.join(output_dir, split) os.makedirs(split_output_dir) split_meta_df = meta_df[meta_df.split_partition_name == split].copy() split_meta_df["rgb_path"] = None split_meta_df["rgb_mean"] = np.nan split_meta_df["rgb_std"] = np.nan split_meta_df["rgb_min"] = np.nan split_meta_df["rgb_max"] = np.nan split_meta_df["depth_path"] = None split_meta_df["depth_mean"] = np.nan split_meta_df["depth_std"] = np.nan split_meta_df["depth_min"] = np.nan split_meta_df["depth_max"] = np.nan split_meta_df["invalid_ratio"] = np.nan for i, row in tqdm(split_meta_df.iterrows(), total=len(split_meta_df)): # Load data rgb_path = os.path.join( row.scene_name, "images", f"scene_{row.camera_name}_final_hdf5", f"frame.{row.frame_id:04d}.color.hdf5", ) dist_path = os.path.join( row.scene_name, "images", f"scene_{row.camera_name}_geometry_hdf5", f"frame.{row.frame_id:04d}.depth_meters.hdf5", ) render_entity_id_path = os.path.join( row.scene_name, "images", f"scene_{row.camera_name}_geometry_hdf5", f"frame.{row.frame_id:04d}.render_entity_id.hdf5", ) assert os.path.exists(os.path.join(dataset_dir, rgb_path)) assert os.path.exists(os.path.join(dataset_dir, dist_path)) with h5py.File(os.path.join(dataset_dir, rgb_path), "r") as f: rgb = np.array(f["dataset"]).astype(float) with h5py.File(os.path.join(dataset_dir, dist_path), "r") as f: dist_from_center = np.array(f["dataset"]).astype(float) with h5py.File(os.path.join(dataset_dir, render_entity_id_path), "r") as f: render_entity_id = np.array(f["dataset"]).astype(int) # Tone map rgb_color_tm = tone_map(rgb, render_entity_id) rgb_int = (rgb_color_tm * 255).astype(np.uint8) # [H, W, RGB] # Distance -> depth plane_depth = dist_2_depth( IMG_WIDTH, IMG_HEIGHT, FOCAL_LENGTH, dist_from_center ) valid_mask = render_entity_id != -1 # Record invalid ratio invalid_ratio = (np.prod(valid_mask.shape) - valid_mask.sum()) / np.prod( valid_mask.shape ) plane_depth[~valid_mask] = 0 # Save as png scene_path = row.scene_name if not os.path.exists(os.path.join(split_output_dir, row.scene_name)): os.makedirs(os.path.join(split_output_dir, row.scene_name)) rgb_name = f"rgb_{row.camera_name}_fr{row.frame_id:04d}.png" rgb_path = os.path.join(scene_path, rgb_name) cv2.imwrite( os.path.join(split_output_dir, rgb_path), cv2.cvtColor(rgb_int, cv2.COLOR_RGB2BGR), ) plane_depth *= 1000.0 plane_depth = plane_depth.astype(np.uint16) depth_name = f"depth_plane_{row.camera_name}_fr{row.frame_id:04d}.png" depth_path = os.path.join(scene_path, depth_name) cv2.imwrite(os.path.join(split_output_dir, depth_path), plane_depth) # Meta data split_meta_df.at[i, "rgb_path"] = rgb_path split_meta_df.at[i, "rgb_mean"] = np.mean(rgb_int) split_meta_df.at[i, "rgb_std"] = np.std(rgb_int) split_meta_df.at[i, "rgb_min"] = np.min(rgb_int) split_meta_df.at[i, "rgb_max"] = np.max(rgb_int) split_meta_df.at[i, "depth_path"] = depth_path restored_depth = plane_depth / 1000.0 split_meta_df.at[i, "depth_mean"] = np.mean(restored_depth) split_meta_df.at[i, "depth_std"] = np.std(restored_depth) split_meta_df.at[i, "depth_min"] = np.min(restored_depth) split_meta_df.at[i, "depth_max"] = np.max(restored_depth) split_meta_df.at[i, "invalid_ratio"] = invalid_ratio with open( os.path.join(split_output_dir, f"filename_list_{split}.txt"), "w+" ) as f: lines = split_meta_df.apply( lambda r: f"{r['rgb_path']} {r['depth_path']}", axis=1 ).tolist() f.writelines("\n".join(lines)) with open( os.path.join(split_output_dir, f"filename_meta_{split}.csv"), "w+" ) as f: split_meta_df.to_csv(f, header=True) print("Preprocess finished")