# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. DEFAULT_MODEL = "depth-anything/DA3NESTED-GIANT-LARGE-1.1" DEFAULT_EXPORT_DIR = "workspace/gallery/scene" DEFAULT_GALLERY_DIR = "workspace/gallery" DEFAULT_GRADIO_DIR = "workspace/gradio" THRESH_FOR_REF_SELECTION = 3 # ============================================================================= # Benchmark Evaluation Constants # ============================================================================= # Default evaluation workspace directory DEFAULT_EVAL_WORKSPACE = "workspace/evaluation" # Default reference view selection strategy for evaluation # Use "first" for consistent and reproducible evaluation results # Other options: "saddle_balanced", "auto", "mid" EVAL_REF_VIEW_STRATEGY = "first" # ----------------------------------------------------------------------------- # DTU Dataset Configuration # Reference: https://roboimagedata.compute.dtu.dk/ # Note: DepthAnything3 was never trained on any images from DTU. # ----------------------------------------------------------------------------- # Root directory for DTU evaluation data (MVSNet format) # Download from: https://drive.google.com/file/d/1rX0EXlUL4prRxrRu2DgLJv2j7-tpUD4D/view DTU_EVAL_DATA_ROOT = "workspace/benchmark_dataset/dtu" # List of DTU evaluation scenes DTU_SCENES = [ "scan1", "scan4", "scan9", "scan10", "scan11", "scan12", "scan13", "scan15", "scan23", "scan24", "scan29", "scan32", "scan33", "scan34", "scan48", "scan49", "scan62", "scan75", "scan77", "scan110", "scan114", "scan118", ] # Point cloud fusion hyperparameters DTU_DIST_THRESH = 0.2 # Distance threshold for geometric consistency (mm) DTU_NUM_CONSIST = 4 # Minimum number of consistent views for a point DTU_MAX_POINTS = 4_000_000 # Maximum points in fused point cloud # 3D reconstruction evaluation hyperparameters DTU_DOWN_DENSE = 0.2 # Downsample density for evaluation (mm) DTU_PATCH_SIZE = 60 # Patch size for boundary handling DTU_MAX_DIST = 20 # Outlier threshold for accuracy/completeness (mm) # ----------------------------------------------------------------------------- # DTU-64 Dataset Configuration (Pose Evaluation Only) # This is a subset of DTU with 64 images per scene for pose evaluation. # Note: This dataset is ONLY for pose evaluation, not 3D reconstruction. # ----------------------------------------------------------------------------- # Root directory for DTU-64 evaluation data DTU64_EVAL_DATA_ROOT = "workspace/benchmark_dataset/dtu64" DTU64_CAMERA_ROOT = "workspace/benchmark_dataset/dtu64/Cameras" # List of DTU-64 evaluation scenes (13 scenes) DTU64_SCENES = [ "scan105", "scan114", "scan118", "scan122", "scan24", "scan37", "scan40", "scan55", "scan63", "scan65", "scan69", "scan83", "scan97", ] # ----------------------------------------------------------------------------- # ETH3D Dataset Configuration # Reference: https://www.eth3d.net/ # High-resolution multi-view stereo benchmark with laser-scanned ground truth. # Note: DepthAnything3 was never trained on any images from ETH3D. # ----------------------------------------------------------------------------- # Root directory for ETH3D evaluation data ETH3D_EVAL_DATA_ROOT = "workspace/benchmark_dataset/eth3d" # List of ETH3D evaluation scenes (indoor and outdoor) ETH3D_SCENES = [ "courtyard", "electro", "kicker", "pipes", "relief", # "terrace", # Excluded: known issues "delivery_area", "facade", # "meadow", # Excluded: known issues "office", "playground", "relief_2", "terrains", ] # Images to filter out (known problematic views per scene) ETH3D_FILTER_KEYS = { "delivery_area": ["711.JPG", "712.JPG", "713.JPG", "714.JPG"], "electro": ["9289.JPG", "9290.JPG", "9291.JPG", "9292.JPG", "9293.JPG", "9298.JPG"], "playground": ["587.JPG", "588.JPG", "589.JPG", "590.JPG", "591.JPG", "592.JPG"], "relief": [ "427.JPG", "428.JPG", "429.JPG", "430.JPG", "431.JPG", "432.JPG", "433.JPG", "434.JPG", "435.JPG", "436.JPG", "437.JPG", "438.JPG", ], "relief_2": [ "458.JPG", "459.JPG", "460.JPG", "461.JPG", "462.JPG", "463.JPG", "464.JPG", "465.JPG", "466.JPG", "467.JPG", "468.JPG", ], } # TSDF fusion hyperparameters (scaled for outdoor scenes) ETH3D_VOXEL_LENGTH = 4.0 / 512.0 * 5 # Voxel size for TSDF (meters) ETH3D_SDF_TRUNC = 0.04 * 5 # SDF truncation distance (meters) ETH3D_MAX_DEPTH = 100000.0 # Maximum depth for integration (effectively no truncation) # Point cloud sampling ETH3D_SAMPLING_NUMBER = 1_000_000 # Number of points to sample from mesh # 3D reconstruction evaluation hyperparameters ETH3D_EVAL_THRESHOLD = 0.05 * 5 # Distance threshold for precision/recall (meters) ETH3D_DOWN_SAMPLE = 4.0 / 512.0 * 5 # Voxel size for evaluation downsampling (meters) # ============================================================================== # 7Scenes Dataset Configuration # ============================================================================== # Reference: https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/ # Note: Indoor RGB-D dataset with ground truth poses and meshes. # Root directory for 7Scenes evaluation data SEVENSCENES_EVAL_DATA_ROOT = "workspace/benchmark_dataset/7scenes" # List of 7Scenes evaluation scenes SEVENSCENES_SCENES = [ "chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs", ] # Fixed camera intrinsics for 7Scenes (all images share same intrinsics) SEVENSCENES_FX = 585.0 SEVENSCENES_FY = 585.0 SEVENSCENES_CX = 320.0 SEVENSCENES_CY = 240.0 # TSDF fusion hyperparameters (indoor scenes, smaller voxels) SEVENSCENES_VOXEL_LENGTH = 4.0 / 512.0 # Voxel size for TSDF (meters) SEVENSCENES_SDF_TRUNC = 0.04 # SDF truncation distance (meters) SEVENSCENES_MAX_DEPTH = 1000000.0 # Maximum depth for integration (no truncation) # Point cloud sampling SEVENSCENES_SAMPLING_NUMBER = 1_000_000 # Number of points to sample from mesh # 3D reconstruction evaluation hyperparameters SEVENSCENES_EVAL_THRESHOLD = 0.05 # Distance threshold for precision/recall (meters) SEVENSCENES_DOWN_SAMPLE = 4.0 / 512.0 # Voxel size for evaluation downsampling (meters) # ============================================================================== # ScanNet++ Dataset Configuration # ============================================================================== # Reference: https://kaldir.vc.in.tum.de/scannetpp/ # Note: High-quality indoor RGB-D dataset with iPhone and DSLR images. # Root directory for ScanNet++ evaluation data SCANNETPP_EVAL_DATA_ROOT = "workspace/benchmark_dataset/scannetpp" # List of ScanNet++ evaluation scenes SCANNETPP_SCENES = [ "09c1414f1b", "1ada7a0617", "40aec5fffa", "3e8bba0176", "acd95847c5", "578511c8a9", "5f99900f09", "c4c04e6d6c", "f3d64c30f8", "7bc286c1b6", "c5439f4607", "286b55a2bf", "fb5a96b1a2", "7831862f02", "38d58a7a31", "bde1e479ad", "9071e139d9", "21d970d8de", "bcd2436daf", "cc5237fd77", ] # Input resolution for ScanNet++ (after undistortion and resize) SCANNETPP_INPUT_H = 768 SCANNETPP_INPUT_W = 1024 # TSDF fusion hyperparameters (indoor scenes) SCANNETPP_VOXEL_LENGTH = 0.02 # Voxel size for TSDF (meters) SCANNETPP_SDF_TRUNC = 0.15 # SDF truncation distance (meters) SCANNETPP_MAX_DEPTH = 5.0 # Maximum depth for integration (meters) # Point cloud sampling SCANNETPP_SAMPLING_NUMBER = 1_000_000 # Number of points to sample from mesh # 3D reconstruction evaluation hyperparameters SCANNETPP_EVAL_THRESHOLD = 0.05 # Distance threshold for precision/recall (meters) SCANNETPP_DOWN_SAMPLE = 0.02 # Voxel size for evaluation downsampling (meters) # ============================================================================== # HiRoom Dataset Configuration # ============================================================================== # Note: Indoor RGB-D dataset. # Root directory for HiRoom evaluation data HIROOM_EVAL_DATA_ROOT = "workspace/benchmark_dataset/hiroom/data" HIROOM_GT_ROOT_PATH = "workspace/benchmark_dataset/hiroom/fused_pcd" HIROOM_SCENE_LIST_PATH = "workspace/benchmark_dataset/hiroom/selected_scene_list_val.txt" # TSDF fusion hyperparameters (indoor scenes) HIROOM_VOXEL_LENGTH = 4.0 / 512.0 # Voxel size for TSDF (meters) HIROOM_SDF_TRUNC = 0.04 # SDF truncation distance (meters) HIROOM_MAX_DEPTH = 10000.0 # Maximum depth for integration (no truncation) # Point cloud sampling HIROOM_SAMPLING_NUMBER = 1_000_000 # Number of points to sample from mesh # 3D reconstruction evaluation hyperparameters HIROOM_EVAL_THRESHOLD = 0.05 # Distance threshold for precision/recall (meters) HIROOM_DOWN_SAMPLE = 4.0 / 512.0 # Voxel size for evaluation downsampling (meters)