Addax-Data-Science commited on Feb 25

Commit

c8ae7e8

verified ·

1 Parent(s): 269a3ed

Upload 174 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

dinov2/__init__.py +6 -0
dinov2/configs/__init__.py +22 -0
dinov2/configs/eval/cell_dino/vitl16_channel_adaptive_pretrain.yaml +35 -0
dinov2/configs/eval/cell_dino/vitl16_pretrain.yaml +14 -0
dinov2/configs/eval/vitb14_pretrain.yaml +6 -0
dinov2/configs/eval/vitb14_reg4_pretrain.yaml +9 -0
dinov2/configs/eval/vitg14_pretrain.yaml +7 -0
dinov2/configs/eval/vitg14_reg4_pretrain.yaml +10 -0
dinov2/configs/eval/vitl14_pretrain.yaml +6 -0
dinov2/configs/eval/vitl14_reg4_pretrain.yaml +9 -0
dinov2/configs/eval/vits14_pretrain.yaml +6 -0
dinov2/configs/eval/vits14_reg4_pretrain.yaml +9 -0
dinov2/configs/ssl_default_config.yaml +123 -0
dinov2/configs/train/cell_dino/vitl16_boc_hpafov.yaml +31 -0
dinov2/configs/train/cell_dino/vitl16_hpafov.yaml +32 -0
dinov2/configs/train/cell_dino/vitl16_hpaone.yaml +30 -0
dinov2/configs/train/vitg14.yaml +26 -0
dinov2/configs/train/vitl14.yaml +26 -0
dinov2/configs/train/vitl16_short.yaml +6 -0
dinov2/data/__init__.py +12 -0
dinov2/data/accumulators.py +133 -0
dinov2/data/adapters.py +51 -0
dinov2/data/augmentations.py +118 -0
dinov2/data/cell_dino/augmentations.py +91 -0
dinov2/data/cell_dino/transforms.py +169 -0
dinov2/data/collate.py +49 -0
dinov2/data/datasets/__init__.py +12 -0
dinov2/data/datasets/cell_dino/chammi_cp.py +112 -0
dinov2/data/datasets/cell_dino/chammi_hpa.py +111 -0
dinov2/data/datasets/cell_dino/chammi_wtc.py +108 -0
dinov2/data/datasets/cell_dino/hpafov.py +283 -0
dinov2/data/datasets/cell_dino/hpaone.py +223 -0
dinov2/data/datasets/decoders.py +94 -0
dinov2/data/datasets/extended.py +44 -0
dinov2/data/datasets/image_net.py +290 -0
dinov2/data/datasets/image_net_22k.py +302 -0
dinov2/data/loaders.py +232 -0
dinov2/data/masking.py +86 -0
dinov2/data/samplers.py +229 -0
dinov2/data/transforms.py +91 -0
dinov2/distributed/__init__.py +270 -0
dinov2/eval/__init__.py +4 -0
dinov2/eval/cell_dino/knn.py +479 -0
dinov2/eval/cell_dino/linear.py +1048 -0
dinov2/eval/cell_dino/utils.py +542 -0
dinov2/eval/depth/__init__.py +4 -0
dinov2/eval/depth/models/__init__.py +10 -0
dinov2/eval/depth/models/backbones/__init__.py +6 -0
dinov2/eval/depth/models/backbones/vision_transformer.py +16 -0
dinov2/eval/depth/models/builder.py +49 -0

dinov2/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+__version__ = "0.0.1"

dinov2/configs/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import pathlib
+from omegaconf import OmegaConf
+def load_config(config_name: str):
+    config_filename = config_name + ".yaml"
+    return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
+dinov2_default_config = load_config("ssl_default_config")
+def load_and_merge_config(config_name: str):
+    default_config = OmegaConf.create(dinov2_default_config)
+    loaded_config = load_config(config_name)
+    return OmegaConf.merge(default_config, loaded_config)

dinov2/configs/eval/cell_dino/vitl16_channel_adaptive_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+train:
+  batch_size_per_gpu: 32
+  OFFICIAL_EPOCH_LENGTH: 450
+  cell_augmentation: true
+  channel_adaptive: true
+student:
+  arch: vit_large
+  patch_size: 16
+  num_register_tokens: 0
+  interpolate_antialias: false
+  interpolate_offset: 0.1
+  drop_path_rate: 0.1
+  in_chans: 1
+  block_chunks: 4
+  channel_adaptive: true
+teacher:
+  momentum_teacher: 0.996
+  warmup_teacher_temp_epochs: 20
+  in_chans: 1
+  channel_adaptive: true
+crops:
+  global_crops_scale:
+  - 0.4
+  - 1.0
+  local_crops_number: 8
+  local_crops_scale:
+  - 0.005
+  - 0.4
+  global_crops_size: 224
+  local_crops_size: 96
+optim:
+  weight_decay_end: 0.2
+  base_lr: 5.0e-4
+  warmup_epochs: 20
+  epochs: 400

dinov2/configs/eval/cell_dino/vitl16_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+student:
+  arch: vit_large
+  patch_size: 16
+  num_register_tokens: 0
+  interpolate_antialias: false
+  interpolate_offset: 0.1
+  drop_path_rate: 0.1
+  in_chans: 4
+  block_chunks: 4
+teacher:
+  in_chans: 4
+crops:
+  global_crops_size: 224
+  local_crops_size: 96

dinov2/configs/eval/vitb14_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+student:
+  arch: vit_base
+  patch_size: 14
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vitb14_reg4_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+student:
+  arch: vit_base
+  patch_size: 14
+  num_register_tokens: 4
+  interpolate_antialias: true
+  interpolate_offset: 0.0
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vitg14_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+student:
+  arch: vit_giant2
+  patch_size: 14
+  ffn_layer: swiglufused
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vitg14_reg4_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+student:
+  arch: vit_giant2
+  patch_size: 14
+  ffn_layer: swiglufused
+  num_register_tokens: 4
+  interpolate_antialias: true
+  interpolate_offset: 0.0
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vitl14_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+student:
+  arch: vit_large
+  patch_size: 14
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vitl14_reg4_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+student:
+  arch: vit_large
+  patch_size: 14
+  num_register_tokens: 4
+  interpolate_antialias: true
+  interpolate_offset: 0.0
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vits14_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+student:
+  arch: vit_small
+  patch_size: 14
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/eval/vits14_reg4_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+student:
+  arch: vit_small
+  patch_size: 14
+  num_register_tokens: 4
+  interpolate_antialias: true
+  interpolate_offset: 0.0
+crops:
+  global_crops_size: 518  # this is to set up the position embeddings properly
+  local_crops_size: 98

dinov2/configs/ssl_default_config.yaml ADDED Viewed

	@@ -0,0 +1,123 @@

+MODEL:
+  WEIGHTS: ''
+compute_precision:
+  grad_scaler: true
+  teacher:
+    backbone:
+      sharding_strategy: SHARD_GRAD_OP
+      mixed_precision:
+        param_dtype: fp16
+        reduce_dtype: fp16
+        buffer_dtype: fp32
+    dino_head:
+      sharding_strategy: SHARD_GRAD_OP
+      mixed_precision:
+        param_dtype: fp16
+        reduce_dtype: fp16
+        buffer_dtype: fp32
+    ibot_head:
+      sharding_strategy: SHARD_GRAD_OP
+      mixed_precision:
+        param_dtype: fp16
+        reduce_dtype: fp16
+        buffer_dtype: fp32
+  student:
+    backbone:
+      sharding_strategy: SHARD_GRAD_OP
+      mixed_precision:
+        param_dtype: fp16
+        reduce_dtype: fp16
+        buffer_dtype: fp32
+    dino_head:
+      sharding_strategy: SHARD_GRAD_OP
+      mixed_precision:
+        param_dtype: fp16
+        reduce_dtype: fp32
+        buffer_dtype: fp32
+    ibot_head:
+      sharding_strategy: SHARD_GRAD_OP
+      mixed_precision:
+        param_dtype: fp16
+        reduce_dtype: fp32
+        buffer_dtype: fp32
+dino:
+  loss_weight: 1.0
+  head_n_prototypes: 65536
+  head_bottleneck_dim: 256
+  head_nlayers: 3
+  head_hidden_dim: 2048
+  koleo_loss_weight: 0.1
+ibot:
+  loss_weight: 1.0
+  mask_sample_probability: 0.5
+  mask_ratio_min_max:
+  - 0.1
+  - 0.5
+  separate_head: false
+  head_n_prototypes: 65536
+  head_bottleneck_dim: 256
+  head_nlayers: 3
+  head_hidden_dim: 2048
+train:
+  batch_size_per_gpu: 64
+  dataset_path: ImageNet:split=TRAIN
+  output_dir: .
+  saveckp_freq: 20
+  seed: 0
+  num_workers: 10
+  OFFICIAL_EPOCH_LENGTH: 1250
+  cache_dataset: true
+  centering: "centering" # or "sinkhorn_knopp"
+  cell_augmentation: false
+student:
+  arch: vit_large
+  patch_size: 16
+  drop_path_rate: 0.3
+  layerscale: 1.0e-05
+  drop_path_uniform: true
+  pretrained_weights: ''
+  ffn_layer: "mlp"
+  block_chunks: 0
+  qkv_bias: true
+  proj_bias: true
+  ffn_bias: true
+  num_register_tokens: 0
+  interpolate_antialias: false
+  interpolate_offset: 0.1
+  in_chans: 3
+  channel_adaptive: false
+teacher:
+  momentum_teacher: 0.992
+  final_momentum_teacher: 1
+  warmup_teacher_temp: 0.04
+  teacher_temp: 0.07
+  warmup_teacher_temp_epochs: 30
+  in_chans: 3
+  channel_adaptive: false
+optim:
+  epochs: 100
+  weight_decay: 0.04
+  weight_decay_end: 0.4
+  base_lr: 0.004  # learning rate for a batch size of 1024
+  lr: 0.  # will be set after applying scaling rule
+  warmup_epochs: 10
+  min_lr: 1.0e-06
+  clip_grad: 3.0
+  freeze_last_layer_epochs: 1
+  scaling_rule: sqrt_wrt_1024
+  patch_embed_lr_mult: 0.2
+  layerwise_decay: 0.9
+  adamw_beta1: 0.9
+  adamw_beta2: 0.999
+crops:
+  global_crops_scale:
+  - 0.32
+  - 1.0
+  local_crops_number: 8
+  local_crops_scale:
+  - 0.05
+  - 0.32
+  global_crops_size: 224
+  local_crops_size: 96
+evaluation:
+  eval_period_iterations: 12500

dinov2/configs/train/cell_dino/vitl16_boc_hpafov.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+train:
+  batch_size_per_gpu: 16
+  OFFICIAL_EPOCH_LENGTH: 450
+  cell_augmentation: true
+  channel_adaptive: true
+student:
+  arch: vit_large
+  patch_size: 16
+  in_chans: 1
+  drop_path_rate: 0.1
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.996
+  warmup_teacher_temp_epochs: 20
+  in_chans: 1
+crops:
+  global_crops_scale:
+  - 0.4
+  - 1.0
+  local_crops_number: 8
+  local_crops_scale:
+  - 0.005
+  - 0.4
+  global_crops_size: 224
+  local_crops_size: 96
+optim:
+  weight_decay_end: 0.2
+  base_lr: 5.0e-4
+  warmup_epochs: 20
+  epochs: 400

dinov2/configs/train/cell_dino/vitl16_hpafov.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+train:
+  batch_size_per_gpu: 16
+  OFFICIAL_EPOCH_LENGTH: 450
+  cell_augmentation: true
+student:
+  arch: vit_large
+  patch_size: 16
+  in_chans: 4
+  drop_path_rate: 0.1
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.996
+  warmup_teacher_temp_epochs: 20
+  in_chans: 4
+optim:
+  weight_decay_end: 0.2
+  base_lr: 5.0e-4
+  warmup_epochs: 20
+crops:
+  global_crops_scale:
+  - 0.4
+  - 1.0
+  local_crops_number: 8
+  local_crops_scale:
+  - 0.005
+  - 0.4
+  global_crops_size: 224
+  local_crops_size: 96
+evaluation:
+  eval_period_iterations: 9000

dinov2/configs/train/cell_dino/vitl16_hpaone.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+train:
+  batch_size_per_gpu: 16
+  OFFICIAL_EPOCH_LENGTH: 1756
+  cell_augmentation: true
+student:
+  arch: vit_large
+  patch_size: 16
+  in_chans: 4
+  drop_path_rate: 0.1
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.996
+  warmup_teacher_temp_epochs: 20
+  in_chans: 4
+optim:
+  weight_decay_end: 0.2
+  base_lr: 5.0e-4
+  warmup_epochs: 20
+crops:
+  global_crops_scale:
+  - 0.4
+  - 1.0
+  local_crops_number: 8
+  local_crops_scale:
+  - 0.005
+  - 0.4
+  global_crops_size: 224
+  local_crops_size: 96
+evaluation:
+  eval_period_iterations: 9000

dinov2/configs/train/vitg14.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+dino:
+  head_n_prototypes: 131072
+  head_bottleneck_dim: 384
+ibot:
+  separate_head: true
+  head_n_prototypes: 131072
+train:
+  batch_size_per_gpu: 12
+  dataset_path: ImageNet22k
+  centering: sinkhorn_knopp
+student:
+  arch: vit_giant2
+  patch_size: 14
+  drop_path_rate: 0.4
+  ffn_layer: swiglufused
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.994
+optim:
+  epochs: 500
+  weight_decay_end: 0.2
+  base_lr: 2.0e-04  # learning rate for a batch size of 1024
+  warmup_epochs: 80
+  layerwise_decay: 1.0
+crops:
+  local_crops_size: 98

dinov2/configs/train/vitl14.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+dino:
+  head_n_prototypes: 131072
+  head_bottleneck_dim: 384
+ibot:
+  separate_head: true
+  head_n_prototypes: 131072
+train:
+  batch_size_per_gpu: 32
+  dataset_path: ImageNet22k
+  centering: sinkhorn_knopp
+student:
+  arch: vit_large
+  patch_size: 14
+  drop_path_rate: 0.4
+  ffn_layer: swiglufused
+  block_chunks: 4
+teacher:
+  momentum_teacher: 0.994
+optim:
+  epochs: 500
+  weight_decay_end: 0.2
+  base_lr: 2.0e-04  # learning rate for a batch size of 1024
+  warmup_epochs: 80
+  layerwise_decay: 1.0
+crops:
+  local_crops_size: 98

dinov2/configs/train/vitl16_short.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+# this corresponds to the default config
+train:
+  dataset_path: ImageNet:split=TRAIN
+  batch_size_per_gpu: 64
+student:
+  block_chunks: 4

dinov2/data/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from .adapters import DatasetWithEnumeratedTargets
+from .loaders import make_data_loader, make_dataset, SamplerType
+from .collate import collate_data_and_cast
+from .masking import MaskingGenerator
+from .augmentations import DataAugmentationDINO
+from .cell_dino.augmentations import CellAugmentationDINO
+from .accumulators import NoOpAccumulator, ResultsAccumulator

dinov2/data/accumulators.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from collections import defaultdict
+from typing import Dict, List, Optional, Any
+import torch
+from torch import Tensor
+from torch.nn import functional as F
+import torch.distributed as dist
+from dinov2.distributed import get_global_size
+def _simple_gather_all_tensors(result: torch.Tensor, group: Any, world_size: int) -> List[torch.Tensor]:
+    gathered_result = [torch.zeros_like(result) for _ in range(world_size)]
+    dist.all_gather(gathered_result, result, group)
+    return gathered_result
+def gather_all_tensors(result: torch.Tensor, group: Optional[Any] = None) -> List[torch.Tensor]:
+    """
+    Copied from https://github.com/Lightning-AI/torchmetrics/blob/master/src/torchmetrics/utilities/distributed.py
+    Gather all tensors from several ddp processes onto a list that is broadcasted to all processes.
+    Works on tensors that have the same number of dimensions, but where each dimension may differ. In this case
+    tensors are padded, gathered and then trimmed to secure equal workload for all processes.
+    Args:
+        result: the value to sync
+        group: the process group to gather results from. Defaults to all processes (world)
+    Return:
+        list with size equal to the process group where element i corresponds to result tensor from process i
+    """
+    # convert tensors to contiguous format
+    result = result.contiguous()
+    world_size = get_global_size()
+    dist.barrier(group=group)
+    # if the tensor is scalar, things are easy
+    if result.ndim == 0:
+        return _simple_gather_all_tensors(result, group, world_size)
+    # 1. Gather sizes of all tensors
+    local_size = torch.tensor(result.shape, device=result.device)
+    local_sizes = [torch.zeros_like(local_size) for _ in range(world_size)]
+    dist.all_gather(local_sizes, local_size, group=group)
+    max_size = torch.stack(local_sizes).max(dim=0).values
+    all_sizes_equal = all(all(ls == max_size) for ls in local_sizes)
+    # 2. If shapes are all the same, then do a simple gather:
+    if all_sizes_equal:
+        return _simple_gather_all_tensors(result, group, world_size)
+    # 3. If not, we need to pad each local tensor to maximum size, gather and then truncate
+    pad_dims = []
+    pad_by = (max_size - local_size).detach().cpu()
+    for val in reversed(pad_by):
+        pad_dims.append(0)
+        pad_dims.append(val.item())
+    result_padded = F.pad(result, pad_dims)
+    gathered_result = [torch.zeros_like(result_padded) for _ in range(world_size)]
+    dist.all_gather(gathered_result, result_padded, group)
+    for idx, item_size in enumerate(local_sizes):
+        slice_param = [slice(dim_size) for dim_size in item_size]
+        gathered_result[idx] = gathered_result[idx][slice_param]
+    return gathered_result
+def _cat_and_gather_tensor_list(tensor_list: List[Tensor]) -> Tensor:
+    local_cat = torch.cat(tensor_list)
+    return torch.cat(gather_all_tensors(local_cat))
+class Accumulator:
+    def __init__(self) -> None:
+        pass
+    def update(self, preds: Tensor, target: Tensor, index: Tensor) -> None:
+        raise NotImplementedError
+    def accumulate(self) -> Optional[Dict[str, Tensor]]:
+        raise NotImplementedError
+class NoOpAccumulator(Accumulator):
+    def __init__(self) -> None:
+        pass
+    def update(self, preds: Tensor, target: Tensor, index: Tensor) -> None:
+        pass
+    def accumulate(self) -> None:
+        return None
+class ResultsAccumulator(Accumulator):
+    """
+    Accumulate predictions and targets across processes
+    """
+    def __init__(self) -> None:
+        self._local_values: Dict[str, List[Tensor]] = defaultdict(list)
+        self._gathered_values: Dict[str, Tensor] = {}
+        self._gathered = False
+    def update(self, preds: Tensor, target: Tensor, index: Tensor) -> None:
+        assert len(preds) == len(target) == len(index)
+        assert not self._gathered, "Tensors have already been gathered in this helper"
+        self._local_values["preds"].append(preds)
+        self._local_values["target"].append(target)
+        self._local_values["index"].append(index)
+        self._gathered = False
+    def _gather_tensors(self):
+        for k, tensor_list in self._local_values.items():
+            self._gathered_values[k] = _cat_and_gather_tensor_list(tensor_list)
+        self._gathered = True
+    def accumulate(self) -> Dict[str, Tensor]:
+        if not self._gathered:
+            self._gather_tensors()
+        preds, target, index = [self._gathered_values[k] for k in ["preds", "target", "index"]]
+        assert len(preds) == len(target) == len(index) and index.min() == 0
+        preds_ordered = torch.zeros((index.max() + 1, *preds.shape[1:]), dtype=preds.dtype, device=preds.device)
+        preds_ordered[index] = preds
+        target_ordered = torch.zeros((index.max() + 1, *target.shape[1:]), dtype=target.dtype, device=target.device)
+        target_ordered[index] = target
+        return {"preds": preds_ordered, "target": target_ordered}

dinov2/data/adapters.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from typing import Any, Tuple, Optional
+from torch.utils.data import Dataset
+class DatasetWithEnumeratedTargets(Dataset):
+    """
+    If pad_dataset is set, pads based on torch's DistributedSampler implementation, which
+    with drop_last=False pads the last batch to be a multiple of the world size.
+    https://github.com/pytorch/pytorch/blob/main/torch/utils/data/distributed.py#L91
+    """
+    def __init__(self, dataset: Dataset, pad_dataset: bool = False, num_replicas: Optional[int] = None):
+        self._dataset = dataset
+        self._size = len(self._dataset)
+        self._padded_size = self._size
+        self._pad_dataset = pad_dataset
+        if self._pad_dataset:
+            assert num_replicas is not None, "num_replicas should be set if pad_dataset is True"
+            self._padded_size = num_replicas * ((len(dataset) + num_replicas - 1) // num_replicas)
+    def get_image_relpath(self, index: int) -> str:
+        assert self._pad_dataset or index < self._size
+        return self._dataset.get_image_relpath(index % self._size)
+    def get_image_data(self, index: int) -> bytes:
+        assert self._pad_dataset or index < self._size
+        return self._dataset.get_image_data(index % self._size)
+    def get_target(self, index: int) -> Tuple[Any, int]:
+        target = self._dataset.get_target(index % self._size)
+        if index >= self._size:
+            assert self._pad_dataset
+            return (-1, target)
+        return (index, target)
+    def __getitem__(self, index: int) -> Tuple[Any, Tuple[Any, int]]:
+        image, target = self._dataset[index % self._size]
+        if index >= self._size:
+            assert self._pad_dataset
+            return image, (-1, target)
+        target = index if target is None else target
+        return image, (index, target)
+    def __len__(self) -> int:
+        return self._padded_size

dinov2/data/augmentations.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import logging
+from torchvision import transforms
+from .transforms import (
+    GaussianBlur,
+    make_normalize_transform,
+)
+logger = logging.getLogger("dinov2")
+class DataAugmentationDINO(object):
+    def __init__(
+        self,
+        global_crops_scale,
+        local_crops_scale,
+        local_crops_number,
+        global_crops_size=224,
+        local_crops_size=96,
+    ):
+        self.global_crops_scale = global_crops_scale
+        self.local_crops_scale = local_crops_scale
+        self.local_crops_number = local_crops_number
+        self.global_crops_size = global_crops_size
+        self.local_crops_size = local_crops_size
+        logger.info("###################################")
+        logger.info("Using data augmentation parameters:")
+        logger.info(f"global_crops_scale: {global_crops_scale}")
+        logger.info(f"local_crops_scale: {local_crops_scale}")
+        logger.info(f"local_crops_number: {local_crops_number}")
+        logger.info(f"global_crops_size: {global_crops_size}")
+        logger.info(f"local_crops_size: {local_crops_size}")
+        logger.info("###################################")
+        # random resized crop and flip
+        self.geometric_augmentation_global = transforms.Compose(
+            [
+                transforms.RandomResizedCrop(
+                    global_crops_size, scale=global_crops_scale, interpolation=transforms.InterpolationMode.BICUBIC
+                ),
+                transforms.RandomHorizontalFlip(p=0.5),
+            ]
+        )
+        self.geometric_augmentation_local = transforms.Compose(
+            [
+                transforms.RandomResizedCrop(
+                    local_crops_size, scale=local_crops_scale, interpolation=transforms.InterpolationMode.BICUBIC
+                ),
+                transforms.RandomHorizontalFlip(p=0.5),
+            ]
+        )
+        # color distorsions / blurring
+        color_jittering = transforms.Compose(
+            [
+                transforms.RandomApply(
+                    [transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)],
+                    p=0.8,
+                ),
+                transforms.RandomGrayscale(p=0.2),
+            ]
+        )
+        global_transfo1_extra = GaussianBlur(p=1.0)
+        global_transfo2_extra = transforms.Compose(
+            [
+                GaussianBlur(p=0.1),
+                transforms.RandomSolarize(threshold=128, p=0.2),
+            ]
+        )
+        local_transfo_extra = GaussianBlur(p=0.5)
+        # normalization
+        self.normalize = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                make_normalize_transform(),
+            ]
+        )
+        self.global_transfo1 = transforms.Compose([color_jittering, global_transfo1_extra, self.normalize])
+        self.global_transfo2 = transforms.Compose([color_jittering, global_transfo2_extra, self.normalize])
+        self.local_transfo = transforms.Compose([color_jittering, local_transfo_extra, self.normalize])
+    def __call__(self, image):
+        output = {}
+        # global crops:
+        im1_base = self.geometric_augmentation_global(image)
+        global_crop_1 = self.global_transfo1(im1_base)
+        im2_base = self.geometric_augmentation_global(image)
+        global_crop_2 = self.global_transfo2(im2_base)
+        output["global_crops"] = [global_crop_1, global_crop_2]
+        # global crops for teacher:
+        output["global_crops_teacher"] = [global_crop_1, global_crop_2]
+        # local crops:
+        local_crops = [
+            self.local_transfo(self.geometric_augmentation_local(image)) for _ in range(self.local_crops_number)
+        ]
+        output["local_crops"] = local_crops
+        output["offsets"] = ()
+        return output

dinov2/data/cell_dino/augmentations.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import logging
+import torchvision
+from torchvision import transforms
+from .transforms import (
+    RandomContrastProteinChannel,
+    RandomRemoveChannelExceptProtein,
+    RandomBrightness,
+    RandomContrast,
+    Div255,
+    SelfNormalizeNoDiv,
+)
+logger = logging.getLogger("dinov2")
+class CellAugmentationDINO(object):
+    def __init__(
+        self,
+        global_crops_scale,
+        local_crops_scale,
+        local_crops_number,
+        global_crops_size=224,
+        local_crops_size=96,
+    ):
+        self.global_crops_scale = global_crops_scale
+        self.local_crops_scale = local_crops_scale
+        self.local_crops_number = local_crops_number
+        self.global_crops_size = global_crops_size
+        self.local_crops_size = local_crops_size
+        logger.info("###################################")
+        logger.info("Using data augmentation parameters:")
+        logger.info(f"global_crops_scale: {global_crops_scale}")
+        logger.info(f"local_crops_scale: {local_crops_scale}")
+        logger.info(f"local_crops_number: {local_crops_number}")
+        logger.info(f"global_crops_size: {global_crops_size}")
+        logger.info(f"local_crops_size: {local_crops_size}")
+        logger.info("###################################")
+        additional_transforms_list = [
+            torchvision.transforms.RandomHorizontalFlip(),
+            torchvision.transforms.RandomVerticalFlip(),
+            RandomBrightness(),
+            RandomContrast(),
+            SelfNormalizeNoDiv(),
+        ]
+        first_transforms_list = [
+            Div255(),
+            RandomRemoveChannelExceptProtein(),
+            RandomContrastProteinChannel(),
+        ]
+        global_transforms_list = first_transforms_list.copy()
+        global_transforms_list.append(
+            torchvision.transforms.RandomResizedCrop(size=global_crops_size, scale=global_crops_scale)
+        )
+        global_transforms_list = global_transforms_list + additional_transforms_list
+        local_transforms_list = first_transforms_list
+        local_transforms_list.append(
+            torchvision.transforms.RandomResizedCrop(size=local_crops_size, scale=local_crops_scale)
+        )
+        local_transforms_list = local_transforms_list + additional_transforms_list
+        self.global_transform = transforms.Compose(global_transforms_list)
+        self.local_transform = transforms.Compose(local_transforms_list)
+    def __call__(self, image):
+        output = {}
+        global_crop1 = self.global_transform(image)
+        global_crop2 = self.global_transform(image)
+        output["global_crops"] = [global_crop1, global_crop2]
+        local_crops = []
+        for _ in range(self.local_crops_number):
+            local_crops.append(self.local_transform(image))
+        output["local_crops"] = local_crops
+        output["global_crops_teacher"] = [global_crop1, global_crop2]
+        output["offsets"] = ()
+        return output

dinov2/data/cell_dino/transforms.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import torch
+from torchvision import transforms
+import numpy as np
+from enum import Enum
+class NormalizationType(Enum):
+    SELF_NORM_AUG_DECODER = "self_norm_aug_decoder"
+    SELF_NORM_CENTER_CROP = "self_norm_center_crop"
+class Div255(torch.nn.Module):
+    def forward(self, x):
+        x = x / 255
+        return x
+class SelfNormalizeNoDiv(torch.nn.Module):
+    def forward(self, x):
+        m = x.mean((-2, -1), keepdim=True)
+        s = x.std((-2, -1), unbiased=False, keepdim=True)
+        x -= m
+        x /= s + 1e-7
+        return x
+class SelfNormalize(torch.nn.Module):
+    def forward(self, x):
+        x = x / 255
+        m = x.mean((-2, -1), keepdim=True)
+        s = x.std((-2, -1), unbiased=False, keepdim=True)
+        x -= m
+        x /= s + 1e-7
+        return x
+class RandomContrastProteinChannel(torch.nn.Module):
+    """
+    Random constrast rescaling of the protein channel only.
+    RescaleProtein function in Dino4cell codebase.
+    """
+    def __init__(self, p=0.2):
+        super().__init__()
+        self.p = p
+    def forward(self, img):
+        if img.max() == 0:
+            return img
+        if len(img) == 1:
+            return img
+        if np.random.rand() <= self.p:
+            random_factor = (np.random.rand() * 2) / img.max()  # scaling
+            img[1] = img[1] * random_factor
+            return img
+        else:
+            return img
+class RandomRemoveChannelExceptProtein(torch.nn.Module):
+    """
+    dropping a channel at random except the channel 1, corresponding to proteins in HPA datasets.
+    """
+    def __init__(self, p=0.2):
+        super().__init__()
+        self.p = p
+    def forward(self, img):
+        img_size = np.array(img).shape
+        if img_size[0] < 4:
+            return img
+        if np.random.rand() <= self.p:
+            channel_to_blacken = np.random.choice(np.array([0, 2, 3]))
+            img[channel_to_blacken] = torch.zeros(1, *img.shape[1:])
+            return img
+        else:
+            return img
+class RandomRemoveChannel(torch.nn.Module):
+    """
+    dropping a channel at random
+    """
+    def __init__(self, p=0.2):
+        super().__init__()
+        self.p = p
+    def forward(self, img):
+        img_size = np.array(img).shape
+        num_channels = img_size[0]
+        if num_channels < 4:
+            return img
+        if np.random.rand() <= self.p:
+            channel_to_blacken = np.random.choice(np.array(list(range(num_channels))))
+            img[channel_to_blacken] = torch.zeros(1, *img.shape[1:])
+            return img
+        else:
+            return img
+class RandomContrast(torch.nn.Module):
+    def __init__(self, p=0.2):
+        super().__init__()
+        self.p = p
+    def forward(self, img):
+        if img.max() == 0:
+            return img
+        n_channels = img.shape[0]
+        for ind in range(n_channels):
+            factor = max(np.random.normal(1, self.p), 0.5)
+            img[ind] = transforms.functional.adjust_contrast(img[ind][None, ...], factor)
+        return img
+class RandomBrightness(torch.nn.Module):
+    def __init__(self, p=0.2):
+        super().__init__()
+        self.p = p
+    def forward(self, img):
+        if img.max() == 0:
+            return img
+        n_channels = img.shape[0]
+        for ind in range(n_channels):
+            factor = max(np.random.normal(1, self.p), 0.5)
+            img[ind] = transforms.functional.adjust_brightness(img[ind], factor)
+        return img
+def make_classification_eval_cell_transform(
+    *,
+    resize_size: int = 0,
+    interpolation=transforms.InterpolationMode.BICUBIC,
+    crop_size: int = 384,
+    normalization_type: Enum = NormalizationType.SELF_NORM_CENTER_CROP,
+) -> transforms.Compose:
+    from .transforms import (
+        Div255,
+        SelfNormalizeNoDiv,
+    )
+    transforms_list = [Div255()]
+    if resize_size > 0:
+        transforms_list.append(transforms.Resize(resize_size, interpolation=interpolation))
+    if normalization_type == NormalizationType.SELF_NORM_AUG_DECODER:
+        transforms_list.extend(
+            [
+                transforms.RandomCrop(size=crop_size, pad_if_needed=True),
+                transforms.RandomHorizontalFlip(),
+                transforms.RandomVerticalFlip(),
+            ]
+        )
+    elif normalization_type == NormalizationType.SELF_NORM_CENTER_CROP:
+        transforms_list.append(transforms.CenterCrop(size=crop_size))
+    else:
+        raise ValueError("f{normalization_type}: unknown NormalizationType")
+    transforms_list.append(SelfNormalizeNoDiv())
+    return transforms.Compose(transforms_list)

dinov2/data/collate.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import torch
+import random
+def collate_data_and_cast(samples_list, mask_ratio_tuple, mask_probability, dtype, n_tokens=None, mask_generator=None):
+    # dtype = torch.half  # TODO: Remove
+    n_global_crops = len(samples_list[0][0]["global_crops"])
+    n_local_crops = len(samples_list[0][0]["local_crops"])
+    collated_global_crops = torch.stack([s[0]["global_crops"][i] for i in range(n_global_crops) for s in samples_list])
+    collated_local_crops = torch.stack([s[0]["local_crops"][i] for i in range(n_local_crops) for s in samples_list])
+    B = len(collated_global_crops)
+    N = n_tokens
+    n_samples_masked = int(B * mask_probability)
+    probs = torch.linspace(*mask_ratio_tuple, n_samples_masked + 1)
+    upperbound = 0
+    masks_list = []
+    for i in range(0, n_samples_masked):
+        prob_min = probs[i]
+        prob_max = probs[i + 1]
+        masks_list.append(torch.BoolTensor(mask_generator(int(N * random.uniform(prob_min, prob_max)))))
+        upperbound += int(N * prob_max)
+    for i in range(n_samples_masked, B):
+        masks_list.append(torch.BoolTensor(mask_generator(0)))
+    random.shuffle(masks_list)
+    collated_masks = torch.stack(masks_list).flatten(1)
+    mask_indices_list = collated_masks.flatten().nonzero().flatten()
+    masks_weight = (1 / collated_masks.sum(-1).clamp(min=1.0)).unsqueeze(-1).expand_as(collated_masks)[collated_masks]
+    return {
+        "collated_global_crops": collated_global_crops.to(dtype),
+        "collated_local_crops": collated_local_crops.to(dtype),
+        "collated_masks": collated_masks,
+        "mask_indices_list": mask_indices_list,
+        "masks_weight": masks_weight,
+        "upperbound": upperbound,
+        "n_masked_patches": torch.full((1,), fill_value=mask_indices_list.shape[0], dtype=torch.long),
+    }

dinov2/data/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from .image_net import ImageNet
+from .image_net_22k import ImageNet22k
+from .cell_dino.hpaone import HPAone
+from .cell_dino.hpafov import HPAFoV
+from .cell_dino.chammi_cp import CHAMMI_CP
+from .cell_dino.chammi_hpa import CHAMMI_HPA
+from .cell_dino.chammi_wtc import CHAMMI_WTC

dinov2/data/datasets/cell_dino/chammi_cp.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import csv
+from enum import Enum
+import logging
+import os
+from typing import Any, Callable, Optional, Union
+import numpy as np
+from ..extended import ExtendedVisionDataset
+from ..decoders import DecoderType
+logger = logging.getLogger("dinov2")
+METADATA_FILE = "morphem70k_v2.csv"
+CLASS_LABELS = {
+    "BRD-A29260609": 0,
+    "BRD-K04185004": 1,
+    "BRD-K21680192": 2,
+    "DMSO": 3,
+    "BRD-K11129031": 4,  # labels only seen in TASK_FOUR
+    "BRD-K62310379": 5,
+    "BRD-K77947974": 6,
+}
+class _Split(Enum):
+    TRAIN = "Train"
+    TASK_ONE = "Task_one"
+    TASK_TWO = "Task_two"
+    TASK_THREE = "Task_three"
+    TASK_FOUR = "Task_four"
+def _load_file_names_and_targets(
+    root: str,
+    split: _Split,
+):
+    image_paths = []
+    labels = []
+    with open(os.path.join(root, METADATA_FILE)) as metadata:
+        metadata_reader = csv.DictReader(metadata)
+        for row in metadata_reader:
+            row_dataset = row["file_path"].split("/")[0]
+            if row["train_test_split"].upper() == split and row_dataset == "CP":
+                image_paths.append(row["file_path"])
+                labels.append(CLASS_LABELS[row["label"]])
+    return image_paths, labels  # to debug
+class CHAMMI_CP(ExtendedVisionDataset):
+    """
+    Implementation of the CP (Cell-Painting) subset of the CHAMMI benchmark dataset,
+    following the CHAMMI paper: https://arxiv.org/pdf/2310.19224
+    Github code: https://github.com/chaudatascience/channel_adaptive_models
+    """
+    Split = Union[_Split]
+    def __init__(
+        self,
+        *,
+        split: "CHAMMI_CP.Split",
+        root: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        image_decoder_type: DecoderType = DecoderType.XChannelsDecoder,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            root,
+            transforms,
+            transform,
+            target_transform,
+            image_decoder_type=image_decoder_type,
+            **kwargs,
+        )
+        self.split = split
+        self.root = root
+        self.num_additional_labels_loo_eval = 3
+        self._image_paths, self._targets = _load_file_names_and_targets(
+            root,
+            split,
+        )
+    def get_image_relpath(self, index: int) -> str:
+        return self._image_paths[index]
+    def get_image_data(self, index: int) -> bytes:
+        image_relpath = self.get_image_relpath(index)
+        image_full_path = os.path.join(self.root, image_relpath)
+        with open(image_full_path, mode="rb") as f:
+            image_data = f.read()
+        return image_data
+    def get_target(self, index: int) -> Any:
+        return self._targets[index]
+    def get_targets(self) -> np.ndarray:
+        return np.array(self._targets)
+    def __len__(self) -> int:
+        return len(self._image_paths)

dinov2/data/datasets/cell_dino/chammi_hpa.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import csv
+from enum import Enum
+import logging
+import os
+from typing import Any, Callable, Optional, Union
+import numpy as np
+from ..extended import ExtendedVisionDataset
+from ..decoders import DecoderType
+logger = logging.getLogger("dinov2")
+METADATA_FILE = "morphem70k_v2.csv"
+CLASS_LABELS = {
+    "golgi apparatus": 0,
+    "microtubules": 1,
+    "mitochondria": 2,
+    "nuclear speckles": 3,
+    "cytosol": 4,  # labels only seen in TASK_THREE
+    "endoplasmic reticulum": 5,
+    "nucleoplasm": 6,
+}
+class _Split(Enum):
+    TRAIN = "Train"
+    TASK_ONE = "Task_one"
+    TASK_TWO = "Task_two"
+    TASK_THREE = "Task_three"
+def _load_file_names_and_targets(
+    root: str,
+    split: _Split,
+):
+    image_paths = []
+    labels = []
+    with open(os.path.join(root, METADATA_FILE)) as metadata:
+        metadata_reader = csv.DictReader(metadata)
+        for row in metadata_reader:
+            row_dataset = row["file_path"].split("/")[0]
+            if row["train_test_split"].upper() == split and row_dataset == "HPA":
+                image_paths.append(row["file_path"])
+                labels.append(CLASS_LABELS[row["label"]])
+    return image_paths, labels
+class CHAMMI_HPA(ExtendedVisionDataset):
+    """
+    Implementation of the CP (Cell-Painting) subset of the CHAMMI benchmark dataset,
+    following the CHAMMI paper: https://arxiv.org/pdf/2310.19224
+    Github code: https://github.com/chaudatascience/channel_adaptive_models
+    """
+    Split = Union[_Split]
+    def __init__(
+        self,
+        *,
+        split: "CHAMMI_HPA.Split",
+        root: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        image_decoder_type: DecoderType = DecoderType.XChannelsDecoder,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            root,
+            transforms,
+            transform,
+            target_transform,
+            image_decoder_type=image_decoder_type,
+            **kwargs,
+        )
+        self.split = split
+        self.root = root
+        self.num_additional_labels_loo_eval = 3
+        self._image_paths, self._targets = _load_file_names_and_targets(
+            root,
+            split,
+        )
+    def get_image_relpath(self, index: int) -> str:
+        return self._image_paths[index]
+    def get_image_data(self, index: int) -> bytes:
+        image_relpath = self.get_image_relpath(index)
+        image_full_path = os.path.join(self.root, image_relpath)
+        with open(image_full_path, mode="rb") as f:
+            image_data = f.read()
+        return image_data
+    def get_target(self, index: int) -> Any:
+        return self._targets[index]
+    def get_targets(self) -> np.ndarray:
+        return np.array(self._targets)
+    def __len__(self) -> int:
+        return len(self._image_paths)

dinov2/data/datasets/cell_dino/chammi_wtc.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import csv
+from enum import Enum
+import logging
+import os
+from typing import Any, Callable, Optional, Union
+import numpy as np
+from ..extended import ExtendedVisionDataset
+from ..decoders import DecoderType
+logger = logging.getLogger("dinov2")
+METADATA_FILE = "morphem70k_v2.csv"
+CLASS_LABELS = {
+    "M0": 0,
+    "M1M2": 1,
+    "M3": 2,
+    "M4M5": 3,
+    "M6M7_complete": 4,
+    "M6M7_single": 5,
+}
+class _Split(Enum):
+    TRAIN = "Train"
+    TASK_ONE = "Task_one"
+    TASK_TWO = "Task_two"
+def _load_file_names_and_targets(
+    root: str,
+    split: _Split,
+):
+    image_paths = []
+    labels = []
+    with open(os.path.join(root, METADATA_FILE)) as metadata:
+        metadata_reader = csv.DictReader(metadata)
+        for row in metadata_reader:
+            row_dataset = row["file_path"].split("/")[0]
+            if row["train_test_split"].upper() == split and row_dataset == "Allen":
+                image_paths.append(row["file_path"])
+                labels.append(CLASS_LABELS[row["label"]])
+    return image_paths, labels
+class CHAMMI_WTC(ExtendedVisionDataset):
+    """
+    Implementation of the CP (Cell-Painting) subset of the CHAMMI benchmark dataset,
+    following the CHAMMI paper: https://arxiv.org/pdf/2310.19224
+    Github code: https://github.com/chaudatascience/channel_adaptive_models
+    """
+    Split = Union[_Split]
+    def __init__(
+        self,
+        *,
+        split: "CHAMMI_WTC.Split",
+        root: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        image_decoder_type: DecoderType = DecoderType.XChannelsTIFFDecoder,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            root,
+            transforms,
+            transform,
+            target_transform,
+            image_decoder_type=image_decoder_type,
+            **kwargs,
+        )
+        self.split = split
+        self.root = root
+        self._image_paths, self._targets = _load_file_names_and_targets(
+            root,
+            split,
+        )
+    def get_image_relpath(self, index: int) -> str:
+        return self._image_paths[index]
+    def get_image_data(self, index: int) -> bytes:
+        image_relpath = self.get_image_relpath(index)
+        image_full_path = os.path.join(self.root, image_relpath)
+        with open(image_full_path, mode="rb") as f:
+            image_data = f.read()
+        return image_data
+    def get_target(self, index: int) -> Any:
+        return self._targets[index]
+    def get_targets(self) -> np.ndarray:
+        return np.array(self._targets)
+    def __len__(self) -> int:
+        return len(self._image_paths)

dinov2/data/datasets/cell_dino/hpafov.py ADDED Viewed

	@@ -0,0 +1,283 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import csv
+from enum import Enum
+import logging
+import os
+from typing import Any, Callable, List, Optional, Tuple, Union, Dict
+import numpy as np
+from ..extended import ExtendedVisionDataset
+from ..decoders import DecoderType
+logger = logging.getLogger("dinov2")
+CELL_TYPE = [
+    "BJ",  # 1
+    "LHCN-M2",
+    "RH-30",
+    "SH-SY5Y",
+    "U-2 OS",  # 5
+    "ASC TERT1",
+    "HaCaT",
+    "A-431",
+    "U-251 MG",
+    "HEK 293",  # 10
+    "A549",
+    "RT4",
+    "HeLa",
+    "MCF7",
+    "PC-3",  # 15
+    "hTERT-RPE1",
+    "SK-MEL-30",
+    "EFO-21",
+    "AF22",
+    "HEL",  # 20
+    "Hep G2",
+    "HUVEC TERT2",
+    "THP-1",
+    "CACO-2",
+    "JURKAT",  # 25
+    "RPTEC TERT1",
+    "SuSa",
+    "REH",
+    "HDLM-2",
+    "K-562",  # 30
+    "hTCEpi",
+    "NB-4",
+    "HAP1",
+    "OE19",
+    "SiHa",  # 35
+]
+PROTEIN_LOCALIZATION = [  # matches https://www.kaggle.com/c/human-protein-atlas-image-classification/data
+    "nucleoplasm",
+    "nuclear membrane",
+    "nucleoli",
+    "nucleoli fibrillar center",
+    "nuclear speckles",  # 5
+    "nuclear bodies",
+    "endoplasmic reticulum",
+    "golgi apparatus",
+    "peroxisomes",
+    "endosomes",  # 10
+    "lysosomes",
+    "intermediate filaments",
+    "actin filaments",
+    "focal adhesion sites",
+    "microtubules",  # 15
+    "microtubule ends",
+    "cytokinetic bridge",
+    "mitotic spindle",
+    "microtubule organizing center",
+    "centrosome",  # 20
+    "lipid droplets",
+    "plasma membrane",
+    "cell junctions",
+    "mitochondria",
+    "aggresome",  # 25
+    "cytosol",
+    "cytoplasmic bodies",
+    "rods & rings",
+]
+class _Split(Enum):
+    TRAIN = "train"
+    VAL = "val"
+    SSL = "ssl"
+def get_csv_fpath(split):
+    """
+    Path to data relative to root
+    """
+    if split == _Split.TRAIN.value.upper() or split == _Split.TRAIN or split == "TRAIN":
+        return "whole_images_512_train.csv"
+    elif split == _Split.VAL.value.upper() or split == _Split.VAL or split == "VAL":
+        return "whole_images_512_test.csv"
+class _WildCard(Enum):
+    NONE = "none"
+    SEPARATECHANNELS = "separate_channels"  # each channel from each image is treated as an independent sample, overrides chosen channel configuration
+class _Mode(Enum):
+    """
+    Targets:
+    - ALL: tuple, (one hot encoding of multilabel protein localization, categorical encoding of cell type)
+    - PROTEIN_LOCALIZATION: one hot encoding of multilabel protein localization
+    - CELL_TYPE: categorical encoding of cell type
+    """
+    ALL = "all"
+    PROTEIN_LOCALIZATION = "protein_localization"
+    CELL_TYPE = "cell_type"
+    @property
+    def nb_labels(self):
+        if self == _Mode.CELL_TYPE:
+            return len(CELL_TYPE)
+        elif self == _Mode.PROTEIN_LOCALIZATION:
+            return len(PROTEIN_LOCALIZATION)
+        else:
+            return None
+def _list_images_from_csv(img_path, csv_path):
+    L = []
+    with open(csv_path) as filename:
+        reader = csv.DictReader(filename)
+        for row in reader:
+            L.append(os.path.join(img_path, row["ID"] + ".png"))
+    return L
+def _load_file_names_and_labels_ssl(
+    root: str,
+) -> Tuple[List[str], List[Any]]:
+    curr_img_path = os.path.join(root, "normalized_data")
+    csv_train_ssl = os.path.join(root, "whole_images_names.csv")
+    image_paths = _list_images_from_csv(curr_img_path, csv_train_ssl)
+    labels = [i for i in range(len(image_paths))]
+    return image_paths, labels
+def _load_file_names_and_labels(
+    root: str,
+    split: _Split,
+    mode: _Mode,
+) -> Tuple[List[str], List[Any], np.ndarray]:
+    data_path = os.path.join(root, "512_whole_images")
+    csv_fpath = os.path.join(root, get_csv_fpath(split))
+    image_paths = []
+    labels = []
+    with open(csv_fpath) as filename:
+        reader = csv.DictReader(filename)
+        for row in reader:
+            add_sample = True
+            if mode != _Mode.PROTEIN_LOCALIZATION.value.upper():
+                # categorical
+                if row["cell_type"] in CELL_TYPE:
+                    cell_type = CELL_TYPE.index(row["cell_type"])
+                else:
+                    cell_type = np.nan
+            if mode != _Mode.CELL_TYPE.value.upper():
+                # one hot encoding
+                prot_loc = np.zeros(len(PROTEIN_LOCALIZATION), dtype=np.int_)
+                for k in range(len(PROTEIN_LOCALIZATION)):
+                    if row[PROTEIN_LOCALIZATION[k]] == "True":
+                        prot_loc[k] = 1
+                if prot_loc.max() < 0.5:
+                    add_sample = False
+            if add_sample:
+                if mode == _Mode.PROTEIN_LOCALIZATION.value.upper():
+                    labels.append(prot_loc)
+                elif mode == _Mode.CELL_TYPE.value.upper():
+                    labels.append(cell_type)
+                else:
+                    labels.append({"prot_loc": prot_loc, "cell_type": cell_type})
+                candidate_path = os.path.join(data_path, row["file"].split("/")[-1])
+                if os.path.exists(candidate_path):
+                    image_paths.append(candidate_path)
+                else:
+                    candidate_path = os.path.join(
+                        data_path, row["file"].split("/")[-1].split(".")[0] + ".tiff"
+                    )  # _blue.png") # some images on the normalized_data folder have a _blue suffix on their names
+                    if os.path.exists(candidate_path):
+                        image_paths.append(candidate_path)
+                    else:
+                        raise FileNotFoundError(f"File {candidate_path} not found.")
+        return image_paths, labels
+class HPAFoV(ExtendedVisionDataset):
+    Split = Union[_Split]
+    Mode = Union[_Mode]
+    WildCard = Union[_WildCard]
+    def __init__(
+        self,
+        *,
+        split: "HPAFoV.Split" = _Split.TRAIN,
+        mode: "HPAFoV.Mode" = _Mode.ALL,
+        wildcard: "HPAFoV.WildCard" = _WildCard.NONE,
+        root: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        image_decoder_type: DecoderType = DecoderType.ChannelSelectDecoder,
+        image_decoder_params: Dict[str, Any] = {},
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            root,
+            transforms,
+            transform,
+            target_transform,
+            image_decoder_type=image_decoder_type,
+            image_decoder_params={
+                "select_channel": True
+                if wildcard == _WildCard.SEPARATECHANNELS or wildcard == "SEPARATE_CHANNELS"
+                else False
+            },
+            **kwargs,
+        )
+        self.mode = mode
+        self.split = split
+        self.root = root
+        self.wildcard = wildcard
+        self.channel_adaptive = True
+        if split == _Split.SSL.value.upper() or split == _Split.SSL or split == "SSL":
+            self._image_paths, self._labels = _load_file_names_and_labels_ssl(root)
+        else:
+            self._image_paths, self._labels = _load_file_names_and_labels(root, self.split, self.mode)
+        self._channels = np.repeat(np.array([[0, 1, 2, 3]]), len(self._image_paths), axis=0).tolist()
+        if self.wildcard == _WildCard.SEPARATECHANNELS.value.upper():
+            image_paths, labels, channels = self._image_paths, self._labels, self._channels
+            channels = np.array(channels)
+            # separate and stack the columns of the channels array
+            C = channels.shape[1]
+            channels = np.concatenate([channels[:, i] for i in range(C)])
+            self._channels = np.expand_dims(channels, 1).tolist()
+            self.image_paths = image_paths * C
+            self.labels = labels * C
+    def get_image_relpath(self, index: int) -> str:
+        return self._image_paths[index]
+    def get_image_data(self, index: int) -> bytes:
+        image_relpath = self.get_image_relpath(index)
+        image_full_path = os.path.join(self.root, image_relpath)
+        with open(image_full_path, mode="rb") as f:
+            image_data = f.read()
+        if self.channel_adaptive:
+            channels = self._channels[index]
+            return image_data + bytes(channels) + (len(channels)).to_bytes(1, byteorder="big")
+        else:
+            return image_data
+    def get_target(self, index: int) -> Any:
+        return self._labels[index]
+    def get_targets(self) -> np.ndarray:
+        return np.array(self._labels)
+    def __len__(self) -> int:
+        return len(self._image_paths)

dinov2/data/datasets/cell_dino/hpaone.py ADDED Viewed

	@@ -0,0 +1,223 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import csv
+from enum import Enum
+import logging
+import os
+from typing import Any, Callable, List, Optional, Tuple, Union
+import numpy as np
+from ..extended import ExtendedVisionDataset
+from ..decoders import DecoderType
+logger = logging.getLogger("dinov2")
+PROTEIN_LOCALIZATION = [
+    "actin filaments,focal adhesion sites",
+    "aggresome",
+    "centrosome,centriolar satellite",
+    "cytosol",
+    "endoplasmic reticulum",
+    "golgi apparatus",
+    "intermediate filaments",
+    "microtubules",
+    "mitochondria",
+    "mitotic spindle",
+    "no staining",
+    "nuclear bodies",
+    "nuclear membrane",
+    "nuclear speckles",
+    "nucleoli",
+    "nucleoli fibrillar center",
+    "nucleoplasm",
+    "plasma membrane,cell junctions",
+    "vesicles,peroxisomes,endosomes,lysosomes,lipid droplets,cytoplasmic bodies",
+]  # 19
+CELL_TYPE = [
+    "A-431",  # 0
+    "A549",
+    "AF22",
+    "ASC TERT1",
+    "BJ",
+    "CACO-2",
+    "EFO-21",
+    "HAP1",
+    "HDLM-2",
+    "HEK 293",  # 9
+    "HEL",
+    "HUVEC TERT2",
+    "HaCaT",
+    "HeLa",
+    "Hep G2",
+    "JURKAT",
+    "K-562",
+    "MCF7",
+    "PC-3",
+    "REH",
+    "RH-30",  # 20
+    "RPTEC TERT1",
+    "RT4",
+    "SH-SY5Y",
+    "SK-MEL-30",
+    "SiHa",
+    "U-2 OS",
+    "U-251 MG",
+    "hTCEpi",  # 28
+]  # 29 cell types
+class _Split(Enum):
+    VAL = "val"
+    TRAIN = "train"
+    ALL = "all"  # images without labels, for encoder training
+class _Mode(Enum):
+    PROTEIN_LOCALIZATION = "protein_localization"
+    CELL_TYPE = "cell_type"
+    @property
+    def num_labels(self):
+        if self == _Mode.CELL_TYPE.value.upper():
+            return len(CELL_TYPE)
+        return len(PROTEIN_LOCALIZATION)
+def _simple_parse_csv(img_rootdir, csv_filepath: str):
+    samples = []
+    with open(csv_filepath) as filename:
+        template = csv.DictReader(filename)
+        samples = [(os.path.join(img_rootdir, row["img_path"]), 0) for row in template]
+    return samples
+def _parse_csv(img_rootdir, csv_labels_path: str):
+    nb_protein_location = len(PROTEIN_LOCALIZATION)
+    nb_cell_type = len(CELL_TYPE)
+    samples = []
+    with open(csv_labels_path) as filename:
+        reader = csv.DictReader(filename)
+        for row in reader:
+            protein_location = np.zeros(nb_protein_location, dtype=np.int_)
+            for k in range(nb_protein_location):
+                if row[PROTEIN_LOCALIZATION[k]] == "True":
+                    protein_location[k] = 1
+            cell_type = 0
+            for k in range(nb_cell_type):
+                if row[CELL_TYPE[k]] == "True":
+                    cell_type = k
+            samples.append(
+                (
+                    img_rootdir + "/" + row["file"].rsplit("/", 1)[1],
+                    protein_location,
+                    cell_type,
+                )
+            )
+    return samples
+def _load_file_names_and_labels_ssl(
+    root: str,
+) -> Tuple[List[str], List[Any]]:
+    curr_dir_train = os.path.join(root, "varied_size_masked_single_cells_HPA")
+    csv_all_path = os.path.join(root, "varied_size_masked_single_cells_pretrain_20240507.csv")
+    samples = _simple_parse_csv(curr_dir_train, csv_all_path)
+    image_paths, fake_labels = zip(*samples)
+    lab = list(fake_labels)
+    return image_paths, lab
+def _load_file_names_and_labels_train_or_test(
+    root: str,
+    split: _Split,
+    mode: _Mode,
+) -> Tuple[List[str], List[Any]]:
+    if split == _Split.TRAIN.value.upper() or split == _Split.TRAIN:
+        csv_labels_path = os.path.join(root, "fixed_size_masked_single_cells_pretrain_20240507.csv")
+    elif split == _Split.VAL.value.upper() or split == _Split.VAL:
+        csv_labels_path = os.path.join(root, "fixed_size_masked_single_cells_evaluation_20240507.csv")
+    else:
+        print("wrong split name")
+    curr_dir_val = os.path.join(root, "fixed_size_masked_single_cells_HPA")
+    samples = _parse_csv(curr_dir_val, csv_labels_path)
+    image_paths, protein_location, cell_type = zip(*samples)
+    if mode == _Mode.PROTEIN_LOCALIZATION.value.upper():
+        lab = protein_location
+    elif mode == _Mode.CELL_TYPE.value.upper():
+        lab = cell_type
+    else:
+        lab = protein_location, cell_type
+    image_paths = list(image_paths)
+    return image_paths, lab
+class HPAone(ExtendedVisionDataset):
+    Split = Union[_Split]
+    Mode = Union[_Mode]
+    def __init__(
+        self,
+        *,
+        split: "HPAone.Split" = _Split.ALL,
+        mode: "HPAone.Mode" = None,
+        root: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        image_decoder_type: DecoderType = DecoderType.XChannelsDecoder,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            root,
+            transforms,
+            transform,
+            target_transform,
+            image_decoder_type=image_decoder_type,
+            **kwargs,
+        )
+        self.mode = mode
+        self.split = split
+        self.root = root
+        if (
+            split in {_Split.TRAIN.value.upper(), _Split.VAL.value.upper()}
+            or split == _Split.TRAIN
+            or split == _Split.VAL
+        ):
+            (
+                self._image_paths,
+                self._labels,
+            ) = _load_file_names_and_labels_train_or_test(root, split, mode)
+        elif split == _Split.ALL.value.upper() or split == _Split.ALL:
+            self._image_paths, self._labels = _load_file_names_and_labels_ssl(root)
+        else:
+            logger.info(f"unknown split: {split}, {_Split.ALL.value.upper()}")
+    def get_image_relpath(self, index: int) -> str:
+        return self._image_paths[index]
+    def get_image_data(self, index: int) -> bytes:
+        image_relpath = self.get_image_relpath(index)
+        image_full_path = os.path.join(self.root, image_relpath)
+        with open(image_full_path, mode="rb") as f:
+            image_data = f.read()
+        return image_data
+    def get_target(self, index: int) -> Any:
+        return self._labels[index]
+    def get_targets(self) -> np.ndarray:
+        return np.array(self._labels)
+    def __len__(self) -> int:
+        return len(self._image_paths)

dinov2/data/datasets/decoders.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from io import BytesIO
+from typing import Any, Type
+from PIL import Image
+import numpy as np
+import torch
+from enum import Enum
+try:
+    import tifffile
+except ImportError:
+    print("Could not import `tifffile`, TIFFImageDataDecoder will be disabled")
+class Decoder:
+    def decode(self) -> Any:
+        raise NotImplementedError
+class DecoderType(Enum):
+    ImageDataDecoder = "ImageDataDecoder"
+    XChannelsDecoder = "XChannelsDecoder"
+    XChannelsTIFFDecoder = "XChannelsTIFFDecoder"
+    ChannelSelectDecoder = "ChannelSelectDecoder"
+    def get_class(self) -> Type[Decoder]:  # noqa: C901
+        if self == DecoderType.ImageDataDecoder:
+            return ImageDataDecoder
+        if self == DecoderType.XChannelsDecoder:
+            return XChannelsDecoder
+        if self == DecoderType.XChannelsTIFFDecoder:
+            return XChannelsTIFFDecoder
+        if self == DecoderType.ChannelSelectDecoder:
+            return ChannelSelectDecoder
+class ImageDataDecoder(Decoder):
+    def __init__(self, image_data: bytes) -> None:
+        self._image_data = image_data
+    def decode(self) -> Image:
+        f = BytesIO(self._image_data)
+        return Image.open(f).convert(mode="RGB")
+class TargetDecoder(Decoder):
+    def __init__(self, target: Any):
+        self._target = target
+    def decode(self) -> Any:
+        return self._target
+class XChannelsDecoder(Decoder):
+    def __init__(self, image_data: bytes) -> None:
+        self._image_data = image_data
+    def decode(self):
+        im = np.asarray(Image.open(BytesIO(self._image_data)))
+        if len(im.shape) == 2:
+            im = np.reshape(im, (im.shape[0], im.shape[0], -1), order="F")
+        return torch.Tensor(im).permute(2, 0, 1)
+class XChannelsTIFFDecoder(Decoder):
+    def __init__(self, image_data: bytes, num_channels: int = 3) -> None:
+        self._image_data = image_data
+        self._num_channels = num_channels
+    def decode(self):
+        numpy_array = tifffile.imread(BytesIO(self._image_data))
+        numpy_array = np.reshape(numpy_array, (numpy_array.shape[0], -1, self._num_channels), order="F")
+        return torch.Tensor(numpy_array).permute(2, 0, 1)
+class ChannelSelectDecoder(Decoder):
+    def __init__(self, image_data: bytes, select_channel: bool = False) -> None:
+        self.select_channel = select_channel
+        if select_channel:
+            self._image_data = image_data[:-1]
+            self._channel = image_data[-1]
+        else:
+            self._image_data = image_data
+    def decode(self):
+        im = np.asarray(Image.open(BytesIO(self._image_data)))
+        if self.select_channel:
+            return torch.Tensor(im).permute(2, 0, 1)[[self._channel]]
+        return torch.Tensor(im).permute(2, 0, 1)

dinov2/data/datasets/extended.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from typing import Any, Tuple
+from torchvision.datasets import VisionDataset
+from .decoders import DecoderType, TargetDecoder
+class ExtendedVisionDataset(VisionDataset):
+    def __init__(self, *args, **kwargs) -> None:
+        image_decoder_type = kwargs.pop("image_decoder_type", DecoderType.ImageDataDecoder)
+        self._decoder_params = {}
+        self._image_decoder_class = image_decoder_type.get_class()
+        if "image_decoder_params" in kwargs:
+            self._decoder_params = kwargs.pop("image_decoder_params")
+        super().__init__(*args, **kwargs)  # type: ignore
+    def get_image_data(self, index: int) -> bytes:
+        raise NotImplementedError
+    def get_target(self, index: int) -> Any:
+        raise NotImplementedError
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        try:
+            image_data = self.get_image_data(index)
+            image = self._image_decoder_class(image_data, **self._decoder_params).decode()
+        except Exception as e:
+            raise RuntimeError(f"can not read image for sample {index}") from e
+        target = self.get_target(index)
+        target = TargetDecoder(target).decode()
+        if self.transforms is not None:
+            image, target = self.transforms(image, target)
+        return image, target
+    def __len__(self) -> int:
+        raise NotImplementedError

dinov2/data/datasets/image_net.py ADDED Viewed

	@@ -0,0 +1,290 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import csv
+from enum import Enum
+import logging
+import os
+from typing import Callable, List, Optional, Tuple, Union
+import numpy as np
+from .extended import ExtendedVisionDataset
+logger = logging.getLogger("dinov2")
+_Target = int
+class _Split(Enum):
+    TRAIN = "train"
+    VAL = "val"
+    TEST = "test"  # NOTE: torchvision does not support the test split
+    @property
+    def length(self) -> int:
+        split_lengths = {
+            _Split.TRAIN: 1_281_167,
+            _Split.VAL: 50_000,
+            _Split.TEST: 100_000,
+        }
+        return split_lengths[self]
+    def get_dirname(self, class_id: Optional[str] = None) -> str:
+        return self.value if class_id is None else os.path.join(self.value, class_id)
+    def get_image_relpath(self, actual_index: int, class_id: Optional[str] = None) -> str:
+        dirname = self.get_dirname(class_id)
+        if self == _Split.TRAIN:
+            basename = f"{class_id}_{actual_index}"
+        else:  # self in (_Split.VAL, _Split.TEST):
+            basename = f"ILSVRC2012_{self.value}_{actual_index:08d}"
+        return os.path.join(dirname, basename + ".JPEG")
+    def parse_image_relpath(self, image_relpath: str) -> Tuple[str, int]:
+        assert self != _Split.TEST
+        dirname, filename = os.path.split(image_relpath)
+        class_id = os.path.split(dirname)[-1]
+        basename, _ = os.path.splitext(filename)
+        actual_index = int(basename.split("_")[-1])
+        return class_id, actual_index
+class ImageNet(ExtendedVisionDataset):
+    Target = Union[_Target]
+    Split = Union[_Split]
+    def __init__(
+        self,
+        *,
+        split: "ImageNet.Split",
+        root: str,
+        extra: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transforms, transform, target_transform)
+        self._extra_root = extra
+        self._split = split
+        self._entries = None
+        self._class_ids = None
+        self._class_names = None
+    @property
+    def split(self) -> "ImageNet.Split":
+        return self._split
+    def _get_extra_full_path(self, extra_path: str) -> str:
+        return os.path.join(self._extra_root, extra_path)
+    def _load_extra(self, extra_path: str) -> np.ndarray:
+        extra_full_path = self._get_extra_full_path(extra_path)
+        return np.load(extra_full_path, mmap_mode="r")
+    def _save_extra(self, extra_array: np.ndarray, extra_path: str) -> None:
+        extra_full_path = self._get_extra_full_path(extra_path)
+        os.makedirs(self._extra_root, exist_ok=True)
+        np.save(extra_full_path, extra_array)
+    @property
+    def _entries_path(self) -> str:
+        return f"entries-{self._split.value.upper()}.npy"
+    @property
+    def _class_ids_path(self) -> str:
+        return f"class-ids-{self._split.value.upper()}.npy"
+    @property
+    def _class_names_path(self) -> str:
+        return f"class-names-{self._split.value.upper()}.npy"
+    def _get_entries(self) -> np.ndarray:
+        if self._entries is None:
+            self._entries = self._load_extra(self._entries_path)
+        assert self._entries is not None
+        return self._entries
+    def _get_class_ids(self) -> np.ndarray:
+        if self._split == _Split.TEST:
+            assert False, "Class IDs are not available in TEST split"
+        if self._class_ids is None:
+            self._class_ids = self._load_extra(self._class_ids_path)
+        assert self._class_ids is not None
+        return self._class_ids
+    def _get_class_names(self) -> np.ndarray:
+        if self._split == _Split.TEST:
+            assert False, "Class names are not available in TEST split"
+        if self._class_names is None:
+            self._class_names = self._load_extra(self._class_names_path)
+        assert self._class_names is not None
+        return self._class_names
+    def find_class_id(self, class_index: int) -> str:
+        class_ids = self._get_class_ids()
+        return str(class_ids[class_index])
+    def find_class_name(self, class_index: int) -> str:
+        class_names = self._get_class_names()
+        return str(class_names[class_index])
+    def get_image_data(self, index: int) -> bytes:
+        entries = self._get_entries()
+        actual_index = entries[index]["actual_index"]
+        class_id = self.get_class_id(index)
+        image_relpath = self.split.get_image_relpath(actual_index, class_id)
+        image_full_path = os.path.join(self.root, image_relpath)
+        with open(image_full_path, mode="rb") as f:
+            image_data = f.read()
+        return image_data
+    def get_target(self, index: int) -> Optional[Target]:
+        entries = self._get_entries()
+        class_index = entries[index]["class_index"]
+        return None if self.split == _Split.TEST else int(class_index)
+    def get_targets(self) -> Optional[np.ndarray]:
+        entries = self._get_entries()
+        return None if self.split == _Split.TEST else entries["class_index"]
+    def get_class_id(self, index: int) -> Optional[str]:
+        entries = self._get_entries()
+        class_id = entries[index]["class_id"]
+        return None if self.split == _Split.TEST else str(class_id)
+    def get_class_name(self, index: int) -> Optional[str]:
+        entries = self._get_entries()
+        class_name = entries[index]["class_name"]
+        return None if self.split == _Split.TEST else str(class_name)
+    def __len__(self) -> int:
+        entries = self._get_entries()
+        assert len(entries) == self.split.length
+        return len(entries)
+    def _load_labels(self, labels_path: str) -> List[Tuple[str, str]]:
+        labels_full_path = os.path.join(self.root, labels_path)
+        labels = []
+        try:
+            with open(labels_full_path, "r") as f:
+                reader = csv.reader(f)
+                for row in reader:
+                    class_id, class_name = row
+                    labels.append((class_id, class_name))
+        except OSError as e:
+            raise RuntimeError(f'can not read labels file "{labels_full_path}"') from e
+        return labels
+    def _dump_entries(self) -> None:
+        split = self.split
+        if split == ImageNet.Split.TEST:
+            dataset = None
+            sample_count = split.length
+            max_class_id_length, max_class_name_length = 0, 0
+        else:
+            labels_path = "labels.txt"
+            logger.info(f'loading labels from "{labels_path}"')
+            labels = self._load_labels(labels_path)
+            # NOTE: Using torchvision ImageFolder for consistency
+            from torchvision.datasets import ImageFolder
+            dataset_root = os.path.join(self.root, split.get_dirname())
+            dataset = ImageFolder(dataset_root)
+            sample_count = len(dataset)
+            max_class_id_length, max_class_name_length = -1, -1
+            for sample in dataset.samples:
+                _, class_index = sample
+                class_id, class_name = labels[class_index]
+                max_class_id_length = max(len(class_id), max_class_id_length)
+                max_class_name_length = max(len(class_name), max_class_name_length)
+        dtype = np.dtype(
+            [
+                ("actual_index", "<u4"),
+                ("class_index", "<u4"),
+                ("class_id", f"U{max_class_id_length}"),
+                ("class_name", f"U{max_class_name_length}"),
+            ]
+        )
+        entries_array = np.empty(sample_count, dtype=dtype)
+        if split == ImageNet.Split.TEST:
+            old_percent = -1
+            for index in range(sample_count):
+                percent = 100 * (index + 1) // sample_count
+                if percent > old_percent:
+                    logger.info(f"creating entries: {percent}%")
+                    old_percent = percent
+                actual_index = index + 1
+                class_index = np.uint32(-1)
+                class_id, class_name = "", ""
+                entries_array[index] = (actual_index, class_index, class_id, class_name)
+        else:
+            class_names = {class_id: class_name for class_id, class_name in labels}
+            assert dataset
+            old_percent = -1
+            for index in range(sample_count):
+                percent = 100 * (index + 1) // sample_count
+                if percent > old_percent:
+                    logger.info(f"creating entries: {percent}%")
+                    old_percent = percent
+                image_full_path, class_index = dataset.samples[index]
+                image_relpath = os.path.relpath(image_full_path, self.root)
+                class_id, actual_index = split.parse_image_relpath(image_relpath)
+                class_name = class_names[class_id]
+                entries_array[index] = (actual_index, class_index, class_id, class_name)
+        logger.info(f'saving entries to "{self._entries_path}"')
+        self._save_extra(entries_array, self._entries_path)
+    def _dump_class_ids_and_names(self) -> None:
+        split = self.split
+        if split == ImageNet.Split.TEST:
+            return
+        entries_array = self._load_extra(self._entries_path)
+        max_class_id_length, max_class_name_length, max_class_index = -1, -1, -1
+        for entry in entries_array:
+            class_index, class_id, class_name = (
+                entry["class_index"],
+                entry["class_id"],
+                entry["class_name"],
+            )
+            max_class_index = max(int(class_index), max_class_index)
+            max_class_id_length = max(len(str(class_id)), max_class_id_length)
+            max_class_name_length = max(len(str(class_name)), max_class_name_length)
+        class_count = max_class_index + 1
+        class_ids_array = np.empty(class_count, dtype=f"U{max_class_id_length}")
+        class_names_array = np.empty(class_count, dtype=f"U{max_class_name_length}")
+        for entry in entries_array:
+            class_index, class_id, class_name = (
+                entry["class_index"],
+                entry["class_id"],
+                entry["class_name"],
+            )
+            class_ids_array[class_index] = class_id
+            class_names_array[class_index] = class_name
+        logger.info(f'saving class IDs to "{self._class_ids_path}"')
+        self._save_extra(class_ids_array, self._class_ids_path)
+        logger.info(f'saving class names to "{self._class_names_path}"')
+        self._save_extra(class_names_array, self._class_names_path)
+    def dump_extra(self) -> None:
+        self._dump_entries()
+        self._dump_class_ids_and_names()

dinov2/data/datasets/image_net_22k.py ADDED Viewed

	@@ -0,0 +1,302 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from dataclasses import dataclass
+from enum import Enum
+from functools import lru_cache
+from gzip import GzipFile
+from io import BytesIO
+from mmap import ACCESS_READ, mmap
+import os
+from typing import Any, Callable, List, Optional, Set, Tuple
+import warnings
+import numpy as np
+from .extended import ExtendedVisionDataset
+_Labels = int
+_DEFAULT_MMAP_CACHE_SIZE = 16  # Warning: This can exhaust file descriptors
+@dataclass
+class _ClassEntry:
+    block_offset: int
+    maybe_filename: Optional[str] = None
+@dataclass
+class _Entry:
+    class_index: int  # noqa: E701
+    start_offset: int
+    end_offset: int
+    filename: str
+class _Split(Enum):
+    TRAIN = "train"
+    VAL = "val"
+    @property
+    def length(self) -> int:
+        return {
+            _Split.TRAIN: 11_797_647,
+            _Split.VAL: 561_050,
+        }[self]
+    def entries_path(self):
+        return f"imagenet21kp_{self.value}.txt"
+def _get_tarball_path(class_id: str) -> str:
+    return f"{class_id}.tar"
+def _make_mmap_tarball(tarballs_root: str, mmap_cache_size: int):
+    @lru_cache(maxsize=mmap_cache_size)
+    def _mmap_tarball(class_id: str) -> mmap:
+        tarball_path = _get_tarball_path(class_id)
+        tarball_full_path = os.path.join(tarballs_root, tarball_path)
+        with open(tarball_full_path) as f:
+            return mmap(fileno=f.fileno(), length=0, access=ACCESS_READ)
+    return _mmap_tarball
+class ImageNet22k(ExtendedVisionDataset):
+    _GZIPPED_INDICES: Set[int] = {
+        841_545,
+        1_304_131,
+        2_437_921,
+        2_672_079,
+        2_795_676,
+        2_969_786,
+        6_902_965,
+        6_903_550,
+        6_903_628,
+        7_432_557,
+        7_432_589,
+        7_813_809,
+        8_329_633,
+        10_296_990,
+        10_417_652,
+        10_492_265,
+        10_598_078,
+        10_782_398,
+        10_902_612,
+        11_203_736,
+        11_342_890,
+        11_397_596,
+        11_589_762,
+        11_705_103,
+        12_936_875,
+        13_289_782,
+    }
+    Labels = _Labels
+    def __init__(
+        self,
+        *,
+        root: str,
+        extra: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        mmap_cache_size: int = _DEFAULT_MMAP_CACHE_SIZE,
+    ) -> None:
+        super().__init__(root, transforms, transform, target_transform)
+        self._extra_root = extra
+        entries_path = self._get_entries_path(root)
+        self._entries = self._load_extra(entries_path)
+        class_ids_path = self._get_class_ids_path(root)
+        self._class_ids = self._load_extra(class_ids_path)
+        self._gzipped_indices = ImageNet22k._GZIPPED_INDICES
+        self._mmap_tarball = _make_mmap_tarball(self._tarballs_root, mmap_cache_size)
+    def _get_entries_path(self, root: Optional[str] = None) -> str:
+        return "entries.npy"
+    def _get_class_ids_path(self, root: Optional[str] = None) -> str:
+        return "class-ids.npy"
+    def _find_class_ids(self, path: str) -> List[str]:
+        class_ids = []
+        with os.scandir(path) as entries:
+            for entry in entries:
+                root, ext = os.path.splitext(entry.name)
+                if ext != ".tar":
+                    continue
+                class_ids.append(root)
+        return sorted(class_ids)
+    def _load_entries_class_ids(self, root: Optional[str] = None) -> Tuple[List[_Entry], List[str]]:
+        root = self.get_root(root)
+        entries: List[_Entry] = []
+        class_ids = self._find_class_ids(root)
+        for class_index, class_id in enumerate(class_ids):
+            path = os.path.join(root, "blocks", f"{class_id}.log")
+            class_entries = []
+            try:
+                with open(path) as f:
+                    for line in f:
+                        line = line.rstrip()
+                        block, filename = line.split(":")
+                        block_offset = int(block[6:])
+                        filename = filename[1:]
+                        maybe_filename = None
+                        if filename != "** Block of NULs **":
+                            maybe_filename = filename
+                            _, ext = os.path.splitext(filename)
+                            # assert ext == ".JPEG"
+                        class_entry = _ClassEntry(block_offset, maybe_filename)
+                        class_entries.append(class_entry)
+            except OSError as e:
+                raise RuntimeError(f'can not read blocks file "{path}"') from e
+            assert class_entries[-1].maybe_filename is None
+            for class_entry1, class_entry2 in zip(class_entries, class_entries[1:]):
+                assert class_entry1.block_offset <= class_entry2.block_offset
+                start_offset = 512 * class_entry1.block_offset
+                end_offset = 512 * class_entry2.block_offset
+                assert class_entry1.maybe_filename is not None
+                filename = class_entry1.maybe_filename
+                entry = _Entry(class_index, start_offset, end_offset, filename)
+                # Skip invalid image files (PIL throws UnidentifiedImageError)
+                if filename == "n06470073_47249.JPEG":
+                    continue
+                entries.append(entry)
+        return entries, class_ids
+    def _load_extra(self, extra_path: str) -> np.ndarray:
+        extra_root = self._extra_root
+        extra_full_path = os.path.join(extra_root, extra_path)
+        return np.load(extra_full_path, mmap_mode="r")
+    def _save_extra(self, extra_array: np.ndarray, extra_path: str) -> None:
+        extra_root = self._extra_root
+        extra_full_path = os.path.join(extra_root, extra_path)
+        os.makedirs(extra_root, exist_ok=True)
+        np.save(extra_full_path, extra_array)
+    @property
+    def _tarballs_root(self) -> str:
+        return self.root
+    def find_class_id(self, class_index: int) -> str:
+        return str(self._class_ids[class_index])
+    def get_image_data(self, index: int) -> bytes:
+        entry = self._entries[index]
+        class_id = entry["class_id"]
+        class_mmap = self._mmap_tarball(class_id)
+        start_offset, end_offset = entry["start_offset"], entry["end_offset"]
+        try:
+            mapped_data = class_mmap[start_offset:end_offset]
+            data = mapped_data[512:]  # Skip entry header block
+            if len(data) >= 2 and tuple(data[:2]) == (0x1F, 0x8B):
+                assert index in self._gzipped_indices, f"unexpected gzip header for sample {index}"
+                with GzipFile(fileobj=BytesIO(data)) as g:
+                    data = g.read()
+        except Exception as e:
+            raise RuntimeError(f"can not retrieve image data for sample {index} " f'from "{class_id}" tarball') from e
+        return data
+    def get_target(self, index: int) -> Any:
+        return int(self._entries[index]["class_index"])
+    def get_targets(self) -> np.ndarray:
+        return self._entries["class_index"]
+    def get_class_id(self, index: int) -> str:
+        return str(self._entries[index]["class_id"])
+    def get_class_ids(self) -> np.ndarray:
+        return self._entries["class_id"]
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            return super().__getitem__(index)
+    def __len__(self) -> int:
+        return len(self._entries)
+    def _dump_entries(self, *args, **kwargs) -> None:
+        entries, class_ids = self._load_entries_class_ids(*args, **kwargs)
+        max_class_id_length, max_filename_length, max_class_index = -1, -1, -1
+        for entry in entries:
+            class_id = class_ids[entry.class_index]
+            max_class_index = max(entry.class_index, max_class_index)
+            max_class_id_length = max(len(class_id), max_class_id_length)
+            max_filename_length = max(len(entry.filename), max_filename_length)
+        dtype = np.dtype(
+            [
+                ("class_index", "<u4"),
+                ("class_id", f"U{max_class_id_length}"),
+                ("start_offset", "<u4"),
+                ("end_offset", "<u4"),
+                ("filename", f"U{max_filename_length}"),
+            ]
+        )
+        sample_count = len(entries)
+        entries_array = np.empty(sample_count, dtype=dtype)
+        for i, entry in enumerate(entries):
+            class_index = entry.class_index
+            class_id = class_ids[class_index]
+            start_offset = entry.start_offset
+            end_offset = entry.end_offset
+            filename = entry.filename
+            entries_array[i] = (
+                class_index,
+                class_id,
+                start_offset,
+                end_offset,
+                filename,
+            )
+        entries_path = self._get_entries_path(*args, **kwargs)
+        self._save_extra(entries_array, entries_path)
+    def _dump_class_ids(self, *args, **kwargs) -> None:
+        entries_path = self._get_entries_path(*args, **kwargs)
+        entries_array = self._load_extra(entries_path)
+        max_class_id_length, max_class_index = -1, -1
+        for entry in entries_array:
+            class_index, class_id = entry["class_index"], entry["class_id"]
+            max_class_index = max(int(class_index), max_class_index)
+            max_class_id_length = max(len(str(class_id)), max_class_id_length)
+        class_ids_array = np.empty(max_class_index + 1, dtype=f"U{max_class_id_length}")
+        for entry in entries_array:
+            class_index, class_id = entry["class_index"], entry["class_id"]
+            class_ids_array[class_index] = class_id
+        class_ids_path = self._get_class_ids_path(*args, **kwargs)
+        self._save_extra(class_ids_array, class_ids_path)
+    def _dump_extra(self, *args, **kwargs) -> None:
+        self._dump_entries(*args, *kwargs)
+        self._dump_class_ids(*args, *kwargs)
+    def dump_extra(self, root: Optional[str] = None) -> None:
+        return self._dump_extra(root)

dinov2/data/loaders.py ADDED Viewed

	@@ -0,0 +1,232 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import logging
+from enum import Enum
+from typing import Any, Callable, List, Optional, TypeVar
+import torch
+from torch.utils.data import Sampler
+from .datasets import ImageNet, ImageNet22k, HPAone, HPAFoV, CHAMMI_CP, CHAMMI_HPA, CHAMMI_WTC
+from .samplers import EpochSampler, InfiniteSampler, ShardedInfiniteSampler
+logger = logging.getLogger("dinov2")
+class SamplerType(Enum):
+    DISTRIBUTED = 0
+    EPOCH = 1
+    INFINITE = 2
+    SHARDED_INFINITE = 3
+    SHARDED_INFINITE_NEW = 4
+def _make_bool_str(b: bool) -> str:
+    return "yes" if b else "no"
+def _make_sample_transform(image_transform: Optional[Callable] = None, target_transform: Optional[Callable] = None):
+    def transform(sample):
+        image, target = sample
+        if image_transform is not None:
+            image = image_transform(image)
+        if target_transform is not None:
+            target = target_transform(target)
+        return image, target
+    return transform
+def _parse_dataset_str(dataset_str: str):
+    tokens = dataset_str.split(":")
+    name = tokens[0]
+    kwargs = {}
+    for token in tokens[1:]:
+        key, value = token.split("=")
+        assert key in ("root", "extra", "split", "mode", "wildcard")
+        kwargs[key] = value
+    if name == "ImageNet":
+        class_ = ImageNet
+        if "split" in kwargs:
+            kwargs["split"] = ImageNet.Split[kwargs["split"]]
+    elif name == "ImageNet22k":
+        class_ = ImageNet22k
+    elif name == "HPAone":
+        class_ = HPAone
+    elif name == "HPAFoV":
+        class_ = HPAFoV
+    elif name == "CHAMMI_CP":
+        class_ = CHAMMI_CP
+    elif name == "CHAMMI_WTC":
+        class_ = CHAMMI_WTC
+    elif name == "CHAMMI_HPA":
+        class_ = CHAMMI_HPA
+    else:
+        raise ValueError(f'Unsupported dataset "{name}"')
+    return class_, kwargs
+def make_dataset(
+    *,
+    dataset_str: str,
+    transform: Optional[Callable] = None,
+    target_transform: Optional[Callable] = None,
+):
+    """
+    Creates a dataset with the specified parameters.
+    Args:
+        dataset_str: A dataset string description (e.g. ImageNet:split=TRAIN).
+        transform: A transform to apply to images.
+        target_transform: A transform to apply to targets.
+    Returns:
+        The created dataset.
+    """
+    logger.info(f'using dataset: "{dataset_str}"')
+    class_, kwargs = _parse_dataset_str(dataset_str)
+    dataset = class_(transform=transform, target_transform=target_transform, **kwargs)
+    logger.info(f"# of dataset samples: {len(dataset):,d}")
+    # Aggregated datasets do not expose (yet) these attributes, so add them.
+    if not hasattr(dataset, "transform"):
+        setattr(dataset, "transform", transform)
+    if not hasattr(dataset, "target_transform"):
+        setattr(dataset, "target_transform", target_transform)
+    return dataset
+def _make_sampler(
+    *,
+    dataset,
+    type: Optional[SamplerType] = None,
+    shuffle: bool = False,
+    seed: int = 0,
+    size: int = -1,
+    advance: int = 0,
+) -> Optional[Sampler]:
+    sample_count = len(dataset)
+    if type == SamplerType.INFINITE:
+        logger.info("sampler: infinite")
+        if size > 0:
+            raise ValueError("sampler size > 0 is invalid")
+        return InfiniteSampler(
+            sample_count=sample_count,
+            shuffle=shuffle,
+            seed=seed,
+            advance=advance,
+        )
+    elif type in (SamplerType.SHARDED_INFINITE, SamplerType.SHARDED_INFINITE_NEW):
+        logger.info("sampler: sharded infinite")
+        if size > 0:
+            raise ValueError("sampler size > 0 is invalid")
+        # TODO: Remove support for old shuffling
+        use_new_shuffle_tensor_slice = type == SamplerType.SHARDED_INFINITE_NEW
+        return ShardedInfiniteSampler(
+            sample_count=sample_count,
+            shuffle=shuffle,
+            seed=seed,
+            advance=advance,
+            use_new_shuffle_tensor_slice=use_new_shuffle_tensor_slice,
+        )
+    elif type == SamplerType.EPOCH:
+        logger.info("sampler: epoch")
+        if advance > 0:
+            raise NotImplementedError("sampler advance > 0 is not supported")
+        size = size if size > 0 else sample_count
+        logger.info(f"# of samples / epoch: {size:,d}")
+        return EpochSampler(
+            size=size,
+            sample_count=sample_count,
+            shuffle=shuffle,
+            seed=seed,
+        )
+    elif type == SamplerType.DISTRIBUTED:
+        logger.info("sampler: distributed")
+        if size > 0:
+            raise ValueError("sampler size > 0 is invalid")
+        if advance > 0:
+            raise ValueError("sampler advance > 0 is invalid")
+        return torch.utils.data.DistributedSampler(
+            dataset=dataset,
+            shuffle=shuffle,
+            seed=seed,
+            drop_last=False,
+        )
+    logger.info("sampler: none")
+    return None
+T = TypeVar("T")
+def make_data_loader(
+    *,
+    dataset,
+    batch_size: int,
+    num_workers: int,
+    shuffle: bool = True,
+    seed: int = 0,
+    sampler_type: Optional[SamplerType] = SamplerType.INFINITE,
+    sampler_size: int = -1,
+    sampler_advance: int = 0,
+    drop_last: bool = True,
+    persistent_workers: bool = False,
+    collate_fn: Optional[Callable[[List[T]], Any]] = None,
+):
+    """
+    Creates a data loader with the specified parameters.
+    Args:
+        dataset: A dataset (third party, LaViDa or WebDataset).
+        batch_size: The size of batches to generate.
+        num_workers: The number of workers to use.
+        shuffle: Whether to shuffle samples.
+        seed: The random seed to use.
+        sampler_type: Which sampler to use: EPOCH, INFINITE, SHARDED_INFINITE, SHARDED_INFINITE_NEW, DISTRIBUTED or None.
+        sampler_size: The number of images per epoch (when applicable) or -1 for the entire dataset.
+        sampler_advance: How many samples to skip (when applicable).
+        drop_last: Whether the last non-full batch of data should be dropped.
+        persistent_workers: maintain the workers Dataset instances alive after a dataset has been consumed once.
+        collate_fn: Function that performs batch collation
+    """
+    sampler = _make_sampler(
+        dataset=dataset,
+        type=sampler_type,
+        shuffle=shuffle,
+        seed=seed,
+        size=sampler_size,
+        advance=sampler_advance,
+    )
+    logger.info("using PyTorch data loader")
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        sampler=sampler,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        pin_memory=True,
+        drop_last=drop_last,
+        persistent_workers=persistent_workers,
+        collate_fn=collate_fn,
+    )
+    try:
+        logger.info(f"# of batches: {len(data_loader):,d}")
+    except TypeError:  # data loader has no length
+        logger.info("infinite data loader")
+    return data_loader

dinov2/data/masking.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import random
+import math
+import numpy as np
+class MaskingGenerator:
+    def __init__(
+        self,
+        input_size,
+        num_masking_patches=None,
+        min_num_patches=4,
+        max_num_patches=None,
+        min_aspect=0.3,
+        max_aspect=None,
+    ):
+        if not isinstance(input_size, tuple):
+            input_size = (input_size,) * 2
+        self.height, self.width = input_size
+        self.num_patches = self.height * self.width
+        self.num_masking_patches = num_masking_patches
+        self.min_num_patches = min_num_patches
+        self.max_num_patches = num_masking_patches if max_num_patches is None else max_num_patches
+        max_aspect = max_aspect or 1 / min_aspect
+        self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect))
+    def __repr__(self):
+        repr_str = "Generator(%d, %d -> [%d ~ %d], max = %d, %.3f ~ %.3f)" % (
+            self.height,
+            self.width,
+            self.min_num_patches,
+            self.max_num_patches,
+            self.num_masking_patches,
+            self.log_aspect_ratio[0],
+            self.log_aspect_ratio[1],
+        )
+        return repr_str
+    def get_shape(self):
+        return self.height, self.width
+    def _mask(self, mask, max_mask_patches):
+        delta = 0
+        for _ in range(10):
+            target_area = random.uniform(self.min_num_patches, max_mask_patches)
+            aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio))
+            h = int(round(math.sqrt(target_area * aspect_ratio)))
+            w = int(round(math.sqrt(target_area / aspect_ratio)))
+            if w < self.width and h < self.height:
+                top = random.randint(0, self.height - h)
+                left = random.randint(0, self.width - w)
+                num_masked = mask[top : top + h, left : left + w].sum()
+                # Overlap
+                if 0 < h * w - num_masked <= max_mask_patches:
+                    for i in range(top, top + h):
+                        for j in range(left, left + w):
+                            if mask[i, j] == 0:
+                                mask[i, j] = 1
+                                delta += 1
+                if delta > 0:
+                    break
+        return delta
+    def __call__(self, num_masking_patches=0):
+        mask = np.zeros(shape=self.get_shape(), dtype=bool)
+        mask_count = 0
+        while mask_count < num_masking_patches:
+            max_mask_patches = num_masking_patches - mask_count
+            max_mask_patches = min(max_mask_patches, self.max_num_patches)
+            delta = self._mask(mask, max_mask_patches)
+            if delta == 0:
+                break
+            else:
+                mask_count += delta
+        return mask

dinov2/data/samplers.py ADDED Viewed

	@@ -0,0 +1,229 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import itertools
+from typing import Any, Optional
+import warnings
+import numpy as np
+import torch
+from torch.utils.data.sampler import Sampler
+import dinov2.distributed as distributed
+class EpochSampler(Sampler):
+    def __init__(
+        self,
+        *,
+        size: int,
+        sample_count: int,
+        shuffle: bool = False,
+        seed: int = 0,
+        start: Optional[int] = None,
+        step: Optional[int] = None,
+    ):
+        self._size = size
+        self._sample_count = sample_count
+        self._shuffle = shuffle
+        self._seed = seed
+        self._start = distributed.get_global_rank() if start is None else start
+        self._step = distributed.get_global_size() if step is None else step
+        self._epoch = 0
+    def __iter__(self):
+        count = (self._size + self._sample_count - 1) // self._sample_count
+        tiled_indices = np.tile(np.arange(self._sample_count), count)
+        if self._shuffle:
+            seed = self._seed * self._epoch if self._seed != 0 else self._epoch
+            rng = np.random.default_rng(seed)
+            iterable = rng.choice(tiled_indices, self._size, replace=False)
+        else:
+            iterable = tiled_indices[: self._size]
+        yield from itertools.islice(iterable, self._start, None, self._step)
+    def __len__(self):
+        return (self._size - self._start + self._step - 1) // self._step
+    def set_epoch(self, epoch):
+        self._epoch = epoch
+def _get_numpy_dtype(size: int) -> Any:
+    return np.int32 if size <= 2**31 else np.int64
+def _get_torch_dtype(size: int) -> Any:
+    return torch.int32 if size <= 2**31 else torch.int64
+def _generate_randperm_indices(*, size: int, generator: torch.Generator):
+    """Generate the indices of a random permutation."""
+    dtype = _get_torch_dtype(size)
+    # This is actually matching PyTorch's CPU implementation, see: https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/TensorFactories.cpp#L900-L921
+    perm = torch.arange(size, dtype=dtype)
+    for i in range(size):
+        j = torch.randint(i, size, size=(1,), generator=generator).item()
+        # Always swap even if no-op
+        value = perm[j].item()
+        perm[j] = perm[i].item()
+        perm[i] = value
+        yield value
+class InfiniteSampler(Sampler):
+    def __init__(
+        self,
+        *,
+        sample_count: int,
+        shuffle: bool = False,
+        seed: int = 0,
+        start: Optional[int] = None,
+        step: Optional[int] = None,
+        advance: int = 0,
+    ):
+        self._sample_count = sample_count
+        self._seed = seed
+        self._shuffle = shuffle
+        self._start = distributed.get_global_rank() if start is None else start
+        self._step = distributed.get_global_size() if step is None else step
+        self._advance = advance
+    def __iter__(self):
+        if self._shuffle:
+            iterator = self._shuffled_iterator()
+        else:
+            iterator = self._iterator()
+        yield from itertools.islice(iterator, self._advance, None)
+    def _iterator(self):
+        assert not self._shuffle
+        while True:
+            iterable = range(self._sample_count)
+            yield from itertools.islice(iterable, self._start, None, self._step)
+    def _shuffled_iterator(self):
+        assert self._shuffle
+        # Instantiate a generator here (rather than in the ctor) to keep the class
+        # picklable (requirement of mp.spawn)
+        generator = torch.Generator().manual_seed(self._seed)
+        while True:
+            iterable = _generate_randperm_indices(size=self._sample_count, generator=generator)
+            yield from itertools.islice(iterable, self._start, None, self._step)
+# The following function is somewhat equivalent to _new_shuffle_tensor_slice below,
+# but avoids a full in-place random permutation generation.
+def _shuffle_tensor_slice(
+    *, tensor: torch.Tensor, start: int = 0, step: int = 1, generator: torch.Generator
+) -> np.ndarray:
+    stop = len(tensor)
+    count = stop // step
+    drop_count = stop - step * count
+    if drop_count:
+        warnings.warn(f"# of dropped samples: {drop_count}")
+    dtype = _get_numpy_dtype(stop)
+    result = np.empty(count, dtype=dtype)
+    for i in range(count):
+        j = torch.randint(0, i + 1, size=(1,), generator=generator).item() if i > 0 else 0
+        result[i] = result[j]
+        result[j] = tensor[start + i * step].item()
+    return result
+def _new_shuffle_tensor_slice(
+    *, tensor: torch.Tensor, start: int = 0, step: int = 1, generator: torch.Generator
+) -> np.ndarray:
+    stop = len(tensor)
+    count = stop // step
+    dtype = torch.int64  # Needed for using randperm result as indices
+    count = stop // step
+    drop_count = stop - step * count
+    if drop_count:
+        warnings.warn(f"# of dropped samples: {drop_count}")
+    indices = torch.randperm(count, dtype=dtype, generator=generator)
+    return tensor[start::step][indices].numpy()
+def _make_seed(seed: int, start: int, iter_count: int) -> int:
+    # NOTE: Tried a few variants (including iter_count << 32), this one worked best.
+    return seed + start + (iter_count << 24)
+class ShardedInfiniteSampler(Sampler):
+    def __init__(
+        self,
+        *,
+        sample_count: int,
+        shuffle: bool = False,
+        seed: int = 0,
+        start: Optional[int] = None,
+        step: Optional[int] = None,
+        advance: int = 0,
+        use_new_shuffle_tensor_slice: bool = False,
+    ):
+        self._sample_count = sample_count
+        self._seed = seed
+        self._shuffle = shuffle
+        self._start = distributed.get_global_rank() if start is None else start
+        self._step = distributed.get_global_size() if step is None else step
+        self._advance = advance
+        self._iter_count = 0
+        self._shuffle_tensor_slice_fn = (
+            _new_shuffle_tensor_slice if use_new_shuffle_tensor_slice else _shuffle_tensor_slice
+        )
+    def __iter__(self):
+        iter_count = self._advance // self._sample_count
+        if iter_count > 0:
+            self._advance -= iter_count * self._sample_count
+            self._iter_count += iter_count
+        if self._shuffle:
+            iterator = self._shuffled_iterator()
+        else:
+            iterator = self._iterator()
+        yield from itertools.islice(iterator, self._advance, None)
+    def _iterator(self):
+        assert not self._shuffle
+        while True:
+            iterable = range(self._sample_count)
+            yield from itertools.islice(iterable, self._start, None, self._step)
+    def _shuffled_iterator(self):
+        assert self._shuffle
+        # Instantiate a generator here (rather than in the ctor) to be keep the class
+        # picklable (requirement of mp.spawn)
+        generator = torch.Generator()
+        # Always shuffle everything first
+        generator.manual_seed(self._seed)
+        dtype = _get_torch_dtype(self._sample_count)
+        perm = torch.randperm(self._sample_count, dtype=dtype, generator=generator)
+        while True:
+            # Re-seed on each iteration to allow skipping whole permutations
+            seed = _make_seed(self._seed, self._start, self._iter_count)
+            generator.manual_seed(seed)
+            iterable = self._shuffle_tensor_slice_fn(
+                tensor=perm, start=self._start, step=self._step, generator=generator
+            )
+            yield from iterable
+            self._iter_count += 1

dinov2/data/transforms.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from typing import Sequence
+import torch
+from torchvision import transforms
+class GaussianBlur(transforms.RandomApply):
+    """
+    Apply Gaussian Blur to the PIL image.
+    """
+    def __init__(self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0):
+        # NOTE: torchvision is applying 1 - probability to return the original image
+        keep_p = 1 - p
+        transform = transforms.GaussianBlur(kernel_size=9, sigma=(radius_min, radius_max))
+        super().__init__(transforms=[transform], p=keep_p)
+class MaybeToTensor(transforms.ToTensor):
+    """
+    Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor, or keep as is if already a tensor.
+    """
+    def __call__(self, pic):
+        """
+        Args:
+            pic (PIL Image, numpy.ndarray or torch.tensor): Image to be converted to tensor.
+        Returns:
+            Tensor: Converted image.
+        """
+        if isinstance(pic, torch.Tensor):
+            return pic
+        return super().__call__(pic)
+# Use timm's names
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+def make_normalize_transform(
+    mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
+    std: Sequence[float] = IMAGENET_DEFAULT_STD,
+) -> transforms.Normalize:
+    return transforms.Normalize(mean=mean, std=std)
+# This roughly matches torchvision's preset for classification training:
+#   https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L6-L44
+def make_classification_train_transform(
+    *,
+    crop_size: int = 224,
+    interpolation=transforms.InterpolationMode.BICUBIC,
+    hflip_prob: float = 0.5,
+    mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
+    std: Sequence[float] = IMAGENET_DEFAULT_STD,
+):
+    transforms_list = [transforms.RandomResizedCrop(crop_size, interpolation=interpolation)]
+    if hflip_prob > 0.0:
+        transforms_list.append(transforms.RandomHorizontalFlip(hflip_prob))
+    transforms_list.extend(
+        [
+            MaybeToTensor(),
+            make_normalize_transform(mean=mean, std=std),
+        ]
+    )
+    return transforms.Compose(transforms_list)
+# This matches (roughly) torchvision's preset for classification evaluation:
+#   https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L47-L69
+def make_classification_eval_transform(
+    *,
+    resize_size: int = 256,
+    interpolation=transforms.InterpolationMode.BICUBIC,
+    crop_size: int = 224,
+    mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
+    std: Sequence[float] = IMAGENET_DEFAULT_STD,
+) -> transforms.Compose:
+    transforms_list = [
+        transforms.Resize(resize_size, interpolation=interpolation),
+        transforms.CenterCrop(crop_size),
+        MaybeToTensor(),
+        make_normalize_transform(mean=mean, std=std),
+    ]
+    return transforms.Compose(transforms_list)

dinov2/distributed/__init__.py ADDED Viewed

	@@ -0,0 +1,270 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import os
+import random
+import re
+import socket
+from typing import Dict, List
+import torch
+import torch.distributed as dist
+_LOCAL_RANK = -1
+_LOCAL_WORLD_SIZE = -1
+def is_enabled() -> bool:
+    """
+    Returns:
+        True if distributed training is enabled
+    """
+    return dist.is_available() and dist.is_initialized()
+def get_global_size() -> int:
+    """
+    Returns:
+        The number of processes in the process group
+    """
+    return dist.get_world_size() if is_enabled() else 1
+def get_global_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the global process group.
+    """
+    return dist.get_rank() if is_enabled() else 0
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not is_enabled():
+        return 0
+    assert 0 <= _LOCAL_RANK < _LOCAL_WORLD_SIZE
+    return _LOCAL_RANK
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not is_enabled():
+        return 1
+    assert 0 <= _LOCAL_RANK < _LOCAL_WORLD_SIZE
+    return _LOCAL_WORLD_SIZE
+def is_main_process() -> bool:
+    """
+    Returns:
+        True if the current process is the main one.
+    """
+    return get_global_rank() == 0
+def _restrict_print_to_main_process() -> None:
+    """
+    This function disables printing when not in the main process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if is_main_process() or force:
+            builtin_print(*args, **kwargs)
+    __builtin__.print = print
+def _get_master_port(seed: int = 0) -> int:
+    MIN_MASTER_PORT, MAX_MASTER_PORT = (20_000, 60_000)
+    master_port_str = os.environ.get("MASTER_PORT")
+    if master_port_str is None:
+        rng = random.Random(seed)
+        return rng.randint(MIN_MASTER_PORT, MAX_MASTER_PORT)
+    return int(master_port_str)
+def _get_available_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        # A "" host address means INADDR_ANY i.e. binding to all interfaces.
+        # Note this is not compatible with IPv6.
+        s.bind(("", 0))
+        port = s.getsockname()[1]
+        return port
+_TORCH_DISTRIBUTED_ENV_VARS = (
+    "MASTER_ADDR",
+    "MASTER_PORT",
+    "RANK",
+    "WORLD_SIZE",
+    "LOCAL_RANK",
+    "LOCAL_WORLD_SIZE",
+)
+def _collect_env_vars() -> Dict[str, str]:
+    return {env_var: os.environ[env_var] for env_var in _TORCH_DISTRIBUTED_ENV_VARS if env_var in os.environ}
+def _is_slurm_job_process() -> bool:
+    return "SLURM_JOB_ID" in os.environ
+def _parse_slurm_node_list(s: str) -> List[str]:
+    nodes = []
+    # Extract "hostname", "hostname[1-2,3,4-5]," substrings
+    p = re.compile(r"(([^\[]+)(?:\[([^\]]+)\])?),?")
+    for m in p.finditer(s):
+        prefix, suffixes = s[m.start(2) : m.end(2)], s[m.start(3) : m.end(3)]
+        for suffix in suffixes.split(","):
+            span = suffix.split("-")
+            if len(span) == 1:
+                nodes.append(prefix + suffix)
+            else:
+                width = len(span[0])
+                start, end = int(span[0]), int(span[1]) + 1
+                nodes.extend([prefix + f"{i:0{width}}" for i in range(start, end)])
+    return nodes
+def _check_env_variable(key: str, new_value: str):
+    # Only check for difference with preset environment variables
+    if key in os.environ and os.environ[key] != new_value:
+        raise RuntimeError(f"Cannot export environment variables as {key} is already set")
+class _TorchDistributedEnvironment:
+    def __init__(self):
+        self.master_addr = "127.0.0.1"
+        self.master_port = 0
+        self.rank = -1
+        self.world_size = -1
+        self.local_rank = -1
+        self.local_world_size = -1
+        if _is_slurm_job_process():
+            return self._set_from_slurm_env()
+        env_vars = _collect_env_vars()
+        if not env_vars:
+            # Environment is not set
+            pass
+        elif len(env_vars) == len(_TORCH_DISTRIBUTED_ENV_VARS):
+            # Environment is fully set
+            return self._set_from_preset_env()
+        else:
+            # Environment is partially set
+            collected_env_vars = ", ".join(env_vars.keys())
+            raise RuntimeError(f"Partially set environment: {collected_env_vars}")
+        if torch.cuda.device_count() > 0:
+            return self._set_from_local()
+        raise RuntimeError("Can't initialize PyTorch distributed environment")
+    # Slurm job created with sbatch, submitit, etc...
+    def _set_from_slurm_env(self):
+        # logger.info("Initialization from Slurm environment")
+        job_id = int(os.environ["SLURM_JOB_ID"])
+        node_count = int(os.environ["SLURM_JOB_NUM_NODES"])
+        nodes = _parse_slurm_node_list(os.environ["SLURM_JOB_NODELIST"])
+        assert len(nodes) == node_count
+        self.master_addr = nodes[0]
+        self.master_port = _get_master_port(seed=job_id)
+        self.rank = int(os.environ["SLURM_PROCID"])
+        self.world_size = int(os.environ["SLURM_NTASKS"])
+        assert self.rank < self.world_size
+        self.local_rank = int(os.environ["SLURM_LOCALID"])
+        self.local_world_size = self.world_size // node_count
+        assert self.local_rank < self.local_world_size
+    # Single node job with preset environment (i.e. torchrun)
+    def _set_from_preset_env(self):
+        # logger.info("Initialization from preset environment")
+        self.master_addr = os.environ["MASTER_ADDR"]
+        self.master_port = os.environ["MASTER_PORT"]
+        self.rank = int(os.environ["RANK"])
+        self.world_size = int(os.environ["WORLD_SIZE"])
+        assert self.rank < self.world_size
+        self.local_rank = int(os.environ["LOCAL_RANK"])
+        self.local_world_size = int(os.environ["LOCAL_WORLD_SIZE"])
+        assert self.local_rank < self.local_world_size
+    # Single node and GPU job (i.e. local script run)
+    def _set_from_local(self):
+        # logger.info("Initialization from local")
+        self.master_addr = "127.0.0.1"
+        self.master_port = _get_available_port()
+        self.rank = 0
+        self.world_size = 1
+        self.local_rank = 0
+        self.local_world_size = 1
+    def export(self, *, overwrite: bool) -> "_TorchDistributedEnvironment":
+        # See the "Environment variable initialization" section from
+        # https://pytorch.org/docs/stable/distributed.html for the complete list of
+        # environment variables required for the env:// initialization method.
+        env_vars = {
+            "MASTER_ADDR": self.master_addr,
+            "MASTER_PORT": str(self.master_port),
+            "RANK": str(self.rank),
+            "WORLD_SIZE": str(self.world_size),
+            "LOCAL_RANK": str(self.local_rank),
+            "LOCAL_WORLD_SIZE": str(self.local_world_size),
+        }
+        if not overwrite:
+            for k, v in env_vars.items():
+                _check_env_variable(k, v)
+        os.environ.update(env_vars)
+        return self
+def enable(*, set_cuda_current_device: bool = True, overwrite: bool = False, allow_nccl_timeout: bool = False):
+    """Enable distributed mode
+    Args:
+        set_cuda_current_device: If True, call torch.cuda.set_device() to set the
+            current PyTorch CUDA device to the one matching the local rank.
+        overwrite: If True, overwrites already set variables. Else fails.
+    """
+    global _LOCAL_RANK, _LOCAL_WORLD_SIZE
+    if _LOCAL_RANK >= 0 or _LOCAL_WORLD_SIZE >= 0:
+        raise RuntimeError("Distributed mode has already been enabled")
+    torch_env = _TorchDistributedEnvironment()
+    torch_env.export(overwrite=overwrite)
+    if set_cuda_current_device:
+        torch.cuda.set_device(torch_env.local_rank)
+    if allow_nccl_timeout:
+        # This allows to use torch distributed timeout in a NCCL backend
+        key, value = "NCCL_ASYNC_ERROR_HANDLING", "1"
+        if not overwrite:
+            _check_env_variable(key, value)
+        os.environ[key] = value
+    dist.init_process_group(backend="nccl")
+    dist.barrier()
+    # Finalize setup
+    _LOCAL_RANK = torch_env.local_rank
+    _LOCAL_WORLD_SIZE = torch_env.local_world_size
+    _restrict_print_to_main_process()

dinov2/eval/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.

dinov2/eval/cell_dino/knn.py ADDED Viewed

	@@ -0,0 +1,479 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import argparse
+from functools import partial
+import json
+import logging
+import os
+import sys
+from typing import List, Optional, Any
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import pandas as pd
+from sklearn.metrics import f1_score
+import dinov2.distributed as distributed
+from dinov2.data import make_dataset, DatasetWithEnumeratedTargets, SamplerType, make_data_loader
+from dinov2.data.cell_dino.transforms import NormalizationType, make_classification_eval_cell_transform
+from dinov2.eval.metrics import build_metric, MetricType
+from dinov2.eval.setup import get_args_parser as get_setup_args_parser
+from dinov2.eval.setup import setup_and_build_model
+from dinov2.data import ResultsAccumulator
+from dinov2.eval.utils import ModelWithNormalize
+from dinov2.eval.cell_dino.utils import (
+    BagOfChannelsModelWithNormalize,
+    extract_features_cell_dino,
+    average_metrics,
+    create_train_dataset_dict,
+    get_num_classes,
+    extract_features_for_dataset_dict,
+    evaluate_with_accumulate,
+    KnnModule,
+)
+from dinov2.eval.knn import DictKeysModule
+from torch.utils.data import Subset as SubsetEx
+from torch.utils.data import ConcatDataset as ConcatDatasetEx
+logger = logging.getLogger("dinov2")
+def get_args_parser(
+    description: Optional[str] = None,
+    parents: Optional[List[argparse.ArgumentParser]] = None,
+    add_help: bool = True,
+):
+    parents = parents or []
+    setup_args_parser = get_setup_args_parser(parents=parents, add_help=False)
+    parents = [setup_args_parser]
+    parser = argparse.ArgumentParser(
+        description=description,
+        parents=parents,
+        add_help=add_help,
+    )
+    parser.add_argument(
+        "--train-dataset",
+        dest="train_dataset_str",
+        type=str,
+        help="Training dataset",
+    )
+    parser.add_argument(
+        "--val-dataset",
+        dest="val_dataset_str",
+        type=str,
+        help="Validation dataset",
+    )
+    parser.add_argument(
+        "--nb_knn",
+        nargs="+",
+        type=int,
+        help="Number of NN to use. 20 is usually working the best.",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        help="Temperature used in the voting coefficient",
+    )
+    parser.add_argument(
+        "--gather-on-cpu",
+        action="store_true",
+        help="Whether to gather the train features on cpu, slower"
+        "but useful to avoid OOM for large datasets (e.g. ImageNet22k).",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        help="Batch size.",
+    )
+    parser.add_argument(
+        "--n-per-class-list",
+        nargs="+",
+        type=int,
+        help="Number to take per class",
+    )
+    parser.add_argument(
+        "--n-tries",
+        type=int,
+        help="Number of tries",
+    )
+    parser.add_argument(
+        "--leave-one-out-dataset",
+        type=str,
+        help="Path with indexes to use the leave one out strategy for CHAMMI_CP task 3 and CHAMMI_HPA task 4",
+    )
+    parser.add_argument(
+        "--bag-of-channels",
+        action="store_true",
+        help='Whether to use the "bag of channels" channel adaptive strategy',
+    )
+    parser.add_argument(
+        "--crop-size",
+        type=int,
+        help="crop size for train and eval",
+    )
+    parser.add_argument(
+        "--resize-size",
+        type=int,
+        help="resize size for image just before crop. 0: no resize",
+    )
+    parser.add_argument(
+        "--metric-type",
+        type=MetricType,
+        choices=list(MetricType),
+        help="Validation metric",
+    )
+    parser.add_argument(
+        "--avgpool",
+        action="store_true",
+        help="Whether to use average pooling of path tokens in addition to CLS tokens",
+    )
+    parser.set_defaults(
+        train_dataset_str="ImageNet:split=TRAIN",
+        val_dataset_str="ImageNet:split=VAL",
+        nb_knn=[1],
+        temperature=0.07,
+        batch_size=256,
+        resize_size=0,
+    )
+    return parser
+class SequentialWithKwargs(torch.nn.Sequential):
+    def __init__(self, *args):
+        super().__init__(*args)
+    def forward(self, input, **kwargs):
+        input = self[0](input, **kwargs)
+        for module in self[1:]:
+            input = module(input)
+        return input
+def create_train_test_dataset_dict_leave_one_out(
+    train_dataset,
+    test_dataset,
+) -> dict[int, dict[int, Any]]:
+    """
+    This function implements a train dataset dictionary with the leave-one-out (LOO) method.
+    Specifically, given a train dataset and test dataset, it creates a train dataset for each
+    test dataset point, which is a combination of train+test dataset except for this specific data point.
+    At the end, it contains len(test_dataset) key and value pairs.
+    Format is {"nth-test-sample": dataset_without_test_sample}
+    """
+    train_dataset_dict: dict[int, Any] = {}
+    test_size = len(test_dataset)
+    for test_sample_index in range(test_size):
+        test_indices_bool = torch.ones(test_size, dtype=bool)
+        test_indices_bool[test_sample_index] = False
+        train_dataset_dict[test_sample_index] = ConcatDatasetEx(
+            [train_dataset, SubsetEx(test_dataset, test_indices_bool.nonzero().flatten())]
+        )
+    return train_dataset_dict
+def eval_knn_with_leave_one_out(
+    model, leave_one_out_dataset, train_dataset, test_dataset, metric_type, nb_knn, temperature, batch_size, num_workers
+):
+    num_classes = get_num_classes(test_dataset)
+    train_dataset_dict = create_train_dataset_dict(train_dataset)
+    test_dataset_dict = create_train_dataset_dict(test_dataset)
+    logger.info("Extracting features for train set...")
+    train_data_dict = extract_features_for_dataset_dict(
+        model, train_dataset_dict, batch_size, num_workers, gather_on_cpu=True
+    )
+    test_data_dict = extract_features_for_dataset_dict(
+        model, test_dataset_dict, batch_size, num_workers, gather_on_cpu=True
+    )
+    train_features = train_data_dict[0]["train_features"]
+    train_labels = train_data_dict[0]["train_labels"]
+    test_features = test_data_dict[0]["train_features"]
+    test_labels = test_data_dict[0]["train_labels"]
+    metric_collection = build_metric(metric_type, num_classes=3)
+    device = torch.cuda.current_device()
+    partial_knn_module = partial(KnnModule, T=temperature, device=device, num_classes=num_classes)
+    logger.info("Reading the leave-one-out label metadata.")
+    leave_one_out_indices = {}
+    metadata = pd.read_csv(leave_one_out_dataset)
+    if "HPA" in leave_one_out_dataset:
+        metadata = metadata[metadata["Task_three"]].reset_index()
+        leave_one_out_label_type = "cell_type"
+    else:
+        metadata = metadata[metadata["Task_four"]].reset_index()
+        leave_one_out_label_type = "Plate"
+    leave_one_out_labels = metadata[leave_one_out_label_type].unique()
+    for leave_one_out_label in leave_one_out_labels:
+        leave_one_out_indices[leave_one_out_label] = torch.tensor(
+            metadata[metadata[leave_one_out_label_type] == leave_one_out_label].index.values
+        )
+    # ============ evaluation ... ============
+    logger.info("Start the k-NN classification.")
+    eval_metrics_dict = {}
+    postprocessors, metrics = {k: DictKeysModule([k]) for k in nb_knn}, {
+        k: metric_collection.clone().to(device) for k in nb_knn
+    }
+    for metric_key in metrics.keys():
+        metrics[metric_key] = metrics[metric_key].to(device)
+    accumulator_class = ResultsAccumulator
+    accumulators = {k: accumulator_class() for k in postprocessors.keys()}
+    all_preds = []
+    all_target = []
+    for loo_label, loo_indices in leave_one_out_indices.items():
+        logger.info(f"Evaluating on test sample {loo_label}")
+        loo_for_training_indices = torch.ones(test_features.shape[0], dtype=bool)
+        loo_for_training_indices[loo_indices] = False
+        train_features_sample = torch.cat([train_features, test_features[loo_for_training_indices]])
+        train_labels_sample = torch.cat([train_labels, test_labels[loo_for_training_indices]])
+        logger.info(f"Train shape {train_features_sample.shape}, Test shape {test_features[loo_indices].shape}")
+        logger.info(
+            f"Train values {train_labels_sample.unique(return_counts=True)}, Test shape {test_labels[loo_indices].unique(return_counts=True)}"
+        )
+        knn_module = partial_knn_module(
+            train_features=train_features_sample, train_labels=train_labels_sample, nb_knn=nb_knn
+        )
+        output = knn_module(test_features[loo_indices].to(device))
+        all_preds.append(output[1])
+        all_target.append(test_labels[loo_indices])
+        output[1] = output[1][:, 4:]
+        transformed_test_labels = test_labels[loo_indices] - 4
+        for k, metric in metrics.items():
+            metric_inputs = postprocessors[k](output, transformed_test_labels.to(device))
+            metric.update(**metric_inputs)
+            accumulators[k].update(
+                preds=metric_inputs["preds"], target=metric_inputs["target"], index=loo_indices.to(device)
+            )
+    all_preds = torch.cat(all_preds).cpu().detach().numpy()
+    all_preds = np.argmax(all_preds, axis=1)
+    all_target = torch.cat(all_target).cpu().detach().numpy()
+    f1 = f1_score(all_target, all_preds, average="macro", labels=[4, 5, 6])
+    logger.info(f"Real f1 score: {f1}")
+    eval_metrics = {
+        k: metric.compute() for k, metric in metrics.items()
+    }  # next erased by the real f1 score computed above
+    for k in nb_knn:
+        if k not in eval_metrics_dict:
+            eval_metrics_dict[k] = {}
+        eval_metrics_dict[k] = {metric: f1 * 100.0 for metric, v in eval_metrics[k].items()}
+    if len(train_data_dict) > 1:
+        return {k: average_metrics(eval_metrics_dict[k]) for k in eval_metrics_dict.keys()}
+    return {k: eval_metrics_dict[k] for k in eval_metrics_dict.keys()}
+def eval_knn_with_model(
+    model,
+    output_dir,
+    train_dataset_str,
+    val_dataset_str,
+    nb_knn=(10, 20, 100, 200),
+    temperature=0.07,
+    autocast_dtype=torch.float,
+    metric_type=MetricType.MEAN_ACCURACY,
+    transform=None,
+    resize_size=256,
+    crop_size=224,
+    batch_size=256,
+    num_workers=5,
+    leave_one_out_dataset="",
+    bag_of_channels=False,
+    avgpool=False,
+):
+    autocast_ctx = partial(torch.cuda.amp.autocast, enabled=True, dtype=autocast_dtype)
+    if bag_of_channels:
+        model = BagOfChannelsModelWithNormalize(model, autocast_ctx, avgpool)
+    else:
+        model = ModelWithNormalize(model)
+    if leave_one_out_dataset == "" or leave_one_out_dataset is None:
+        leave_one_out = False
+    else:
+        leave_one_out = True
+    cudnn.benchmark = True
+    transform = make_classification_eval_cell_transform(
+        normalization_type=NormalizationType.SELF_NORM_CENTER_CROP, resize_size=resize_size, crop_size=crop_size
+    )
+    train_dataset = make_dataset(dataset_str=train_dataset_str, transform=transform)
+    results_dict = {}
+    test_dataset = make_dataset(dataset_str=val_dataset_str, transform=transform)
+    with torch.cuda.amp.autocast(dtype=autocast_dtype):
+        if leave_one_out:
+            results_dict_knn = eval_knn_with_leave_one_out(
+                model=model,
+                leave_one_out_dataset=leave_one_out_dataset,
+                train_dataset=train_dataset,
+                test_dataset=test_dataset,
+                metric_type=metric_type,
+                nb_knn=nb_knn,
+                temperature=temperature,
+                batch_size=batch_size,
+                num_workers=num_workers,
+            )
+        else:
+            results_dict_knn = eval_knn(
+                model=model,
+                train_dataset=train_dataset,
+                test_dataset=test_dataset,
+                metric_type=metric_type,
+                nb_knn=nb_knn,
+                temperature=temperature,
+                batch_size=batch_size,
+                num_workers=num_workers,
+            )
+    for knn_ in results_dict_knn.keys():
+        top1 = results_dict_knn[knn_]["top-1"]
+        results_dict[f"{val_dataset_str}_{knn_} Top 1"] = top1
+        results_string = f"{val_dataset_str} {knn_} NN classifier result: Top1: {top1:.2f}"
+        if "top-5" in results_dict_knn[knn_]:
+            top5 = results_dict_knn[knn_]["top-5"]
+            results_dict[f"{val_dataset_str}_{knn_} Top 5"] = top5
+            results_string += f"Top5: {top5:.2f}"
+        logger.info(results_string)
+    metrics_file_path = os.path.join(output_dir, "results_eval_knn.json")
+    with open(metrics_file_path, "a") as f:
+        for k, v in results_dict.items():
+            f.write(json.dumps({k: v}) + "\n")
+    if distributed.is_enabled():
+        torch.distributed.barrier()
+    return results_dict
+def eval_knn(
+    model,
+    train_dataset,
+    test_dataset,
+    metric_type,
+    nb_knn,
+    temperature,
+    batch_size,
+    num_workers,
+    few_shot_eval=False,
+    few_shot_k_or_percent=None,
+    few_shot_n_tries=1,
+):
+    num_classes = get_num_classes(train_dataset)
+    train_dataset_dict = create_train_dataset_dict(
+        train_dataset,
+        few_shot_eval=few_shot_eval,
+        few_shot_k_or_percent=few_shot_k_or_percent,
+        few_shot_n_tries=few_shot_n_tries,
+    )
+    logger.info("Extracting features for train set...")
+    train_data_dict: dict[int, dict[str, torch.Tensor]] = {}
+    for try_n, dataset in train_dataset_dict.items():
+        features, labels = extract_features_cell_dino(model, dataset, batch_size, num_workers, gather_on_cpu=True)
+        train_data_dict[try_n] = {"train_features": features, "train_labels": labels}
+    test_data_loader = make_data_loader(
+        dataset=DatasetWithEnumeratedTargets(
+            test_dataset, pad_dataset=True, num_replicas=distributed.get_global_size()
+        ),
+        batch_size=batch_size,
+        num_workers=num_workers,
+        sampler_type=SamplerType.DISTRIBUTED,
+        drop_last=False,
+        shuffle=False,
+        persistent_workers=True,
+        collate_fn=None,
+    )
+    metric_collection = build_metric(metric_type, num_classes=num_classes)
+    device = torch.cuda.current_device()
+    partial_knn_module = partial(
+        KnnModule,
+        T=temperature,
+        device=device,
+        num_classes=num_classes,
+    )
+    # ============ evaluation ... ============
+    logger.info("Start the k-NN classification.")
+    eval_metrics_dict = {}
+    for try_ in train_data_dict.keys():
+        train_features, train_labels = train_data_dict[try_]["train_features"], train_data_dict[try_]["train_labels"]
+        k_list = sorted(set([el if el < len(train_features) else len(train_features) for el in nb_knn]))
+        knn_module = partial_knn_module(train_features=train_features, train_labels=train_labels, nb_knn=k_list)
+        postprocessors, metrics = {k: DictKeysModule([k]) for k in k_list}, {
+            k: metric_collection.clone() for k in k_list
+        }
+        _, eval_metrics, _ = evaluate_with_accumulate(
+            SequentialWithKwargs(model, knn_module),
+            test_data_loader,
+            postprocessors,
+            metrics,
+            device,
+            accumulate_results=False,
+        )
+        for k in k_list:
+            if k not in eval_metrics_dict:
+                eval_metrics_dict[k] = {}
+            eval_metrics_dict[k][try_] = {metric: v.item() * 100.0 for metric, v in eval_metrics[k].items()}
+    if len(train_data_dict) > 1:
+        return {k: average_metrics(eval_metrics_dict[k]) for k in eval_metrics_dict.keys()}
+    return {k: eval_metrics_dict[k][0] for k in eval_metrics_dict.keys()}
+def main(args):
+    model, autocast_dtype = setup_and_build_model(args)
+    eval_knn_with_model(
+        model=model,
+        output_dir=args.output_dir,
+        train_dataset_str=args.train_dataset_str,
+        val_dataset_str=args.val_dataset_str,
+        nb_knn=args.nb_knn,
+        temperature=args.temperature,
+        autocast_dtype=autocast_dtype,
+        transform=None,
+        metric_type=args.metric_type,
+        batch_size=args.batch_size,
+        num_workers=5,
+        leave_one_out_dataset=args.leave_one_out_dataset,
+        resize_size=args.resize_size,
+        crop_size=args.crop_size,
+        avgpool=args.avgpool,
+        bag_of_channels=args.bag_of_channels,
+    )
+    return 0
+if __name__ == "__main__":
+    description = "k-NN evaluation on models trained with bag of channel strategy or cell dino"
+    args_parser = get_args_parser(description=description)
+    args = args_parser.parse_args()
+    sys.exit(main(args))

dinov2/eval/cell_dino/linear.py ADDED Viewed

	@@ -0,0 +1,1048 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import argparse
+from functools import partial
+import json
+import logging
+import os
+import sys
+from typing import Any, Callable, Dict, Optional, Tuple, List
+from enum import Enum
+from dataclasses import dataclass
+from sklearn.metrics import f1_score
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from torch.utils.data import TensorDataset
+from torch.nn.parallel import DistributedDataParallel
+from dinov2.data import SamplerType, make_data_loader, make_dataset, DatasetWithEnumeratedTargets
+from dinov2.data.cell_dino.transforms import NormalizationType, make_classification_eval_cell_transform
+import dinov2.distributed as distributed
+from dinov2.eval.metrics import MetricType, build_metric
+from dinov2.eval.setup import get_args_parser as get_setup_args_parser
+from dinov2.eval.setup import setup_and_build_model
+from dinov2.eval.cell_dino.utils import (
+    evaluate_with_accumulate,
+    LossType,
+    average_metrics,
+    create_train_dataset_dict,
+    get_num_classes,
+    extract_features_for_dataset_dict,
+)
+from dinov2.eval.utils import ModelWithIntermediateLayers
+from dinov2.logging import MetricLogger
+from dinov2.utils.checkpoint import build_periodic_checkpointer, resume_or_load
+logger = logging.getLogger("dinov2")
+"""
+List of changes with respect to the standard linear evaluation script:
+bag of channel option : SCALE ADAPTIVE STRATEGY
+Adam optimizer instead of SGD
+Scheduler : two options : onecycleLR or CosineAnnealingLR
+the transforms/normalization are different, now calling make_classification_eval_cell_transform
+add binary cross entropy loss option for protein localization
+change the definition of the num_classes using get_num_classes
+change of some default parameters (batch_size, epoch_length, epochs, lrs)
+defined n_last_blocks option
+avgpool option
+leave one out strategy for CHAMMI evaluation
+grid search for optimal weight decay
+"""
+def get_args_parser(
+    description: Optional[str] = None,
+    parents: Optional[List[argparse.ArgumentParser]] = None,
+    add_help: bool = True,
+):
+    parents = parents or []
+    setup_args_parser = get_setup_args_parser(parents=parents, add_help=False)
+    parents = [setup_args_parser]
+    parser = argparse.ArgumentParser(
+        description=description,
+        parents=parents,
+        add_help=add_help,
+    )
+    parser.add_argument(
+        "--train-dataset",
+        dest="train_dataset_str",
+        type=str,
+        help="Training dataset",
+    )
+    parser.add_argument(
+        "--val-dataset",
+        dest="val_dataset_str",
+        type=str,
+        help="Validation dataset",
+    )
+    parser.add_argument(
+        "--test-datasets",
+        dest="test_dataset_strs",
+        type=str,
+        nargs="+",
+        help="Test datasets, none to reuse the validation dataset",
+    )
+    parser.add_argument(
+        "--epochs",
+        type=int,
+        help="Number of training epochs",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        help="Batch Size (per GPU)",
+    )
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        help="Number de Workers",
+    )
+    parser.add_argument(
+        "--epoch-length",
+        type=int,
+        help="Length of an epoch in number of iterations",
+    )
+    parser.add_argument(
+        "--save-checkpoint-frequency",
+        type=int,
+        help="Number of epochs between two named checkpoint saves.",
+    )
+    parser.add_argument(
+        "--eval-period-iterations",
+        type=int,
+        help="Number of iterations between two evaluations.",
+    )
+    parser.add_argument(
+        "--learning-rates",
+        nargs="+",
+        type=float,
+        help="Learning rates to grid search.",
+    )
+    parser.add_argument(
+        "--weight_decays",
+        nargs="+",
+        type=float,
+        help="Weight decays to grid search.",
+    )
+    parser.add_argument(
+        "--n-last-blocks",
+        type=int,
+        help="number of backbone last blocks used for the linear classifier",
+    )
+    parser.add_argument(
+        "--no-resume",
+        action="store_true",
+        help="Whether to not resume from existing checkpoints",
+    )
+    parser.add_argument(
+        "--val-metric-type",
+        type=MetricType,
+        choices=list(MetricType),
+        help="Validation metric",
+    )
+    parser.add_argument(
+        "--test-metric-types",
+        type=MetricType,
+        choices=list(MetricType),
+        nargs="+",
+        help="Evaluation metric",
+    )
+    parser.add_argument(
+        "--classifier-fpath",
+        type=str,
+        help="Path to a file containing pretrained linear classifiers",
+    )
+    parser.add_argument(
+        "--val-class-mapping-fpath",
+        type=str,
+        help="Path to a file containing a mapping to adjust classifier outputs",
+    )
+    parser.add_argument(
+        "--test-class-mapping-fpaths",
+        nargs="+",
+        type=str,
+        help="Path to a file containing a mapping to adjust classifier outputs",
+    )
+    parser.add_argument(
+        "--loss-type",
+        type=LossType,
+        help="Cross Entropy or Binary Cross Entropy, default cross entropy loss",
+    )
+    parser.add_argument(
+        "--bag-of-channels",
+        action="store_true",
+        help='Whether to use the "bag of channels" channel adaptive strategy',
+    )
+    parser.add_argument(
+        "--leave-one-out-dataset",
+        type=str,
+        help="Path with indexes to use the leave one out strategy for CHAMMI_CP task 3 and CHAMMI_HPA task 4",
+    )
+    parser.add_argument(
+        "--crop-size",
+        type=int,
+        help="crop size for train and eval",
+    )
+    parser.add_argument(
+        "--resize-size",
+        type=int,
+        help="resize size for image just before crop. 0: no resize",
+    )
+    parser.add_argument(
+        "--avgpool",
+        action="store_true",
+        help="Whether to use average pooling of path tokens in addition to CLS tokens",
+    )
+    parser.add_argument(
+        "--scheduler",
+        type=SchedulerType,
+        help="Scheduler type",
+    )
+    parser.set_defaults(
+        train_dataset_str="ImageNet:split=TRAIN",
+        val_dataset_str="ImageNet:split=VAL",
+        test_dataset_strs=None,
+        epochs=30,
+        batch_size=64,
+        num_workers=8,
+        epoch_length=145,
+        save_checkpoint_frequency=1250,
+        eval_period_iterations=1250,
+        learning_rates=[1e-5, 2e-5, 5e-5, 1e-4, 2e-4, 5e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1, 2e-1, 5e-1, 1.0],
+        weight_decays=[0.0, 0.0001, 1.0e-05],
+        val_metric_type=MetricType.MEAN_ACCURACY,
+        test_metric_types=None,
+        classifier_fpath=None,
+        val_class_mapping_fpath=None,
+        test_class_mapping_fpaths=[None],
+        loss_type=LossType.CROSS_ENTROPY,
+        crop_size=384,
+        resize_size=0,
+        n_last_blocks=4,
+        avgpool=False,
+        scheduler=SchedulerType.COSINE_ANNEALING,
+    )
+    return parser
+def has_ddp_wrapper(m: nn.Module) -> bool:
+    return isinstance(m, DistributedDataParallel)
+def remove_ddp_wrapper(m: nn.Module) -> nn.Module:
+    return m.module if has_ddp_wrapper(m) else m
+def create_linear_input(x_tokens_list, use_n_blocks, use_avgpool, bag_of_channels):
+    intermediate_output = x_tokens_list[-use_n_blocks:]
+    output = torch.cat([class_token for _, class_token in intermediate_output], dim=-1)
+    if bag_of_channels:
+        if use_avgpool:
+            output = torch.cat(
+                (
+                    output,
+                    torch.mean(intermediate_output[-1][0], dim=-2).reshape(intermediate_output[-1][0].shape[0], -1),
+                    # average pooling of patch tokens: average over N, then concatenate channels if single-channel patch model
+                ),
+                dim=-1,
+            )  # concatenate average pooling of patch tokens to concatenated patch tokens
+    else:
+        if use_avgpool:
+            output = torch.cat(
+                (
+                    output,
+                    torch.mean(intermediate_output[-1][0], dim=1),  # patch tokens
+                ),
+                dim=-1,
+            )
+    output = output.reshape(output.shape[0], -1)
+    return output.float()
+class LinearClassifier(nn.Module):
+    """Linear layer to train on top of frozen features"""
+    def __init__(
+        self, out_dim, use_n_blocks, use_avgpool, num_classes=1000, bag_of_channels=False, leave_one_out=False
+    ):
+        super().__init__()
+        self.out_dim = out_dim
+        self.use_n_blocks = use_n_blocks
+        self.use_avgpool = use_avgpool
+        self.num_classes = num_classes
+        self.bag_of_channels = bag_of_channels
+        self.leave_one_out = leave_one_out
+        self.linear = nn.Linear(out_dim, num_classes)
+        self.linear.weight.data.normal_(mean=0.0, std=0.01)
+        self.linear.bias.data.zero_()
+    def forward(self, x_tokens_list):
+        if self.leave_one_out:
+            return self.linear(x_tokens_list)
+        output = create_linear_input(x_tokens_list, self.use_n_blocks, self.use_avgpool, self.bag_of_channels)
+        return self.linear(output)
+class AllClassifiers(nn.Module):
+    def __init__(self, classifiers_dict):
+        super().__init__()
+        self.classifiers_dict = nn.ModuleDict()
+        self.classifiers_dict.update(classifiers_dict)
+    def forward(self, inputs):
+        return {k: v.forward(inputs) for k, v in self.classifiers_dict.items()}
+    def __len__(self):
+        return len(self.classifiers_dict)
+class LinearPostprocessor(nn.Module):
+    def __init__(self, linear_classifier, class_mapping=None):
+        super().__init__()
+        self.linear_classifier = linear_classifier
+        self.register_buffer("class_mapping", None if class_mapping is None else torch.LongTensor(class_mapping))
+    def forward(self, samples, targets):
+        preds = self.linear_classifier(samples)
+        return {
+            "preds": preds[:, self.class_mapping] if self.class_mapping is not None else preds,
+            "target": targets,
+        }
+def scale_lr(learning_rates, batch_size):
+    return learning_rates * (batch_size * distributed.get_global_size()) / 256.0
+def setup_linear_classifiers(
+    sample_output,
+    n_last_blocks_list,
+    learning_rates,
+    weight_decays,
+    batch_size,
+    num_classes=1000,
+    bag_of_channels=False,
+    leave_one_out=False,
+    avgpool=False,
+):
+    linear_classifiers_dict = nn.ModuleDict()
+    avgpool_value = avgpool
+    optim_param_groups = []
+    for n in n_last_blocks_list:
+        for avgpool in [avgpool_value]:
+            for _lr in learning_rates:
+                for wd in weight_decays:
+                    lr = scale_lr(_lr, batch_size)
+                    out_dim = create_linear_input(
+                        sample_output, use_n_blocks=n, use_avgpool=avgpool, bag_of_channels=bag_of_channels
+                    ).shape[1]
+                    linear_classifier = LinearClassifier(
+                        out_dim,
+                        use_n_blocks=n,
+                        use_avgpool=avgpool,
+                        num_classes=num_classes,
+                        bag_of_channels=bag_of_channels,
+                        leave_one_out=leave_one_out,
+                    )
+                    linear_classifier = linear_classifier.cuda()
+                    linear_classifiers_dict[
+                        f"classifier_{n}_blocks_avgpool_{avgpool}_lr_{lr:.5f}_wd_{wd:.2E}".replace(".", "_")
+                    ] = linear_classifier
+                    optim_param_groups.append({"params": linear_classifier.parameters(), "lr": lr, "weight_decay": wd})
+    linear_classifiers = AllClassifiers(linear_classifiers_dict)
+    if distributed.is_enabled():
+        linear_classifiers = nn.parallel.DistributedDataParallel(linear_classifiers)
+    return linear_classifiers, optim_param_groups
+def make_eval_data_loader(
+    *,
+    test_dataset_str_or_path_or_loo_dataset,
+    config,
+    batch_size,
+    num_workers,
+):
+    if isinstance(test_dataset_str_or_path_or_loo_dataset, str):
+        logger.info(f"Loading dataset {test_dataset_str_or_path_or_loo_dataset}")
+        transform = make_classification_eval_cell_transform(
+            normalization_type=NormalizationType.SELF_NORM_CENTER_CROP,
+            resize_size=config["resize_size"],
+            crop_size=config["crop_size"],
+        )
+        test_dataset = make_dataset(dataset_str=test_dataset_str_or_path_or_loo_dataset, transform=transform)
+        collate_fn = None
+    else:
+        logger.info("Making data loader for feature dataset (typical in leave one out evaluation)")
+        test_dataset = test_dataset_str_or_path_or_loo_dataset
+        collate_fn = None
+    class_mapping = None
+    if hasattr(test_dataset, "get_imagenet_class_mapping"):
+        class_mapping = test_dataset.get_imagenet_class_mapping()
+    test_data_loader = make_data_loader(
+        dataset=DatasetWithEnumeratedTargets(
+            test_dataset, pad_dataset=True, num_replicas=distributed.get_global_size()
+        ),
+        batch_size=batch_size,
+        num_workers=num_workers,
+        sampler_type=SamplerType.DISTRIBUTED,
+        drop_last=False,
+        shuffle=False,
+        persistent_workers=False,
+        collate_fn=collate_fn,
+    )
+    return test_data_loader, class_mapping
+@dataclass
+class Evaluator:
+    batch_size: int
+    num_workers: int
+    dataset_str_or_path: str
+    config: Dict
+    metric_type: MetricType
+    metrics_file_path: str
+    training_num_classes: int
+    save_results_func: Optional[Callable]
+    val_dataset_loo: Optional[TensorDataset] = None
+    def __post_init__(self):
+        self.main_metric_name = f"{self.dataset_str_or_path}_accuracy"
+        if self.val_dataset_loo is not None:
+            self.dataset_str_or_path = self.val_dataset_loo
+        self.data_loader, self.class_mapping = make_eval_data_loader(
+            test_dataset_str_or_path_or_loo_dataset=self.dataset_str_or_path,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            config=self.config,
+        )
+    @torch.no_grad()
+    def _evaluate_linear_classifiers(
+        self,
+        *,
+        feature_model,
+        linear_classifiers,
+        iteration,
+        prefixstring="",
+        best_classifier_on_val=None,
+        accumulate_results=False,
+        test_mode=False,
+    ) -> Tuple[Dict[str, Any], Optional[Dict[str, torch.Tensor]]]:
+        logger.info("running validation !")
+        num_classes = len(self.class_mapping) if self.class_mapping is not None else self.training_num_classes
+        metric = build_metric(self.metric_type, num_classes=num_classes)
+        postprocessors = {
+            k: LinearPostprocessor(v, self.class_mapping) for k, v in linear_classifiers.classifiers_dict.items()
+        }
+        metrics = {k: metric.clone() for k in linear_classifiers.classifiers_dict}
+        _, results_dict_temp, accumulated_results = evaluate_with_accumulate(
+            feature_model,
+            self.data_loader,
+            postprocessors,
+            metrics,
+            torch.cuda.current_device(),
+            accumulate_results=accumulate_results,
+            leave_one_out=self.config["leave_one_out"],
+            test_mode=test_mode,
+        )
+        logger.info("")
+        results_dict = {}
+        max_accuracy = 0
+        best_classifier = ""
+        for _, (classifier_string, metric) in enumerate(results_dict_temp.items()):
+            logger.info(f"{prefixstring} -- Classifier: {classifier_string} * {metric}")
+            if (
+                best_classifier_on_val is None and metric["top-1"].item() > max_accuracy
+            ) or classifier_string == best_classifier_on_val:
+                max_accuracy = metric["top-1"].item()
+                best_classifier = classifier_string
+        results_dict["best_classifier"] = {"name": best_classifier, "accuracy": max_accuracy}
+        logger.info(f"best classifier: {results_dict['best_classifier']}")
+        accumulated_best_results = None
+        if test_mode:
+            accumulated_best_results = accumulated_results
+        elif accumulated_results is not None:
+            accumulated_best_results = accumulated_results[best_classifier]
+        if distributed.is_main_process():
+            with open(self.metrics_file_path, "a") as f:
+                f.write(f"iter: {iteration}\n")
+                for k, v in results_dict.items():
+                    f.write(json.dumps({k: v}) + "\n")
+                f.write("\n")
+        return results_dict, accumulated_best_results
+    def evaluate_and_maybe_save(
+        self,
+        feature_model,
+        linear_classifiers,
+        iteration: int,
+        best_classifier_on_val: Optional[Any] = None,
+        save_filename_suffix: str = "",
+        prefixstring: str = "",
+        test_mode: bool = False,
+    ):
+        logger.info(f"Testing on {self.dataset_str_or_path}")
+        save_results = self.save_results_func is not None
+        full_results_dict, accumulated_best_results = self._evaluate_linear_classifiers(
+            feature_model=feature_model,
+            linear_classifiers=remove_ddp_wrapper(linear_classifiers),
+            iteration=iteration,
+            prefixstring=prefixstring,
+            best_classifier_on_val=best_classifier_on_val,
+            accumulate_results=save_results,
+            test_mode=test_mode,
+        )
+        if self.save_results_func is not None:
+            self.save_results_func(
+                filename_suffix=f"{self.dataset_str_or_path}{save_filename_suffix}", **accumulated_best_results
+            )
+        results_dict = {
+            self.main_metric_name: 100.0 * full_results_dict["best_classifier"]["accuracy"],
+            "best_classifier": full_results_dict["best_classifier"]["name"],
+        }
+        return results_dict, accumulated_best_results
+def make_evaluators(
+    config: Dict,
+    val_metric_type: MetricType,
+    val_dataset: str,
+    metric_type: MetricType,
+    metrics_file_path: str,
+    training_num_classes: int,
+    save_results_func: Optional[Callable],
+    val_dataset_loo: Optional[TensorDataset] = None,
+):
+    test_metric_types = config["test_metric_types"]
+    test_dataset_strs = config["test_datasets"]
+    if test_dataset_strs is None:
+        test_dataset_strs = (config["val_dataset"],)
+    if test_metric_types is None:
+        test_metric_types = (val_metric_type,)
+    else:
+        assert len(test_metric_types) == len(config["test_datasets"])
+    val_evaluator, *test_evaluators = [
+        Evaluator(
+            dataset_str_or_path=dataset_str_or_path,
+            batch_size=config["batch_size"],
+            num_workers=config["num_workers"],
+            config=config,
+            metric_type=metric_type,
+            metrics_file_path=metrics_file_path,
+            training_num_classes=training_num_classes,
+            save_results_func=save_results_func,
+            val_dataset_loo=val_dataset_loo,
+        )
+        for dataset_str_or_path, metric_type in zip(
+            (val_dataset,) + tuple(test_dataset_strs),
+            (val_metric_type,) + tuple(test_metric_types),
+        )
+    ]
+    return val_evaluator, test_evaluators
+class SchedulerType(Enum):
+    COSINE_ANNEALING = "cosine_annealing"
+    ONE_CYCLE = "one_cycle"
+    def get_scheduler(self, optimizer, optim_param_groups, epoch_length, epochs, max_iter):
+        if self == SchedulerType.ONE_CYCLE:
+            lr_list = [optim_param_groups[i]["lr"] for i in range(len(optim_param_groups))]
+            scheduler = torch.optim.lr_scheduler.OneCycleLR(
+                optimizer, max_lr=lr_list, steps_per_epoch=epoch_length, epochs=epochs
+            )
+        else:
+            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, max_iter, eta_min=0)
+            print("CosineAnnealingLR scheduler")
+        return scheduler
+def setup_linear_training(
+    *,
+    config: Dict,
+    sample_output: torch.Tensor,
+    training_num_classes: int,
+    checkpoint_output_dir: str,
+):
+    linear_classifiers, optim_param_groups = setup_linear_classifiers(
+        sample_output,
+        config["n_last_blocks_list"],
+        config["learning_rates"],
+        config["weight_decays"],
+        config["batch_size"],
+        training_num_classes,
+        config["bag_of_channels"],
+        config["leave_one_out"],
+        config["avgpool"],
+    )
+    max_iter = config["epochs"] * config["epoch_length"]
+    optimizer = torch.optim.AdamW(optim_param_groups, weight_decay=0)
+    scheduler = config["scheduler"].get_scheduler(
+        optimizer=optimizer,
+        optim_param_groups=optim_param_groups,
+        epoch_length=config["epoch_length"],
+        epochs=config["epochs"],
+        max_iter=max_iter,
+    )
+    checkpoint_period = config["save_checkpoint_iterations"] or config["epoch_length"]
+    periodic_checkpointer = build_periodic_checkpointer(
+        linear_classifiers,
+        checkpoint_output_dir,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        period=checkpoint_period,
+        max_iter=max_iter,
+        max_to_keep=None,
+    )
+    checkpoint = resume_or_load(periodic_checkpointer, config["classifier_fpath"] or "", resume=config["resume"])
+    start_iter = checkpoint.get("iteration", -1) + 1
+    best_accuracy = checkpoint.get("best_accuracy", -1)
+    if config["loss_type"] == LossType.BINARY_CROSS_ENTROPY:
+        criterion = nn.BCEWithLogitsLoss()
+    else:
+        criterion = nn.CrossEntropyLoss()
+    return (
+        linear_classifiers,
+        start_iter,
+        max_iter,
+        criterion,
+        optimizer,
+        scheduler,
+        periodic_checkpointer,
+        best_accuracy,
+    )
+def train_linear_classifiers(
+    *,
+    feature_model,
+    train_dataset,
+    train_config: Dict,
+    training_num_classes: int,
+    val_evaluator: Evaluator,
+    checkpoint_output_dir: str,
+    sample_output: Optional[torch.Tensor] = None,
+):
+    if train_config["leave_one_out"]:
+        assert sample_output is not None, "sample_output should be passed as argument when using leave_one_out."
+    else:
+        sample_output = feature_model(train_dataset[0][0].unsqueeze(0).cuda())
+    (
+        linear_classifiers,
+        start_iter,
+        max_iter,
+        criterion,
+        optimizer,
+        scheduler,
+        periodic_checkpointer,
+        best_accuracy,
+    ) = setup_linear_training(
+        config=train_config,
+        sample_output=sample_output,
+        training_num_classes=training_num_classes,
+        checkpoint_output_dir=checkpoint_output_dir,
+    )
+    sampler_type = SamplerType.INFINITE
+    train_data_loader = make_data_loader(
+        dataset=train_dataset,
+        batch_size=train_config["batch_size"],
+        num_workers=train_config["num_workers"],
+        shuffle=True,
+        seed=0,
+        sampler_type=sampler_type,
+        sampler_advance=start_iter,
+        drop_last=True,
+        persistent_workers=True,
+    )
+    eval_period = train_config["eval_period_iterations"] or train_config["epoch_length"]
+    iteration = start_iter
+    logger.info("Starting training from iteration {}".format(start_iter))
+    metric_logger = MetricLogger(delimiter="  ")
+    header = "Training"
+    for data, labels in metric_logger.log_every(
+        train_data_loader,
+        10,
+        header,
+        max_iter,
+        start_iter,
+    ):
+        data = data.cuda(non_blocking=True)
+        labels = labels.cuda(non_blocking=True)
+        if not train_config["leave_one_out"]:
+            in_classifier = feature_model(data)
+        else:
+            in_classifier = data
+        outputs = linear_classifiers(in_classifier)
+        if len(labels.shape) > 1:
+            labels = labels.float()
+        losses = {f"loss_{k}": criterion(v, labels) for k, v in outputs.items()}
+        loss = sum(losses.values())
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        scheduler.step()
+        if iteration % 10 == 0:
+            torch.cuda.synchronize()
+            metric_logger.update(loss=loss.item())
+            metric_logger.update(lr=optimizer.param_groups[0]["lr"])
+        periodic_checkpointer.step(iteration=iteration, best_accuracy=best_accuracy)
+        if eval_period > 0 and (iteration + 1) % eval_period == 0 and iteration != max_iter - 1:
+            val_results_dict, _ = val_evaluator.evaluate_and_maybe_save(
+                feature_model=feature_model,
+                linear_classifiers=linear_classifiers,
+                prefixstring=f"ITER: {iteration}",
+                iteration=iteration,
+            )
+            val_accuracy = val_results_dict[val_evaluator.main_metric_name]
+            if val_accuracy >= best_accuracy:
+                best_accuracy = val_accuracy
+                periodic_checkpointer.save_best(iteration=iteration, best_accuracy=best_accuracy)
+            torch.distributed.barrier()
+        iteration = iteration + 1
+    return feature_model, linear_classifiers, iteration, periodic_checkpointer
+def eval_linear_with_model(
+    model,
+    output_dir,
+    train_dataset_str,
+    val_dataset_str,
+    batch_size,
+    epochs,
+    epoch_length,
+    num_workers,
+    save_checkpoint_frequency,
+    eval_period_iterations,
+    learning_rates,
+    weight_decays,
+    autocast_dtype,
+    test_dataset_strs=None,
+    resume=True,
+    classifier_fpath=None,
+    val_metric_type=MetricType.MEAN_ACCURACY,
+    test_metric_types=None,
+    loss_type=LossType.CROSS_ENTROPY,
+    bag_of_channels=False,
+    leave_one_out_dataset="",
+    resize_size=0,
+    crop_size=384,
+    n_last_blocks=4,
+    avgpool=False,
+    scheduler=SchedulerType.COSINE_ANNEALING,
+):
+    if leave_one_out_dataset == "" or leave_one_out_dataset is None:
+        leave_one_out = False
+    else:
+        logger.info("Reading the leave-one-out label metadata.")
+        leave_one_out_indices = {}
+        metadata = pd.read_csv(leave_one_out_dataset)
+        if "HPA" in leave_one_out_dataset:
+            metadata = metadata[metadata["Task_three"]].reset_index()
+            leave_one_out_label_type = "cell_type"
+        else:
+            metadata = metadata[metadata["Task_four"]].reset_index()
+            leave_one_out_label_type = "Plate"
+        leave_one_out_labels = metadata[leave_one_out_label_type].unique()
+        for leave_one_out_label in leave_one_out_labels:
+            leave_one_out_indices[leave_one_out_label] = np.array(
+                metadata[metadata[leave_one_out_label_type] == leave_one_out_label].index.values
+            )
+        leave_one_out = True
+    train_transform = make_classification_eval_cell_transform(
+        normalization_type=NormalizationType.SELF_NORM_AUG_DECODER, crop_size=crop_size, resize_size=resize_size
+    )
+    print("train_transform", train_transform)
+    train_dataset = make_dataset(
+        dataset_str=train_dataset_str,
+        transform=train_transform,
+    )
+    training_num_classes = get_num_classes(train_dataset)
+    if leave_one_out:
+        training_num_classes += train_dataset.num_additional_labels_loo_eval
+    train_dataset_dict = create_train_dataset_dict(train_dataset)
+    n_last_blocks_list = [n_last_blocks]
+    n_last_blocks = max(n_last_blocks_list)
+    dataset_use_cache = True
+    autocast_ctx = partial(torch.cuda.amp.autocast, enabled=True, dtype=autocast_dtype)
+    feature_model = ModelWithIntermediateLayers(model, n_last_blocks, autocast_ctx)
+    if bag_of_channels:
+        sample = train_dataset[0][0].unsqueeze(0)
+        sample_output = feature_model(sample.cuda())
+    if leave_one_out:
+        loo_dict = {}
+        train_data_dict = extract_features_for_dataset_dict(
+            feature_model,
+            train_dataset_dict,
+            batch_size,
+            num_workers,
+            gather_on_cpu=True,
+            avgpool=avgpool,
+        )
+        val_dataset = make_dataset(
+            dataset_str=val_dataset_str,
+            transform=make_classification_eval_cell_transform(
+                normalization_type=NormalizationType.SELF_NORM_CENTER_CROP, crop_size=crop_size, resize_size=resize_size
+            ),
+        )
+        val_dataset_dict = create_train_dataset_dict(val_dataset)
+        val_data_dict = extract_features_for_dataset_dict(
+            feature_model,
+            val_dataset_dict,
+            batch_size,
+            num_workers,
+            gather_on_cpu=True,
+            avgpool=avgpool,
+        )
+        train_features = train_data_dict[0]["train_features"]
+        train_labels = train_data_dict[0]["train_labels"]
+        val_features = val_data_dict[0]["train_features"]
+        val_labels = val_data_dict[0]["train_labels"]
+        for loo_label, loo_indices in leave_one_out_indices.items():
+            loo_for_training_indices = torch.ones(val_features.shape[0], dtype=bool)
+            loo_for_training_indices[loo_indices] = False
+            loo_for_val_indices = torch.zeros(val_features.shape[0], dtype=bool)
+            loo_for_val_indices[loo_indices] = True
+            loo_dict[loo_label] = {
+                "train_features": torch.cat([train_features, val_features[loo_for_training_indices]]),
+                "train_labels": torch.cat([train_labels, val_labels[loo_for_training_indices]]),
+                "val_features": val_features[loo_indices],
+                "val_labels": val_labels[loo_indices],
+            }
+    save_results_func = None
+    # if config.save_results:
+    #     save_results_func = partial(default_save_results_func, output_dir=output_dir)
+    metrics_file_path = os.path.join(output_dir, "results_eval_linear.json")
+    periodic_checkpointers: list = []
+    train_config = {
+        "learning_rates": learning_rates,
+        "weight_decays": weight_decays,
+        "batch_size": batch_size,
+        "num_workers": num_workers,
+        "dataset_use_cache": dataset_use_cache,
+        "eval_period_iterations": eval_period_iterations,
+        "epoch_length": epoch_length,
+        "leave_one_out": leave_one_out,
+        "bag_of_channels": bag_of_channels,
+        "n_last_blocks_list": n_last_blocks_list,
+        "epochs": epochs,
+        "loss_type": loss_type,
+        "resume": resume,
+        "save_checkpoint_iterations": save_checkpoint_frequency,
+        "classifier_fpath": classifier_fpath,
+        "avgpool": avgpool,
+        "scheduler": scheduler,
+    }
+    config = {
+        "test_metric_types": test_metric_types,
+        "test_datasets": test_dataset_strs,
+        "val_metric_types": val_metric_type,
+        "val_dataset": val_dataset_str,
+        "batch_size": batch_size,
+        "num_workers": num_workers,
+        "leave_one_out": leave_one_out,
+        "crop_size": crop_size,
+        "resize_size": resize_size,
+    }
+    if not leave_one_out:
+        val_evaluator, test_evaluators = make_evaluators(
+            config=config,
+            val_metric_type=val_metric_type,
+            val_dataset=val_dataset_str,
+            metric_type=test_metric_types,
+            metrics_file_path=metrics_file_path,
+            training_num_classes=training_num_classes,
+            save_results_func=save_results_func,
+        )
+        results_dict = {}
+        for _try in train_dataset_dict.keys():
+            if len(train_dataset_dict) > 1:
+                checkpoint_output_dir = os.path.join(output_dir, f"checkpoints_{_try}")
+                save_filename_suffix = f"_{_try}"
+            else:
+                checkpoint_output_dir, save_filename_suffix = output_dir, ""
+            os.makedirs(checkpoint_output_dir, exist_ok=True)
+            feature_model, linear_classifiers, iteration, periodic_checkpointer = train_linear_classifiers(
+                train_config=train_config,
+                feature_model=feature_model,
+                train_dataset=train_dataset_dict[_try],
+                training_num_classes=training_num_classes,
+                val_evaluator=val_evaluator,
+                checkpoint_output_dir=checkpoint_output_dir,
+            )
+            periodic_checkpointers.append(periodic_checkpointer)
+            results_dict[_try], _ = val_evaluator.evaluate_and_maybe_save(
+                feature_model=feature_model,
+                linear_classifiers=linear_classifiers,
+                iteration=iteration,
+                save_filename_suffix=save_filename_suffix,
+            )
+            for test_evaluator in test_evaluators:
+                eval_results_dict, _ = test_evaluator.evaluate_and_maybe_save(
+                    feature_model=feature_model,
+                    linear_classifiers=linear_classifiers,
+                    iteration=iteration,
+                    best_classifier_on_val=results_dict[_try]["best_classifier"],
+                    save_filename_suffix=save_filename_suffix,
+                )
+                results_dict[_try] = {**eval_results_dict, **results_dict[_try]}
+        if len(train_dataset_dict) > 1:
+            results_dict = average_metrics(results_dict, ignore_keys=["best_classifier"])
+        else:
+            results_dict = {**results_dict[_try]}
+    else:  # if leave one out is True
+        test_results_dict = {}
+        for loo_label in loo_dict.keys():
+            checkpoint_output_dir, save_filename_suffix = os.path.join(output_dir, f"checkpoints_{loo_label}"), ""
+            os.makedirs(checkpoint_output_dir, exist_ok=True)
+            train_dataset_loo = TensorDataset(
+                loo_dict[loo_label]["train_features"], loo_dict[loo_label]["train_labels"]
+            )
+            logger.info(f"Creating leave_one_out evaluators. loo_label: {loo_label}")
+            val_dataset_loo = TensorDataset(loo_dict[loo_label]["val_features"], loo_dict[loo_label]["val_labels"])
+            val_evaluators_loo, _ = make_evaluators(
+                config=config,
+                val_metric_type=val_metric_type,
+                val_dataset="loo",
+                metric_type=test_metric_types,
+                metrics_file_path=metrics_file_path,
+                training_num_classes=training_num_classes,
+                save_results_func=save_results_func,
+                val_dataset_loo=val_dataset_loo,
+            )
+            feature_model, linear_classifiers, iteration, periodic_checkpointer = train_linear_classifiers(
+                feature_model=feature_model,
+                train_dataset=train_dataset_loo,
+                train_config=train_config,
+                training_num_classes=training_num_classes,
+                val_evaluator=val_evaluators_loo,
+                checkpoint_output_dir=checkpoint_output_dir,
+                sample_output=sample_output,
+            )
+            periodic_checkpointers.append(periodic_checkpointer)
+            _, test_results_dict[loo_label] = val_evaluators_loo.evaluate_and_maybe_save(
+                feature_model=feature_model,
+                linear_classifiers=linear_classifiers,
+                iteration=iteration,
+                save_filename_suffix=save_filename_suffix,
+                test_mode=True,
+            )
+        classifier_names = test_results_dict[loo_label].keys()
+        results_dict = {k: [[], []] for k in classifier_names}
+        for ll in test_results_dict.keys():
+            for k in classifier_names:
+                results_dict[k][0].append(test_results_dict[ll][k][0])
+                results_dict[k][1].append(test_results_dict[ll][k][1])
+        for k in classifier_names:
+            results_dict[k] = [
+                np.argmax(torch.cat(results_dict[k][0]).cpu().detach().numpy(), axis=1),
+                torch.cat(results_dict[k][1]).cpu().detach().numpy(),
+            ]
+            results_dict[k] = f1_score(results_dict[k][1], results_dict[k][0], average="macro", labels=[4, 5, 6])
+        logger.info(
+            f"Best performance is for {max(results_dict, key=results_dict.get)}, with F1-Score of {results_dict[max(results_dict, key=results_dict.get)]}"
+        )
+    logger.info("Test Results Dict " + str(results_dict))
+    return results_dict
+def main(args):
+    model, autocast_dtype = setup_and_build_model(args)
+    eval_linear_with_model(
+        model=model,
+        output_dir=args.output_dir,
+        train_dataset_str=args.train_dataset_str,
+        val_dataset_str=args.val_dataset_str,
+        test_dataset_strs=args.test_dataset_strs,
+        batch_size=args.batch_size,
+        epochs=args.epochs,
+        epoch_length=args.epoch_length,
+        num_workers=args.num_workers,
+        save_checkpoint_frequency=args.save_checkpoint_frequency,
+        eval_period_iterations=args.eval_period_iterations,
+        learning_rates=args.learning_rates,
+        weight_decays=args.weight_decays,
+        autocast_dtype=autocast_dtype,
+        resume=not args.no_resume,
+        classifier_fpath=args.classifier_fpath,
+        val_metric_type=args.val_metric_type,
+        test_metric_types=args.test_metric_types,
+        loss_type=args.loss_type,
+        bag_of_channels=args.bag_of_channels,
+        leave_one_out_dataset=args.leave_one_out_dataset,
+        crop_size=args.crop_size,
+        resize_size=args.resize_size,
+        n_last_blocks=args.n_last_blocks,
+        avgpool=args.avgpool,
+        scheduler=args.scheduler,
+    )
+    return 0
+if __name__ == "__main__":
+    description = "DINOv2 linear_cell_dino evaluation"
+    args_parser = get_args_parser(description=description)
+    args = args_parser.parse_args()
+    sys.exit(main(args))

dinov2/eval/cell_dino/utils.py ADDED Viewed

	@@ -0,0 +1,542 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the CC-by-NC licence,
+# found in the LICENSE_CELL_DINO_CODE file in the root directory of this source tree.
+import logging
+from typing import Callable, Dict, Optional, Any, List
+import torch
+from torch import nn
+from torchmetrics import MetricCollection
+from dinov2.data import DatasetWithEnumeratedTargets, SamplerType, make_data_loader
+from dinov2.data import NoOpAccumulator, ResultsAccumulator
+import dinov2.distributed as distributed
+from dinov2.logging import MetricLogger
+from enum import Enum
+from torch.utils.data import Subset
+from torchvision.datasets.vision import StandardTransform
+import numpy as np
+from torch.nn.functional import one_hot, softmax
+logger = logging.getLogger("dinov2")
+class LossType(Enum):
+    CROSS_ENTROPY = "cross_entropy"
+    BINARY_CROSS_ENTROPY = "binary_cross_entropy"
+class BagOfChannelsModelWithNormalize(nn.Module):
+    def __init__(self, model, autocast_ctx, avgpool, n_last_blocks=1):
+        super().__init__()
+        self.model = model
+        self.autocast_ctx = autocast_ctx
+        self.n_last_blocks = n_last_blocks
+        self.avgpool = avgpool
+    def forward(self, samples):
+        with self.autocast_ctx():
+            features = self.model.get_intermediate_layers(samples, self.n_last_blocks, return_class_token=True)
+            output = create_linear_input(features, self.avgpool, use_n_blocks=self.n_last_blocks)
+            return nn.functional.normalize(output, dim=1, p=2)
+@torch.inference_mode()
+def evaluate_with_accumulate(
+    model: nn.Module,
+    data_loader,
+    postprocessors: Dict[str, nn.Module],
+    metrics: Dict[str, MetricCollection],
+    device: torch.device,
+    criterion: Optional[nn.Module] = None,
+    test_mode: bool = False,
+    accumulate_results: bool = False,
+    leave_one_out: bool = False,
+):
+    model.eval()
+    if test_mode:
+        output_tensor = {k: [] for k in postprocessors.keys()}
+        target_tensor = {k: [] for k in postprocessors.keys()}
+    if criterion is not None:
+        criterion.eval()
+    accumulator_class = ResultsAccumulator if accumulate_results else NoOpAccumulator
+    accumulators = {k: accumulator_class() for k in postprocessors.keys()}
+    for metric in metrics.values():
+        metric = metric.to(device)
+    metric_logger = MetricLogger(delimiter="  ")
+    header = "Test:"
+    for samples, targets, *_ in metric_logger.log_every(data_loader, 10, header):
+        if isinstance(targets, list):
+            index = targets[0]
+            targets = targets[1]
+            samples, targets, index = samples[index >= 0], targets[index >= 0], index[index >= 0]
+            if len(index) == 0:
+                continue
+        outputs = samples.to(device) if leave_one_out else model(samples.to(device))
+        targets = targets.to(device)
+        if criterion is not None:
+            loss = criterion(outputs, targets)
+            metric_logger.update(loss=loss.item())
+        for k, metric in metrics.items():
+            metric_inputs = postprocessors[k](outputs, targets)
+            metric.update(**metric_inputs)
+            if test_mode:
+                output_tensor[k].append(metric_inputs["preds"])
+                target_tensor[k].append(metric_inputs["target"])
+            accumulators[k].update(preds=metric_inputs["preds"], target=metric_inputs["target"], index=index)
+    metric_logger.synchronize_between_processes()
+    logger.info(f"Averaged stats: {metric_logger}")
+    stats = {k: metric.compute() for k, metric in metrics.items()}
+    metric_logger_stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
+    # accumulator.accumulate() returns None for the NoOpAccumulator
+    accumulated_results = {k: accumulator.accumulate() for k, accumulator in accumulators.items()}
+    if test_mode:
+        for k in postprocessors.keys():
+            output_tensor[k] = torch.cat(output_tensor[k])
+            target_tensor[k] = torch.cat(target_tensor[k])
+        accumulated_results = {k: [output_tensor[k], target_tensor[k]] for k in postprocessors.keys()}
+    if accumulate_results:
+        return metric_logger_stats, stats
+    return metric_logger_stats, stats, accumulated_results
+def all_gather_and_flatten(tensor_rank):
+    tensor_all_ranks = torch.empty(
+        distributed.get_global_size(),
+        *tensor_rank.shape,
+        dtype=tensor_rank.dtype,
+        device=tensor_rank.device,
+    )
+    tensor_list = list(tensor_all_ranks.unbind(0))
+    torch.distributed.all_gather(tensor_list, tensor_rank.contiguous())
+    return tensor_all_ranks.flatten(end_dim=1)
+def extract_features_cell_dino(
+    model, dataset, batch_size, num_workers, gather_on_cpu=False, shuffle=False, avgpool=False
+):
+    dataset_with_enumerated_targets = DatasetWithEnumeratedTargets(dataset)
+    sample_count = len(dataset_with_enumerated_targets)
+    data_loader = make_data_loader(
+        dataset=dataset_with_enumerated_targets,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        sampler_type=SamplerType.DISTRIBUTED,
+        drop_last=False,
+        shuffle=shuffle,
+    )
+    return extract_features_with_dataloader_cell_dino(model, data_loader, sample_count, gather_on_cpu, avgpool=avgpool)
+@torch.inference_mode()
+def extract_features_with_dataloader_cell_dino(model, data_loader, sample_count, gather_on_cpu=False, avgpool=False):
+    gather_device = torch.device("cpu") if gather_on_cpu else torch.device("cuda")
+    metric_logger = MetricLogger(delimiter="  ")
+    features, all_labels = None, None
+    for samples, (index, labels_rank) in metric_logger.log_every(data_loader, 10):
+        samples = samples.cuda(non_blocking=True)
+        labels_rank = labels_rank.cuda(non_blocking=True)
+        index = index.cuda(non_blocking=True)
+        feat = model(samples)
+        if isinstance(samples, list) or isinstance(feat, tuple):
+            features_rank = create_linear_input(feat, avgpool=avgpool)
+        else:
+            features_rank = feat
+        # init storage feature matrix
+        if features is None:
+            features = torch.zeros(sample_count, features_rank.shape[-1], device=gather_device)
+            labels_shape = list(labels_rank.shape)
+            labels_shape[0] = sample_count
+            all_labels = torch.full(labels_shape, fill_value=-1, device=gather_device)
+            logger.info(f"Storing features into tensor of shape {features.shape}")
+        # share indexes, features and labels between processes
+        index_all = all_gather_and_flatten(index).to(gather_device)
+        features_all_ranks = all_gather_and_flatten(features_rank).to(gather_device)
+        labels_all_ranks = all_gather_and_flatten(labels_rank).to(gather_device)
+        # update storage feature matrix
+        if len(index_all) > 0:
+            features.index_copy_(0, index_all, features_all_ranks)
+            all_labels.index_copy_(0, index_all, labels_all_ranks)
+    logger.info(f"Features shape: {tuple(features.shape)}")
+    logger.info(f"Labels shape: {tuple(all_labels.shape)}")
+    assert torch.all(all_labels > -1)
+    return features, all_labels
+def create_linear_input(x_tokens_list, avgpool=False, use_n_blocks=1):
+    intermediate_output = x_tokens_list[-use_n_blocks:]
+    output = torch.cat(
+        [class_token for _, class_token in intermediate_output], dim=-1
+    )  # concatenate class tokens of the last n blocks
+    if avgpool:
+        output = torch.cat(
+            (
+                output,
+                torch.mean(intermediate_output[-1][0], dim=-2).reshape(
+                    intermediate_output[-1][0].shape[0], -1
+                ),  # average pooling of patch tokens: average over N, then concatenate channels if single-channel patch model
+            ),
+            dim=-1,
+        )  # concatenate average pooling of patch tokens to concatenated patch tokens
+    output = output.reshape(output.shape[0], -1)
+    return output.float()
+def get_target_transform(dataset) -> Optional[Callable]:
+    if hasattr(dataset, "transforms"):
+        if isinstance(dataset.transforms, StandardTransform):
+            return dataset.transforms.target_transform
+        raise ValueError("Dataset has a non-standard .transforms property")
+    if hasattr(dataset, "target_transform"):
+        return dataset.target_transform
+    return None
+def get_labels(dataset) -> torch.Tensor:
+    """
+    Get the labels of a classification dataset, as a Tensor, using the `get_targets` method
+    if it is present or loading the labels one by one with `get_target`, if it exists.
+    If the dataset has a target transform, iterate over the whole dataset to get the
+    transformed labels for each element, then stack them as a torch tensor.
+    """
+    logger.info("Getting dataset labels ...")
+    if hasattr(dataset, "get_targets") or hasattr(dataset, "get_target"):
+        if hasattr(dataset, "get_targets"):  # Returns a np.array
+            labels = dataset.get_targets()
+        elif hasattr(dataset, "get_target"):
+            labels = [dataset.get_target(i) for i in range(len(dataset))]
+        target_transform = get_target_transform(dataset)
+        if target_transform is not None:
+            labels = [target_transform(label) for label in labels]
+    else:
+        # Target transform is applied in this case
+        labels = [dataset[i][1] for i in range(len(dataset))]
+    return torch.stack([torch.tensor(label, dtype=int) for label in labels])
+def get_num_classes(dataset) -> int:
+    """
+    Get the labels of a dataset and compute the number of classes
+    """
+    labels = get_labels(dataset)
+    if len(labels.shape) > 1:
+        return int(labels.shape[1])
+    return int(labels.max() + 1)
+def average_metrics(eval_metrics_dict: dict[Any, dict[str, torch.Tensor]], ignore_keys: List[str] = []):
+    """
+    Function that computes the average and the std on a metrics dict.
+    A linear evaluation dictionary contains "best_classifier",
+    so this specific key is removed for computing aggregated metrics.
+    """
+    output_metrics_dict = {}
+    metrics = [metric for metric in eval_metrics_dict[0].keys() if metric not in ignore_keys]
+    for metric in metrics:
+        stats_tensor = torch.tensor([stat[metric] for stat in eval_metrics_dict.values()])
+        output_metrics_dict[metric + "_mean"] = stats_tensor.mean().item()
+        output_metrics_dict[metric + "_std"] = torch.std(stats_tensor).item()
+    return output_metrics_dict
+def create_class_indices_mapping(labels: torch.Tensor) -> dict[int, torch.Tensor]:
+    """
+    Efficiently creates a mapping between the labels and tensors containing
+    the indices of all the dataset elements that share this label.
+    In the case of multiple labels, it is not guaranteed that there
+    will be exactly the specified percentage of labels.
+    """
+    if len(labels.shape) > 1:  # labels are a one-hot encoding
+        assert len(labels.shape) == 2
+        sorted_labels, indices = torch.nonzero(labels.T, as_tuple=True)
+    else:
+        sorted_labels, indices = torch.sort(labels, stable=True)
+    unique_labels, counts = torch.unique_consecutive(sorted_labels, return_counts=True)
+    mapping = dict(zip(unique_labels.tolist(), torch.split(indices, counts.tolist())))
+    return mapping
+def _shuffle_dataset(dataset: torch.Tensor, seed: int = 0):
+    """
+    Shuffling a dataset by subsetting it with a random permutation of its indices
+    """
+    random_generator = torch.Generator()
+    random_generator.manual_seed(seed)
+    random_indices = torch.randperm(len(dataset), generator=random_generator)
+    return Subset(dataset, random_indices)
+def _subset_dataset_per_class(
+    class_indices_mapping: dict[int, torch.Tensor],
+    n_or_percent_per_class: float,
+    dataset_size: int,
+    seed: int = 0,
+    is_percent: bool = False,
+) -> torch.Tensor:
+    """
+    Helper function to select a percentage of a dataset, equally distributed across classes,
+    or to take the same number of elements from each class of the dataset.
+    Returns a boolean mask tensor being True at indices of selected elements
+    """
+    random_generator = torch.Generator()
+    random_generator.manual_seed(seed)
+    final_indices_bool = torch.zeros(dataset_size, dtype=bool)
+    for class_indices in class_indices_mapping.values():
+        # Select at least one element
+        n_for_class = max(int(len(class_indices) * n_or_percent_per_class), 1) if is_percent else n_or_percent_per_class
+        assert isinstance(n_for_class, int)
+        filtered_index = torch.randperm(len(class_indices), generator=random_generator)[:n_for_class]
+        final_indices_bool[class_indices[filtered_index]] = True
+    return final_indices_bool
+def _multilabel_rebalance_subset(
+    class_indices_mapping: dict[int, torch.Tensor],
+    n_or_percent_per_class: float,
+    labels: torch.Tensor,
+    indices_bool: torch.Tensor,
+    dataset_size: int,
+    seed: int = 0,
+) -> torch.Tensor:
+    """
+    Helper function to refine a subset of a multi-label dataset (indices_bool)
+    to better match a target percentage of labels.
+    Returns a boolean mask tensor being True at indices of selected elements.
+    """
+    # Compute the number of selected labels in indices_bool
+    num_total_labels = labels.sum()
+    num_wanted_labels = int(num_total_labels * n_or_percent_per_class)
+    num_selected_labels = (labels[indices_bool] > 0).sum()
+    logger.info(f" {num_selected_labels} labels instead of {num_wanted_labels}")
+    # Compute a new percentage and new set selecting less images, therefore less labels, to match approximatelly the exact percentage of labels selected
+    n_or_percent_per_class = n_or_percent_per_class / (num_selected_labels / num_wanted_labels)
+    final_indices_bool = _subset_dataset_per_class(
+        class_indices_mapping, n_or_percent_per_class, dataset_size, seed, True
+    )
+    # Compute the number of labels finally used
+    num_selected_labels = (labels[final_indices_bool] > 0).sum()
+    logger.info(f" {num_selected_labels} labels instead of {num_wanted_labels}")
+    return final_indices_bool
+def split_train_val_datasets(train_dataset, split_percentage: float = 0.1, shuffle_train: bool = True):
+    """
+    Splitting a percent of the train dataset to choose hyperparameters, taking the same percentage for each class.
+    If `shuffle` is False, taking the first elements of each class as the validaton set.
+    """
+    assert 0 < split_percentage < 1
+    logger.info(f"Selecting {int(split_percentage * 100)}% of the train dataset as the validation set")
+    if shuffle_train:
+        logger.info("Shuffling train dataset before splitting in train and validation sets")
+        train_dataset = _shuffle_dataset(train_dataset)
+    train_labels = get_labels(train_dataset)
+    class_indices_mapping = create_class_indices_mapping(train_labels)
+    val_mask = torch.zeros(len(train_labels), dtype=bool)
+    for class_indices in class_indices_mapping.values():
+        # If there is only one element, it goes in the train set
+        n_for_val = max(1, int(split_percentage * len(class_indices))) if len(class_indices) > 1 else 0
+        val_mask[class_indices[:n_for_val]] = True
+    val_dataset = Subset(train_dataset, val_mask.nonzero().flatten())
+    train_dataset = Subset(train_dataset, (~val_mask).nonzero().flatten())
+    return train_dataset, val_dataset
+def create_train_dataset_dict(
+    train_dataset,
+    few_shot_eval: bool = False,
+    few_shot_k_or_percent=None,
+    few_shot_n_tries: int = 1,
+) -> dict[int, dict[int, Any]]:
+    """
+    Randomly split a dataset for few-shot evaluation, with `few_shot_k_or_percent` being
+    n elements or x% of a class. Produces a dict, which keys are number of random "tries"
+    and values are the dataset subset for this "try".
+    Format is {"nth-try": dataset}
+    """
+    if few_shot_eval is False:
+        assert few_shot_k_or_percent is None
+        assert few_shot_n_tries == 1
+        return {0: train_dataset}
+    assert few_shot_k_or_percent is not None
+    train_labels = get_labels(train_dataset)
+    class_indices_mapping = create_class_indices_mapping(train_labels)
+    train_dataset_dict: dict[int, Any] = {}
+    is_percent = few_shot_k_or_percent < 1
+    if not is_percent:
+        few_shot_k_or_percent = int(few_shot_k_or_percent)
+    for t in range(few_shot_n_tries):
+        t_subset_bool = _subset_dataset_per_class(
+            class_indices_mapping=class_indices_mapping,
+            n_or_percent_per_class=few_shot_k_or_percent,
+            dataset_size=len(train_labels),
+            is_percent=is_percent,
+            seed=t,
+        )
+        if len(train_labels.shape) > 1 and is_percent:
+            t_subset_bool = _multilabel_rebalance_subset(
+                class_indices_mapping=class_indices_mapping,
+                n_or_percent_per_class=few_shot_k_or_percent,
+                dataset_size=len(train_labels),
+                labels=train_labels,
+                indices_bool=t_subset_bool,
+                seed=t,
+            )
+        train_dataset_dict[t] = Subset(train_dataset, t_subset_bool.nonzero().flatten())
+    return train_dataset_dict
+def extract_features_for_dataset_dict(
+    model,
+    dataset_dict: dict[int, dict[int, Any]],
+    batch_size: int,
+    num_workers: int,
+    gather_on_cpu=False,
+    avgpool=False,
+) -> dict[int, dict[str, torch.Tensor]]:
+    """
+    Extract features for each subset of dataset in the context of few-shot evaluations
+    """
+    few_shot_data_dict: dict[int, dict[str, torch.Tensor]] = {}
+    for try_n, dataset in dataset_dict.items():
+        features, labels = extract_features_cell_dino(
+            model, dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu, avgpool=avgpool
+        )
+        few_shot_data_dict[try_n] = {"train_features": features, "train_labels": labels}
+    return few_shot_data_dict
+def pad_multilabel_and_collate(batch, pad_value=-1):
+    """
+    This method pads and collates a batch of (image, (index, target)) tuples, coming from
+    DatasetWithEnumeratedTargets, with targets that are list of potentially varying sizes.
+    The targets are padded to the length of the longest target list in the batch.
+    """
+    maxlen = max(len(targets) for _, (_, targets) in batch)
+    padded_batch = [
+        (image, (index, np.pad(targets, (0, maxlen - len(targets)), constant_values=pad_value)))
+        for image, (index, targets) in batch
+    ]
+    return torch.utils.data.default_collate(padded_batch)
+class KnnModule(torch.nn.Module):
+    """
+    Gets knn of test features from all processes on a chunk of the train features
+    Each rank gets a chunk of the train features as well as a chunk of the test features.
+    In `compute_neighbors`, for each rank one after the other, its chunk of test features
+    is sent to all devices, partial knns are computed with each chunk of train features
+    then collated back on the original device.
+    """
+    def __init__(self, train_features, train_labels, nb_knn, T, device, num_classes=1000):
+        super().__init__()
+        self.global_rank = distributed.get_global_rank()
+        self.global_size = distributed.get_global_size()
+        self.device = device
+        self.train_features_rank_T = train_features.chunk(self.global_size)[self.global_rank].T.to(self.device)
+        # Labels can either be integers, or in a one-hot format
+        self.candidates = train_labels.chunk(self.global_size)[self.global_rank].unsqueeze(0).to(self.device)
+        self.nb_knn = nb_knn
+        self.max_k = max(self.nb_knn)
+        self.T = T
+        self.num_classes = num_classes
+    def _get_knn_sims_and_labels(self, similarity, train_labels):
+        topk_sims, indices = similarity.topk(self.max_k, largest=True, sorted=True)
+        if len(train_labels.shape) == 3:  # If the labels are in one_hot format
+            indices = indices.unsqueeze(2).expand(-1, -1, self.num_classes)  # Orignally [bs, max_k]
+        neighbors_labels = torch.gather(train_labels, 1, indices)
+        return topk_sims, neighbors_labels
+    def _similarity_for_rank(self, features_rank, source_rank):
+        # Send the features from `source_rank` to all ranks
+        broadcast_shape = torch.tensor(features_rank.shape).to(self.device)
+        torch.distributed.broadcast(broadcast_shape, source_rank)
+        broadcasted = features_rank
+        if self.global_rank != source_rank:
+            broadcasted = torch.zeros(*broadcast_shape, dtype=features_rank.dtype, device=self.device)
+        torch.distributed.broadcast(broadcasted, source_rank)
+        # Compute the neighbors for `source_rank` among `train_features_rank_T`
+        similarity_rank = torch.mm(broadcasted, self.train_features_rank_T)
+        candidate_labels = self.candidates.expand(len(similarity_rank), *self.candidates.shape[1:])
+        return self._get_knn_sims_and_labels(similarity_rank, candidate_labels)
+    def _gather_all_knn_for_rank(self, topk_sims, neighbors_labels, target_rank):
+        # Gather all neighbors for `target_rank`
+        topk_sims_rank = retrieved_rank = None
+        if self.global_rank == target_rank:
+            topk_sims_rank = [torch.zeros_like(topk_sims) for _ in range(self.global_size)]
+            retrieved_rank = [torch.zeros_like(neighbors_labels) for _ in range(self.global_size)]
+        torch.distributed.gather(topk_sims, topk_sims_rank, dst=target_rank)
+        torch.distributed.gather(neighbors_labels, retrieved_rank, dst=target_rank)
+        if self.global_rank == target_rank:
+            # Perform a second top-k on the k * global_size retrieved neighbors
+            topk_sims_rank = torch.cat(topk_sims_rank, dim=1)
+            retrieved_rank = torch.cat(retrieved_rank, dim=1)
+            results = self._get_knn_sims_and_labels(topk_sims_rank, retrieved_rank)
+            return results
+        return None
+    def compute_neighbors(self, features_rank):
+        for rank in range(self.global_size):
+            topk_sims, neighbors_labels = self._similarity_for_rank(features_rank, rank)
+            results = self._gather_all_knn_for_rank(topk_sims, neighbors_labels, rank)
+            if results is not None:
+                topk_sims_rank, neighbors_labels_rank = results
+        return topk_sims_rank, neighbors_labels_rank
+    def forward(self, features_rank):
+        """
+        Compute the results on all values of `self.nb_knn` neighbors from the full `self.max_k`
+        """
+        assert all(k <= self.max_k for k in self.nb_knn)
+        topk_sims, neighbors_labels = self.compute_neighbors(features_rank)
+        batch_size = neighbors_labels.shape[0]
+        topk_sims_transform = softmax(topk_sims / self.T, 1)
+        voting_coefficient = topk_sims_transform.view(batch_size, -1, 1)
+        if len(neighbors_labels.shape) == 2:  # If the labels are not yet one hot
+            neighbors_labels = one_hot(neighbors_labels, num_classes=self.num_classes)
+        matmul = torch.mul(neighbors_labels, voting_coefficient)
+        probas_for_k = {k: torch.sum(matmul[:, :k, :], 1) for k in self.nb_knn}
+        return probas_for_k

dinov2/eval/depth/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.

dinov2/eval/depth/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from .backbones import *  # noqa: F403
+from .builder import BACKBONES, DEPTHER, HEADS, LOSSES, build_backbone, build_depther, build_head, build_loss
+from .decode_heads import *  # noqa: F403
+from .depther import *  # noqa: F403
+from .losses import *  # noqa: F403

dinov2/eval/depth/models/backbones/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from .vision_transformer import DinoVisionTransformer

dinov2/eval/depth/models/backbones/vision_transformer.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+from mmcv.runner import BaseModule
+from ..builder import BACKBONES
+@BACKBONES.register_module()
+class DinoVisionTransformer(BaseModule):
+    """Vision Transformer."""
+    def __init__(self, *args, **kwargs):
+        super().__init__()

dinov2/eval/depth/models/builder.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the Apache License, Version 2.0
+# found in the LICENSE file in the root directory of this source tree.
+import warnings
+from mmcv.cnn import MODELS as MMCV_MODELS
+from mmcv.cnn.bricks.registry import ATTENTION as MMCV_ATTENTION
+from mmcv.utils import Registry
+MODELS = Registry("models", parent=MMCV_MODELS)
+ATTENTION = Registry("attention", parent=MMCV_ATTENTION)
+BACKBONES = MODELS
+NECKS = MODELS
+HEADS = MODELS
+LOSSES = MODELS
+DEPTHER = MODELS
+def build_backbone(cfg):
+    """Build backbone."""
+    return BACKBONES.build(cfg)
+def build_neck(cfg):
+    """Build neck."""
+    return NECKS.build(cfg)
+def build_head(cfg):
+    """Build head."""
+    return HEADS.build(cfg)
+def build_loss(cfg):
+    """Build loss."""
+    return LOSSES.build(cfg)
+def build_depther(cfg, train_cfg=None, test_cfg=None):
+    """Build depther."""
+    if train_cfg is not None or test_cfg is not None:
+        warnings.warn("train_cfg and test_cfg is deprecated, " "please specify them in model", UserWarning)
+    assert cfg.get("train_cfg") is None or train_cfg is None, "train_cfg specified in both outer field and model field "
+    assert cfg.get("test_cfg") is None or test_cfg is None, "test_cfg specified in both outer field and model field "
+    return DEPTHER.build(cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))