| cond_image_size: 512 |
| isosurface_resolution: 160 |
| radius: 0.87 |
|
|
| camera_embedder_cls: sf3d.models.camera.LinearCameraEmbedder |
| camera_embedder: |
| in_channels: 25 |
| out_channels: 768 |
| conditions: |
| - c2w_cond |
| - intrinsic_normed_cond |
|
|
| image_tokenizer_cls: sf3d.models.tokenizers.image.DINOV2SingleImageTokenizer |
| image_tokenizer: |
| pretrained_model_name_or_path: "facebook/dinov2-large" |
| width: 512 |
| height: 512 |
| modulation_cond_dim: 768 |
|
|
| tokenizer_cls: sf3d.models.tokenizers.triplane.TriplaneLearnablePositionalEmbedding |
| tokenizer: |
| plane_size: 96 |
| num_channels: 1024 |
|
|
| backbone_cls: sf3d.models.transformers.backbone.TwoStreamInterleaveTransformer |
| backbone: |
| num_attention_heads: 16 |
| attention_head_dim: 64 |
| raw_triplane_channels: 1024 |
| triplane_channels: 1024 |
| raw_image_channels: 1024 |
| num_latents: 1792 |
| num_blocks: 4 |
| num_basic_blocks: 3 |
|
|
| post_processor_cls: sf3d.models.network.PixelShuffleUpsampleNetwork |
| post_processor: |
| in_channels: 1024 |
| out_channels: 40 |
| scale_factor: 4 |
| conv_layers: 4 |
|
|
|
|
| decoder_cls: sf3d.models.network.MaterialMLP |
| decoder: |
| in_channels: 120 |
| n_neurons: 64 |
| activation: silu |
| heads: |
| - name: density |
| out_channels: 1 |
| out_bias: -1.0 |
| n_hidden_layers: 2 |
| output_activation: trunc_exp |
| - name: features |
| out_channels: 3 |
| n_hidden_layers: 3 |
| output_activation: sigmoid |
| - name: perturb_normal |
| out_channels: 3 |
| n_hidden_layers: 3 |
| output_activation: normalize_channel_last |
| - name: vertex_offset |
| out_channels: 3 |
| n_hidden_layers: 2 |
|
|
| image_estimator_cls: sf3d.models.image_estimator.clip_based_estimator.ClipBasedHeadEstimator |
| image_estimator: |
| distribution: beta |
| distribution_eval: mode |
| heads: |
| - name: roughness |
| out_channels: 1 |
| n_hidden_layers: 3 |
| output_activation: linear |
| add_to_decoder_features: true |
| output_bias: 1.0 |
| shape: [-1, 1, 1] |
| - name: metallic |
| out_channels: 1 |
| n_hidden_layers: 3 |
| output_activation: linear |
| add_to_decoder_features: true |
| output_bias: 1.0 |
| shape: [-1, 1, 1] |
|
|
| global_estimator_cls: sf3d.models.global_estimator.multi_head_estimator.MultiHeadEstimator |
| global_estimator: |
| triplane_features: 1024 |
| heads: |
| - name: sg_amplitudes |
| out_channels: 24 |
| n_hidden_layers: 3 |
| output_activation: softplus |
| output_bias: 1.0 |
| shape: [-1, 24, 1] |
|
|