| action_dim: 20 | |
| diffusion_step_conditioning: add | |
| freeze: false | |
| input_noise_std: 0.05 | |
| noise_scheduler: | |
| beta_end: 0.02 | |
| beta_start: 0.0001 | |
| clamp_range: | |
| - -1.5 | |
| - 1.5 | |
| freeze: false | |
| num_timesteps: 1000 | |
| resume_from_checkpoint: null | |
| resume_weights_only: false | |
| type: noise_scheduler | |
| num_action_head_repeats: 8 | |
| proprioception_dim: 0 | |
| resume_from_checkpoint: null | |
| resume_weights_only: false | |
| transformer: | |
| attn_name: torch_attn | |
| cast_output_to_float32: false | |
| ffn_type: swiglu | |
| freeze: false | |
| hidden_dim: 1024 | |
| is_causal: true | |
| max_seq_len: 2048 | |
| n_heads: 16 | |
| n_layers: 24 | |
| norm_eps: 1.0e-05 | |
| norm_type: default_layer_norm | |
| positional_embedding_type: rotary | |
| post_embed_norm: false | |
| qk_norm: false | |
| resume_from_checkpoint: null | |
| resume_weights_only: false | |
| type: transformer | |
| vocab_size: 0 | |
| weight_tying: false | |
| type: diffusion_policy | |
| use_diffusers_scheduler: false | |
| use_flow_matching_scheduler: true | |
| vision_language_backbone: | |
| freeze: false | |
| hf_pretrained: null | |
| num_vlm_layers_to_use: 4 | |
| resume_from_checkpoint: null | |
| resume_weights_only: true | |
| type: vlm_foundry_backbone | |