action_dim: 20 diffusion_step_conditioning: add freeze: false input_noise_std: 0.05 noise_scheduler: beta_end: 0.02 beta_start: 0.0001 clamp_range: - -1.5 - 1.5 freeze: false num_timesteps: 1000 resume_from_checkpoint: null resume_weights_only: false type: noise_scheduler num_action_head_repeats: 8 proprioception_dim: 0 resume_from_checkpoint: null resume_weights_only: false transformer: attn_name: torch_attn cast_output_to_float32: false ffn_type: swiglu freeze: false hidden_dim: 1024 is_causal: true max_seq_len: 2048 n_heads: 16 n_layers: 24 norm_eps: 1.0e-05 norm_type: default_layer_norm positional_embedding_type: rotary post_embed_norm: false qk_norm: false resume_from_checkpoint: null resume_weights_only: false type: transformer vocab_size: 0 weight_tying: false type: diffusion_policy use_diffusers_scheduler: false use_flow_matching_scheduler: true vision_language_backbone: freeze: false hf_pretrained: null num_vlm_layers_to_use: 4 resume_from_checkpoint: null resume_weights_only: true type: vlm_foundry_backbone