LIBERO_ALL_FDM_DURATION / config_resolved.yaml
joon-stack's picture
Upload folder using huggingface_hub
6e775a3 verified
data:
name: libero_cosmos_policy
type: mg
backend: robomimic
paths: []
task_suite_name: null
observations_keys:
- image
observation_source_keys:
image: agentview_rgb_jpeg
action_dim: 7
frame_stack: 1
horizon: 1
temporal_index_mode: clip
use_proprio: false
proprio_source: robot_states
image_chw:
- 3
- 224
- 224
image_value_range: zero_to_one
image_transport_dtype: uint8
duration_focus: null
hf:
repo_id: nvidia/LIBERO-Cosmos-Policy
repo_type: dataset
allow_patterns:
- success_only/*_regen/*.hdf5
local_files_only: false
action_sequence_targets:
enabled: true
horizons:
- 20
target_key: gt_action_seq_max
train:
optimizer:
type: adamw
lr: 0.0001
weight_decay: 1.0e-06
batch_size: 256
num_workers: 4
prefetch_factor: null
log_interval: 100
total_steps: 5000
eval_interval: 500
use_aug: true
aug_mode: default
use_amp: true
device_transfer_non_blocking: true
seed: 42
scheduler:
type: cosine
warmup_steps: 500
num_cycles: 0.5
min_lr_scale: 0.0
decoder_dataloader:
enabled: false
batch_size: 64
num_workers: 4
prefetch_factor: null
shuffle: true
stages:
stage1: true
stage2: false
stage3: false
stage1:
batch_size: 256
num_workers: 16
prefetch_factor: 2
log_interval: 100
total_steps: 20000
eval_interval: 500
optimizer:
lr: 0.0001
weight_decay: 1.0e-06
stage2:
batch_size: 256
num_workers: 4
prefetch_factor: null
log_interval: 100
total_steps: 5000
eval_interval: 500
latent_target: auto
optimizer:
lr: 0.0001
weight_decay: 1.0e-06
action_probe:
enabled: true
stage3:
batch_size: 256
num_workers: 4
prefetch_factor: null
log_interval: 100
total_steps: 5000
eval_interval: 500
final_eval_num_episodes: 100
log_eval_video: false
optimizer:
lr: 0.0001
weight_decay: 1.0e-06
trainable_scope: all
non_decoder_lr_scale: 1.0
checkpoint:
enabled: true
base_dir: null
save_interval: 1000
save_last: true
latest_only: true
load:
stage1:
mode: none
path: null
stage2:
mode: handoff
path: null
teacher_path: null
stage3:
mode: handoff
path: null
model:
idm:
type: token_idm
action_dim: 128
token_dim: 768
model_dim: 512
latent_dim: 32
num_action_tokens: 4
num_blocks: 4
num_heads: 8
dropout: 0.0
fdm:
type: token_fdm_duration
action_dim: 128
token_dim: 768
model_dim: 512
num_action_tokens: 4
num_blocks: 4
num_heads: 8
dropout: 0.0
hyperbolic_latent:
enabled: false
backend: geoopt
manifold: poincare
curvature: 1.0
learnable_curvature: false
fdm_input_mode: logmap0
prelift_mode: none
prelift_scale: 1.0
lift_max_norm: 5.0
tangent_max_norm: 5.0
eps: 1.0e-06
encoders:
type: dino
image_key: image
model_id: facebook/dinov2-base
input_value_range: zero_to_one
freeze_backbone: true
drop_cls_token: true
output_dim: 768
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
type: dino_lam
idm_input: future
fdm_target: future
pixel_decoders: null
latent_action_decoders: null
objective:
fdm_target: future
idm_input: future
stage_overrides: {}
multiscale:
enabled: true
consistency:
enabled: false
weight: 10.0
num_pairs: 4
sample_source: all_horizons
teacher_mode: direct_teacher
prediction_mode: direct_duration
allow_plain_fdm_oneshot: true
weight_mode: uniform
weights: {}
temporal:
max_offset: 20
anchor_mode: fixed
extra_random_count: 4
branch_order:
enabled: false
radial_weight: 0.0
local_radial_margin_weight: 0.0
local_radial_margin_alpha: 0.05
branch_weight: 0.0
z0_origin_weight: 0.0
prefix_weight: 0.0
radius_progress_weight: 1.0
radius_progress_mode: offset_margin
radius_progress_alpha: 0.02
branch_margin_deg: 10.0
eps: 1.0e-06
latent_plan:
enabled: false
total_horizon: 20
eval:
type: robomimic
data_path: []
reset_mode: env_reset
num_eval_episodes: 20
max_steps: 500
record_video: true
checkpoint_path: null
checkpoint_strict: true
use_checkpoint_cfg: true
video_output_path: null
obs_keys: null
use_proprio: null
use_object: false
resize_hw: null
image_value_range: null
seed: 0
agent:
encoders:
image:
in_channels: 3
output_dim: 512
output_mode: global
type: resnet18
pretrained: false
proprio:
input_dim: 9
hidden_dim: 128
output_dim: 64
type: group
modalities:
- agentview_image
- robot0_eye_in_hand_image
proj_dim: 128
policies:
decoder:
type: mlp
hidden_dims:
- 256
- 256
hidden_dims:
- 512
- 512
- 256
- 64
action_dim: 256
emb_dim: 384
gt_action_dim: 7
type: latent_action
probes:
enabled: true
every: 10
steps_per_call: 1
sequence:
enabled: true
horizons:
- 20
target_key: gt_action_seq_max
list:
z_to_s_t:
name: z_to_s_t
enabled: false
shuffle: true
type: regression
input: z_t
target: s_t
loss: mse
lr: 0.001
every: 10
mlp:
hidden_dims:
- 128
- 64
z_to_s_tp:
name: z_to_s_tp
enabled: false
shuffle: true
type: regression
input: z_t
target: s_tp
loss: mse
lr: 0.001
every: 10
mlp:
hidden_dims:
- 128
- 64
z_to_action_t:
name: z_to_action_t
probe_type: z_to_action
enabled: true
shuffle: true
type: regression
input: z_t
target: gt_action
loss: mse
lr: 0.001
every: 1
mlp:
hidden_dims:
- 128
- 64
z_to_action_seq_h20:
name: z_to_action_seq_h20
probe_type: z_to_action_sequence
enabled: true
shuffle: true
type: regression
input: z_t
target: gt_action_seq_max
sequence_horizon: 20
loss: mse
lr: 0.001
every: 10
mlp:
hidden_dims:
- 128
- 64
output_dim: 140
logger:
project: latent_action
run_name: latent_action_training
tags:
- debug
output_root: outputs/token_fdm_duration/dino_suite_all_k20_extra4