LIBERO_ALL_FDM_DURATION / config_resolved.yaml

Upload folder using huggingface_hub

6e775a3 verified 18 days ago

6.56 kB

	data:
	name: libero_cosmos_policy
	type: mg
	backend: robomimic
	paths: []
	task_suite_name: null
	observations_keys:
	- image
	observation_source_keys:
	image: agentview_rgb_jpeg
	action_dim: 7
	frame_stack: 1
	horizon: 1
	temporal_index_mode: clip
	use_proprio: false
	proprio_source: robot_states
	image_chw:
	- 3
	- 224
	- 224
	image_value_range: zero_to_one
	image_transport_dtype: uint8
	duration_focus: null
	hf:
	repo_id: nvidia/LIBERO-Cosmos-Policy
	repo_type: dataset
	allow_patterns:
	- success_only/_regen/.hdf5
	local_files_only: false
	action_sequence_targets:
	enabled: true
	horizons:
	- 20
	target_key: gt_action_seq_max
	train:
	optimizer:
	type: adamw
	lr: 0.0001
	weight_decay: 1.0e-06
	batch_size: 256
	num_workers: 4
	prefetch_factor: null
	log_interval: 100
	total_steps: 5000
	eval_interval: 500
	use_aug: true
	aug_mode: default
	use_amp: true
	device_transfer_non_blocking: true
	seed: 42
	scheduler:
	type: cosine
	warmup_steps: 500
	num_cycles: 0.5
	min_lr_scale: 0.0
	decoder_dataloader:
	enabled: false
	batch_size: 64
	num_workers: 4
	prefetch_factor: null
	shuffle: true
	stages:
	stage1: true
	stage2: false
	stage3: false
	stage1:
	batch_size: 256
	num_workers: 16
	prefetch_factor: 2
	log_interval: 100
	total_steps: 20000
	eval_interval: 500
	optimizer:
	lr: 0.0001
	weight_decay: 1.0e-06
	stage2:
	batch_size: 256
	num_workers: 4
	prefetch_factor: null
	log_interval: 100
	total_steps: 5000
	eval_interval: 500
	latent_target: auto
	optimizer:
	lr: 0.0001
	weight_decay: 1.0e-06
	action_probe:
	enabled: true
	stage3:
	batch_size: 256
	num_workers: 4
	prefetch_factor: null
	log_interval: 100
	total_steps: 5000
	eval_interval: 500
	final_eval_num_episodes: 100
	log_eval_video: false
	optimizer:
	lr: 0.0001
	weight_decay: 1.0e-06
	trainable_scope: all
	non_decoder_lr_scale: 1.0
	checkpoint:
	enabled: true
	base_dir: null
	save_interval: 1000
	save_last: true
	latest_only: true
	load:
	stage1:
	mode: none
	path: null
	stage2:
	mode: handoff
	path: null
	teacher_path: null
	stage3:
	mode: handoff
	path: null
	model:
	idm:
	type: token_idm
	action_dim: 128
	token_dim: 768
	model_dim: 512
	latent_dim: 32
	num_action_tokens: 4
	num_blocks: 4
	num_heads: 8
	dropout: 0.0
	fdm:
	type: token_fdm_duration
	action_dim: 128
	token_dim: 768
	model_dim: 512
	num_action_tokens: 4
	num_blocks: 4
	num_heads: 8
	dropout: 0.0
	hyperbolic_latent:
	enabled: false
	backend: geoopt
	manifold: poincare
	curvature: 1.0
	learnable_curvature: false
	fdm_input_mode: logmap0
	prelift_mode: none
	prelift_scale: 1.0
	lift_max_norm: 5.0
	tangent_max_norm: 5.0
	eps: 1.0e-06
	encoders:
	type: dino
	image_key: image
	model_id: facebook/dinov2-base
	input_value_range: zero_to_one
	freeze_backbone: true
	drop_cls_token: true
	output_dim: 768
	mean:
	- 0.485
	- 0.456
	- 0.406
	std:
	- 0.229
	- 0.224
	- 0.225
	type: dino_lam
	idm_input: future
	fdm_target: future
	pixel_decoders: null
	latent_action_decoders: null
	objective:
	fdm_target: future
	idm_input: future
	stage_overrides: {}
	multiscale:
	enabled: true
	consistency:
	enabled: false
	weight: 10.0
	num_pairs: 4
	sample_source: all_horizons
	teacher_mode: direct_teacher
	prediction_mode: direct_duration
	allow_plain_fdm_oneshot: true
	weight_mode: uniform
	weights: {}
	temporal:
	max_offset: 20
	anchor_mode: fixed
	extra_random_count: 4
	branch_order:
	enabled: false
	radial_weight: 0.0
	local_radial_margin_weight: 0.0
	local_radial_margin_alpha: 0.05
	branch_weight: 0.0
	z0_origin_weight: 0.0
	prefix_weight: 0.0
	radius_progress_weight: 1.0
	radius_progress_mode: offset_margin
	radius_progress_alpha: 0.02
	branch_margin_deg: 10.0
	eps: 1.0e-06
	latent_plan:
	enabled: false
	total_horizon: 20
	eval:
	type: robomimic
	data_path: []
	reset_mode: env_reset
	num_eval_episodes: 20
	max_steps: 500
	record_video: true
	checkpoint_path: null
	checkpoint_strict: true
	use_checkpoint_cfg: true
	video_output_path: null
	obs_keys: null
	use_proprio: null
	use_object: false
	resize_hw: null
	image_value_range: null
	seed: 0
	agent:
	encoders:
	image:
	in_channels: 3
	output_dim: 512
	output_mode: global
	type: resnet18
	pretrained: false
	proprio:
	input_dim: 9
	hidden_dim: 128
	output_dim: 64
	type: group
	modalities:
	- agentview_image
	- robot0_eye_in_hand_image
	proj_dim: 128
	policies:
	decoder:
	type: mlp
	hidden_dims:
	- 256
	- 256
	hidden_dims:
	- 512
	- 512
	- 256
	- 64
	action_dim: 256
	emb_dim: 384
	gt_action_dim: 7
	type: latent_action
	probes:
	enabled: true
	every: 10
	steps_per_call: 1
	sequence:
	enabled: true
	horizons:
	- 20
	target_key: gt_action_seq_max
	list:
	z_to_s_t:
	name: z_to_s_t
	enabled: false
	shuffle: true
	type: regression
	input: z_t
	target: s_t
	loss: mse
	lr: 0.001
	every: 10
	mlp:
	hidden_dims:
	- 128
	- 64
	z_to_s_tp:
	name: z_to_s_tp
	enabled: false
	shuffle: true
	type: regression
	input: z_t
	target: s_tp
	loss: mse
	lr: 0.001
	every: 10
	mlp:
	hidden_dims:
	- 128
	- 64
	z_to_action_t:
	name: z_to_action_t
	probe_type: z_to_action
	enabled: true
	shuffle: true
	type: regression
	input: z_t
	target: gt_action
	loss: mse
	lr: 0.001
	every: 1
	mlp:
	hidden_dims:
	- 128
	- 64
	z_to_action_seq_h20:
	name: z_to_action_seq_h20
	probe_type: z_to_action_sequence
	enabled: true
	shuffle: true
	type: regression
	input: z_t
	target: gt_action_seq_max
	sequence_horizon: 20
	loss: mse
	lr: 0.001
	every: 10
	mlp:
	hidden_dims:
	- 128
	- 64
	output_dim: 140
	logger:
	project: latent_action
	run_name: latent_action_training
	tags:
	- debug
	output_root: outputs/token_fdm_duration/dino_suite_all_k20_extra4