junjin0
/

Multi-view-VLA

Model card Files Files and versions

Multi-view-VLA / pretrained_model /config.yaml

junjin0's picture

Upload folder using huggingface_hub

12f4088 verified 7 days ago

history blame contribute delete

1.78 kB

	datasets:
	vla_data:
	data_mix: delta_state
	data_root_dir: /mnt/xlab-nas-2/vla_dataset
	dataset_py: lerobot_datasets
	image_size:
	- 224
	- 224
	num_workers: 6
	per_device_batch_size: 16
	sequential_step_sampling: false
	framework:
	action_model:
	action_dim: 14
	action_horizon: 16
	action_model_type: DiT-B
	add_pos_embed: true
	diffusion_model_cfg:
	cross_attention_dim: 2560
	dropout: 0.2
	final_dropout: true
	interleave_self_attention: true
	norm_type: ada_norm
	num_layers: 16
	output_dim: 1024
	positional_embeddings: null
	future_action_window_size: 15
	hidden_size: 1024
	max_seq_len: 1024
	noise_beta_alpha: 1.5
	noise_beta_beta: 1.0
	noise_s: 0.999
	num_inference_timesteps: 4
	num_target_vision_tokens: 32
	num_timestep_buckets: 1000
	past_action_window_size: 0
	state_dim: 14
	name: QwenJAT
	qwenvl:
	base_vlm: ./checkpoints/Qwen3-VL-4B-Instruct-Action
	output_dir: /mnt/workspace/lintong.lt/output/vla_pretrain/0323_pretrain_Qwen3VL4BJAT_bs2048
	run_id: 0323_pretrain_Qwen3VL4BJAT_bs2048
	run_root_dir: /mnt/workspace/lintong.lt/output/vla_pretrain
	seed: 42
	trainer:
	eval_interval: 2000
	freeze_modules: null
	gradient_accumulation_steps: 2
	gradient_clipping: 1.0
	is_resume: false
	learning_rate:
	action_model: 0.0001
	base: 3.0e-05
	qwen_vl_interface: 1.0e-05
	logging_frequency: 100
	lr_scheduler_type: cosine_with_min_lr
	max_train_steps: 200000
	num_warmup_steps: 2000
	optimizer:
	betas:
	- 0.9
	- 0.95
	eps: 1.0e-08
	weight_decay: 1.0e-08
	repeated_diffusion_steps: 4
	save_interval: 2000
	scheduler_specific_kwargs:
	min_lr: 5.0e-07
	wandb_entity: your_wandb_entity
	wandb_project: llavavla