FrameSkip-RoboCasaGR1 / config.yaml
VLyb's picture
Upload folder using huggingface_hub
92f6a68 verified
datasets:
vla_data:
CoT_prompt: '{instruction}'
data_mix: fourier_gr1_unified_1000
data_root_dir: playground/Datasets/PhysicalAI-Robotics-GR00T-X-Embodiment-Sim
delete_pause_frame: false
image_size:
- 224
- 224
per_device_batch_size: 16
video_backend: pyav
frameskip:
cache_dir: playground/frameskip/frameskip_cache_vac_cr20-100
default_compression_ratio: 1.0
enabled: true
importance:
allow_backend_fallback: false
alpha: 0.6
beta: 0.2
device: cuda
enable_vac: true
gamma: 0.2
max_vac_frames: 16
type: gripper_aware
vac_beta: 0.2
video_backend: ffmpeg
visual_encoder_checkpoint: /mnt/project_ai4edu/share/models/timm/vit_large_patch14_dinov2.lvd142m/pytorch_model.bin
visual_encoder_name: vit_large_patch14_dinov2
pruning:
compression_ratios:
- 0.2
- 0.3
- 0.4
- 0.5
- 0.6
- 0.7
- 0.8
- 0.9
- 1.0
used_compression_ratios:
- 0.2
- 0.2
- 0.2
- 0.2
- 0.2
- 1.0
training:
dynamic_ratio: true
ratio_schedule: uniform
warmup_steps: 5000
framework:
action_model:
action_dim: 29
action_horizon: 16
action_model_type: DiT-B
add_pos_embed: true
diffusion_model_cfg:
cross_attention_dim: 2560
dropout: 0.2
final_dropout: true
interleave_self_attention: true
norm_type: ada_norm
num_layers: 16
output_dim: 2560
positional_embeddings: null
future_action_window_size: 15
hidden_size: 2560
max_seq_len: 1024
noise_beta_alpha: 1.5
noise_beta_beta: 1.0
noise_s: 0.999
num_inference_timesteps: 4
num_target_vision_tokens: 32
num_timestep_buckets: 1000
past_action_window_size: 0
state_dim: 58
name: QwenGR00T
qwenvl:
base_vlm: Qwen/Qwen3-VL-4B-Instruct
template: qwen3_vl
run_root_dir: ./results/Checkpoints/FrameSkip/RoboCasa
seed: 42
trainer:
eval_interval: 1000
freeze_modules: true
gradient_accumulation_steps: 1
gradient_clipping: 1.0
is_resume: true
learning_rate:
action_model: 0.0001
base: 1.0e-05
qwen_vl_interface: 1.0e-05
logging_frequency: 100
lr_scheduler_type: cosine_with_min_lr
max_train_steps: 100000
num_warmup_steps: 5000
optimizer:
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 1.0e-08
save_interval: 10000
scheduler_specific_kwargs:
min_lr: 5.0e-07
wandb_entity: jinhuiye
wandb_project: starVLA