{ "architectures": [ "RLDX" ], "backbone_model_type": "vtc_qwen3_vl", "backbone_trainable_params_fp32": true, "color_jitter_params": { "brightness": 0.3, "contrast": 0.4, "hue": 0.08, "saturation": 0.5 }, "conversation_image_first": false, "diffusion_model_cfg": { "action_model_max_seq_len": 512, "attention_head_dim": 64, "depth_multi_stream": 4, "depth_single_stream": 8, "dropout": 0.2, "final_dropout": true, "num_attention_heads": 24, "output_dim": 1024, "positional_embeddings": "rope_sa_only", "pre_norm": "layer_norm", "qk_norm": "rms_norm", "rope_theta": 10000.0, "sa_dim": 1536, "set_triple_stream_for_mq": false, "set_triple_stream_for_state": false, "temb_type": "input_token", "use_swiglu": true, "vl_dim": 4096 }, "dtype": "bfloat16", "load_bf16": true, "memory_cfg": { "hidden_size": 4096, "intermediate_size": 16384, "max_position_embeddings": 32, "num_attention_heads": 16, "num_hidden_layers": 2, "num_key_value_heads": 16, "rms_norm_eps": 1e-05, "use_causal_attn": true, "use_rope": true }, "memory_video_delta_indices": [ -48, -32, -16, 0 ], "model_name": "RLWRLD/RLDX-1-VLM", "model_type": "RLDX-1", "n_cog_tokens": 64, "general_embodiment_train_ratio": 0.03125, "qwen3_collator": true, "random_rotation_angle": null, "reproject_vision": false, "state_dropout_prob": 0.0, "transformers_version": "4.57.0", "tune_diffusion_model": true, "tune_llm": false, "tune_projector": true, "tune_top_llm_layers": 4, "tune_visual": false, "use_relative_action": true, "use_video": true, "video_length": 4 }