ramu0e commited on
Commit
e11c038
·
verified ·
1 Parent(s): 3f274a1

Upload folder using huggingface_hub

Browse files
lam/config.json CHANGED
@@ -12,7 +12,7 @@
12
  ],
13
  "decoder_attention_head_dim": 64,
14
  "decoder_attn_implementation": "flash_attention_2",
15
- "decoder_encoder_hidden_dim": 3,
16
  "decoder_eps": 1e-06,
17
  "decoder_ffn_dim": 768,
18
  "decoder_freq_dim": 64,
@@ -32,12 +32,14 @@
32
  "fsq_levels": [
33
  8,
34
  5,
 
35
  5
36
  ],
37
  "initializer_range": 0.02,
 
38
  "is_diffusion": true,
39
- "latent_channels": 3,
40
- "max_tokens": 128,
41
  "min_tokens": 1,
42
  "model_type": "lam",
43
  "null_latent": 0,
@@ -71,5 +73,5 @@
71
  "with_cp": false
72
  },
73
  "videomae_from_pretrained": null,
74
- "vocab_size": 200
75
  }
 
12
  ],
13
  "decoder_attention_head_dim": 64,
14
  "decoder_attn_implementation": "flash_attention_2",
15
+ "decoder_encoder_hidden_dim": 4,
16
  "decoder_eps": 1e-06,
17
  "decoder_ffn_dim": 768,
18
  "decoder_freq_dim": 64,
 
32
  "fsq_levels": [
33
  8,
34
  5,
35
+ 5,
36
  5
37
  ],
38
  "initializer_range": 0.02,
39
+ "is_action_discrete": false,
40
  "is_diffusion": true,
41
+ "latent_channels": 4,
42
+ "max_tokens": 64,
43
  "min_tokens": 1,
44
  "model_type": "lam",
45
  "null_latent": 0,
 
73
  "with_cp": false
74
  },
75
  "videomae_from_pretrained": null,
76
+ "vocab_size": 1000
77
  }
lam/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9cd4d00c3766e9b28f89158ade6ffe30d64eef6ba8929f96398475679416c40
3
- size 23418680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8bc80515fb9cf54863015c22ac0e8e6a433cbe8be5e4e9e8a8d320c397dca20
3
+ size 23288380
policy/config.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "action_latent_dim": null,
3
- "action_seq_len": 128,
4
- "action_start_token_id": 203,
5
- "action_vocab_size": 200,
6
  "architectures": [
7
  "PolicyQwen3ForConditionalGeneration"
8
  ],
9
  "attention_bias": false,
10
  "attention_dropout": 0.0,
11
  "dtype": "bfloat16",
12
- "eos_token_id": 204,
13
- "frame_stride": 3,
14
  "head_dim": 64,
15
  "hidden_act": "silu",
16
  "hidden_size": 256,
17
- "image_token_id": 202,
18
  "initializer_range": 0.02,
 
19
  "intermediate_size": 1024,
20
  "layer_types": [
21
  "full_attention",
@@ -31,17 +31,16 @@
31
  "full_attention",
32
  "full_attention"
33
  ],
34
- "max_position_embeddings": 2048,
35
  "max_window_layers": 28,
36
  "model_type": "policy_qwen3",
37
  "num_attention_heads": 4,
38
- "num_frames": 2,
39
  "num_hidden_layers": 12,
40
  "num_key_value_heads": 4,
41
  "pad_token_id": 0,
42
  "policy_image_height": 64,
43
  "policy_image_width": 64,
44
- "predict_tokens": true,
45
  "rms_norm_eps": 1e-06,
46
  "rope_scaling": null,
47
  "rope_theta": 10000.0,
@@ -50,10 +49,10 @@
50
  "transformers_version": "4.57.1",
51
  "use_cache": false,
52
  "use_sliding_window": false,
53
- "vision_end_token_id": 201,
54
  "vision_in_channels": 3,
55
  "vision_merge_size": 1,
56
- "vision_patch_size": 4,
57
- "vision_start_token_id": 200,
58
- "vocab_size": 205
59
  }
 
1
  {
2
+ "action_seq_len": 64,
3
+ "action_start_token_id": 1003,
4
+ "action_vocab_size": 1000,
 
5
  "architectures": [
6
  "PolicyQwen3ForConditionalGeneration"
7
  ],
8
  "attention_bias": false,
9
  "attention_dropout": 0.0,
10
  "dtype": "bfloat16",
11
+ "eos_token_id": 1004,
12
+ "frame_stride": 1,
13
  "head_dim": 64,
14
  "hidden_act": "silu",
15
  "hidden_size": 256,
16
+ "image_token_id": 1002,
17
  "initializer_range": 0.02,
18
+ "input_frame_stride": 8,
19
  "intermediate_size": 1024,
20
  "layer_types": [
21
  "full_attention",
 
31
  "full_attention",
32
  "full_attention"
33
  ],
34
+ "max_position_embeddings": 8192,
35
  "max_window_layers": 28,
36
  "model_type": "policy_qwen3",
37
  "num_attention_heads": 4,
38
+ "num_frames": 10,
39
  "num_hidden_layers": 12,
40
  "num_key_value_heads": 4,
41
  "pad_token_id": 0,
42
  "policy_image_height": 64,
43
  "policy_image_width": 64,
 
44
  "rms_norm_eps": 1e-06,
45
  "rope_scaling": null,
46
  "rope_theta": 10000.0,
 
49
  "transformers_version": "4.57.1",
50
  "use_cache": false,
51
  "use_sliding_window": false,
52
+ "vision_end_token_id": 1001,
53
  "vision_in_channels": 3,
54
  "vision_merge_size": 1,
55
+ "vision_patch_size": 8,
56
+ "vision_start_token_id": 1000,
57
+ "vocab_size": 1005
58
  }
policy/generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "eos_token_id": 204,
4
  "pad_token_id": 0,
5
  "transformers_version": "4.57.1",
6
  "use_cache": false
 
1
  {
2
  "_from_model_config": true,
3
+ "eos_token_id": 1004,
4
  "pad_token_id": 0,
5
  "transformers_version": "4.57.1",
6
  "use_cache": false
policy/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cf1309a302321033f67c7e02769544951c7c3da13d96c08c38513a9fc857dae
3
- size 25326488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c0bb93a2d7e4012eeb5f57a7b6a9512fcf68630f835ee05b1be3083a0a54e8
3
+ size 25809824
policy_processor/preprocessor_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "max_pixels": 1003520,
18
  "merge_size": 1,
19
  "min_pixels": 3136,
20
- "patch_size": 4,
21
  "resample": 3,
22
  "rescale_factor": 0.00392156862745098,
23
  "size": {
 
17
  "max_pixels": 1003520,
18
  "merge_size": 1,
19
  "min_pixels": 3136,
20
+ "patch_size": 8,
21
  "resample": 3,
22
  "rescale_factor": 0.00392156862745098,
23
  "size": {