jmercat commited on
Commit
5357bd2
·
0 Parent(s):

release: initial squashed history

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. checkpoints/checkpoint_71.pt +3 -0
  3. config.yaml +168 -0
  4. config_model.yaml +43 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
checkpoints/checkpoint_71.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f2d0ee8a7f661c8cb83a48e376fff6cce44e3656d26997a7d2e5e49a0b581f
3
+ size 6109656743
config.yaml ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ allow_multiple_epochs: false
3
+ augmentation:
4
+ enabled: true
5
+ image:
6
+ color_jitter:
7
+ brightness: 0.2
8
+ contrast: 0.4
9
+ enabled: true
10
+ hue:
11
+ - -0.05
12
+ - 0.05
13
+ saturation: 0.2
14
+ crop:
15
+ enabled: true
16
+ mode: random
17
+ shape:
18
+ - 224
19
+ - 224
20
+ point_cloud:
21
+ color_jitter:
22
+ brightness: 0.2
23
+ contrast: 0.4
24
+ enabled: false
25
+ hue:
26
+ - -0.05
27
+ - 0.05
28
+ saturation: 0.2
29
+ dataloader_in_order: false
30
+ dataset_cache:
31
+ cache_dir: null
32
+ cache_size_gb: null
33
+ cache_verbose: null
34
+ enabled: false
35
+ dataset_manifest:
36
+ - null
37
+ dataset_modality:
38
+ - image_caption
39
+ dataset_weighting:
40
+ - 1.0
41
+ hf_fast_tokenizer_rayon_threads: null
42
+ hf_fast_tokenizers_parallelism: true
43
+ image_size: 342
44
+ img_num_tokens: 64
45
+ num_workers: 8
46
+ prefetch_factor: 4
47
+ processor: HuggingFaceTB/SmolVLM2-256M-Video-Instruct
48
+ processor_kwargs:
49
+ max_image_size:
50
+ longest_edge: 224
51
+ size:
52
+ longest_edge: 224
53
+ seed: 42
54
+ seq_len: 256
55
+ shuffle: true
56
+ shuffle_buffer_size: 2000
57
+ shuffle_initial: 500
58
+ tokenizer: HuggingFaceTB/SmolVLM2-256M-Video-Instruct
59
+ type: image_caption
60
+ use_hf_fast_tokenizer: true
61
+ val_dataset_manifest: []
62
+ val_dataset_weighting: []
63
+ db_logging: true
64
+ distributed:
65
+ ddp_static_graph: false
66
+ device: cuda:0
67
+ dist_backend: nccl
68
+ dist_url: env://
69
+ fsdp: true
70
+ fsdp_cpu_offload: false
71
+ fsdp_reshard_after_forward: false
72
+ local_rank: 0
73
+ rank: 0
74
+ use_distributed: true
75
+ world_size: 16
76
+ ema:
77
+ alpha: 0.999
78
+ enabled: false
79
+ inv_gamma: 1.0
80
+ max_value: 0.9999
81
+ min_value: 0.0
82
+ power: 0.75
83
+ type: ema
84
+ update_after_step: 0
85
+ hparams:
86
+ beta1: 0.9
87
+ beta2: 0.95
88
+ decay: '0.2'
89
+ eps: 1.0e-08
90
+ force_min_lr: 0.0
91
+ global_batch_size: 1024
92
+ grad_checkpointing: false
93
+ grad_clip_norm: 1.0
94
+ loss_function: cross_entropy
95
+ lr: 0.0001
96
+ lr_cooldown_end: 0.0
97
+ lr_scheduler: warmup_constant_decay
98
+ optimizer: adamw
99
+ per_gpu_batch_size: 64
100
+ precision: pure_bf16
101
+ seed: 42
102
+ torchcompile: true
103
+ warmup: '1000'
104
+ wd: 0.01
105
+ world_size: 16
106
+ z_loss_coefficient: 0.0001
107
+ log_every_n_steps: 20
108
+ log_level: INFO
109
+ max_checkpoint_limit: null
110
+ model:
111
+ freeze: false
112
+ image_token_id: 49190
113
+ processor: HuggingFaceTB/SmolVLM2-256M-Video-Instruct
114
+ resume_from_checkpoint: null
115
+ resume_weights_only: false
116
+ transformer:
117
+ attn_name: torch_attn
118
+ cast_output_to_float32: false
119
+ ffn_type: swiglu
120
+ freeze: false
121
+ hidden_dim: 2048
122
+ is_causal: true
123
+ max_seq_len: 2048
124
+ n_heads: 16
125
+ n_layers: 24
126
+ norm_eps: 1.0e-05
127
+ norm_type: lp_layer_norm
128
+ positional_embedding_type: rotary
129
+ post_embed_norm: false
130
+ qk_norm: true
131
+ resume_from_checkpoint: null
132
+ resume_weights_only: true
133
+ type: transformer
134
+ vocab_size: 49280
135
+ weight_tying: false
136
+ type: vlm
137
+ vit:
138
+ cls_flag: false
139
+ dropout: 0.0
140
+ freeze: false
141
+ hidden_dim: 768
142
+ img_size: 224
143
+ inter_dim: 3072
144
+ interpolation_mode: bicubic
145
+ ln_eps: 1.0e-06
146
+ n_heads: 12
147
+ n_layers: 12
148
+ patch_size: 14
149
+ pretrained: null
150
+ projector_pixel_shuffle_factor: 2
151
+ resume_from_checkpoint: null
152
+ resume_weights_only: false
153
+ type: vit
154
+ name: 2026_04_11-08_24_20-model_vlm-lr_0.0001-bsz_1024
155
+ num_checkpoints: 20
156
+ num_epochs: null
157
+ remote_sync: null
158
+ remote_sync_fixed_path: null
159
+ resolve_configs: false
160
+ resolve_configs_path: null
161
+ save_path: /tmp
162
+ total_train_samples: 200000000
163
+ total_val_samples: null
164
+ val_every_n_checkpoints: 1
165
+ wandb: true
166
+ wandb_entity: tri
167
+ wandb_project_name: vla_foundry
168
+ wandb_tags: []
config_model.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ freeze: false
2
+ image_token_id: 49190
3
+ processor: HuggingFaceTB/SmolVLM2-256M-Video-Instruct
4
+ resume_from_checkpoint: null
5
+ resume_weights_only: false
6
+ transformer:
7
+ attn_name: torch_attn
8
+ cast_output_to_float32: false
9
+ ffn_type: swiglu
10
+ freeze: false
11
+ hidden_dim: 2048
12
+ is_causal: true
13
+ max_seq_len: 2048
14
+ n_heads: 16
15
+ n_layers: 24
16
+ norm_eps: 1.0e-05
17
+ norm_type: lp_layer_norm
18
+ positional_embedding_type: rotary
19
+ post_embed_norm: false
20
+ qk_norm: true
21
+ resume_from_checkpoint: null
22
+ resume_weights_only: true
23
+ type: transformer
24
+ vocab_size: 49280
25
+ weight_tying: false
26
+ type: vlm
27
+ vit:
28
+ cls_flag: false
29
+ dropout: 0.0
30
+ freeze: false
31
+ hidden_dim: 768
32
+ img_size: 224
33
+ inter_dim: 3072
34
+ interpolation_mode: bicubic
35
+ ln_eps: 1.0e-06
36
+ n_heads: 12
37
+ n_layers: 12
38
+ patch_size: 14
39
+ pretrained: null
40
+ projector_pixel_shuffle_factor: 2
41
+ resume_from_checkpoint: null
42
+ resume_weights_only: false
43
+ type: vit