jmercat commited on
Commit
b03066e
·
0 Parent(s):

release: initial squashed history

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. checkpoints/checkpoint_50.pt +3 -0
  3. config.yaml +110 -0
  4. config_model.yaml +19 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
checkpoints/checkpoint_50.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae75f2ca515e5776ac7f74af03e7806c3e7611c2aa0976ce4945f08a6db2833
3
+ size 5742376211
config.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ allow_multiple_epochs: true
3
+ dataloader_in_order: false
4
+ dataset_cache:
5
+ cache_dir: null
6
+ cache_size_gb: null
7
+ cache_verbose: null
8
+ enabled: false
9
+ dataset_manifest:
10
+ - null
11
+ dataset_modality:
12
+ - text_untokenized
13
+ dataset_weighting:
14
+ - 1.0
15
+ hf_fast_tokenizer_rayon_threads: null
16
+ hf_fast_tokenizers_parallelism: true
17
+ num_workers: 8
18
+ prefetch_factor: 4
19
+ seed: 42
20
+ seq_len: 2048
21
+ shuffle: true
22
+ shuffle_buffer_size: 2000
23
+ shuffle_initial: 500
24
+ tokenizer: HuggingFaceTB/SmolVLM2-256M-Video-Instruct
25
+ type: text_untokenized
26
+ use_hf_fast_tokenizer: true
27
+ val_dataset_manifest: []
28
+ val_dataset_weighting: []
29
+ db_logging: true
30
+ distributed:
31
+ ddp_static_graph: false
32
+ device: cuda:0
33
+ dist_backend: nccl
34
+ dist_url: env://
35
+ fsdp: true
36
+ fsdp_cpu_offload: false
37
+ fsdp_reshard_after_forward: false
38
+ local_rank: 0
39
+ rank: 0
40
+ use_distributed: true
41
+ world_size: 128
42
+ ema:
43
+ alpha: 0.999
44
+ enabled: false
45
+ inv_gamma: 1.0
46
+ max_value: 0.9999
47
+ min_value: 0.0
48
+ power: 0.75
49
+ type: ema
50
+ update_after_step: 0
51
+ hparams:
52
+ beta1: 0.9
53
+ beta2: 0.95
54
+ decay: '0.2'
55
+ eps: 1.0e-08
56
+ force_min_lr: 0.0
57
+ global_batch_size: 1280
58
+ grad_checkpointing: false
59
+ grad_clip_norm: 1.0
60
+ loss_function: cross_entropy
61
+ lr: 0.0003
62
+ lr_cooldown_end: 0.0
63
+ lr_scheduler: warmup_constant_decay
64
+ optimizer: adamw
65
+ per_gpu_batch_size: 10
66
+ precision: pure_bf16
67
+ seed: 42
68
+ torchcompile: true
69
+ warmup: '1000'
70
+ wd: 0.01
71
+ world_size: 128
72
+ z_loss_coefficient: 0.0001
73
+ log_every_n_steps: 20
74
+ log_level: INFO
75
+ max_checkpoint_limit: null
76
+ model:
77
+ attn_name: torch_attn
78
+ cast_output_to_float32: false
79
+ ffn_type: swiglu
80
+ freeze: false
81
+ hidden_dim: 2048
82
+ is_causal: true
83
+ max_seq_len: 2048
84
+ n_heads: 16
85
+ n_layers: 24
86
+ norm_eps: 1.0e-05
87
+ norm_type: lp_layer_norm
88
+ positional_embedding_type: rotary
89
+ post_embed_norm: false
90
+ qk_norm: true
91
+ resume_from_checkpoint: null
92
+ resume_weights_only: false
93
+ type: transformer
94
+ vocab_size: 49280
95
+ weight_tying: false
96
+ name: 2026_04_04-21_43_15-model_transformer-lr_0.0003-bsz_1280
97
+ num_checkpoints: 20
98
+ num_epochs: null
99
+ remote_sync: null
100
+ remote_sync_fixed_path: null
101
+ resolve_configs: false
102
+ resolve_configs_path: null
103
+ save_path: /tmp
104
+ total_train_samples: 488294400
105
+ total_val_samples: null
106
+ val_every_n_checkpoints: 1
107
+ wandb: true
108
+ wandb_entity: tri
109
+ wandb_project_name: vla_foundry
110
+ wandb_tags: []
config_model.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attn_name: torch_attn
2
+ cast_output_to_float32: false
3
+ ffn_type: swiglu
4
+ freeze: false
5
+ hidden_dim: 2048
6
+ is_causal: true
7
+ max_seq_len: 2048
8
+ n_heads: 16
9
+ n_layers: 24
10
+ norm_eps: 1.0e-05
11
+ norm_type: lp_layer_norm
12
+ positional_embedding_type: rotary
13
+ post_embed_norm: false
14
+ qk_norm: true
15
+ resume_from_checkpoint: null
16
+ resume_weights_only: false
17
+ type: transformer
18
+ vocab_size: 49280
19
+ weight_tying: false