craffel HF Staff commited on
Commit
4c2659c
·
verified ·
1 Parent(s): 40a622d

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +9 -0
  2. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/.metadata +3 -0
  3. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__0_0.distcp +3 -0
  4. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__1_0.distcp +3 -0
  5. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__2_0.distcp +3 -0
  6. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__3_0.distcp +3 -0
  7. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__4_0.distcp +3 -0
  8. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__5_0.distcp +3 -0
  9. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__6_0.distcp +3 -0
  10. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__7_0.distcp +3 -0
  11. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/consolidated/consolidated.pth +3 -0
  12. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/consolidated/params.json +1 -0
  13. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/params.json +1 -0
  14. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00000.json +1 -0
  15. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00001.json +1 -0
  16. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00002.json +1 -0
  17. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00003.json +1 -0
  18. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00004.json +1 -0
  19. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00005.json +1 -0
  20. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00006.json +1 -0
  21. meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00007.json +1 -0
.gitattributes CHANGED
@@ -153,3 +153,12 @@ meta-llama-Llama-3.2-1B-seed_777_model_seed_111/0000100000/__5_0.distcp filter=l
153
  meta-llama-Llama-3.2-1B-seed_777_model_seed_111/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
154
  meta-llama-Llama-3.2-1B-seed_777_model_seed_111/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
155
  meta-llama-Llama-3.2-1B-seed_777_model_seed_222/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
153
  meta-llama-Llama-3.2-1B-seed_777_model_seed_111/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
154
  meta-llama-Llama-3.2-1B-seed_777_model_seed_111/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
155
  meta-llama-Llama-3.2-1B-seed_777_model_seed_222/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
156
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/.metadata filter=lfs diff=lfs merge=lfs -text
157
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
158
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
159
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
160
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
161
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
162
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
163
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
164
+ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a984d2477b9441595e294b726eaeda7c8e6219d81ffaf62f6f5f4808080a7ff
3
+ size 1148596
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17eeb85326e54cc81081a66a348fc64e08d1ca872b00a5ae69f12c8a79a932e0
3
+ size 2715727056
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e73aa8c06f4a1f6fc51a8c031d77f4c09a36944678cc6b4845eaa4b8a66d76
3
+ size 2715765996
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca9bde3ab23a444bf1c34e19d1221e0240587b6f9bea514a44ebb2f2087d887
3
+ size 2715765996
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbcc20b0f83a11eb24da0f6e7859752c3c322ec07643dac985b6c1515ebab893
3
+ size 2715765996
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f25554d423228b1f9af9d892d98f1034a637f2bb9bad10b38f0b8ce232cf3de
3
+ size 2715765996
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff69b37e904cae9b580e9c4e2849c13bdf10dde35b7c77d278ce9a3ddebe40aa
3
+ size 2715768272
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9aa70d8b5aa90c7ca0ec1c9184a7960d1526393a1bbe1a7aba724282eedac58
3
+ size 2715768272
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3430fd5fb918e4c6a6137b87b12307ffdfc157841f0f5df97d35072ccaadfc9c
3
+ size 2715776528
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/consolidated/consolidated.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6946ba6865c9b2dd0d8e32cb2a6dcd99fd5eedaf1f4fb19ec59e2d9a14cbecc6
3
+ size 21719647126
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/consolidated/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "toksuite_llama_1b_seed_777_model_seed_222", "dump_dir": "/fsx/craffel/toksuite/lingua_logs/meta-llama-Llama-3.2-1B-seed_777_model_seed_222/", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "load_supermapping": false, "dropout": 0.0, "seed": 42, "superset_code_name": "super_vocab", "n_words": null}, "routing": {"source_to_tokenizer": {}, "task_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 222, "vocab_size": 128256, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": false, "factorized_embedding_dim": 0}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 6000, "keep": -1}, "eval": {"every": 2000, "keep": -1}, "path": "/fsx/craffel/toksuite/lingua_logs/meta-llama-Llama-3.2-1B-seed_777_model_seed_222/checkpoints", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_turkish", "include_base_44_italian", "include_base_44_chinese", "belebele_pes_Arab", "belebele_eng_Latn", "belebele_ita_Latn", "belebele_tur_Latn", "belebele_zho_Hans", "xnli_en", "xnli_tr", "xnli_zh"]}, "generator": {"max_tokens": 8192, "dtype": "bf16", "add_bos": false}}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "toksuite_llama_1b_seed_777_model_seed_222", "dump_dir": "/fsx/craffel/toksuite/lingua_logs/meta-llama-Llama-3.2-1B-seed_777_model_seed_222/", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "load_supermapping": false, "dropout": 0.0, "seed": 42, "superset_code_name": "super_vocab", "n_words": null}, "routing": {"source_to_tokenizer": {}, "task_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 222, "vocab_size": 128256, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": false, "factorized_embedding_dim": 0}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 6000, "keep": -1}, "eval": {"every": 2000, "keep": -1}, "path": "/fsx/craffel/toksuite/lingua_logs/meta-llama-Llama-3.2-1B-seed_777_model_seed_222/checkpoints", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_turkish", "include_base_44_italian", "include_base_44_chinese", "belebele_pes_Arab", "belebele_eng_Latn", "belebele_ita_Latn", "belebele_tur_Latn", "belebele_zho_Hans", "xnli_en", "xnli_tr", "xnli_zh"]}, "generator": {"max_tokens": 8192, "dtype": "bf16", "add_bos": false}}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 3809, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.00.jsonl", "position": 28923659462, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.00.jsonl", "position": 8878702149, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.00.jsonl", "position": 3514187030, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.00.jsonl", "position": 7725981215, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.00.jsonl", "position": 1039867211, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 252006778087284745682378847932169343221, "inc": 252101603063402394885084957393789173453}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 300569569296678341640414112158566886944, "inc": 257317082376085721142933171929815648017}, "has_uint32": 1, "uinteger": 630457105}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00001.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 22372, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.01.jsonl", "position": 28911907797, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.01.jsonl", "position": 8866600855, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.01.jsonl", "position": 3523243125, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.01.jsonl", "position": 7748009227, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.01.jsonl", "position": 1034977015, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 150660440047832350023741814646711888543, "inc": 246509925186285949978196491240064802315}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 298609680158430271867266436931522339460, "inc": 173555323965545256606922338259303677603}, "has_uint32": 1, "uinteger": 1182378492}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00002.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 25, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.02.jsonl", "position": 28915311047, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.02.jsonl", "position": 8865983864, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.02.jsonl", "position": 3510401929, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.02.jsonl", "position": 7739133096, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.02.jsonl", "position": 1040424836, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 84443802827960551292957336347060699986, "inc": 234358335530849485425064040311006256713}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 172206642884315098528897268843500314535, "inc": 319170006889470250209362588441616495209}, "has_uint32": 1, "uinteger": 2183176397}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00003.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 323, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.03.jsonl", "position": 28929581476, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.03.jsonl", "position": 8855899388, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.03.jsonl", "position": 3532541572, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.03.jsonl", "position": 7744244902, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.03.jsonl", "position": 1039231639, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 247719738647945436892247052549358623267, "inc": 148211758571781046255077612135386035203}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 54691439170624420224489263842165716902, "inc": 115810872492597857501795428972873905393}, "has_uint32": 1, "uinteger": 1237320779}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00004.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 535, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.04.jsonl", "position": 28940176490, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.04.jsonl", "position": 8870400674, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.04.jsonl", "position": 3541056680, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.04.jsonl", "position": 7731913299, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.04.jsonl", "position": 1004032538, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 89393135109349170892217878116501711473, "inc": 186633262021180533256729114674950595327}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 330736315247521707224292038935126153445, "inc": 303111205818808944921858206842105131807}, "has_uint32": 1, "uinteger": 211256137}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00005.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 307, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.05.jsonl", "position": 28945621310, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.05.jsonl", "position": 8861977844, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.05.jsonl", "position": 3526810869, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.05.jsonl", "position": 7739356867, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.05.jsonl", "position": 1000810212, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 196809890920812555086752416725913898418, "inc": 329233669073478483697346584247981015037}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 152527936818944850177382785176910810558, "inc": 47382953940698287647753879262736142901}, "has_uint32": 1, "uinteger": 3675784275}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00006.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 6621, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.06.jsonl", "position": 28953365878, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.06.jsonl", "position": 8871564781, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.06.jsonl", "position": 3541407922, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.06.jsonl", "position": 7723234587, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.06.jsonl", "position": 989393492, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 65085220618383211160166919022876062037, "inc": 95963489890761403814531195999220475639}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 131915213571014219328634582687476804074, "inc": 72545526324180839152750112646078969085}, "has_uint32": 0, "uinteger": 2783644522}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/train_state_00007.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 95, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.4, "cmn_Hani": 0.15, "tur_Latn": 0.15, "ita_Latn": 0.15, "fas_Arab": 0.15}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.07.jsonl", "position": 28904321208, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.07.jsonl", "position": 8859495407, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.07.jsonl", "position": 3539713423, "block_size": 1, "offset": 0, "current_iter": 1}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.07.jsonl", "position": 7741636969, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.07.jsonl", "position": 1037894696, "block_size": 1, "offset": 0, "current_iter": 3}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 45700460280623489407278938463076623728, "inc": 53245743019587277358203950863334653629}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42, "superset_code_name": "super_vocab", "n_words": null, "routing": {"source_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 239803483826676955776584746976189400951, "inc": 19761753544780285878460645500694854795}, "has_uint32": 1, "uinteger": 513301027}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}