Upload folder using huggingface_hub
Browse files- .gitattributes +4 -0
- checkpoint_e00_20260217_163202/config.json +15 -0
- checkpoint_e00_20260217_163202/metadata.json +5 -0
- checkpoint_e00_20260217_163202/model_state/_CHECKPOINT_METADATA +1 -0
- checkpoint_e00_20260217_163202/model_state/_METADATA +1 -0
- checkpoint_e00_20260217_163202/model_state/_sharding +1 -0
- checkpoint_e00_20260217_163202/model_state/array_metadatas/process_0 +1 -0
- checkpoint_e00_20260217_163202/model_state/d/2cde0cbc9f20d63487e4bf92a00bc276 +0 -0
- checkpoint_e00_20260217_163202/model_state/manifest.ocdbt +0 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/3b2d6eb5d6276e880df86873678f2b58 +3 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/4a454a6b7ed822c8dfb458d0437a11ac +3 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/63d04dbf90d603fdd36849c7ccc87d7f +0 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/7e07ec9ce8b36dc1257e05bff3d6d8c7 +3 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/b933d2e6575980532c0bdafcce4e1fff +3 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/cfc4bcc5b41e48c72587216b64c8a00c +0 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/e88bf8efd4ede58beee3b5633af9e10c +0 -0
- checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/3b2d6eb5d6276e880df86873678f2b58 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/4a454a6b7ed822c8dfb458d0437a11ac filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/7e07ec9ce8b36dc1257e05bff3d6d8c7 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/b933d2e6575980532c0bdafcce4e1fff filter=lfs diff=lfs merge=lfs -text
|
checkpoint_e00_20260217_163202/config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"img_size": 64,
|
| 3 |
+
"patch_size": 4,
|
| 4 |
+
"dim_raw": 48,
|
| 5 |
+
"channels": 3,
|
| 6 |
+
"dim_bottleneck": 128,
|
| 7 |
+
"dim_model": 256,
|
| 8 |
+
"depth": 6,
|
| 9 |
+
"heads": 8,
|
| 10 |
+
"mlp_dim": 1024,
|
| 11 |
+
"batch_size": 64,
|
| 12 |
+
"lr": 0.0001,
|
| 13 |
+
"epochs": 1,
|
| 14 |
+
"seed": 42
|
| 15 |
+
}
|
checkpoint_e00_20260217_163202/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0,
|
| 3 |
+
"timestamp": "20260217_163202",
|
| 4 |
+
"global_step": 0
|
| 5 |
+
}
|
checkpoint_e00_20260217_163202/model_state/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1771345922308901417, "commit_timestamp_nsecs": 1771345922407744730, "custom_metadata": {}}
|
checkpoint_e00_20260217_163202/model_state/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('blocks', '0', 'attn', 'c_attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 768]}}, "('blocks', '0', 'attn', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}, "('blocks', '0', 'attn', 'omega', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "omega", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 32]}}, "('blocks', '0', 'ln1', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'ln1', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'ln2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'ln2', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'mlp', 'c_fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '0', 'mlp', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '0', 'mlp', 'gate', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '1', 'attn', 'c_attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 768]}}, "('blocks', '1', 'attn', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}, "('blocks', '1', 'ln1', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'ln1', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'ln2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'ln2', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'mlp', 'c_fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '1', 'mlp', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '1', 'mlp', 'gate', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '2', 'attn', 'c_attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 768]}}, "('blocks', '2', 'attn', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}, "('blocks', '2', 'ln1', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'ln1', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'ln2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'ln2', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'mlp', 'c_fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '2', 'mlp', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '2', 'mlp', 'gate', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '3', 'attn', 'c_attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 768]}}, "('blocks', '3', 'attn', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}, "('blocks', '3', 'ln1', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'ln1', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'ln2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'ln2', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'mlp', 'c_fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '3', 'mlp', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '3', 'mlp', 'gate', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '4', 'attn', 'c_attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 768]}}, "('blocks', '4', 'attn', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}, "('blocks', '4', 'ln1', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'ln1', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'ln2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'ln2', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'mlp', 'c_fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '4', 'mlp', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '4', 'mlp', 'gate', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '5', 'attn', 'c_attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 768]}}, "('blocks', '5', 'attn', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}, "('blocks', '5', 'ln1', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'ln1', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'ln2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'ln2', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'mlp', 'c_fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '5', 'mlp', 'c_proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "c_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '5', 'mlp', 'gate', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('head', 'kernel', 'value')": {"key_metadata": [{"key": "head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 48]}}, "('ln_f', 'bias', 'value')": {"key_metadata": [{"key": "ln_f", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('ln_f', 'scale', 'value')": {"key_metadata": [{"key": "ln_f", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('patch_embed', 'compress', 'kernel', 'value')": {"key_metadata": [{"key": "patch_embed", "key_type": 2}, {"key": "compress", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [48, 128]}}, "('patch_embed', 'expand', 'kernel', 'value')": {"key_metadata": [{"key": "patch_embed", "key_type": 2}, {"key": "expand", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 256]}}, "('time_embed', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "time_embed", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('time_embed', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "time_embed", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 256]}}, "('time_embed', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "time_embed", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('time_embed', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "time_embed", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 256]}}}, "use_ocdbt": true, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
checkpoint_e00_20260217_163202/model_state/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"YmxvY2tzLjAuYXR0bi5jX2F0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAuYXR0bi5jX3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAuYXR0bi5vbWVnYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubG4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubG4xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubG4yLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubG4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubWxwLmNfZmMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubWxwLmNfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjAubWxwLmdhdGUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEuYXR0bi5jX2F0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEuYXR0bi5jX3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubG4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubG4xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubG4yLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubG4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubWxwLmNfZmMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubWxwLmNfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjEubWxwLmdhdGUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIuYXR0bi5jX2F0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIuYXR0bi5jX3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubG4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubG4xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubG4yLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubG4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubWxwLmNfZmMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubWxwLmNfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjIubWxwLmdhdGUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMuYXR0bi5jX2F0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMuYXR0bi5jX3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubG4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubG4xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubG4yLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubG4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubWxwLmNfZmMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubWxwLmNfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjMubWxwLmdhdGUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQuYXR0bi5jX2F0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQuYXR0bi5jX3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubG4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubG4xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubG4yLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubG4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubWxwLmNfZmMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubWxwLmNfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjQubWxwLmdhdGUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUuYXR0bi5jX2F0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUuYXR0bi5jX3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubG4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubG4xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubG4yLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubG4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubWxwLmNfZmMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubWxwLmNfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLjUubWxwLmdhdGUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","aGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG5fZi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG5fZi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGF0Y2hfZW1iZWQuY29tcHJlc3Mua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGF0Y2hfZW1iZWQuZXhwYW5kLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dGltZV9lbWJlZC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dGltZV9lbWJlZC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dGltZV9lbWJlZC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dGltZV9lbWJlZC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
checkpoint_e00_20260217_163202/model_state/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.0.attn.c_attn.kernel.value", "write_shape": [256, 768], "chunk_shape": [256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.c_proj.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.omega.value", "write_shape": [256, 32], "chunk_shape": [256, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.ln1.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.ln1.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.ln2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.ln2.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.c_fc.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.c_proj.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.gate.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.c_attn.kernel.value", "write_shape": [256, 768], "chunk_shape": [256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.c_proj.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.ln1.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.ln1.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.ln2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.ln2.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.c_fc.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.c_proj.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.gate.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.c_attn.kernel.value", "write_shape": [256, 768], "chunk_shape": [256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.c_proj.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.ln1.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.ln1.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.ln2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.ln2.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.c_fc.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.c_proj.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.gate.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.c_attn.kernel.value", "write_shape": [256, 768], "chunk_shape": [256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.c_proj.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.ln1.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.ln1.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.ln2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.ln2.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.c_fc.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.c_proj.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.gate.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.c_attn.kernel.value", "write_shape": [256, 768], "chunk_shape": [256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.c_proj.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.ln1.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.ln1.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.ln2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.ln2.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.c_fc.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.c_proj.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.gate.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.c_attn.kernel.value", "write_shape": [256, 768], "chunk_shape": [256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.c_proj.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.ln1.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.ln1.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.ln2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.ln2.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.c_fc.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.c_proj.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.gate.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "head.kernel.value", "write_shape": [256, 48], "chunk_shape": [256, 48], "ext_metadata": null}}, {"array_metadata": {"param_name": "ln_f.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "ln_f.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "patch_embed.compress.kernel.value", "write_shape": [48, 128], "chunk_shape": [48, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "patch_embed.expand.kernel.value", "write_shape": [128, 256], "chunk_shape": [128, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "time_embed.layers.0.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "time_embed.layers.0.kernel.value", "write_shape": [1, 256], "chunk_shape": [1, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "time_embed.layers.2.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "time_embed.layers.2.kernel.value", "write_shape": [256, 256], "chunk_shape": [256, 256], "ext_metadata": null}}]}
|
checkpoint_e00_20260217_163202/model_state/d/2cde0cbc9f20d63487e4bf92a00bc276
ADDED
|
Binary file (2.21 kB). View file
|
|
|
checkpoint_e00_20260217_163202/model_state/manifest.ocdbt
ADDED
|
Binary file (117 Bytes). View file
|
|
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/3b2d6eb5d6276e880df86873678f2b58
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bcee25b161661df4d403f5cac17a9ef85e221179103a6723c76c3a66f9f7602
|
| 3 |
+
size 972769
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/4a454a6b7ed822c8dfb458d0437a11ac
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:847da29fac2ae63a357f90e7d6e3bcab48fb70673e192ea14f4fa58e1dbd547c
|
| 3 |
+
size 11567104
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/63d04dbf90d603fdd36849c7ccc87d7f
ADDED
|
Binary file (410 Bytes). View file
|
|
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/7e07ec9ce8b36dc1257e05bff3d6d8c7
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44299fc3f8fdbd0c04f65d0879d72d48ba2229bff880e303aef45d9d4bbaaee6
|
| 3 |
+
size 9211904
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/b933d2e6575980532c0bdafcce4e1fff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1b5d36ae11081cc42a0f85a8fa44f0c90421df4cd81648b7406ff07da2f8565
|
| 3 |
+
size 1942130
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/cfc4bcc5b41e48c72587216b64c8a00c
ADDED
|
Binary file (401 Bytes). View file
|
|
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/d/e88bf8efd4ede58beee3b5633af9e10c
ADDED
|
Binary file (259 Bytes). View file
|
|
|
checkpoint_e00_20260217_163202/model_state/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (380 Bytes). View file
|
|
|