andrebarrosilva1123 commited on
Commit
20478e2
·
verified ·
1 Parent(s): c34c1be

epoch 22534 | block 8112294 | steps 175 | loss 4.0805 | sim_abs 0.103 sq_acc 0.00

Browse files
Files changed (3) hide show
  1. config.json +3 -3
  2. generation_config.json +3 -6
  3. model.safetensors +1 -1
config.json CHANGED
@@ -3,7 +3,7 @@
3
  "Mamba2ForCausalLM"
4
  ],
5
  "bos_token_id": null,
6
- "chunk_size": 512,
7
  "conv_kernel": 4,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 151645,
@@ -11,14 +11,14 @@
11
  "head_dim": 64,
12
  "hidden_act": "silu",
13
  "hidden_size": 1024,
14
- "initializer_range": 0.02,
15
  "layer_norm_epsilon": 1e-05,
16
  "model_type": "mamba2",
17
  "n_groups": 1,
18
  "num_heads": 32,
19
  "num_hidden_layers": 48,
20
  "pad_token_id": 151643,
21
- "rescale_prenorm_residual": true,
22
  "residual_in_fp32": true,
23
  "rms_norm": true,
24
  "state_size": 128,
 
3
  "Mamba2ForCausalLM"
4
  ],
5
  "bos_token_id": null,
6
+ "chunk_size": 256,
7
  "conv_kernel": 4,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 151645,
 
11
  "head_dim": 64,
12
  "hidden_act": "silu",
13
  "hidden_size": 1024,
14
+ "initializer_range": 0.1,
15
  "layer_norm_epsilon": 1e-05,
16
  "model_type": "mamba2",
17
  "n_groups": 1,
18
  "num_heads": 32,
19
  "num_hidden_layers": 48,
20
  "pad_token_id": 151643,
21
+ "rescale_prenorm_residual": false,
22
  "residual_in_fp32": true,
23
  "rms_norm": true,
24
  "state_size": 128,
generation_config.json CHANGED
@@ -1,11 +1,8 @@
1
  {
2
  "_from_model_config": true,
3
- "eos_token_id": [
4
- 151645
5
- ],
6
- "output_attentions": false,
7
- "output_hidden_states": false,
8
  "pad_token_id": 151643,
9
  "transformers_version": "5.7.0",
10
- "use_cache": false
11
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 151645,
 
 
 
5
  "pad_token_id": 151643,
6
  "transformers_version": "5.7.0",
7
+ "use_cache": true
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1714930c07abd3051b1ede301f09cdf97ae7efb25225e9597c08336fe7d9dc6a
3
  size 944367904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4c35ee9ef90c9a4785261dc3e934e01f2fb493b951452f3c6459c0c8abb233
3
  size 944367904