simonko912 commited on
Commit
38a5de0
·
verified ·
1 Parent(s): 8b5c229

Upload 5 files

Browse files
Files changed (4) hide show
  1. config.json +6 -5
  2. model.safetensors +2 -2
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +9 -4
config.json CHANGED
@@ -12,11 +12,12 @@
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
- "n_embd": 384,
16
- "n_head": 6,
 
17
  "n_inner": null,
18
- "n_layer": 6,
19
- "n_positions": 64,
20
  "pad_token_id": null,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
@@ -30,5 +31,5 @@
30
  "tie_word_embeddings": true,
31
  "transformers_version": "5.1.0",
32
  "use_cache": false,
33
- "vocab_size": 5132
34
  }
 
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
+ "n_ctx": 128,
16
+ "n_embd": 128,
17
+ "n_head": 4,
18
  "n_inner": null,
19
+ "n_layer": 4,
20
+ "n_positions": 128,
21
  "pad_token_id": null,
22
  "reorder_and_upcast_attn": false,
23
  "resid_pdrop": 0.1,
 
31
  "tie_word_embeddings": true,
32
  "transformers_version": "5.1.0",
33
  "use_cache": false,
34
+ "vocab_size": 50257
35
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a2e93734366f4b79c10662412855473c915aae796424242799c13b90882867
3
- size 50578800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cacb396227a35c0063ae2fea8df1523eed1be96bb23142e3ecda01aec8e18340
3
+ size 28975536
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,7 +1,12 @@
1
  {
 
2
  "backend": "tokenizers",
3
- "eos_token": "</s>",
4
- "model_max_length": 1000000000000000019884624838656,
5
- "pad_token": "[PAD]",
6
- "tokenizer_class": "TokenizersBackend"
 
 
 
 
7
  }
 
1
  {
2
+ "add_prefix_space": false,
3
  "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "eos_token": "<|endoftext|>",
6
+ "errors": "replace",
7
+ "is_local": false,
8
+ "model_max_length": 1024,
9
+ "pad_token": "<|endoftext|>",
10
+ "tokenizer_class": "GPT2Tokenizer",
11
+ "unk_token": "<|endoftext|>"
12
  }