ronnengmail commited on
Commit
4b368cd
·
verified ·
1 Parent(s): f276b17

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +21 -28
config.json CHANGED
@@ -1,41 +1,34 @@
1
  {
2
- "model_type": "gpt",
3
  "architectures": [
4
- "MultilingualGPT"
5
  ],
6
  "vocab_size": 32000,
7
  "hidden_size": 3072,
8
  "num_hidden_layers": 26,
9
  "num_attention_heads": 24,
10
- "head_dim": 128,
11
- "max_position_embeddings": 2048,
12
  "intermediate_size": 8192,
13
- "activation_function": "swiglu",
14
- "normalization": "rmsnorm",
15
- "position_encoding": "rope",
16
  "rope_theta": 10000.0,
17
- "total_params": "3.14B",
18
- "tokenizer_type": "sentencepiece",
19
- "tokenizer_vocab_size": 32000,
20
- "bos_token": "<s>",
21
- "eos_token": "</s>",
22
- "pad_token": "<pad>",
23
- "special_tokens": [
24
- "<|user|>",
25
- "<|assistant|>",
26
- "<s>",
27
- "</s>",
28
- "<pad>"
29
  ],
30
  "training": {
31
- "optimizer": "AdamW",
32
- "learning_rate": 0.0003,
33
- "schedule": "cosine_decay",
34
- "warmup_steps": 2000,
35
- "batch_size_tokens": 524288,
36
- "weight_decay": 0.1,
37
- "gradient_clip": 1.0,
38
- "precision": "bf16",
39
- "total_tokens": "~20B"
40
  }
41
  }
 
1
  {
2
+ "model_type": "semitic-gpt",
3
  "architectures": [
4
+ "SemiticGPT"
5
  ],
6
  "vocab_size": 32000,
7
  "hidden_size": 3072,
8
  "num_hidden_layers": 26,
9
  "num_attention_heads": 24,
 
 
10
  "intermediate_size": 8192,
11
+ "max_position_embeddings": 2048,
 
 
12
  "rope_theta": 10000.0,
13
+ "rope_dim": 64,
14
+ "rms_norm_eps": 1e-06,
15
+ "tie_word_embeddings": false,
16
+ "torch_dtype": "bfloat16",
17
+ "num_parameters": 3140000000,
18
+ "tokenizer": "sentencepiece",
19
+ "tokenizer_model": "multilingual_32k.model",
20
+ "languages": [
21
+ "Hebrew (he)",
22
+ "Arabic (ar)",
23
+ "Farsi/Persian (fa)",
24
+ "English (en)"
25
  ],
26
  "training": {
27
+ "pretraining_tokens": "4.48B",
28
+ "pretraining_languages": "Hebrew (40%), Arabic (20%), Farsi (20%), English (20%)",
29
+ "sft_v4_samples": 36980,
30
+ "sft_optimizer": "AdamW",
31
+ "sft_lr": "2e-5",
32
+ "sft_steps": 8000
 
 
 
33
  }
34
  }