| { | |
| "model_type": "semitic-gpt", | |
| "architectures": [ | |
| "SemiticGPT" | |
| ], | |
| "vocab_size": 32000, | |
| "hidden_size": 3072, | |
| "num_hidden_layers": 26, | |
| "num_attention_heads": 24, | |
| "intermediate_size": 8192, | |
| "max_position_embeddings": 2048, | |
| "rope_theta": 10000.0, | |
| "rope_dim": 64, | |
| "rms_norm_eps": 1e-06, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "num_parameters": 3140000000, | |
| "tokenizer": "sentencepiece", | |
| "tokenizer_model": "multilingual_32k.model", | |
| "languages": [ | |
| "Hebrew (he)", | |
| "Arabic (ar)", | |
| "Farsi/Persian (fa)", | |
| "English (en)" | |
| ], | |
| "training": { | |
| "pretraining_tokens": "4.48B", | |
| "pretraining_languages": "Hebrew (40%), Arabic (20%), Farsi (20%), English (20%)", | |
| "sft_v4_samples": 36980, | |
| "sft_optimizer": "AdamW", | |
| "sft_lr": "2e-5", | |
| "sft_steps": 8000 | |
| } | |
| } |