LequeuISIR
/

RoPE-NeoBERT-768

Model card Files Files and versions

RoPE-NeoBERT-768 / config.json

LequeuISIR's picture

Upload NeoBERT

2892e87 verified 30 days ago

history blame contribute delete

1.32 kB

	{
	"AP_embeddings": false,
	"architectures": [
	"NeoBERT"
	],
	"attention_activation": "softmax",
	"attention_ativation": "softmax",
	"attention_probs_dropout_prob": 0.1,
	"base_scale": 0.03227486121839514,
	"classifier_init_range": 0.02,
	"decoder_init_range": 0.02,
	"dim_head": 128,
	"dropout_prob": 0,
	"embedding_init_range": 0.02,
	"entropy_regularization_lambda": 0.01,
	"flash_attention": false,
	"hidden_act": "swiglu",
	"hidden_size": 768,
	"intermediate_size": 3072,
	"kwargs": {
	"attention_ativation": "softmax",
	"classifier_init_range": 0.02,
	"entropy_regularization_lambda": 0.01
	},
	"max_length": 512,
	"mix_attentions": "sum",
	"mixed_feed_forward": true,
	"model_type": "neobert",
	"ngpt": false,
	"norm_eps": 1e-05,
	"num_attention_heads": 6,
	"num_hidden_layers": 6,
	"pad_token_id": 0,
	"pos_dropout_prob": 0.1,
	"pos_intermediate_size": 1536,
	"pos_size": 384,
	"positional_embed_init": "random",
	"posneobert": false,
	"random_offset": false,
	"relative_pos_bias": false,
	"rms_norm": true,
	"rope": true,
	"scale_QK_dim": true,
	"share_pos_embeds_in_heads": false,
	"shared_pos_keys": false,
	"torch_dtype": "float32",
	"transformers_version": "4.46.3",
	"untie_cls": false,
	"use_only_sem_for_decoding": false,
	"vocab_size": 30522
	}