| { |
| "architectures": [ |
| "Apriel2ForConditionalGeneration" |
| ], |
| "auto_map": { |
| "AutoConfig": "configuration_apriel2.Apriel2Config", |
| "AutoModel": "modeling_apriel2.Apriel2Model", |
| "AutoModelForCausalLM": "modeling_apriel2.Apriel2ForCausalLM", |
| "AutoModelForImageTextToText": "modeling_apriel2.Apriel2ForConditionalGeneration" |
| }, |
| "bos_token_id": 1, |
| "decoder": { |
| "block": { |
| "mixer": { |
| "main_mixer_name": "attention", |
| "mixers": { |
| "attention": { |
| "add_linear_biases": false, |
| "head_groups": 8, |
| "head_size": 128, |
| "heads": 32, |
| "rotary": { |
| "theta": 1000000000.0, |
| "type": "mistral_1d" |
| }, |
| "type": "attention", |
| "window_size": null |
| }, |
| "gdn": { |
| "convolution_layer": { |
| "kernel_size": 4 |
| }, |
| "key_head_dim": 128, |
| "key_heads": 8, |
| "type": "gdn", |
| "value_head_dim": 128, |
| "value_heads": 32 |
| }, |
| "kda": { |
| "convolution_layer": { |
| "kernel_size": 4 |
| }, |
| "head_dim": 128, |
| "heads": 32, |
| "normalization": { |
| "epsilon": 1e-05 |
| }, |
| "type": "kda" |
| }, |
| "sliding_window": { |
| "add_linear_biases": false, |
| "head_groups": 8, |
| "head_size": 128, |
| "heads": 32, |
| "rotary": { |
| "theta": 1000000000.0, |
| "type": "mistral_1d" |
| }, |
| "type": "attention", |
| "window_size": 4096 |
| } |
| }, |
| "sampling_strategy": "uniform", |
| "type": "stochastic" |
| }, |
| "mlp": { |
| "activation": "silu", |
| "add_linear_biases": false, |
| "gated": true, |
| "intermediate_size": 14336, |
| "type": "mlp" |
| }, |
| "normalization": { |
| "epsilon": 1e-05, |
| "type": "rms_norm" |
| } |
| }, |
| "num_blocks": 48, |
| "type": "fixed" |
| }, |
| "embeddings": { |
| "max_position_embeddings": 120000 |
| }, |
| "eos_token_id": 2, |
| "head": { |
| "normalization": { |
| "epsilon": 1e-05, |
| "type": "rms_norm" |
| } |
| }, |
| "hidden_size": 5120, |
| "image_token_index": 10, |
| "model_type": "apriel2", |
| "tie_word_embeddings": false, |
| "transformers_version": "4.57.3", |
| "use_cache": true, |
| "vision_encoder": { |
| "adapter": { |
| "activation": "gelu_pytorch_tanh", |
| "add_linear_biases": true, |
| "gated": false, |
| "intermediate_size": 5120, |
| "type": "mlp" |
| }, |
| "embeddings": { |
| "input_channels": 3, |
| "normalization": { |
| "epsilon": 1e-05, |
| "type": "rms_norm" |
| }, |
| "patch_height": 16, |
| "patch_width": 16 |
| }, |
| "encoder": { |
| "block": { |
| "mixer": { |
| "add_linear_biases": false, |
| "causal": false, |
| "cross_document_attention": false, |
| "head_groups": 16, |
| "head_size": 64, |
| "heads": 16, |
| "rotary": { |
| "max_image_size": 1024, |
| "patch_size": 16, |
| "theta": 10000.0, |
| "type": "pixtral_2d" |
| }, |
| "type": "attention" |
| }, |
| "mlp": { |
| "activation": "silu", |
| "add_linear_biases": false, |
| "gated": true, |
| "intermediate_size": 4096, |
| "type": "mlp" |
| }, |
| "normalization": { |
| "epsilon": 1e-05, |
| "type": "rms_norm" |
| } |
| }, |
| "num_blocks": 24, |
| "type": "fixed" |
| }, |
| "hidden_size": 1024, |
| "num_channels": 3, |
| "num_hidden_layers": 24, |
| "patch_size": 16 |
| }, |
| "vocab_size": 131072 |
| } |
|
|