jacobfa1
/

attractor-370m

Text Generation

Model card Files Files and versions

xet

Community

jacobfa1 commited on 13 days ago

Commit

84feedc

verified ·

1 Parent(s): bed9a3c

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

config.json +65 -0
model.pt +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "name": "eqlm-medium-370m",
+  "hf_config": {
+    "org": "SandyResearch",
+    "name": "eqlm-medium-370m"
+  },
+  "block_size": 2048,
+  "n_embd": 1280,
+  "intermediate_size": 5120,
+  "num_attention_heads": 10,
+  "num_key_value_heads": 10,
+  "vocab_size": 32768,
+  "padding_multiple": 64,
+  "padded_vocab_size": 32768,
+  "rope_settings": {
+    "use_rope": true,
+    "rope_condense_ratio": 1,
+    "rope_base": 50000
+  },
+  "use_abacus": false,
+  "randomize_positions_from": null,
+  "block_class_name": "TransformerPreNormBlock",
+  "norm_class_name": "RMSNorm",
+  "attn_impl": "flash",
+  "norm_eps": 1e-05,
+  "mlp_class_name": "BaseMLP",
+  "nonlin_name": "ReLU2",
+  "bias": false,
+  "qk_bias": false,
+  "init_strategy": "scaled-zero",
+  "init_orthogonal": true,
+  "skip_initialization": false,
+  "mup_model_scaling_factor": 1,
+  "use_fused_head": "pytorch",
+  "debias_attention": false,
+  "center_attention": false,
+  "clip_qkv": null,
+  "qk_norm": true,
+  "logit_softcap": null,
+  "causal": true,
+  "activation_checkpoint_impl": "per-block",
+  "simple_ops": false,
+  "strategy": "ddp",
+  "n_backbone_layers": 15,
+  "n_fp_blocks": 2,
+  "tie_embeddings": true,
+  "solver": "anderson",
+  "max_iter": 64,
+  "min_iter": 8,
+  "tol": 0.0002,
+  "anderson_m": 5,
+  "anderson_beta": 1.0,
+  "backward_type": "onestep",
+  "backward_max_iter": 64,
+  "backward_min_iter": 6,
+  "backward_tol": 0.0002,
+  "adjoint_grad_clip": null,
+  "layer_scale_init": 0.75,
+  "gamma_max": 1.0,
+  "fp_lr_scale": 0.4,
+  "fp_wd": 0.1,
+  "recurrent_embedding_dimension": 1280,
+  "model_class_name": "EQLM",
+  "_class_name": "EQLMConfig"
+}

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:baf74b7473fa869c770230cddccc2e6b3ae493851971465f731a8d6496e87bef
+size 1506002359