aduncan94 commited on
Commit
469e88f
Β·
verified Β·
1 Parent(s): 9779421

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ {}
3
+ ---
4
+
5
+ # EnhancAR
6
+
__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .configuration_enhancar import EnhancARConfig
2
+ from .modeling_enhancar import EnhancARModel
3
+
4
+ __all__ = ["EnhancARConfig", "EnhancARModel"]
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "hidden_size": 256,
4
+ "intermediate_size": 1024,
5
+ "num_hidden_layers": 24,
6
+ "num_attention_heads": 16,
7
+ "num_key_value_heads": 8,
8
+ "use_mamba_kernels": true,
9
+ "mamba_d_state": 16,
10
+ "mamba_d_conv": 4,
11
+ "mamba_expand": 2,
12
+ "mamba_dt_rank": "auto",
13
+ "mamba_conv_bias": true,
14
+ "mamba_proj_bias": false,
15
+ "output_router_logits": true,
16
+ "use_cache": false,
17
+ "_attn_implementation": "flash_attention_2",
18
+ "vocab_size": 16,
19
+ "pad_token_id": 6,
20
+ "bos_token_id": 9,
21
+ "eos_token_id": 7
22
+ },
23
+ "alphabet": [
24
+ "G",
25
+ "A",
26
+ "T",
27
+ "C",
28
+ "N",
29
+ "-",
30
+ "!",
31
+ "*",
32
+ "/",
33
+ "@",
34
+ "[",
35
+ "]",
36
+ "{",
37
+ "}"
38
+ ],
39
+ "model_type": "enhancar",
40
+ "architectures": [
41
+ "EnhancARModel"
42
+ ],
43
+ "auto_map": {
44
+ "AutoConfig": "configuration_enhancar.EnhancARConfig",
45
+ "AutoModel": "modeling_enhancar.EnhancARModel"
46
+ }
47
+ }
configuration_enhancar.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+ class EnhancARConfig(PretrainedConfig):
3
+ model_type = "enhancar"
4
+
5
+ def __init__(self, **kwargs):
6
+ super().__init__(**kwargs)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7095d07d96c0ff232a2b65755274ecddf36269752e696937b3ac3911c7e1db4a
3
+ size 681263120
modeling_enhancar.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedModel, AutoModel
5
+ from configuration_enhancar import EnhancARConfig
6
+
7
+ class EnhancARDNAModel(nn.Module):
8
+ def __init__(self, jamba_base, d_model, vocab_size):
9
+ super().__init__()
10
+ self.embedder = jamba_base.model
11
+ self.lm_head = nn.Linear(d_model, vocab_size)
12
+ self.seq_embedding = nn.Embedding(vocab_size, d_model)
13
+
14
+ def forward(self, input_ids, labels=None):
15
+ inputs_embeds = self.seq_embedding(input_ids)
16
+ outputs = self.embedder(inputs_embeds=inputs_embeds)
17
+ hidden_states = outputs["last_hidden_state"]
18
+ logits = self.lm_head(hidden_states)
19
+
20
+ loss = None
21
+ if labels is not None:
22
+ shift_logits = logits[..., :-1, :].contiguous()
23
+ shift_labels = labels[..., 1:].contiguous()
24
+
25
+ loss = F.cross_entropy(
26
+ shift_logits.view(-1, shift_logits.size(-1)),
27
+ shift_labels.view(-1)
28
+ )
29
+
30
+ return {
31
+ "loss": loss,
32
+ "logits": logits,
33
+ "representation": hidden_states
34
+ }
35
+
36
+ # ── The Hugging Face PreTrainedModel Wrapper ──────────────────────────────
37
+
38
+ class EnhancARModel(PreTrainedModel):
39
+ config_class = EnhancARConfig
40
+ base_model_prefix = "model"
41
+ _supports_flash_attn_2 = True
42
+
43
+ def __init__(self, config: EnhancARConfig):
44
+ super().__init__(config)
45
+
46
+ hf_cfg = AutoConfig.from_pretrained("ai21labs/Jamba-v0.1", trust_remote_code=True)
47
+ merged = {**hf_cfg.to_dict(), **config.to_dict()}
48
+ hf_cfg = type(hf_cfg).from_dict(merged)
49
+
50
+ base_jamba = AutoModelForCausalLM.from_config(hf_cfg, trust_remote_code=True)
51
+
52
+ self.model = EnhancARDNAModel(
53
+ base_jamba,
54
+ d_model=config.hidden_size,
55
+ vocab_size=config.vocab_size
56
+ )
57
+ self.post_init()
58
+
59
+ def forward(self, input_ids, labels=None, **kwargs):
60
+ return self.model(input_ids=input_ids, labels=labels)
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alphabet": [
3
+ "G",
4
+ "A",
5
+ "T",
6
+ "C",
7
+ "N",
8
+ "-",
9
+ "!",
10
+ "*",
11
+ "/",
12
+ "@",
13
+ "[",
14
+ "]",
15
+ "{",
16
+ "}"
17
+ ]
18
+ }