Zandy-Wandy commited on
Commit
0f8cbc9
·
verified ·
1 Parent(s): 5c43f61

Upload Vortex model

Browse files
Files changed (1) hide show
  1. vortex_config.py +16 -19
vortex_config.py CHANGED
@@ -1,38 +1,36 @@
1
  """
2
- Vortex-7B model configuration.
3
- Optimized for 8GB VRAM (4060 laptop) and MacBook Pro M2/M3.
4
  """
5
 
6
- VORTEX_7B_CONFIG = {
7
  # Model dimensions
8
- "d_model": 4096,
9
- "num_layers": 32,
10
- "num_heads": 32,
11
  "head_dim": 128, # d_model // num_heads
12
 
13
  # State-space layer parameters
14
- "d_state": 16, # SSM state dimension
15
  "d_conv": 4, # SSM convolution width
16
 
17
  # Attention parameters
18
  "window_size": 512, # Local attention window
19
- "use_flash_attention": True, # CUDA only
20
 
21
  # Feed-forward parameters
22
- "ffn_expansion": 4, # Hidden dim = d_model * expansion
23
- "num_domains": 7, # Physics, Math, Chemistry, Biology, Earth, Space, Zoology
24
-
25
- # Tokenizer parameters
26
  "vocab_size": 50000,
27
  "max_seq_len": 16384,
28
 
29
- # Layer ratio: 60% SSM, 40% attention
30
- "ssm_ratio": 0.6,
31
 
32
  # Data types
33
  "dtype": "bfloat16",
34
 
35
- # Special tokens
36
  "special_tokens": {
37
  "[PAD]": 0,
38
  "[UNK]": 1,
@@ -55,10 +53,9 @@ VORTEX_7B_CONFIG = {
55
  "[ZOO]": 18,
56
  },
57
 
58
- # Domain tags
59
  "domain_tags": ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"],
60
 
61
- # Science module flags (enable/disable for ablation)
62
  "enable_equation_module": True,
63
  "enable_numerical_module": True,
64
  "enable_citation_module": True,
@@ -67,5 +64,5 @@ VORTEX_7B_CONFIG = {
67
 
68
 
69
  def get_config():
70
- """Return the 7B configuration dictionary."""
71
- return VORTEX_7B_CONFIG
 
1
  """
2
+ Vortex-13B model configuration.
3
+ Optimized for 16GB VRAM (4060 Ti laptop) and MacBook Pro M3 Max.
4
  """
5
 
6
+ VORTEX_13B_CONFIG = {
7
  # Model dimensions
8
+ "d_model": 5120,
9
+ "num_layers": 40,
10
+ "num_heads": 40,
11
  "head_dim": 128, # d_model // num_heads
12
 
13
  # State-space layer parameters
14
+ "d_state": 32, # SSM state dimension (larger for bigger model)
15
  "d_conv": 4, # SSM convolution width
16
 
17
  # Attention parameters
18
  "window_size": 512, # Local attention window
19
+ "use_flash_attention": True,
20
 
21
  # Feed-forward parameters
22
+ "ffn_expansion": 4,
23
+ "num_domains": 7,
 
 
24
  "vocab_size": 50000,
25
  "max_seq_len": 16384,
26
 
27
+ # Layer ratio: 50% SSM, 50% attention (more memory for attention)
28
+ "ssm_ratio": 0.5,
29
 
30
  # Data types
31
  "dtype": "bfloat16",
32
 
33
+ # Special tokens (same as 7B)
34
  "special_tokens": {
35
  "[PAD]": 0,
36
  "[UNK]": 1,
 
53
  "[ZOO]": 18,
54
  },
55
 
 
56
  "domain_tags": ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"],
57
 
58
+ # Science module flags
59
  "enable_equation_module": True,
60
  "enable_numerical_module": True,
61
  "enable_citation_module": True,
 
64
 
65
 
66
  def get_config():
67
+ """Return the 13B configuration dictionary."""
68
+ return VORTEX_13B_CONFIG