PyraCode-1.5B / configuration_qwen2_hybrid.py
abcsk123's picture
Upload folder using huggingface_hub
f98f57f verified
# configuration_qwen2_hybrid.py
from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
class Qwen2HybridConfig(Qwen2Config):
model_type = "qwen2_hybrid"
def __init__(
self,
gqa_layers=7, # 0~6层为GQA
shared_layer_idx=7, # 第7层为Shared MLA
soft_mid_layers_end=23, # 8~22为Soft Mid
soft_deep_layers_end=28, # 23~27为Soft Deep
gqa_sliding_window=32768,
soft_sliding_window=8192,
shared_rank=320,
soft_rank_mid=192,
soft_rank_deep=128,
sink_size=64,
**kwargs,
):
self.gqa_layers = gqa_layers
self.shared_layer_idx = shared_layer_idx
self.soft_mid_layers_end = soft_mid_layers_end
self.soft_deep_layers_end = soft_deep_layers_end
self.gqa_sliding_window = gqa_sliding_window
self.soft_sliding_window = soft_sliding_window
self.shared_rank = shared_rank
self.soft_rank_mid = soft_rank_mid
self.soft_rank_deep = soft_rank_deep
self.sink_size = sink_size
super().__init__(**kwargs)