| # configuration_qwen2_hybrid.py | |
| from transformers.models.qwen2.configuration_qwen2 import Qwen2Config | |
| class Qwen2HybridConfig(Qwen2Config): | |
| model_type = "qwen2_hybrid" | |
| def __init__( | |
| self, | |
| gqa_layers=7, # 0~6层为GQA | |
| shared_layer_idx=7, # 第7层为Shared MLA | |
| soft_mid_layers_end=23, # 8~22为Soft Mid | |
| soft_deep_layers_end=28, # 23~27为Soft Deep | |
| gqa_sliding_window=32768, | |
| soft_sliding_window=8192, | |
| shared_rank=320, | |
| soft_rank_mid=192, | |
| soft_rank_deep=128, | |
| sink_size=64, | |
| **kwargs, | |
| ): | |
| self.gqa_layers = gqa_layers | |
| self.shared_layer_idx = shared_layer_idx | |
| self.soft_mid_layers_end = soft_mid_layers_end | |
| self.soft_deep_layers_end = soft_deep_layers_end | |
| self.gqa_sliding_window = gqa_sliding_window | |
| self.soft_sliding_window = soft_sliding_window | |
| self.shared_rank = shared_rank | |
| self.soft_rank_mid = soft_rank_mid | |
| self.soft_rank_deep = soft_rank_deep | |
| self.sink_size = sink_size | |
| super().__init__(**kwargs) |