| from typing import Any |
|
|
| from transformers import PretrainedConfig, Qwen3Config |
|
|
| try: |
| from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig |
| except ImportError: |
| print('Please upgrade transformers to version 4.46.3 or higher') |
|
|
|
|
| class POINTSGUIConfig(PretrainedConfig): |
| model_type = "points_gui" |
| is_composition = True |
| """Configuration class for `POINTSGUI`.""" |
|
|
| def __init__(self, |
| **kwargs) -> None: |
| super().__init__(**kwargs) |
| if not kwargs: |
| return |
| vision_config = kwargs.pop("vision_config", None) |
| llm_config = kwargs.pop("llm_config", None) |
| if isinstance(vision_config, dict): |
| self.vision_config = Qwen2VLVisionConfig(**vision_config) |
| else: |
| self.vision_config = vision_config |
| if isinstance(llm_config, dict): |
| self.llm_config = Qwen3Config(**llm_config) |
| else: |
| self.llm_config = llm_config |
|
|
| self.vocab_size = llm_config["vocab_size"] |
| self.max_position_embeddings = llm_config["max_position_embeddings"] |
| self.hidden_size = llm_config["hidden_size"] |
| self.intermediate_size = llm_config["intermediate_size"] |
| self.num_hidden_layers = llm_config["num_hidden_layers"] |
| self.num_attention_heads = llm_config["num_attention_heads"] |
| self.use_sliding_window = llm_config["use_sliding_window"] |
| self.sliding_window = llm_config["sliding_window"] |
| self.max_window_layers = llm_config["max_window_layers"] |
|
|
| |
| if llm_config["num_key_value_heads"] is None: |
| llm_config["num_key_value_heads"] = llm_config["num_attention_heads"] |
|
|
| self.num_key_value_heads = llm_config["num_key_value_heads"] |
| self.head_dim = llm_config["head_dim"] |
| self.hidden_act = llm_config["hidden_act"] |
| self.initializer_range = llm_config["initializer_range"] |
| self.rms_norm_eps = llm_config["rms_norm_eps"] |
| self.use_cache = llm_config["use_cache"] |
| self.rope_theta = llm_config["rope_theta"] |
| self.rope_scaling = llm_config["rope_scaling"] |
| self.attention_bias = llm_config["attention_bias"] |
| self.attention_dropout = llm_config["attention_dropout"] |
| |
| |
| if self.rope_scaling is not None and "type" in self.rope_scaling: |
| if self.rope_scaling["type"] == "mrope": |
| self.rope_scaling["type"] = "default" |
| self.rope_scaling["rope_type"] = self.rope_scaling["type"] |
|
|
| super().__init__( |
| tie_word_embeddings=llm_config["tie_word_embeddings"], |
| **kwargs, |
| ) |