fix: add default factor=1.0 for linear rope compat with newer transformers
Browse files- modeling_llada2_moe.py +5 -0
modeling_llada2_moe.py
CHANGED
|
@@ -104,6 +104,11 @@ class LLaDA2MoeRotaryEmbedding(nn.Module):
|
|
| 104 |
# BC: "default" was removed from ROPE_INIT_FUNCTIONS in newer transformers
|
| 105 |
if self.rope_type == "default":
|
| 106 |
self.rope_type = "linear"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
self.max_seq_len_cached = config.max_position_embeddings
|
| 108 |
self.original_max_seq_len = config.max_position_embeddings
|
| 109 |
|
|
|
|
| 104 |
# BC: "default" was removed from ROPE_INIT_FUNCTIONS in newer transformers
|
| 105 |
if self.rope_type == "default":
|
| 106 |
self.rope_type = "linear"
|
| 107 |
+
# Ensure rope_scaling has a factor for linear rope (defaults to no scaling)
|
| 108 |
+
if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
|
| 109 |
+
config.rope_scaling.setdefault("factor", 1.0)
|
| 110 |
+
if hasattr(config, "rope_parameters") and config.rope_parameters is not None:
|
| 111 |
+
config.rope_parameters.setdefault("factor", 1.0)
|
| 112 |
self.max_seq_len_cached = config.max_position_embeddings
|
| 113 |
self.original_max_seq_len = config.max_position_embeddings
|
| 114 |
|