Hoist original_max_position_embeddings to top of rope_parameters as a workaround for an upstream transformers rope-utils bug that KeyErrors on nested-yarn configs. Per-layer-type rope sub-dicts are unchanged; runtime behavior is unaffected.
Browse files- config.json +2 -1
config.json
CHANGED
|
@@ -49,7 +49,8 @@
|
|
| 49 |
"rope_type": "default",
|
| 50 |
"rope_theta": 10000.0,
|
| 51 |
"partial_rotary_factor": 1.0
|
| 52 |
-
}
|
|
|
|
| 53 |
},
|
| 54 |
"layer_types": [
|
| 55 |
"full_attention",
|
|
|
|
| 49 |
"rope_type": "default",
|
| 50 |
"rope_theta": 10000.0,
|
| 51 |
"partial_rotary_factor": 1.0
|
| 52 |
+
},
|
| 53 |
+
"original_max_position_embeddings": 4096
|
| 54 |
},
|
| 55 |
"layer_types": [
|
| 56 |
"full_attention",
|