step 30000
Browse files- config.json +3 -0
- model.safetensors +2 -2
- tokenizer_config.json +1 -1
config.json
CHANGED
|
@@ -79,6 +79,7 @@
|
|
| 79 |
"vocab_size": 151936
|
| 80 |
},
|
| 81 |
"base_model_name_or_path": "Qwen/Qwen3-1.7B",
|
|
|
|
| 82 |
"decoder_layer_indices": [
|
| 83 |
27
|
| 84 |
],
|
|
@@ -86,8 +87,10 @@
|
|
| 86 |
0,
|
| 87 |
1
|
| 88 |
],
|
|
|
|
| 89 |
"hidden_size": 2048,
|
| 90 |
"model_type": "lds",
|
|
|
|
| 91 |
"q_threshold": 0.9,
|
| 92 |
"reasoning_layer_indices": [
|
| 93 |
2,
|
|
|
|
| 79 |
"vocab_size": 151936
|
| 80 |
},
|
| 81 |
"base_model_name_or_path": "Qwen/Qwen3-1.7B",
|
| 82 |
+
"convergence_epsilon": 0.01,
|
| 83 |
"decoder_layer_indices": [
|
| 84 |
27
|
| 85 |
],
|
|
|
|
| 87 |
0,
|
| 88 |
1
|
| 89 |
],
|
| 90 |
+
"halting_strategy": "threshold",
|
| 91 |
"hidden_size": 2048,
|
| 92 |
"model_type": "lds",
|
| 93 |
+
"q_eval_interval": 1,
|
| 94 |
"q_threshold": 0.9,
|
| 95 |
"reasoning_layer_indices": [
|
| 96 |
2,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9941e40dc7bc4b53b026ac31948d48b3ba5bcce90b32f895423a15d09cd010d
|
| 3 |
+
size 4065655516
|
tokenizer_config.json
CHANGED
|
@@ -20,7 +20,7 @@
|
|
| 20 |
"<|image_pad|>",
|
| 21 |
"<|video_pad|>"
|
| 22 |
],
|
| 23 |
-
"is_local":
|
| 24 |
"model_max_length": 131072,
|
| 25 |
"pad_token": "<|endoftext|>",
|
| 26 |
"split_special_tokens": false,
|
|
|
|
| 20 |
"<|image_pad|>",
|
| 21 |
"<|video_pad|>"
|
| 22 |
],
|
| 23 |
+
"is_local": false,
|
| 24 |
"model_max_length": 131072,
|
| 25 |
"pad_token": "<|endoftext|>",
|
| 26 |
"split_special_tokens": false,
|