Add files using upload-large-folder tool
Browse files- adapter_config.json +22 -0
- adapter_model.bin +3 -0
- description.txt +28 -0
- pytorch_model.bin +3 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 3 |
+
"bias": "none",
|
| 4 |
+
"fan_in_fan_out": false,
|
| 5 |
+
"inference_mode": true,
|
| 6 |
+
"init_lora_weights": true,
|
| 7 |
+
"lora_alpha": 16,
|
| 8 |
+
"lora_dropout": 0.05,
|
| 9 |
+
"modules_to_save": null,
|
| 10 |
+
"peft_type": "LORA",
|
| 11 |
+
"r": 8,
|
| 12 |
+
"target_modules": [
|
| 13 |
+
"q_proj",
|
| 14 |
+
"k_proj",
|
| 15 |
+
"v_proj",
|
| 16 |
+
"o_proj",
|
| 17 |
+
"gate_proj",
|
| 18 |
+
"down_proj",
|
| 19 |
+
"up_proj"
|
| 20 |
+
],
|
| 21 |
+
"task_type": "CAUSAL_LM"
|
| 22 |
+
}
|
adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0af8457efb5c85d278f6af6fbf53da01d5837c53ff2fc08276d4d5f7ef2bd471
|
| 3 |
+
size 81108490
|
description.txt
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- Training Parameters:
|
| 2 |
+
- base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
| 3 |
+
- save_ckpt_log_name: DeepSeek-R1-Distill-Llama-8B_s0.10_channel
|
| 4 |
+
- pruning_ratio: 0.1
|
| 5 |
+
- pruner_type: taylor
|
| 6 |
+
- temperature: 1.0
|
| 7 |
+
- top_p: 0.95
|
| 8 |
+
- max_seq_len: 2048
|
| 9 |
+
- channel_wise: True
|
| 10 |
+
- block_wise: False
|
| 11 |
+
- layer_wise: False
|
| 12 |
+
- layer: 12
|
| 13 |
+
- block_attention_layer_start: 3
|
| 14 |
+
- block_attention_layer_end: 31
|
| 15 |
+
- block_mlp_layer_start: 3
|
| 16 |
+
- block_mlp_layer_end: 31
|
| 17 |
+
- iterative_steps: 1
|
| 18 |
+
- grouping_strategy: sum
|
| 19 |
+
- global_pruning: False
|
| 20 |
+
- taylor: param_first
|
| 21 |
+
- num_examples: 10
|
| 22 |
+
- device: cpu
|
| 23 |
+
- test_before_train: False
|
| 24 |
+
- eval_device: cuda
|
| 25 |
+
- test_after_train: False
|
| 26 |
+
- seed: 42
|
| 27 |
+
- save_model: True
|
| 28 |
+
- torch_version: 2.3
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:014615562499b0975d174619771dc35f493adafa5af411145d234372df135c85
|
| 3 |
+
size 14462119278
|