moon4sake commited on
Commit
c89243a
·
verified ·
1 Parent(s): 9d08c4c

Add files using upload-large-folder tool

Browse files
Files changed (4) hide show
  1. adapter_config.json +22 -0
  2. adapter_model.bin +3 -0
  3. description.txt +28 -0
  4. pytorch_model.bin +3 -0
adapter_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "modules_to_save": null,
10
+ "peft_type": "LORA",
11
+ "r": 8,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "k_proj",
15
+ "v_proj",
16
+ "o_proj",
17
+ "gate_proj",
18
+ "down_proj",
19
+ "up_proj"
20
+ ],
21
+ "task_type": "CAUSAL_LM"
22
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af8457efb5c85d278f6af6fbf53da01d5837c53ff2fc08276d4d5f7ef2bd471
3
+ size 81108490
description.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - Training Parameters:
2
+ - base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
3
+ - save_ckpt_log_name: DeepSeek-R1-Distill-Llama-8B_s0.10_channel
4
+ - pruning_ratio: 0.1
5
+ - pruner_type: taylor
6
+ - temperature: 1.0
7
+ - top_p: 0.95
8
+ - max_seq_len: 2048
9
+ - channel_wise: True
10
+ - block_wise: False
11
+ - layer_wise: False
12
+ - layer: 12
13
+ - block_attention_layer_start: 3
14
+ - block_attention_layer_end: 31
15
+ - block_mlp_layer_start: 3
16
+ - block_mlp_layer_end: 31
17
+ - iterative_steps: 1
18
+ - grouping_strategy: sum
19
+ - global_pruning: False
20
+ - taylor: param_first
21
+ - num_examples: 10
22
+ - device: cpu
23
+ - test_before_train: False
24
+ - eval_device: cuda
25
+ - test_after_train: False
26
+ - seed: 42
27
+ - save_model: True
28
+ - torch_version: 2.3
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:014615562499b0975d174619771dc35f493adafa5af411145d234372df135c85
3
+ size 14462119278