s1pruning / description.txt
moon4sake's picture
Add files using upload-large-folder tool
c89243a verified
raw
history blame contribute delete
734 Bytes
- Training Parameters:
- base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
- save_ckpt_log_name: DeepSeek-R1-Distill-Llama-8B_s0.10_channel
- pruning_ratio: 0.1
- pruner_type: taylor
- temperature: 1.0
- top_p: 0.95
- max_seq_len: 2048
- channel_wise: True
- block_wise: False
- layer_wise: False
- layer: 12
- block_attention_layer_start: 3
- block_attention_layer_end: 31
- block_mlp_layer_start: 3
- block_mlp_layer_end: 31
- iterative_steps: 1
- grouping_strategy: sum
- global_pruning: False
- taylor: param_first
- num_examples: 10
- device: cpu
- test_before_train: False
- eval_device: cuda
- test_after_train: False
- seed: 42
- save_model: True
- torch_version: 2.3