Add files using upload-large-folder tool

Files changed (4) hide show

adapter_config.json ADDED Viewed

+{
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0af8457efb5c85d278f6af6fbf53da01d5837c53ff2fc08276d4d5f7ef2bd471
+size 81108490

description.txt ADDED Viewed

+- Training Parameters:
+  - base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  - save_ckpt_log_name: DeepSeek-R1-Distill-Llama-8B_s0.10_channel
+  - pruning_ratio: 0.1
+  - pruner_type: taylor
+  - temperature: 1.0
+  - top_p: 0.95
+  - max_seq_len: 2048
+  - channel_wise: True
+  - block_wise: False
+  - layer_wise: False
+  - layer: 12
+  - block_attention_layer_start: 3
+  - block_attention_layer_end: 31
+  - block_mlp_layer_start: 3
+  - block_mlp_layer_end: 31
+  - iterative_steps: 1
+  - grouping_strategy: sum
+  - global_pruning: False
+  - taylor: param_first
+  - num_examples: 10
+  - device: cpu
+  - test_before_train: False
+  - eval_device: cuda
+  - test_after_train: False
+  - seed: 42
+  - save_model: True
+  - torch_version: 2.3

pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:014615562499b0975d174619771dc35f493adafa5af411145d234372df135c85
+size 14462119278