| models: | |
| - model: CrucibleLab/L3.3-70B-Loki-V2.0 | |
| parameters: | |
| weight: | |
| - filter: q_proj | |
| value: [0.80, 0.30, 0.30, 0.30, 0.8] | |
| - filter: k_proj | |
| value: [0.70, 0.20, 0.20, 0.20, 0.7] | |
| - filter: v_proj | |
| value: [0.80, 0.40, 0.40, 0.40, 0.8] | |
| - filter: o_proj | |
| value: [0.90, 0.80, 0.80, 0.80, 0.9] | |
| - filter: gate_proj | |
| value: [0.80, 0.20, 0.20, 0.20, 0.8] | |
| - filter: up_proj | |
| value: [0.80, 0.30, 0.30, 0.30, 0.8] | |
| - filter: down_proj | |
| value: [0.90, 0.80, 0.80, 0.80, 0.9] | |
| - filter: lm_head | |
| value: 0.95 | |
| - value: 1 | |
| - model: schonsense/Tropoplectic | |
| parameters: | |
| weight: | |
| - filter: q_proj | |
| value: [0.20, 0.70, 0.70, 0.70, 0.2] | |
| - filter: k_proj | |
| value: [0.30, 0.80, 0.80, 0.80, 0.3] | |
| - filter: v_proj | |
| value: [0.20, 0.60, 0.60, 0.60, 0.2] | |
| - filter: o_proj | |
| value: [0.10, 0.25, 0.25, 0.25, 0.1] | |
| - filter: gate_proj | |
| value: [0.20, 0.80, 0.80, 0.80, 0.2] | |
| - filter: up_proj | |
| value: [0.20, 0.70, 0.70, 0.70, 0.2] | |
| - filter: down_proj | |
| value: [0.10, 0.25, 0.25, 0.25, 0.1] | |
| - filter: lm_head | |
| value: 0.05 | |
| - value: 0 | |
| base_model: meta-llama/Llama-3.1-70B | |
| merge_method: nuslerp | |
| parameters: | |
| normalize: false | |
| int8_mask: false | |
| rescale: false | |
| dtype: float32 | |
| out_dtype: bfloat16 | |
| chat_template: llama3 | |
| tokenizer: | |
| source: union | |
| pad_to_multiple_of: 8 | |