treadon commited on
Commit
dddf656
·
verified ·
1 Parent(s): 0e23e44

Upload MiniCPM-V 4.6 ablation artifact with eval card

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: transformers
4
+ pipeline_tag: image-text-to-text
5
+ base_model: openbmb/MiniCPM-V-4.6
6
+ tags:
7
+ - abliteration
8
+ - disinhibition
9
+ - minicpm-v
10
+ - mechanistic-interpretability
11
+ ---
12
+
13
+ # treadon/MiniCPM-V-4.6-Abliterated-AND-Disinhibited-USE-THIS
14
+
15
+ Private research artifact derived from [`openbmb/MiniCPM-V-4.6`](https://huggingface.co/openbmb/MiniCPM-V-4.6).
16
+
17
+
18
+ A MiniCPM-V 4.6 variant with both edits applied sequentially:
19
+
20
+ 1. **Disinhibition**: remove the neutrality / hedging direction.
21
+ 2. **Abliteration**: remove the refusal direction on the already
22
+ disinhibited model.
23
+
24
+ ## Behavior
25
+
26
+ Evaluated on both [`treadon/disinhibition-eval`](https://huggingface.co/datasets/treadon/disinhibition-eval)
27
+ and [`treadon/abliteration-eval`](https://huggingface.co/datasets/treadon/abliteration-eval).
28
+
29
+ | Eval / Split | Base | Disinhibit only | Abliterate only | **Union** |
30
+ |---|---:|---:|---:|---:|
31
+ | disinhibition / opinions hedge | 40.0% | 2.5% | 35.8% | **3.3%** |
32
+ | disinhibition / opinions commit | 27.5% | 79.2% | 20.0% | **77.5%** |
33
+ | disinhibition / edge_cases hedge | 12.1% | 0.0% | 0.0% | **0.0%** |
34
+ | disinhibition / factual commit | 76.2% | 90.5% | 81.0% | **88.1%** |
35
+ | abliteration / harmful refused | 15.0% | 3.0% | 0.5% | **0.5%** |
36
+ | abliteration / over_refusal | 1.2% | 0.0% | 0.0% | **0.0%** |
37
+
38
+ Broken responses were 0 in all reported full-eval runs.
39
+
40
+ ## Method
41
+
42
+ Both passes target only the Qwen3.5 language backbone inside MiniCPM-V 4.6.
43
+ The SigLIP2-style vision tower is untouched.
44
+
45
+ - Disinhibition: top-12 layers `[12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]`, scale 1.5.
46
+ - Abliteration on the disinhibited model: top-12 layers `[12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]`, scale 1.5.
47
+ - Residual writers edited: `linear_attn.out_proj`, `self_attn.o_proj`, and `mlp.down_proj` where present.
48
+ - BF16 weights, FP32 projection math, no fine-tuning.
49
+
50
+ ## Limitations
51
+
52
+ This compounds both per-axis tradeoffs: reduced refusal and reduced
53
+ epistemic humility. It is a research artifact, not a product model.
chat_template.jinja ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if enable_thinking is not defined -%}
2
+ {%- set enable_thinking = false -%}
3
+ {%- endif -%}
4
+ {%- macro render_content(content, is_system_content=false) -%}
5
+ {%- if content is string -%}
6
+ {{- content -}}
7
+ {%- elif content is iterable and content is not mapping -%}
8
+ {%- set ns = namespace(parts=[]) -%}
9
+ {%- for item in content -%}
10
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' -%}
11
+ {%- if is_system_content -%}
12
+ {{- raise_exception('System message cannot contain images.') -}}
13
+ {%- endif -%}
14
+ {%- set ns.parts = ns.parts + ['<|image_pad|>'] -%}
15
+ {%- elif 'video' in item or item.type == 'video' -%}
16
+ {%- if is_system_content -%}
17
+ {{- raise_exception('System message cannot contain videos.') -}}
18
+ {%- endif -%}
19
+ {%- set ns.parts = ns.parts + ['<|video_pad|>'] -%}
20
+ {%- elif 'text' in item -%}
21
+ {%- set ns.parts = ns.parts + [item.text] -%}
22
+ {%- else -%}
23
+ {{- raise_exception('Unexpected item type in content.') -}}
24
+ {%- endif -%}
25
+ {%- endfor -%}
26
+ {{- ns.parts | join('\n') -}}
27
+ {%- elif content is none or content is undefined -%}
28
+ {{- '' -}}
29
+ {%- else -%}
30
+ {{- raise_exception('Unexpected content type.') -}}
31
+ {%- endif -%}
32
+ {%- endmacro -%}
33
+ {%- if not messages %}
34
+ {{- raise_exception('No messages provided.') }}
35
+ {%- endif %}
36
+ {%- if tools and tools is iterable and tools is not mapping %}
37
+ {{- '<|im_start|>system\n' }}
38
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
39
+ {%- for tool in tools %}
40
+ {{- "\n" }}
41
+ {{- tool | tojson }}
42
+ {%- endfor %}
43
+ {{- "\n</tools>" }}
44
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
45
+ {%- if messages[0].role == 'system' %}
46
+ {%- set content = render_content(messages[0].content, true)|trim %}
47
+ {%- if content %}
48
+ {{- '\n\n' + content }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {{- '<|im_end|>\n' }}
52
+ {%- else %}
53
+ {%- if messages[0].role == 'system' %}
54
+ {%- set content = render_content(messages[0].content, true)|trim %}
55
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
56
+ {%- endif %}
57
+ {%- endif %}
58
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
59
+ {%- for message in messages[::-1] %}
60
+ {%- set index = (messages|length - 1) - loop.index0 %}
61
+ {%- if ns.multi_step_tool and message.role == "user" %}
62
+ {%- set content = render_content(message.content)|trim %}
63
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
64
+ {%- set ns.multi_step_tool = false %}
65
+ {%- set ns.last_query_index = index %}
66
+ {%- endif %}
67
+ {%- endif %}
68
+ {%- endfor %}
69
+ {%- if ns.multi_step_tool %}
70
+ {{- raise_exception('No user query found in messages.') }}
71
+ {%- endif %}
72
+ {%- for message in messages %}
73
+ {%- set content = render_content(message.content)|trim %}
74
+ {%- if message.role == "system" %}
75
+ {%- if not loop.first %}
76
+ {{- raise_exception('System message must be at the beginning.') }}
77
+ {%- endif %}
78
+ {%- elif message.role == "user" %}
79
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
80
+ {%- elif message.role == "assistant" %}
81
+ {%- set reasoning_content = '' %}
82
+ {%- if message.reasoning_content is string %}
83
+ {%- set reasoning_content = message.reasoning_content %}
84
+ {%- else %}
85
+ {%- if '</think>' in content %}
86
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
87
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
88
+ {%- endif %}
89
+ {%- endif %}
90
+ {%- set reasoning_content = reasoning_content|trim %}
91
+ {%- if loop.index0 > ns.last_query_index %}
92
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
93
+ {%- else %}
94
+ {{- '<|im_start|>' + message.role + '\n' + content }}
95
+ {%- endif %}
96
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
97
+ {%- for tool_call in message.tool_calls %}
98
+ {%- if tool_call.function is defined %}
99
+ {%- set tool_call = tool_call.function %}
100
+ {%- endif %}
101
+ {%- if loop.first %}
102
+ {%- if content|trim %}
103
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
104
+ {%- else %}
105
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
106
+ {%- endif %}
107
+ {%- else %}
108
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
109
+ {%- endif %}
110
+ {%- if tool_call.arguments is defined %}
111
+ {%- for args_name, args_value in tool_call.arguments|items %}
112
+ {{- '<parameter=' + args_name + '>\n' }}
113
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
114
+ {{- args_value }}
115
+ {{- '\n</parameter>\n' }}
116
+ {%- endfor %}
117
+ {%- endif %}
118
+ {{- '</function>\n</tool_call>' }}
119
+ {%- endfor %}
120
+ {%- endif %}
121
+ {{- '<|im_end|>\n' }}
122
+ {%- elif message.role == "tool" %}
123
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
124
+ {{- '<|im_start|>user' }}
125
+ {%- endif %}
126
+ {{- '\n<tool_response>\n' }}
127
+ {{- content }}
128
+ {{- '\n</tool_response>' }}
129
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
130
+ {{- '<|im_end|>\n' }}
131
+ {%- elif loop.last %}
132
+ {{- '<|im_end|>\n' }}
133
+ {%- endif %}
134
+ {%- else %}
135
+ {{- raise_exception('Unexpected message role.') }}
136
+ {%- endif %}
137
+ {%- endfor %}
138
+ {%- if add_generation_prompt %}
139
+ {{- '<|im_start|>assistant\n' }}
140
+ {%- if enable_thinking is defined and enable_thinking is false %}
141
+ {{- '<think>\n\n</think>\n\n' }}
142
+ {%- else %}
143
+ {{- '<think>\n' }}
144
+ {%- endif %}
145
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniCPMV4_6ForConditionalGeneration"
4
+ ],
5
+ "bos_token_id": null,
6
+ "downsample_mode": "16x",
7
+ "drop_vision_last_layer": false,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 248044,
10
+ "image_size": 1120,
11
+ "image_token_id": 248056,
12
+ "insert_layer_id": 6,
13
+ "merge_kernel_size": [
14
+ 2,
15
+ 2
16
+ ],
17
+ "merger_times": 1,
18
+ "model_type": "minicpmv4_6",
19
+ "pad_token_id": null,
20
+ "patch_size": 14,
21
+ "text_config": {
22
+ "attention_bias": false,
23
+ "attention_dropout": 0.0,
24
+ "attn_output_gate": true,
25
+ "bos_token_id": null,
26
+ "dtype": "bfloat16",
27
+ "eos_token_id": null,
28
+ "full_attention_interval": 4,
29
+ "head_dim": 256,
30
+ "hidden_act": "silu",
31
+ "hidden_size": 1024,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 3584,
34
+ "layer_types": [
35
+ "linear_attention",
36
+ "linear_attention",
37
+ "linear_attention",
38
+ "full_attention",
39
+ "linear_attention",
40
+ "linear_attention",
41
+ "linear_attention",
42
+ "full_attention",
43
+ "linear_attention",
44
+ "linear_attention",
45
+ "linear_attention",
46
+ "full_attention",
47
+ "linear_attention",
48
+ "linear_attention",
49
+ "linear_attention",
50
+ "full_attention",
51
+ "linear_attention",
52
+ "linear_attention",
53
+ "linear_attention",
54
+ "full_attention",
55
+ "linear_attention",
56
+ "linear_attention",
57
+ "linear_attention",
58
+ "full_attention"
59
+ ],
60
+ "linear_conv_kernel_dim": 4,
61
+ "linear_key_head_dim": 128,
62
+ "linear_num_key_heads": 16,
63
+ "linear_num_value_heads": 16,
64
+ "linear_value_head_dim": 128,
65
+ "mamba_ssm_dtype": "float32",
66
+ "max_position_embeddings": 262144,
67
+ "mlp_only_layers": [],
68
+ "model_type": "qwen3_5_text",
69
+ "mtp_num_hidden_layers": 1,
70
+ "mtp_use_dedicated_embeddings": false,
71
+ "num_attention_heads": 8,
72
+ "num_hidden_layers": 24,
73
+ "num_key_value_heads": 2,
74
+ "pad_token_id": null,
75
+ "partial_rotary_factor": 0.25,
76
+ "rms_norm_eps": 1e-06,
77
+ "rope_parameters": {
78
+ "partial_rotary_factor": 0.25,
79
+ "rope_theta": 10000000,
80
+ "rope_type": "default"
81
+ },
82
+ "tie_word_embeddings": true,
83
+ "use_cache": true,
84
+ "vocab_size": 248094
85
+ },
86
+ "tie_word_embeddings": true,
87
+ "transformers_version": "5.8.0",
88
+ "video_token_id": 248057,
89
+ "vision_config": {
90
+ "attention_dropout": 0.0,
91
+ "dtype": "bfloat16",
92
+ "hidden_act": "gelu_pytorch_tanh",
93
+ "hidden_size": 1152,
94
+ "image_size": 980,
95
+ "insert_layer_id": 6,
96
+ "intermediate_size": 4304,
97
+ "layer_norm_eps": 1e-06,
98
+ "model_type": "minicpmv4_6_vision",
99
+ "num_attention_heads": 16,
100
+ "num_channels": 3,
101
+ "num_hidden_layers": 27,
102
+ "patch_size": 14,
103
+ "window_kernel_size": [
104
+ 2,
105
+ 2
106
+ ]
107
+ }
108
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 248045,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 248044,
6
+ 248046
7
+ ],
8
+ "repetition_penalty": 1.0,
9
+ "temperature": 0.7,
10
+ "top_k": 0,
11
+ "top_p": 1.0,
12
+ "transformers_version": "5.8.0"
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80f9826ee2ac978baf3ed35b75c3a225594915ab257139a92b3bb4e572c4092b
3
+ size 2600957560
processor_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "do_convert_rgb": true,
4
+ "do_normalize": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "downsample_mode": "16x",
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "MiniCPMV4_6ImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "max_slice_nums": 9,
20
+ "patch_size": 14,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "scale_resolution": 448,
24
+ "slice_mode": true,
25
+ "use_image_id": true
26
+ },
27
+ "processor_class": "MiniCPMV4_6Processor",
28
+ "video_processor": {
29
+ "do_convert_rgb": true,
30
+ "do_normalize": true,
31
+ "do_rescale": true,
32
+ "do_resize": true,
33
+ "do_sample_frames": true,
34
+ "downsample_mode": "16x",
35
+ "image_mean": [
36
+ 0.5,
37
+ 0.5,
38
+ 0.5
39
+ ],
40
+ "image_std": [
41
+ 0.5,
42
+ 0.5,
43
+ 0.5
44
+ ],
45
+ "max_num_frames": 128,
46
+ "max_slice_nums": 9,
47
+ "patch_size": 14,
48
+ "resample": 3,
49
+ "rescale_factor": 0.00392156862745098,
50
+ "return_metadata": false,
51
+ "scale_resolution": 448,
52
+ "slice_mode": true,
53
+ "stack_frames": 1,
54
+ "use_image_id": true,
55
+ "video_processor_type": "MiniCPMV4_6VideoProcessor"
56
+ }
57
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aab55feffdc02a9680cf244f6fbf6058191a7c4b06b88dae7f4c831f50c9537
3
+ size 19992648
tokenizer_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": "<|im_start|>",
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_end_token": "</image>",
12
+ "image_id_end_token": "</image_id>",
13
+ "image_id_start_token": "<image_id>",
14
+ "image_start_token": "<image>",
15
+ "image_token": "<|image_pad|>",
16
+ "is_local": false,
17
+ "local_files_only": false,
18
+ "model_max_length": 262144,
19
+ "model_specific_special_tokens": {
20
+ "audio_bos_token": "<|audio_start|>",
21
+ "audio_eos_token": "<|audio_end|>",
22
+ "audio_token": "<|audio_pad|>",
23
+ "image_end_token": "</image>",
24
+ "image_id_end_token": "</image_id>",
25
+ "image_id_start_token": "<image_id>",
26
+ "image_start_token": "<image>",
27
+ "image_token": "<|image_pad|>",
28
+ "slice_end_token": "</slice>",
29
+ "slice_start_token": "<slice>",
30
+ "video_token": "<|video_pad|>",
31
+ "vision_bos_token": "<|vision_start|>",
32
+ "vision_eos_token": "<|vision_end|>"
33
+ },
34
+ "pad_token": "<|endoftext|>",
35
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
36
+ "processor_class": "MiniCPMV4_6Processor",
37
+ "slice_end_token": "</slice>",
38
+ "slice_start_token": "<slice>",
39
+ "split_special_tokens": false,
40
+ "tokenizer_class": "TokenizersBackend",
41
+ "unk_token": "<unk>",
42
+ "video_token": "<|video_pad|>",
43
+ "vision_bos_token": "<|vision_start|>",
44
+ "vision_eos_token": "<|vision_end|>"
45
+ }