Youssofal commited on
Commit
cc1cd06
·
verified ·
1 Parent(s): 89123c9

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ target/tokenizer.json filter=lfs diff=lfs merge=lfs -text
3
+ assistant/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
README.md ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ license_link: https://ai.google.dev/gemma/docs/gemma_4_license
4
+ base_model:
5
+ - google/gemma-4-31B-it
6
+ - google/gemma-4-31B-it-assistant
7
+ library_name: mlx
8
+ tags:
9
+ - mlx
10
+ - gemma4
11
+ - mtplx
12
+ - speculative-decoding
13
+ - apple-silicon
14
+ - text-generation
15
+ pipeline_tag: text-generation
16
+ ---
17
+
18
+ # Gemma4 MTPLX Optimized Speed
19
+
20
+ This is an **MTPLX pair bundle** for Gemma 4 31B speculative decoding on Apple Silicon.
21
+
22
+ It is not a single vanilla Transformers model directory. The repository contains two MLX-format artifacts:
23
+
24
+ - `target/` - Gemma 4 31B IT target, MLX Q4 affine group-size 64
25
+ - `assistant/` - official Gemma 4 31B assistant drafter, MLX Q6 affine group-size 64
26
+
27
+ Use this pair when absolute throughput is the priority.
28
+
29
+ ## Source
30
+
31
+ - Target source: `google/gemma-4-31B-it`
32
+ - Target revision: `145dc2508c480a64b47242f160d286cff94a2343`
33
+ - Assistant source: `google/gemma-4-31B-it-assistant`
34
+ - Assistant revision: `cffbbd2cea41ea56a0fa5b0487e0d445121fd204`
35
+
36
+ Both artifacts were converted locally to MLX format.
37
+
38
+ ## Quantization
39
+
40
+ Target:
41
+
42
+ ```text
43
+ bits: 4
44
+ group_size: 64
45
+ mode: affine
46
+ ```
47
+
48
+ Assistant:
49
+
50
+ ```text
51
+ bits: 6
52
+ group_size: 64
53
+ mode: affine
54
+ ```
55
+
56
+ ## MTPLX Usage
57
+
58
+ After downloading this repository, point MTPLX at the two subdirectories:
59
+
60
+ ```bash
61
+ mtplx bench gemma-mtp \
62
+ --target-model ./target \
63
+ --assistant-model ./assistant \
64
+ --prompt-suite mtplx/benchmarks/prompts/flappy.jsonl \
65
+ --max-tokens 1000 \
66
+ --draft-block-sizes 6 \
67
+ --allow-unverified-gemma
68
+ ```
69
+
70
+ The Gemma 4 assistant is a separate drafter model. MTPLX uses exact speculative sampling with target verification and residual correction.
71
+
72
+ ## Local Benchmark
73
+
74
+ Prompt: single-file HTML5 Canvas Flappy Bird game, capped at 1000 generated tokens.
75
+
76
+ Sampler:
77
+
78
+ ```text
79
+ temperature: 1.0
80
+ top_p: 0.95
81
+ top_k: 64
82
+ seed: 0
83
+ ```
84
+
85
+ Best observed block size:
86
+
87
+ ```text
88
+ block_size: 6
89
+ acceptance: 830 / 846 = 98.11%
90
+ ```
91
+
92
+ Observed MTPLX throughput samples:
93
+
94
+ ```text
95
+ 43.56 tok/s
96
+ 44.46 tok/s
97
+ 44.07 tok/s
98
+ ```
99
+
100
+ The bundled benchmark JSON files are in `benchmarks/`.
101
+
102
+ ## Notes
103
+
104
+ This release is optimized for MTPLX speed experiments. For a higher-precision target, use `Youssofal/Gemma4-MTPLX-Optimized-Quality`.
105
+
106
+ Gemma 4 is released by Google under the Gemma 4 license terms linked above.
assistant/README.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ library_name: mlx
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - mlx
7
+ ---
assistant/config.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4AssistantForCausalLM"
4
+ ],
5
+ "audio_token_id": 258881,
6
+ "backbone_hidden_size": 5376,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "centroid_intermediate_top_k": 32,
10
+ "dtype": "bfloat16",
11
+ "eoa_token_id": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106,
16
+ 50
17
+ ],
18
+ "image_token_id": 258880,
19
+ "model_type": "gemma4_assistant",
20
+ "num_centroids": 2048,
21
+ "quantization": {
22
+ "group_size": 64,
23
+ "bits": 6,
24
+ "mode": "affine"
25
+ },
26
+ "quantization_config": {
27
+ "group_size": 64,
28
+ "bits": 6,
29
+ "mode": "affine"
30
+ },
31
+ "text_config": {
32
+ "_name_or_path": "",
33
+ "architectures": null,
34
+ "attention_bias": false,
35
+ "attention_dropout": 0.0,
36
+ "attention_k_eq_v": true,
37
+ "bos_token_id": 2,
38
+ "chunk_size_feed_forward": 0,
39
+ "dtype": "bfloat16",
40
+ "enable_moe_block": false,
41
+ "eos_token_id": 1,
42
+ "final_logit_softcapping": null,
43
+ "global_head_dim": 512,
44
+ "head_dim": 256,
45
+ "hidden_activation": "gelu_pytorch_tanh",
46
+ "hidden_size": 1024,
47
+ "hidden_size_per_layer_input": 0,
48
+ "id2label": {
49
+ "0": "LABEL_0",
50
+ "1": "LABEL_1"
51
+ },
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 8192,
54
+ "is_encoder_decoder": false,
55
+ "label2id": {
56
+ "LABEL_0": 0,
57
+ "LABEL_1": 1
58
+ },
59
+ "layer_types": [
60
+ "sliding_attention",
61
+ "sliding_attention",
62
+ "sliding_attention",
63
+ "full_attention"
64
+ ],
65
+ "max_position_embeddings": 262144,
66
+ "model_type": "gemma4_text",
67
+ "moe_intermediate_size": null,
68
+ "num_attention_heads": 32,
69
+ "num_experts": null,
70
+ "num_global_key_value_heads": 4,
71
+ "num_hidden_layers": 4,
72
+ "num_key_value_heads": 16,
73
+ "num_kv_shared_layers": 4,
74
+ "output_attentions": false,
75
+ "output_hidden_states": false,
76
+ "pad_token_id": 0,
77
+ "problem_type": null,
78
+ "return_dict": true,
79
+ "rms_norm_eps": 1e-06,
80
+ "rope_parameters": {
81
+ "full_attention": {
82
+ "partial_rotary_factor": 0.25,
83
+ "rope_theta": 1000000.0,
84
+ "rope_type": "proportional"
85
+ },
86
+ "sliding_attention": {
87
+ "rope_theta": 10000.0,
88
+ "rope_type": "default"
89
+ }
90
+ },
91
+ "sliding_window": 1024,
92
+ "tie_word_embeddings": true,
93
+ "top_k_experts": null,
94
+ "use_bidirectional_attention": null,
95
+ "use_cache": true,
96
+ "use_double_wide_mlp": false,
97
+ "vocab_size": 262144,
98
+ "vocab_size_per_layer_input": 0
99
+ },
100
+ "tie_word_embeddings": true,
101
+ "transformers_version": "5.7.0.dev0",
102
+ "use_ordered_embeddings": false
103
+ }
assistant/generation_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106,
7
+ 50
8
+ ],
9
+ "is_assistant": true,
10
+ "num_assistant_tokens": 6,
11
+ "num_assistant_tokens_schedule": "constant",
12
+ "pad_token_id": 0,
13
+ "temperature": 1.0,
14
+ "top_k": 64,
15
+ "top_p": 0.95,
16
+ "transformers_version": "5.7.0.dev0"
17
+ }
assistant/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125bbd650ad1b7223b521ff2db20c8f54bc9ce6d4c16a02342fa48c4b9b6fa81
3
+ size 381516281
assistant/model.safetensors.index.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 381506056,
4
+ "total_parameters": 469518592
5
+ },
6
+ "weight_map": {
7
+ "model.embed_tokens.biases": "model.safetensors",
8
+ "model.embed_tokens.scales": "model.safetensors",
9
+ "model.embed_tokens.weight": "model.safetensors",
10
+ "model.layers.0.input_layernorm.weight": "model.safetensors",
11
+ "model.layers.0.layer_scalar": "model.safetensors",
12
+ "model.layers.0.mlp.down_proj.biases": "model.safetensors",
13
+ "model.layers.0.mlp.down_proj.scales": "model.safetensors",
14
+ "model.layers.0.mlp.down_proj.weight": "model.safetensors",
15
+ "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
16
+ "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
17
+ "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
18
+ "model.layers.0.mlp.up_proj.biases": "model.safetensors",
19
+ "model.layers.0.mlp.up_proj.scales": "model.safetensors",
20
+ "model.layers.0.mlp.up_proj.weight": "model.safetensors",
21
+ "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
22
+ "model.layers.0.post_feedforward_layernorm.weight": "model.safetensors",
23
+ "model.layers.0.pre_feedforward_layernorm.weight": "model.safetensors",
24
+ "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
25
+ "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
26
+ "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
27
+ "model.layers.0.self_attn.q_norm.weight": "model.safetensors",
28
+ "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
29
+ "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
30
+ "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
31
+ "model.layers.1.input_layernorm.weight": "model.safetensors",
32
+ "model.layers.1.layer_scalar": "model.safetensors",
33
+ "model.layers.1.mlp.down_proj.biases": "model.safetensors",
34
+ "model.layers.1.mlp.down_proj.scales": "model.safetensors",
35
+ "model.layers.1.mlp.down_proj.weight": "model.safetensors",
36
+ "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
37
+ "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
38
+ "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
39
+ "model.layers.1.mlp.up_proj.biases": "model.safetensors",
40
+ "model.layers.1.mlp.up_proj.scales": "model.safetensors",
41
+ "model.layers.1.mlp.up_proj.weight": "model.safetensors",
42
+ "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
43
+ "model.layers.1.post_feedforward_layernorm.weight": "model.safetensors",
44
+ "model.layers.1.pre_feedforward_layernorm.weight": "model.safetensors",
45
+ "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
46
+ "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
47
+ "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
48
+ "model.layers.1.self_attn.q_norm.weight": "model.safetensors",
49
+ "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
50
+ "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
51
+ "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
52
+ "model.layers.2.input_layernorm.weight": "model.safetensors",
53
+ "model.layers.2.layer_scalar": "model.safetensors",
54
+ "model.layers.2.mlp.down_proj.biases": "model.safetensors",
55
+ "model.layers.2.mlp.down_proj.scales": "model.safetensors",
56
+ "model.layers.2.mlp.down_proj.weight": "model.safetensors",
57
+ "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
58
+ "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
59
+ "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
60
+ "model.layers.2.mlp.up_proj.biases": "model.safetensors",
61
+ "model.layers.2.mlp.up_proj.scales": "model.safetensors",
62
+ "model.layers.2.mlp.up_proj.weight": "model.safetensors",
63
+ "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
64
+ "model.layers.2.post_feedforward_layernorm.weight": "model.safetensors",
65
+ "model.layers.2.pre_feedforward_layernorm.weight": "model.safetensors",
66
+ "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
67
+ "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
68
+ "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
69
+ "model.layers.2.self_attn.q_norm.weight": "model.safetensors",
70
+ "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
71
+ "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
72
+ "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
73
+ "model.layers.3.input_layernorm.weight": "model.safetensors",
74
+ "model.layers.3.layer_scalar": "model.safetensors",
75
+ "model.layers.3.mlp.down_proj.biases": "model.safetensors",
76
+ "model.layers.3.mlp.down_proj.scales": "model.safetensors",
77
+ "model.layers.3.mlp.down_proj.weight": "model.safetensors",
78
+ "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
79
+ "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
80
+ "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
81
+ "model.layers.3.mlp.up_proj.biases": "model.safetensors",
82
+ "model.layers.3.mlp.up_proj.scales": "model.safetensors",
83
+ "model.layers.3.mlp.up_proj.weight": "model.safetensors",
84
+ "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
85
+ "model.layers.3.post_feedforward_layernorm.weight": "model.safetensors",
86
+ "model.layers.3.pre_feedforward_layernorm.weight": "model.safetensors",
87
+ "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
88
+ "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
89
+ "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
90
+ "model.layers.3.self_attn.q_norm.weight": "model.safetensors",
91
+ "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
92
+ "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
93
+ "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
94
+ "model.norm.weight": "model.safetensors",
95
+ "post_projection.biases": "model.safetensors",
96
+ "post_projection.scales": "model.safetensors",
97
+ "post_projection.weight": "model.safetensors",
98
+ "pre_projection.biases": "model.safetensors",
99
+ "pre_projection.scales": "model.safetensors",
100
+ "pre_projection.weight": "model.safetensors"
101
+ }
102
+ }
assistant/mtplx_artifact.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "note": "Converted locally with MTPLX Gemma4 assistant classes because stock mlx_lm.convert does not support model_type=gemma4_assistant.",
3
+ "precision_policy": "Q6 affine G64 for all quantizable assistant modules, including tied embedding/LM-head path and projections.",
4
+ "quantization": {
5
+ "bits": 6,
6
+ "group_size": 64,
7
+ "mode": "affine"
8
+ },
9
+ "source_path": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-bf16-mlx",
10
+ "source_repo": "google/gemma-4-31B-it-assistant",
11
+ "source_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204"
12
+ }
assistant/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a6583c1a418e2bbd79c60d95d28e0f5bf549ad3f2990b5bdb5238c6c2bf70c
3
+ size 32169440
assistant/tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [],
17
+ "image_token": "<|image|>",
18
+ "is_local": true,
19
+ "local_files_only": false,
20
+ "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "model_specific_special_tokens": {
23
+ "audio_token": "<|audio|>",
24
+ "boa_token": "<|audio>",
25
+ "boi_token": "<|image>",
26
+ "eoa_token": "<audio|>",
27
+ "eoc_token": "<channel|>",
28
+ "eoi_token": "<image|>",
29
+ "eot_token": "<turn|>",
30
+ "escape_token": "<|\"|>",
31
+ "etc_token": "<tool_call|>",
32
+ "etd_token": "<tool|>",
33
+ "etr_token": "<tool_response|>",
34
+ "image_token": "<|image|>",
35
+ "soc_token": "<|channel>",
36
+ "sot_token": "<|turn>",
37
+ "stc_token": "<|tool_call>",
38
+ "std_token": "<|tool>",
39
+ "str_token": "<|tool_response>",
40
+ "think_token": "<|think|>"
41
+ },
42
+ "pad_token": "<pad>",
43
+ "padding_side": "left",
44
+ "soc_token": "<|channel>",
45
+ "sot_token": "<|turn>",
46
+ "stc_token": "<|tool_call>",
47
+ "std_token": "<|tool>",
48
+ "str_token": "<|tool_response>",
49
+ "think_token": "<|think|>",
50
+ "tokenizer_class": "GemmaTokenizer",
51
+ "unk_token": "<unk>"
52
+ }
benchmarks/flappy1000-targetq4-assistantq6-confirm-block6.json ADDED
@@ -0,0 +1,1183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_id": "gemma4-assistant-mtp",
3
+ "artifacts": {
4
+ "assistant_dtype": null,
5
+ "assistant_format": "q6-g64-affine",
6
+ "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
7
+ "assistant_quantization": {
8
+ "bits": 6,
9
+ "group_size": 64,
10
+ "mode": "affine"
11
+ },
12
+ "disk_ok": true,
13
+ "min_free_gib": 220.0,
14
+ "observed_free_gib": 704.1293067932129,
15
+ "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx",
16
+ "target_quantization": {
17
+ "bits": 4,
18
+ "format": "mlx-flat4-g64",
19
+ "group_size": 64,
20
+ "mode": "affine"
21
+ }
22
+ },
23
+ "backend": "gemma4_assistant",
24
+ "benchmark": {
25
+ "draft_block_sizes": [
26
+ 6
27
+ ],
28
+ "draft_sampler": {
29
+ "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.",
30
+ "inherits_target_sampler": true,
31
+ "temperature": null,
32
+ "top_k": null,
33
+ "top_p": null
34
+ },
35
+ "max_mode": true,
36
+ "max_tokens": 1000,
37
+ "profile": "sustained",
38
+ "prompt_suite": "mtplx/benchmarks/prompts/flappy.jsonl",
39
+ "reasoning": "off",
40
+ "sampler_source": {
41
+ "do_sample": true,
42
+ "local_reference": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx/generation_config.json",
43
+ "name": "official Gemma 4 generation_config.json",
44
+ "temperature": 1.0,
45
+ "top_k": 64,
46
+ "top_p": 0.95
47
+ },
48
+ "seed": 0,
49
+ "temperature": 1.0,
50
+ "top_k": 64,
51
+ "top_p": 0.95
52
+ },
53
+ "blockers": [
54
+ "best speedup 1.852x is below 2.000x",
55
+ "median confirmation speedup 1.852x is below 2.000x"
56
+ ],
57
+ "can_run_now": true,
58
+ "gates": {
59
+ "generated_tokens": 1000,
60
+ "longer_lengths_blocked_until_160_passes": true,
61
+ "median_of_3_min_speedup_vs_ar": 2.0,
62
+ "min_speedup_vs_ar": 2.0,
63
+ "mtp_peak_memory_lte_ar_multiplier": 1.18,
64
+ "mtp_peak_memory_lte_ar_plus_gib": 6
65
+ },
66
+ "official_sources": {
67
+ "assistant": "google/gemma-4-31B-it-assistant",
68
+ "assistant_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204",
69
+ "target": "google/gemma-4-31B-it",
70
+ "target_revision": "145dc2508c480a64b47242f160d286cff94a2343"
71
+ },
72
+ "pair": {
73
+ "assistant_exists": true,
74
+ "assistant_inspection": {
75
+ "architecture": "Gemma4AssistantForCausalLM",
76
+ "architecture_recognized": true,
77
+ "backbone_hidden_size": 5376,
78
+ "compatibility": {
79
+ "arch_id": "gemma4-assistant-mtp",
80
+ "can_run": false,
81
+ "exit_code": 3,
82
+ "message": "Official-style Gemma 4 31B assistant artifact recognized. This is an assistant-backed MTP pair, not a standalone target; MTPLX scaffold is present but QA and the 160-token speed/memory gate are still pending.",
83
+ "mtp_supported": "recognized",
84
+ "recognized": true,
85
+ "recommended_backend": "gemma4_assistant",
86
+ "recommended_profile": "performance-cold",
87
+ "runtime_compatibility": "assistant-pair-qa-pending",
88
+ "runtime_contract": null,
89
+ "runtime_contract_error": null,
90
+ "runtime_contract_path": null,
91
+ "support_level": "architecture-scaffolded-qa-pending",
92
+ "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.",
93
+ "supported": false,
94
+ "tier": "architecture-compatible-but-unverified",
95
+ "unsafe_force_required": false,
96
+ "unverified_model": true
97
+ },
98
+ "config_exists": true,
99
+ "hidden_size": 1024,
100
+ "layer_types": [
101
+ "sliding_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "full_attention"
105
+ ],
106
+ "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
107
+ "model_files": [
108
+ "model.safetensors"
109
+ ],
110
+ "model_type": "gemma4_assistant",
111
+ "mtp": {
112
+ "exists": false,
113
+ "expected_tensor_count": 15,
114
+ "extra_keys": [],
115
+ "metadata_only": true,
116
+ "missing_expected_keys": [],
117
+ "mtp_file": "model.safetensors.index.json::embedded",
118
+ "passes_tensor_gate": false,
119
+ "sidecar_format": "bf16",
120
+ "tensor_count": 0,
121
+ "tensors": []
122
+ },
123
+ "mtp_arch": "gemma4-assistant-mtp",
124
+ "mtp_num_hidden_layers": 0,
125
+ "mtp_pattern": null,
126
+ "mtp_supported": "recognized",
127
+ "num_hidden_layers": 4,
128
+ "num_kv_shared_layers": 4,
129
+ "passes_primary_gate": false,
130
+ "quantization": {
131
+ "bits": 6,
132
+ "group_size": 64,
133
+ "mode": "affine"
134
+ },
135
+ "recommended_backend": "gemma4_assistant",
136
+ "recommended_profile": "performance-cold",
137
+ "runtime_compatibility": "assistant-pair-qa-pending",
138
+ "runtime_contract_path": null,
139
+ "sidecars": {
140
+ "preprocessor_config.json": false,
141
+ "processor_config.json": false,
142
+ "video_preprocessor_config.json": false
143
+ },
144
+ "source": "local",
145
+ "support_level": "architecture-scaffolded-qa-pending",
146
+ "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.",
147
+ "unverified_model": true,
148
+ "use_ordered_embeddings": false,
149
+ "vocab_size": 262144
150
+ },
151
+ "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
152
+ "pair_error": null,
153
+ "pair_valid": true,
154
+ "target_exists": true,
155
+ "target_inspection": {
156
+ "architecture": "Gemma4ForConditionalGeneration",
157
+ "architecture_recognized": false,
158
+ "backbone_hidden_size": null,
159
+ "compatibility": {
160
+ "arch_id": null,
161
+ "can_run": false,
162
+ "exit_code": 2,
163
+ "message": "Model has no MTP head. MTPLX requires an MTP-equipped model.",
164
+ "mtp_supported": "no",
165
+ "recognized": false,
166
+ "recommended_backend": null,
167
+ "recommended_profile": null,
168
+ "runtime_compatibility": "unsupported",
169
+ "runtime_contract": null,
170
+ "runtime_contract_error": null,
171
+ "runtime_contract_path": null,
172
+ "support_level": "unsupported",
173
+ "support_notes": null,
174
+ "supported": false,
175
+ "tier": "no-MTP",
176
+ "unsafe_force_required": false,
177
+ "unverified_model": false
178
+ },
179
+ "config_exists": true,
180
+ "hidden_size": 5376,
181
+ "layer_types": [
182
+ "sliding_attention",
183
+ "sliding_attention",
184
+ "sliding_attention",
185
+ "sliding_attention",
186
+ "sliding_attention",
187
+ "full_attention",
188
+ "sliding_attention",
189
+ "sliding_attention",
190
+ "sliding_attention",
191
+ "sliding_attention",
192
+ "sliding_attention",
193
+ "full_attention",
194
+ "sliding_attention",
195
+ "sliding_attention",
196
+ "sliding_attention",
197
+ "sliding_attention",
198
+ "sliding_attention",
199
+ "full_attention",
200
+ "sliding_attention",
201
+ "sliding_attention",
202
+ "sliding_attention",
203
+ "sliding_attention",
204
+ "sliding_attention",
205
+ "full_attention",
206
+ "sliding_attention",
207
+ "sliding_attention",
208
+ "sliding_attention",
209
+ "sliding_attention",
210
+ "sliding_attention",
211
+ "full_attention",
212
+ "sliding_attention",
213
+ "sliding_attention",
214
+ "sliding_attention",
215
+ "sliding_attention",
216
+ "sliding_attention",
217
+ "full_attention",
218
+ "sliding_attention",
219
+ "sliding_attention",
220
+ "sliding_attention",
221
+ "sliding_attention",
222
+ "sliding_attention",
223
+ "full_attention",
224
+ "sliding_attention",
225
+ "sliding_attention",
226
+ "sliding_attention",
227
+ "sliding_attention",
228
+ "sliding_attention",
229
+ "full_attention",
230
+ "sliding_attention",
231
+ "sliding_attention",
232
+ "sliding_attention",
233
+ "sliding_attention",
234
+ "sliding_attention",
235
+ "full_attention",
236
+ "sliding_attention",
237
+ "sliding_attention",
238
+ "sliding_attention",
239
+ "sliding_attention",
240
+ "sliding_attention",
241
+ "full_attention"
242
+ ],
243
+ "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx",
244
+ "model_files": [
245
+ "model-00001-of-00004.safetensors",
246
+ "model-00002-of-00004.safetensors",
247
+ "model-00003-of-00004.safetensors",
248
+ "model-00004-of-00004.safetensors"
249
+ ],
250
+ "model_type": "gemma4_text",
251
+ "mtp": {
252
+ "exists": false,
253
+ "expected_tensor_count": 15,
254
+ "extra_keys": [],
255
+ "metadata_only": true,
256
+ "missing_expected_keys": [],
257
+ "mtp_file": "model.safetensors.index.json::embedded",
258
+ "passes_tensor_gate": false,
259
+ "sidecar_format": "bf16",
260
+ "tensor_count": 0,
261
+ "tensors": []
262
+ },
263
+ "mtp_arch": null,
264
+ "mtp_num_hidden_layers": 0,
265
+ "mtp_pattern": null,
266
+ "mtp_supported": "no",
267
+ "num_hidden_layers": 60,
268
+ "num_kv_shared_layers": 0,
269
+ "passes_primary_gate": false,
270
+ "quantization": {
271
+ "bits": 4,
272
+ "group_size": 64,
273
+ "mode": "affine"
274
+ },
275
+ "recommended_backend": null,
276
+ "recommended_profile": null,
277
+ "runtime_compatibility": "unsupported",
278
+ "runtime_contract_path": null,
279
+ "sidecars": {
280
+ "preprocessor_config.json": false,
281
+ "processor_config.json": false,
282
+ "video_preprocessor_config.json": false
283
+ },
284
+ "source": "local",
285
+ "support_level": "unsupported",
286
+ "support_notes": null,
287
+ "unverified_model": false,
288
+ "use_ordered_embeddings": null,
289
+ "vocab_size": 262144
290
+ },
291
+ "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx"
292
+ },
293
+ "planned_commands": {
294
+ "assistant_bf16_snapshot": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it-assistant', revision='cffbbd2cea41ea56a0fa5b0487e0d445121fd204', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx')\"",
295
+ "gate": "mtplx bench gemma-mtp --target-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --assistant-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx --profile sustained --max --prompt-suite mtplx/benchmarks/prompts/flappy.jsonl --max-tokens 1000 --temperature 1.0 --top-p 0.95 --top-k 64 --seed 0 --reasoning off --draft-block-sizes 6 --json --output outputs/gemma4/flappy1000-q6assistant-pure-confirm-block6.json",
296
+ "target_flat4_g64": "uv run python -m mlx_lm.convert --hf-path /Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48 --mlx-path /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --quantize --q-bits 4 --q-group-size 64 --q-mode affine",
297
+ "target_revision_download": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it', revision='145dc2508c480a64b47242f160d286cff94a2343', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48')\""
298
+ },
299
+ "qa_pending": true,
300
+ "results": {
301
+ "ar": {
302
+ "active_memory_gib": 16.96542397607118,
303
+ "cache_memory_gib": 1.9759961115196347,
304
+ "decode_s": 41.648729124994134,
305
+ "generated_tokens": 1000,
306
+ "mode": "ar",
307
+ "peak_memory_gib": 17.040895924903452,
308
+ "prefill_s": 0.7671576249995269,
309
+ "tok_s": 24.010336473865717,
310
+ "token_preview": [
311
+ 9996,
312
+ 625,
313
+ 24731,
314
+ 236761,
315
+ 9996,
316
+ 625,
317
+ 24731,
318
+ 236761,
319
+ 9996,
320
+ 625,
321
+ 24731,
322
+ 236761,
323
+ 9996,
324
+ 625,
325
+ 24731,
326
+ 236761
327
+ ]
328
+ },
329
+ "ar_confirmation": [
330
+ {
331
+ "active_memory_gib": 16.96542397607118,
332
+ "cache_memory_gib": 1.9759961115196347,
333
+ "decode_s": 41.648729124994134,
334
+ "generated_tokens": 1000,
335
+ "mode": "ar",
336
+ "peak_memory_gib": 17.040895924903452,
337
+ "prefill_s": 0.7671576249995269,
338
+ "tok_s": 24.010336473865717,
339
+ "token_preview": [
340
+ 9996,
341
+ 625,
342
+ 24731,
343
+ 236761,
344
+ 9996,
345
+ 625,
346
+ 24731,
347
+ 236761,
348
+ 9996,
349
+ 625,
350
+ 24731,
351
+ 236761,
352
+ 9996,
353
+ 625,
354
+ 24731,
355
+ 236761
356
+ ]
357
+ }
358
+ ],
359
+ "best_block_confirmation": [
360
+ {
361
+ "acceptance": 0.9810874704491725,
362
+ "accepted_drafts": 830,
363
+ "active_memory_gib": 17.410899550653994,
364
+ "block_size": 6,
365
+ "cache_memory_gib": 67.3336522737518,
366
+ "decode_s": 22.49344370800827,
367
+ "draft_sampler": {
368
+ "temperature": 1.0,
369
+ "top_k": 64,
370
+ "top_p": 0.95
371
+ },
372
+ "drafted_tokens": 846,
373
+ "generated_tokens": 1000,
374
+ "mode": "mtp",
375
+ "peak_memory_gib": 17.566339950077236,
376
+ "prefill_s": 0.2826114170020446,
377
+ "row_distribution_evals": 0,
378
+ "speedup_vs_ar": 1.8515941651996162,
379
+ "target_distribution_modes": {
380
+ "batched_logits": 170
381
+ },
382
+ "target_sampler": {
383
+ "temperature": 1.0,
384
+ "top_k": 64,
385
+ "top_p": 0.95
386
+ },
387
+ "telemetry": {
388
+ "ar_dense_fallback_calls": 0,
389
+ "decode_dense_fallback_calls": 0,
390
+ "dense_fallback_calls_by_phase": {
391
+ "ar_decode": 0,
392
+ "decode_verify": 0,
393
+ "postcommit": 0,
394
+ "prefill": 0,
395
+ "unknown": 0
396
+ },
397
+ "events": [],
398
+ "paged_active_array_calls_by_phase": {
399
+ "ar_decode": 0,
400
+ "decode_verify": 0,
401
+ "postcommit": 0,
402
+ "prefill": 0,
403
+ "unknown": 0
404
+ },
405
+ "paged_attention_bailouts_by_phase_reason": {
406
+ "ar_decode": {
407
+ "batch_not_1": 0,
408
+ "block_size_mismatch": 0,
409
+ "blocks_invalid": 0,
410
+ "dtype_unsupported": 0,
411
+ "empty_cache": 0,
412
+ "head_dim_unsupported": 0,
413
+ "kernel_unavailable": 0,
414
+ "offset_invalid": 0,
415
+ "partitioned_invalid_output": 0,
416
+ "partitioned_unavailable": 0,
417
+ "q_len_gt_max": 0,
418
+ "q_len_invalid": 0,
419
+ "turboquant_unsupported": 0,
420
+ "unknown": 0,
421
+ "unsupported_mask": 0
422
+ },
423
+ "decode_verify": {
424
+ "batch_not_1": 0,
425
+ "block_size_mismatch": 0,
426
+ "blocks_invalid": 0,
427
+ "dtype_unsupported": 0,
428
+ "empty_cache": 0,
429
+ "head_dim_unsupported": 0,
430
+ "kernel_unavailable": 0,
431
+ "offset_invalid": 0,
432
+ "partitioned_invalid_output": 0,
433
+ "partitioned_unavailable": 0,
434
+ "q_len_gt_max": 0,
435
+ "q_len_invalid": 0,
436
+ "turboquant_unsupported": 0,
437
+ "unknown": 0,
438
+ "unsupported_mask": 0
439
+ },
440
+ "postcommit": {
441
+ "batch_not_1": 0,
442
+ "block_size_mismatch": 0,
443
+ "blocks_invalid": 0,
444
+ "dtype_unsupported": 0,
445
+ "empty_cache": 0,
446
+ "head_dim_unsupported": 0,
447
+ "kernel_unavailable": 0,
448
+ "offset_invalid": 0,
449
+ "partitioned_invalid_output": 0,
450
+ "partitioned_unavailable": 0,
451
+ "q_len_gt_max": 0,
452
+ "q_len_invalid": 0,
453
+ "turboquant_unsupported": 0,
454
+ "unknown": 0,
455
+ "unsupported_mask": 0
456
+ },
457
+ "prefill": {
458
+ "batch_not_1": 0,
459
+ "block_size_mismatch": 0,
460
+ "blocks_invalid": 0,
461
+ "dtype_unsupported": 0,
462
+ "empty_cache": 0,
463
+ "head_dim_unsupported": 0,
464
+ "kernel_unavailable": 0,
465
+ "offset_invalid": 0,
466
+ "partitioned_invalid_output": 0,
467
+ "partitioned_unavailable": 0,
468
+ "q_len_gt_max": 0,
469
+ "q_len_invalid": 0,
470
+ "turboquant_unsupported": 0,
471
+ "unknown": 0,
472
+ "unsupported_mask": 0
473
+ },
474
+ "unknown": {
475
+ "batch_not_1": 0,
476
+ "block_size_mismatch": 0,
477
+ "blocks_invalid": 0,
478
+ "dtype_unsupported": 0,
479
+ "empty_cache": 0,
480
+ "head_dim_unsupported": 0,
481
+ "kernel_unavailable": 0,
482
+ "offset_invalid": 0,
483
+ "partitioned_invalid_output": 0,
484
+ "partitioned_unavailable": 0,
485
+ "q_len_gt_max": 0,
486
+ "q_len_invalid": 0,
487
+ "turboquant_unsupported": 0,
488
+ "unknown": 0,
489
+ "unsupported_mask": 0
490
+ }
491
+ },
492
+ "paged_attention_large_q_path": {
493
+ "dense_forbidden": 0,
494
+ "large_q_split_sdpa_fallback": 0,
495
+ "partitioned_paged": 0,
496
+ "tail_paged": 0,
497
+ "unknown": 0
498
+ },
499
+ "postcommit_dense_fallback_calls": 0,
500
+ "prefill_dense_fallback_calls": 0,
501
+ "trace_events": false
502
+ },
503
+ "timing_s": {
504
+ "accept": 0.015003502820036374,
505
+ "draft": 1.5297440800641198,
506
+ "rollback": 0.00013703109289053828,
507
+ "target_distribution": 20.73823187011294,
508
+ "target_hidden": 0.0,
509
+ "verify": 0.1980874161090469
510
+ },
511
+ "tok_s": 44.457398919489286,
512
+ "token_preview": [
513
+ 9996,
514
+ 625,
515
+ 759,
516
+ 759,
517
+ 759,
518
+ 759,
519
+ 759,
520
+ 759,
521
+ 759,
522
+ 759,
523
+ 759,
524
+ 759,
525
+ 759,
526
+ 759,
527
+ 759,
528
+ 759
529
+ ],
530
+ "verify_calls": 170
531
+ }
532
+ ],
533
+ "best_block_size": 6,
534
+ "best_speedup": 1.8515941651996162,
535
+ "blockers": [
536
+ "best speedup 1.852x is below 2.000x",
537
+ "median confirmation speedup 1.852x is below 2.000x"
538
+ ],
539
+ "draft_sampler": {
540
+ "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.",
541
+ "inherits_target_sampler": true,
542
+ "temperature": null,
543
+ "top_k": null,
544
+ "top_p": null
545
+ },
546
+ "fan_restore": {
547
+ "after": {
548
+ "actual_max_rpm": 5766,
549
+ "actual_min_rpm": 5419,
550
+ "capacity_max_rpm": 7826,
551
+ "capacity_min_rpm": 7826,
552
+ "fans": [
553
+ {
554
+ "actual_rpm": 5419,
555
+ "max_capacity_rpm": 7826,
556
+ "mode": "auto",
557
+ "raw": {
558
+ "actual_rpm": 5419,
559
+ "index": 0,
560
+ "max_rpm": 7826,
561
+ "min_rpm": 2317,
562
+ "mode": "auto",
563
+ "target_rpm": 4818
564
+ },
565
+ "rpm": 5419,
566
+ "target_rpm": 4818
567
+ },
568
+ {
569
+ "actual_rpm": 5766,
570
+ "max_capacity_rpm": 7826,
571
+ "mode": "auto",
572
+ "raw": {
573
+ "actual_rpm": 5766,
574
+ "index": 1,
575
+ "max_rpm": 7826,
576
+ "min_rpm": 2317,
577
+ "mode": "auto",
578
+ "target_rpm": 5203
579
+ },
580
+ "rpm": 5766,
581
+ "target_rpm": 5203
582
+ }
583
+ ],
584
+ "max_rpm": 5766,
585
+ "min_rpm": 5419,
586
+ "ok": true,
587
+ "raw": {
588
+ "attempts": [
589
+ {
590
+ "command": [
591
+ "/Users/youssof/.mtplx/bin/thermalforge",
592
+ "status"
593
+ ],
594
+ "ok": true,
595
+ "returncode": 0,
596
+ "stderr": "",
597
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5419,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 4818\n },\n {\n \"actual_rpm\" : 5766,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5203\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.1,\n \"TB0T\" : 32.1,\n \"TCDX\" : 75.2,\n \"TCHP\" : 61.6,\n \"TCMb\" : 89.2,\n \"TG0B\" : 32.1,\n \"TG0H\" : 32,\n \"TG0V\" : 32.1,\n \"TH0x\" : 37.9,\n \"TMVR\" : 67,\n \"TPDX\" : 67.3,\n \"TRDX\" : 80.6,\n \"TS0P\" : 73.2,\n \"Tg0j\" : 78.6,\n \"Tm08\" : 73.9,\n \"Tp04\" : 75.3,\n \"Tp08\" : 74.8,\n \"Tp0C\" : 76.3,\n \"Tp0G\" : 76.4,\n \"Tp0X\" : 75.9\n }\n}"
598
+ }
599
+ ],
600
+ "detection": {
601
+ "available": true,
602
+ "clock_anchor_enabled": false,
603
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
604
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
605
+ "selected": {
606
+ "kind": "thermalforge",
607
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
608
+ "version": {
609
+ "command": [
610
+ "/Users/youssof/.mtplx/bin/thermalforge",
611
+ "--version"
612
+ ],
613
+ "ok": true,
614
+ "returncode": 0,
615
+ "stderr": "",
616
+ "stdout": "0.1.0"
617
+ }
618
+ },
619
+ "tools": [
620
+ {
621
+ "kind": "thermalforge",
622
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
623
+ "version": {
624
+ "command": [
625
+ "/Users/youssof/.mtplx/bin/thermalforge",
626
+ "--version"
627
+ ],
628
+ "ok": true,
629
+ "returncode": 0,
630
+ "stderr": "",
631
+ "stdout": "0.1.0"
632
+ }
633
+ }
634
+ ]
635
+ },
636
+ "ok": true,
637
+ "status": {
638
+ "command": [
639
+ "/Users/youssof/.mtplx/bin/thermalforge",
640
+ "status"
641
+ ],
642
+ "ok": true,
643
+ "returncode": 0,
644
+ "stderr": "",
645
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5419,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 4818\n },\n {\n \"actual_rpm\" : 5766,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5203\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.1,\n \"TB0T\" : 32.1,\n \"TCDX\" : 75.2,\n \"TCHP\" : 61.6,\n \"TCMb\" : 89.2,\n \"TG0B\" : 32.1,\n \"TG0H\" : 32,\n \"TG0V\" : 32.1,\n \"TH0x\" : 37.9,\n \"TMVR\" : 67,\n \"TPDX\" : 67.3,\n \"TRDX\" : 80.6,\n \"TS0P\" : 73.2,\n \"Tg0j\" : 78.6,\n \"Tm08\" : 73.9,\n \"Tp04\" : 75.3,\n \"Tp08\" : 74.8,\n \"Tp0C\" : 76.3,\n \"Tp0G\" : 76.4,\n \"Tp0X\" : 75.9\n }\n}"
646
+ }
647
+ },
648
+ "target_max_rpm": 5203,
649
+ "target_min_rpm": 4818
650
+ },
651
+ "message": "fan profile restored",
652
+ "ok": true,
653
+ "profile": "silent",
654
+ "set_result": {
655
+ "attempts": [
656
+ {
657
+ "command": [
658
+ "sudo",
659
+ "-n",
660
+ "/Users/youssof/.mtplx/bin/thermalforge",
661
+ "auto"
662
+ ],
663
+ "ok": true,
664
+ "returncode": 0,
665
+ "stderr": "No matching processes were found",
666
+ "stdout": "Fans reset to Apple defaults"
667
+ }
668
+ ],
669
+ "command": [
670
+ "sudo",
671
+ "-n",
672
+ "/Users/youssof/.mtplx/bin/thermalforge",
673
+ "auto"
674
+ ],
675
+ "detection": {
676
+ "available": true,
677
+ "clock_anchor_enabled": false,
678
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
679
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
680
+ "selected": {
681
+ "kind": "thermalforge",
682
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
683
+ "version": {
684
+ "command": [
685
+ "/Users/youssof/.mtplx/bin/thermalforge",
686
+ "--version"
687
+ ],
688
+ "ok": true,
689
+ "returncode": 0,
690
+ "stderr": "",
691
+ "stdout": "0.1.0"
692
+ }
693
+ },
694
+ "tools": [
695
+ {
696
+ "kind": "thermalforge",
697
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
698
+ "version": {
699
+ "command": [
700
+ "/Users/youssof/.mtplx/bin/thermalforge",
701
+ "--version"
702
+ ],
703
+ "ok": true,
704
+ "returncode": 0,
705
+ "stderr": "",
706
+ "stdout": "0.1.0"
707
+ }
708
+ }
709
+ ]
710
+ },
711
+ "dry_run": false,
712
+ "ok": true,
713
+ "profile": "silent"
714
+ }
715
+ },
716
+ "fanmax": {
717
+ "after": {
718
+ "actual_max_rpm": 7406,
719
+ "actual_min_rpm": 7372,
720
+ "capacity_max_rpm": 7826,
721
+ "capacity_min_rpm": 7826,
722
+ "fans": [
723
+ {
724
+ "actual_rpm": 7372,
725
+ "max_capacity_rpm": 7826,
726
+ "mode": "manual",
727
+ "raw": {
728
+ "actual_rpm": 7372,
729
+ "index": 0,
730
+ "max_rpm": 7826,
731
+ "min_rpm": 2317,
732
+ "mode": "manual",
733
+ "target_rpm": 7826
734
+ },
735
+ "rpm": 7372,
736
+ "target_rpm": 7826
737
+ },
738
+ {
739
+ "actual_rpm": 7406,
740
+ "max_capacity_rpm": 7826,
741
+ "mode": "manual",
742
+ "raw": {
743
+ "actual_rpm": 7406,
744
+ "index": 1,
745
+ "max_rpm": 7826,
746
+ "min_rpm": 2317,
747
+ "mode": "manual",
748
+ "target_rpm": 7826
749
+ },
750
+ "rpm": 7406,
751
+ "target_rpm": 7826
752
+ }
753
+ ],
754
+ "max_rpm": 7406,
755
+ "min_rpm": 7372,
756
+ "ok": true,
757
+ "raw": {
758
+ "attempts": [
759
+ {
760
+ "command": [
761
+ "/Users/youssof/.mtplx/bin/thermalforge",
762
+ "status"
763
+ ],
764
+ "ok": true,
765
+ "returncode": 0,
766
+ "stderr": "",
767
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7372,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7406,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.2,\n \"TB0T\" : 31.9,\n \"TCDX\" : 63.9,\n \"TCHP\" : 56.9,\n \"TCMb\" : 72.3,\n \"TG0B\" : 31.9,\n \"TG0H\" : 31,\n \"TG0V\" : 31.9,\n \"TH0x\" : 37.2,\n \"TMVR\" : 56.2,\n \"TPDX\" : 61.2,\n \"TRDX\" : 63.1,\n \"TS0P\" : 63.3,\n \"Tg0j\" : 62.4,\n \"Tm08\" : 62.5,\n \"Tp04\" : 63.4,\n \"Tp08\" : 62.8,\n \"Tp0C\" : 63.7,\n \"Tp0G\" : 64.2,\n \"Tp0X\" : 63.2\n }\n}"
768
+ }
769
+ ],
770
+ "detection": {
771
+ "available": true,
772
+ "clock_anchor_enabled": false,
773
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
774
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
775
+ "selected": {
776
+ "kind": "thermalforge",
777
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
778
+ "version": {
779
+ "command": [
780
+ "/Users/youssof/.mtplx/bin/thermalforge",
781
+ "--version"
782
+ ],
783
+ "ok": true,
784
+ "returncode": 0,
785
+ "stderr": "",
786
+ "stdout": "0.1.0"
787
+ }
788
+ },
789
+ "tools": [
790
+ {
791
+ "kind": "thermalforge",
792
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
793
+ "version": {
794
+ "command": [
795
+ "/Users/youssof/.mtplx/bin/thermalforge",
796
+ "--version"
797
+ ],
798
+ "ok": true,
799
+ "returncode": 0,
800
+ "stderr": "",
801
+ "stdout": "0.1.0"
802
+ }
803
+ }
804
+ ]
805
+ },
806
+ "ok": true,
807
+ "status": {
808
+ "command": [
809
+ "/Users/youssof/.mtplx/bin/thermalforge",
810
+ "status"
811
+ ],
812
+ "ok": true,
813
+ "returncode": 0,
814
+ "stderr": "",
815
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7372,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7406,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.2,\n \"TB0T\" : 31.9,\n \"TCDX\" : 63.9,\n \"TCHP\" : 56.9,\n \"TCMb\" : 72.3,\n \"TG0B\" : 31.9,\n \"TG0H\" : 31,\n \"TG0V\" : 31.9,\n \"TH0x\" : 37.2,\n \"TMVR\" : 56.2,\n \"TPDX\" : 61.2,\n \"TRDX\" : 63.1,\n \"TS0P\" : 63.3,\n \"Tg0j\" : 62.4,\n \"Tm08\" : 62.5,\n \"Tp04\" : 63.4,\n \"Tp08\" : 62.8,\n \"Tp0C\" : 63.7,\n \"Tp0G\" : 64.2,\n \"Tp0X\" : 63.2\n }\n}"
816
+ }
817
+ },
818
+ "target_max_rpm": 7826,
819
+ "target_min_rpm": 7826
820
+ },
821
+ "baseline": {
822
+ "actual_max_rpm": 5219,
823
+ "actual_min_rpm": 4823,
824
+ "capacity_max_rpm": 7826,
825
+ "capacity_min_rpm": 7826,
826
+ "fans": [
827
+ {
828
+ "actual_rpm": 4823,
829
+ "max_capacity_rpm": 7826,
830
+ "mode": "auto",
831
+ "raw": {
832
+ "actual_rpm": 4823,
833
+ "index": 0,
834
+ "max_rpm": 7826,
835
+ "min_rpm": 2317,
836
+ "mode": "auto",
837
+ "target_rpm": 4826
838
+ },
839
+ "rpm": 4823,
840
+ "target_rpm": 4826
841
+ },
842
+ {
843
+ "actual_rpm": 5219,
844
+ "max_capacity_rpm": 7826,
845
+ "mode": "auto",
846
+ "raw": {
847
+ "actual_rpm": 5219,
848
+ "index": 1,
849
+ "max_rpm": 7826,
850
+ "min_rpm": 2317,
851
+ "mode": "auto",
852
+ "target_rpm": 5212
853
+ },
854
+ "rpm": 5219,
855
+ "target_rpm": 5212
856
+ }
857
+ ],
858
+ "max_rpm": 5219,
859
+ "min_rpm": 4823,
860
+ "ok": true,
861
+ "raw": {
862
+ "attempts": [
863
+ {
864
+ "command": [
865
+ "/Users/youssof/.mtplx/bin/thermalforge",
866
+ "status"
867
+ ],
868
+ "ok": true,
869
+ "returncode": 0,
870
+ "stderr": "",
871
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 4823,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 4826\n },\n {\n \"actual_rpm\" : 5219,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5212\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.2,\n \"TB0T\" : 31.9,\n \"TCDX\" : 63.9,\n \"TCHP\" : 57.2,\n \"TCMb\" : 73.1,\n \"TG0B\" : 31.9,\n \"TG0H\" : 31,\n \"TG0V\" : 31.9,\n \"TH0x\" : 37.3,\n \"TMVR\" : 56.7,\n \"TPDX\" : 61.7,\n \"TRDX\" : 63.6,\n \"TS0P\" : 63.9,\n \"Tg0j\" : 62.9,\n \"Tm08\" : 62.9,\n \"Tp04\" : 63.7,\n \"Tp08\" : 63,\n \"Tp0C\" : 63.9,\n \"Tp0G\" : 64.5,\n \"Tp0X\" : 63.6\n }\n}"
872
+ }
873
+ ],
874
+ "detection": {
875
+ "available": true,
876
+ "clock_anchor_enabled": false,
877
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
878
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
879
+ "selected": {
880
+ "kind": "thermalforge",
881
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
882
+ "version": {
883
+ "command": [
884
+ "/Users/youssof/.mtplx/bin/thermalforge",
885
+ "--version"
886
+ ],
887
+ "ok": true,
888
+ "returncode": 0,
889
+ "stderr": "",
890
+ "stdout": "0.1.0"
891
+ }
892
+ },
893
+ "tools": [
894
+ {
895
+ "kind": "thermalforge",
896
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
897
+ "version": {
898
+ "command": [
899
+ "/Users/youssof/.mtplx/bin/thermalforge",
900
+ "--version"
901
+ ],
902
+ "ok": true,
903
+ "returncode": 0,
904
+ "stderr": "",
905
+ "stdout": "0.1.0"
906
+ }
907
+ }
908
+ ]
909
+ },
910
+ "ok": true,
911
+ "status": {
912
+ "command": [
913
+ "/Users/youssof/.mtplx/bin/thermalforge",
914
+ "status"
915
+ ],
916
+ "ok": true,
917
+ "returncode": 0,
918
+ "stderr": "",
919
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 4823,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 4826\n },\n {\n \"actual_rpm\" : 5219,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5212\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.2,\n \"TB0T\" : 31.9,\n \"TCDX\" : 63.9,\n \"TCHP\" : 57.2,\n \"TCMb\" : 73.1,\n \"TG0B\" : 31.9,\n \"TG0H\" : 31,\n \"TG0V\" : 31.9,\n \"TH0x\" : 37.3,\n \"TMVR\" : 56.7,\n \"TPDX\" : 61.7,\n \"TRDX\" : 63.6,\n \"TS0P\" : 63.9,\n \"Tg0j\" : 62.9,\n \"Tm08\" : 62.9,\n \"Tp04\" : 63.7,\n \"Tp08\" : 63,\n \"Tp0C\" : 63.9,\n \"Tp0G\" : 64.5,\n \"Tp0X\" : 63.6\n }\n}"
920
+ }
921
+ },
922
+ "target_max_rpm": 5212,
923
+ "target_min_rpm": 4826
924
+ },
925
+ "message": "fans ramped to max (actual 7372-7406 RPM; target 7826 RPM)",
926
+ "ok": true,
927
+ "profile": "max",
928
+ "set_result": {
929
+ "attempts": [
930
+ {
931
+ "command": [
932
+ "sudo",
933
+ "-n",
934
+ "/Users/youssof/.mtplx/bin/thermalforge",
935
+ "max"
936
+ ],
937
+ "ok": true,
938
+ "returncode": 0,
939
+ "stderr": "",
940
+ "stdout": "Fan 0: 4823 RPM \u2192 max (7826 RPM)\nFan 1: 5219 RPM \u2192 max (7826 RPM)"
941
+ }
942
+ ],
943
+ "command": [
944
+ "sudo",
945
+ "-n",
946
+ "/Users/youssof/.mtplx/bin/thermalforge",
947
+ "max"
948
+ ],
949
+ "detection": {
950
+ "available": true,
951
+ "clock_anchor_enabled": false,
952
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
953
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
954
+ "selected": {
955
+ "kind": "thermalforge",
956
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
957
+ "version": {
958
+ "command": [
959
+ "/Users/youssof/.mtplx/bin/thermalforge",
960
+ "--version"
961
+ ],
962
+ "ok": true,
963
+ "returncode": 0,
964
+ "stderr": "",
965
+ "stdout": "0.1.0"
966
+ }
967
+ },
968
+ "tools": [
969
+ {
970
+ "kind": "thermalforge",
971
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
972
+ "version": {
973
+ "command": [
974
+ "/Users/youssof/.mtplx/bin/thermalforge",
975
+ "--version"
976
+ ],
977
+ "ok": true,
978
+ "returncode": 0,
979
+ "stderr": "",
980
+ "stdout": "0.1.0"
981
+ }
982
+ }
983
+ ]
984
+ },
985
+ "dry_run": false,
986
+ "ok": true,
987
+ "profile": "max"
988
+ }
989
+ },
990
+ "max_tokens": 1000,
991
+ "median_confirmation_speedup": 1.8515941651996162,
992
+ "memory_gate": {
993
+ "ar_peak_memory_gib": 17.040895924903452,
994
+ "lte_ar_plus_6_gib": true,
995
+ "lte_ar_times_1_18": true,
996
+ "mtp_peak_memory_gib": 17.566339950077236
997
+ },
998
+ "mtp_by_block_size": {
999
+ "6": {
1000
+ "acceptance": 0.9810874704491725,
1001
+ "accepted_drafts": 830,
1002
+ "active_memory_gib": 17.410899550653994,
1003
+ "block_size": 6,
1004
+ "cache_memory_gib": 67.3336522737518,
1005
+ "decode_s": 22.49344370800827,
1006
+ "draft_sampler": {
1007
+ "temperature": 1.0,
1008
+ "top_k": 64,
1009
+ "top_p": 0.95
1010
+ },
1011
+ "drafted_tokens": 846,
1012
+ "generated_tokens": 1000,
1013
+ "mode": "mtp",
1014
+ "peak_memory_gib": 17.566339950077236,
1015
+ "prefill_s": 0.2826114170020446,
1016
+ "row_distribution_evals": 0,
1017
+ "speedup_vs_ar": 1.8515941651996162,
1018
+ "target_distribution_modes": {
1019
+ "batched_logits": 170
1020
+ },
1021
+ "target_sampler": {
1022
+ "temperature": 1.0,
1023
+ "top_k": 64,
1024
+ "top_p": 0.95
1025
+ },
1026
+ "telemetry": {
1027
+ "ar_dense_fallback_calls": 0,
1028
+ "decode_dense_fallback_calls": 0,
1029
+ "dense_fallback_calls_by_phase": {
1030
+ "ar_decode": 0,
1031
+ "decode_verify": 0,
1032
+ "postcommit": 0,
1033
+ "prefill": 0,
1034
+ "unknown": 0
1035
+ },
1036
+ "events": [],
1037
+ "paged_active_array_calls_by_phase": {
1038
+ "ar_decode": 0,
1039
+ "decode_verify": 0,
1040
+ "postcommit": 0,
1041
+ "prefill": 0,
1042
+ "unknown": 0
1043
+ },
1044
+ "paged_attention_bailouts_by_phase_reason": {
1045
+ "ar_decode": {
1046
+ "batch_not_1": 0,
1047
+ "block_size_mismatch": 0,
1048
+ "blocks_invalid": 0,
1049
+ "dtype_unsupported": 0,
1050
+ "empty_cache": 0,
1051
+ "head_dim_unsupported": 0,
1052
+ "kernel_unavailable": 0,
1053
+ "offset_invalid": 0,
1054
+ "partitioned_invalid_output": 0,
1055
+ "partitioned_unavailable": 0,
1056
+ "q_len_gt_max": 0,
1057
+ "q_len_invalid": 0,
1058
+ "turboquant_unsupported": 0,
1059
+ "unknown": 0,
1060
+ "unsupported_mask": 0
1061
+ },
1062
+ "decode_verify": {
1063
+ "batch_not_1": 0,
1064
+ "block_size_mismatch": 0,
1065
+ "blocks_invalid": 0,
1066
+ "dtype_unsupported": 0,
1067
+ "empty_cache": 0,
1068
+ "head_dim_unsupported": 0,
1069
+ "kernel_unavailable": 0,
1070
+ "offset_invalid": 0,
1071
+ "partitioned_invalid_output": 0,
1072
+ "partitioned_unavailable": 0,
1073
+ "q_len_gt_max": 0,
1074
+ "q_len_invalid": 0,
1075
+ "turboquant_unsupported": 0,
1076
+ "unknown": 0,
1077
+ "unsupported_mask": 0
1078
+ },
1079
+ "postcommit": {
1080
+ "batch_not_1": 0,
1081
+ "block_size_mismatch": 0,
1082
+ "blocks_invalid": 0,
1083
+ "dtype_unsupported": 0,
1084
+ "empty_cache": 0,
1085
+ "head_dim_unsupported": 0,
1086
+ "kernel_unavailable": 0,
1087
+ "offset_invalid": 0,
1088
+ "partitioned_invalid_output": 0,
1089
+ "partitioned_unavailable": 0,
1090
+ "q_len_gt_max": 0,
1091
+ "q_len_invalid": 0,
1092
+ "turboquant_unsupported": 0,
1093
+ "unknown": 0,
1094
+ "unsupported_mask": 0
1095
+ },
1096
+ "prefill": {
1097
+ "batch_not_1": 0,
1098
+ "block_size_mismatch": 0,
1099
+ "blocks_invalid": 0,
1100
+ "dtype_unsupported": 0,
1101
+ "empty_cache": 0,
1102
+ "head_dim_unsupported": 0,
1103
+ "kernel_unavailable": 0,
1104
+ "offset_invalid": 0,
1105
+ "partitioned_invalid_output": 0,
1106
+ "partitioned_unavailable": 0,
1107
+ "q_len_gt_max": 0,
1108
+ "q_len_invalid": 0,
1109
+ "turboquant_unsupported": 0,
1110
+ "unknown": 0,
1111
+ "unsupported_mask": 0
1112
+ },
1113
+ "unknown": {
1114
+ "batch_not_1": 0,
1115
+ "block_size_mismatch": 0,
1116
+ "blocks_invalid": 0,
1117
+ "dtype_unsupported": 0,
1118
+ "empty_cache": 0,
1119
+ "head_dim_unsupported": 0,
1120
+ "kernel_unavailable": 0,
1121
+ "offset_invalid": 0,
1122
+ "partitioned_invalid_output": 0,
1123
+ "partitioned_unavailable": 0,
1124
+ "q_len_gt_max": 0,
1125
+ "q_len_invalid": 0,
1126
+ "turboquant_unsupported": 0,
1127
+ "unknown": 0,
1128
+ "unsupported_mask": 0
1129
+ }
1130
+ },
1131
+ "paged_attention_large_q_path": {
1132
+ "dense_forbidden": 0,
1133
+ "large_q_split_sdpa_fallback": 0,
1134
+ "partitioned_paged": 0,
1135
+ "tail_paged": 0,
1136
+ "unknown": 0
1137
+ },
1138
+ "postcommit_dense_fallback_calls": 0,
1139
+ "prefill_dense_fallback_calls": 0,
1140
+ "trace_events": false
1141
+ },
1142
+ "timing_s": {
1143
+ "accept": 0.015003502820036374,
1144
+ "draft": 1.5297440800641198,
1145
+ "rollback": 0.00013703109289053828,
1146
+ "target_distribution": 20.73823187011294,
1147
+ "target_hidden": 0.0,
1148
+ "verify": 0.1980874161090469
1149
+ },
1150
+ "tok_s": 44.457398919489286,
1151
+ "token_preview": [
1152
+ 9996,
1153
+ 625,
1154
+ 759,
1155
+ 759,
1156
+ 759,
1157
+ 759,
1158
+ 759,
1159
+ 759,
1160
+ 759,
1161
+ 759,
1162
+ 759,
1163
+ 759,
1164
+ 759,
1165
+ 759,
1166
+ 759,
1167
+ 759
1168
+ ],
1169
+ "verify_calls": 170
1170
+ }
1171
+ },
1172
+ "passed": false,
1173
+ "prompt_id": "flappy_html5_canvas_game",
1174
+ "prompt_tokens": 119,
1175
+ "sampler": {
1176
+ "seed": 0,
1177
+ "temperature": 1.0,
1178
+ "top_k": 64,
1179
+ "top_p": 0.95
1180
+ }
1181
+ },
1182
+ "status": "failed"
1183
+ }
benchmarks/flappy1000-targetq4-assistantq6-confirm2-block6.json ADDED
@@ -0,0 +1,1183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_id": "gemma4-assistant-mtp",
3
+ "artifacts": {
4
+ "assistant_dtype": null,
5
+ "assistant_format": "q6-g64-affine",
6
+ "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
7
+ "assistant_quantization": {
8
+ "bits": 6,
9
+ "group_size": 64,
10
+ "mode": "affine"
11
+ },
12
+ "disk_ok": true,
13
+ "min_free_gib": 220.0,
14
+ "observed_free_gib": 704.1539611816406,
15
+ "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx",
16
+ "target_quantization": {
17
+ "bits": 4,
18
+ "format": "mlx-flat4-g64",
19
+ "group_size": 64,
20
+ "mode": "affine"
21
+ }
22
+ },
23
+ "backend": "gemma4_assistant",
24
+ "benchmark": {
25
+ "draft_block_sizes": [
26
+ 6
27
+ ],
28
+ "draft_sampler": {
29
+ "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.",
30
+ "inherits_target_sampler": true,
31
+ "temperature": null,
32
+ "top_k": null,
33
+ "top_p": null
34
+ },
35
+ "max_mode": true,
36
+ "max_tokens": 1000,
37
+ "profile": "sustained",
38
+ "prompt_suite": "mtplx/benchmarks/prompts/flappy.jsonl",
39
+ "reasoning": "off",
40
+ "sampler_source": {
41
+ "do_sample": true,
42
+ "local_reference": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx/generation_config.json",
43
+ "name": "official Gemma 4 generation_config.json",
44
+ "temperature": 1.0,
45
+ "top_k": 64,
46
+ "top_p": 0.95
47
+ },
48
+ "seed": 0,
49
+ "temperature": 1.0,
50
+ "top_k": 64,
51
+ "top_p": 0.95
52
+ },
53
+ "blockers": [
54
+ "best speedup 1.923x is below 2.000x",
55
+ "median confirmation speedup 1.923x is below 2.000x"
56
+ ],
57
+ "can_run_now": true,
58
+ "gates": {
59
+ "generated_tokens": 1000,
60
+ "longer_lengths_blocked_until_160_passes": true,
61
+ "median_of_3_min_speedup_vs_ar": 2.0,
62
+ "min_speedup_vs_ar": 2.0,
63
+ "mtp_peak_memory_lte_ar_multiplier": 1.18,
64
+ "mtp_peak_memory_lte_ar_plus_gib": 6
65
+ },
66
+ "official_sources": {
67
+ "assistant": "google/gemma-4-31B-it-assistant",
68
+ "assistant_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204",
69
+ "target": "google/gemma-4-31B-it",
70
+ "target_revision": "145dc2508c480a64b47242f160d286cff94a2343"
71
+ },
72
+ "pair": {
73
+ "assistant_exists": true,
74
+ "assistant_inspection": {
75
+ "architecture": "Gemma4AssistantForCausalLM",
76
+ "architecture_recognized": true,
77
+ "backbone_hidden_size": 5376,
78
+ "compatibility": {
79
+ "arch_id": "gemma4-assistant-mtp",
80
+ "can_run": false,
81
+ "exit_code": 3,
82
+ "message": "Official-style Gemma 4 31B assistant artifact recognized. This is an assistant-backed MTP pair, not a standalone target; MTPLX scaffold is present but QA and the 160-token speed/memory gate are still pending.",
83
+ "mtp_supported": "recognized",
84
+ "recognized": true,
85
+ "recommended_backend": "gemma4_assistant",
86
+ "recommended_profile": "performance-cold",
87
+ "runtime_compatibility": "assistant-pair-qa-pending",
88
+ "runtime_contract": null,
89
+ "runtime_contract_error": null,
90
+ "runtime_contract_path": null,
91
+ "support_level": "architecture-scaffolded-qa-pending",
92
+ "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.",
93
+ "supported": false,
94
+ "tier": "architecture-compatible-but-unverified",
95
+ "unsafe_force_required": false,
96
+ "unverified_model": true
97
+ },
98
+ "config_exists": true,
99
+ "hidden_size": 1024,
100
+ "layer_types": [
101
+ "sliding_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "full_attention"
105
+ ],
106
+ "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
107
+ "model_files": [
108
+ "model.safetensors"
109
+ ],
110
+ "model_type": "gemma4_assistant",
111
+ "mtp": {
112
+ "exists": false,
113
+ "expected_tensor_count": 15,
114
+ "extra_keys": [],
115
+ "metadata_only": true,
116
+ "missing_expected_keys": [],
117
+ "mtp_file": "model.safetensors.index.json::embedded",
118
+ "passes_tensor_gate": false,
119
+ "sidecar_format": "bf16",
120
+ "tensor_count": 0,
121
+ "tensors": []
122
+ },
123
+ "mtp_arch": "gemma4-assistant-mtp",
124
+ "mtp_num_hidden_layers": 0,
125
+ "mtp_pattern": null,
126
+ "mtp_supported": "recognized",
127
+ "num_hidden_layers": 4,
128
+ "num_kv_shared_layers": 4,
129
+ "passes_primary_gate": false,
130
+ "quantization": {
131
+ "bits": 6,
132
+ "group_size": 64,
133
+ "mode": "affine"
134
+ },
135
+ "recommended_backend": "gemma4_assistant",
136
+ "recommended_profile": "performance-cold",
137
+ "runtime_compatibility": "assistant-pair-qa-pending",
138
+ "runtime_contract_path": null,
139
+ "sidecars": {
140
+ "preprocessor_config.json": false,
141
+ "processor_config.json": false,
142
+ "video_preprocessor_config.json": false
143
+ },
144
+ "source": "local",
145
+ "support_level": "architecture-scaffolded-qa-pending",
146
+ "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.",
147
+ "unverified_model": true,
148
+ "use_ordered_embeddings": false,
149
+ "vocab_size": 262144
150
+ },
151
+ "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
152
+ "pair_error": null,
153
+ "pair_valid": true,
154
+ "target_exists": true,
155
+ "target_inspection": {
156
+ "architecture": "Gemma4ForConditionalGeneration",
157
+ "architecture_recognized": false,
158
+ "backbone_hidden_size": null,
159
+ "compatibility": {
160
+ "arch_id": null,
161
+ "can_run": false,
162
+ "exit_code": 2,
163
+ "message": "Model has no MTP head. MTPLX requires an MTP-equipped model.",
164
+ "mtp_supported": "no",
165
+ "recognized": false,
166
+ "recommended_backend": null,
167
+ "recommended_profile": null,
168
+ "runtime_compatibility": "unsupported",
169
+ "runtime_contract": null,
170
+ "runtime_contract_error": null,
171
+ "runtime_contract_path": null,
172
+ "support_level": "unsupported",
173
+ "support_notes": null,
174
+ "supported": false,
175
+ "tier": "no-MTP",
176
+ "unsafe_force_required": false,
177
+ "unverified_model": false
178
+ },
179
+ "config_exists": true,
180
+ "hidden_size": 5376,
181
+ "layer_types": [
182
+ "sliding_attention",
183
+ "sliding_attention",
184
+ "sliding_attention",
185
+ "sliding_attention",
186
+ "sliding_attention",
187
+ "full_attention",
188
+ "sliding_attention",
189
+ "sliding_attention",
190
+ "sliding_attention",
191
+ "sliding_attention",
192
+ "sliding_attention",
193
+ "full_attention",
194
+ "sliding_attention",
195
+ "sliding_attention",
196
+ "sliding_attention",
197
+ "sliding_attention",
198
+ "sliding_attention",
199
+ "full_attention",
200
+ "sliding_attention",
201
+ "sliding_attention",
202
+ "sliding_attention",
203
+ "sliding_attention",
204
+ "sliding_attention",
205
+ "full_attention",
206
+ "sliding_attention",
207
+ "sliding_attention",
208
+ "sliding_attention",
209
+ "sliding_attention",
210
+ "sliding_attention",
211
+ "full_attention",
212
+ "sliding_attention",
213
+ "sliding_attention",
214
+ "sliding_attention",
215
+ "sliding_attention",
216
+ "sliding_attention",
217
+ "full_attention",
218
+ "sliding_attention",
219
+ "sliding_attention",
220
+ "sliding_attention",
221
+ "sliding_attention",
222
+ "sliding_attention",
223
+ "full_attention",
224
+ "sliding_attention",
225
+ "sliding_attention",
226
+ "sliding_attention",
227
+ "sliding_attention",
228
+ "sliding_attention",
229
+ "full_attention",
230
+ "sliding_attention",
231
+ "sliding_attention",
232
+ "sliding_attention",
233
+ "sliding_attention",
234
+ "sliding_attention",
235
+ "full_attention",
236
+ "sliding_attention",
237
+ "sliding_attention",
238
+ "sliding_attention",
239
+ "sliding_attention",
240
+ "sliding_attention",
241
+ "full_attention"
242
+ ],
243
+ "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx",
244
+ "model_files": [
245
+ "model-00001-of-00004.safetensors",
246
+ "model-00002-of-00004.safetensors",
247
+ "model-00003-of-00004.safetensors",
248
+ "model-00004-of-00004.safetensors"
249
+ ],
250
+ "model_type": "gemma4_text",
251
+ "mtp": {
252
+ "exists": false,
253
+ "expected_tensor_count": 15,
254
+ "extra_keys": [],
255
+ "metadata_only": true,
256
+ "missing_expected_keys": [],
257
+ "mtp_file": "model.safetensors.index.json::embedded",
258
+ "passes_tensor_gate": false,
259
+ "sidecar_format": "bf16",
260
+ "tensor_count": 0,
261
+ "tensors": []
262
+ },
263
+ "mtp_arch": null,
264
+ "mtp_num_hidden_layers": 0,
265
+ "mtp_pattern": null,
266
+ "mtp_supported": "no",
267
+ "num_hidden_layers": 60,
268
+ "num_kv_shared_layers": 0,
269
+ "passes_primary_gate": false,
270
+ "quantization": {
271
+ "bits": 4,
272
+ "group_size": 64,
273
+ "mode": "affine"
274
+ },
275
+ "recommended_backend": null,
276
+ "recommended_profile": null,
277
+ "runtime_compatibility": "unsupported",
278
+ "runtime_contract_path": null,
279
+ "sidecars": {
280
+ "preprocessor_config.json": false,
281
+ "processor_config.json": false,
282
+ "video_preprocessor_config.json": false
283
+ },
284
+ "source": "local",
285
+ "support_level": "unsupported",
286
+ "support_notes": null,
287
+ "unverified_model": false,
288
+ "use_ordered_embeddings": null,
289
+ "vocab_size": 262144
290
+ },
291
+ "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx"
292
+ },
293
+ "planned_commands": {
294
+ "assistant_bf16_snapshot": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it-assistant', revision='cffbbd2cea41ea56a0fa5b0487e0d445121fd204', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx')\"",
295
+ "gate": "mtplx bench gemma-mtp --target-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --assistant-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx --profile sustained --max --prompt-suite mtplx/benchmarks/prompts/flappy.jsonl --max-tokens 1000 --temperature 1.0 --top-p 0.95 --top-k 64 --seed 0 --reasoning off --draft-block-sizes 6 --json --output outputs/gemma4/flappy1000-q6assistant-pure-confirm2-block6.json",
296
+ "target_flat4_g64": "uv run python -m mlx_lm.convert --hf-path /Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48 --mlx-path /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --quantize --q-bits 4 --q-group-size 64 --q-mode affine",
297
+ "target_revision_download": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it', revision='145dc2508c480a64b47242f160d286cff94a2343', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48')\""
298
+ },
299
+ "qa_pending": true,
300
+ "results": {
301
+ "ar": {
302
+ "active_memory_gib": 16.96542397607118,
303
+ "cache_memory_gib": 1.9759961115196347,
304
+ "decode_s": 43.638336165997316,
305
+ "generated_tokens": 1000,
306
+ "mode": "ar",
307
+ "peak_memory_gib": 17.040895924903452,
308
+ "prefill_s": 0.7599931250006193,
309
+ "tok_s": 22.915630792981354,
310
+ "token_preview": [
311
+ 9996,
312
+ 625,
313
+ 24731,
314
+ 236761,
315
+ 9996,
316
+ 625,
317
+ 24731,
318
+ 236761,
319
+ 9996,
320
+ 625,
321
+ 24731,
322
+ 236761,
323
+ 9996,
324
+ 625,
325
+ 24731,
326
+ 236761
327
+ ]
328
+ },
329
+ "ar_confirmation": [
330
+ {
331
+ "active_memory_gib": 16.96542397607118,
332
+ "cache_memory_gib": 1.9759961115196347,
333
+ "decode_s": 43.638336165997316,
334
+ "generated_tokens": 1000,
335
+ "mode": "ar",
336
+ "peak_memory_gib": 17.040895924903452,
337
+ "prefill_s": 0.7599931250006193,
338
+ "tok_s": 22.915630792981354,
339
+ "token_preview": [
340
+ 9996,
341
+ 625,
342
+ 24731,
343
+ 236761,
344
+ 9996,
345
+ 625,
346
+ 24731,
347
+ 236761,
348
+ 9996,
349
+ 625,
350
+ 24731,
351
+ 236761,
352
+ 9996,
353
+ 625,
354
+ 24731,
355
+ 236761
356
+ ]
357
+ }
358
+ ],
359
+ "best_block_confirmation": [
360
+ {
361
+ "acceptance": 0.9810874704491725,
362
+ "accepted_drafts": 830,
363
+ "active_memory_gib": 17.410899550653994,
364
+ "block_size": 6,
365
+ "cache_memory_gib": 67.3336522737518,
366
+ "decode_s": 22.690279750007903,
367
+ "draft_sampler": {
368
+ "temperature": 1.0,
369
+ "top_k": 64,
370
+ "top_p": 0.95
371
+ },
372
+ "drafted_tokens": 846,
373
+ "generated_tokens": 1000,
374
+ "mode": "mtp",
375
+ "peak_memory_gib": 17.566339950077236,
376
+ "prefill_s": 0.25343483399774414,
377
+ "row_distribution_evals": 0,
378
+ "speedup_vs_ar": 1.9232171946219445,
379
+ "target_distribution_modes": {
380
+ "batched_logits": 170
381
+ },
382
+ "target_sampler": {
383
+ "temperature": 1.0,
384
+ "top_k": 64,
385
+ "top_p": 0.95
386
+ },
387
+ "telemetry": {
388
+ "ar_dense_fallback_calls": 0,
389
+ "decode_dense_fallback_calls": 0,
390
+ "dense_fallback_calls_by_phase": {
391
+ "ar_decode": 0,
392
+ "decode_verify": 0,
393
+ "postcommit": 0,
394
+ "prefill": 0,
395
+ "unknown": 0
396
+ },
397
+ "events": [],
398
+ "paged_active_array_calls_by_phase": {
399
+ "ar_decode": 0,
400
+ "decode_verify": 0,
401
+ "postcommit": 0,
402
+ "prefill": 0,
403
+ "unknown": 0
404
+ },
405
+ "paged_attention_bailouts_by_phase_reason": {
406
+ "ar_decode": {
407
+ "batch_not_1": 0,
408
+ "block_size_mismatch": 0,
409
+ "blocks_invalid": 0,
410
+ "dtype_unsupported": 0,
411
+ "empty_cache": 0,
412
+ "head_dim_unsupported": 0,
413
+ "kernel_unavailable": 0,
414
+ "offset_invalid": 0,
415
+ "partitioned_invalid_output": 0,
416
+ "partitioned_unavailable": 0,
417
+ "q_len_gt_max": 0,
418
+ "q_len_invalid": 0,
419
+ "turboquant_unsupported": 0,
420
+ "unknown": 0,
421
+ "unsupported_mask": 0
422
+ },
423
+ "decode_verify": {
424
+ "batch_not_1": 0,
425
+ "block_size_mismatch": 0,
426
+ "blocks_invalid": 0,
427
+ "dtype_unsupported": 0,
428
+ "empty_cache": 0,
429
+ "head_dim_unsupported": 0,
430
+ "kernel_unavailable": 0,
431
+ "offset_invalid": 0,
432
+ "partitioned_invalid_output": 0,
433
+ "partitioned_unavailable": 0,
434
+ "q_len_gt_max": 0,
435
+ "q_len_invalid": 0,
436
+ "turboquant_unsupported": 0,
437
+ "unknown": 0,
438
+ "unsupported_mask": 0
439
+ },
440
+ "postcommit": {
441
+ "batch_not_1": 0,
442
+ "block_size_mismatch": 0,
443
+ "blocks_invalid": 0,
444
+ "dtype_unsupported": 0,
445
+ "empty_cache": 0,
446
+ "head_dim_unsupported": 0,
447
+ "kernel_unavailable": 0,
448
+ "offset_invalid": 0,
449
+ "partitioned_invalid_output": 0,
450
+ "partitioned_unavailable": 0,
451
+ "q_len_gt_max": 0,
452
+ "q_len_invalid": 0,
453
+ "turboquant_unsupported": 0,
454
+ "unknown": 0,
455
+ "unsupported_mask": 0
456
+ },
457
+ "prefill": {
458
+ "batch_not_1": 0,
459
+ "block_size_mismatch": 0,
460
+ "blocks_invalid": 0,
461
+ "dtype_unsupported": 0,
462
+ "empty_cache": 0,
463
+ "head_dim_unsupported": 0,
464
+ "kernel_unavailable": 0,
465
+ "offset_invalid": 0,
466
+ "partitioned_invalid_output": 0,
467
+ "partitioned_unavailable": 0,
468
+ "q_len_gt_max": 0,
469
+ "q_len_invalid": 0,
470
+ "turboquant_unsupported": 0,
471
+ "unknown": 0,
472
+ "unsupported_mask": 0
473
+ },
474
+ "unknown": {
475
+ "batch_not_1": 0,
476
+ "block_size_mismatch": 0,
477
+ "blocks_invalid": 0,
478
+ "dtype_unsupported": 0,
479
+ "empty_cache": 0,
480
+ "head_dim_unsupported": 0,
481
+ "kernel_unavailable": 0,
482
+ "offset_invalid": 0,
483
+ "partitioned_invalid_output": 0,
484
+ "partitioned_unavailable": 0,
485
+ "q_len_gt_max": 0,
486
+ "q_len_invalid": 0,
487
+ "turboquant_unsupported": 0,
488
+ "unknown": 0,
489
+ "unsupported_mask": 0
490
+ }
491
+ },
492
+ "paged_attention_large_q_path": {
493
+ "dense_forbidden": 0,
494
+ "large_q_split_sdpa_fallback": 0,
495
+ "partitioned_paged": 0,
496
+ "tail_paged": 0,
497
+ "unknown": 0
498
+ },
499
+ "postcommit_dense_fallback_calls": 0,
500
+ "prefill_dense_fallback_calls": 0,
501
+ "trace_events": false
502
+ },
503
+ "timing_s": {
504
+ "accept": 0.014734539974597283,
505
+ "draft": 1.5304279569390928,
506
+ "rollback": 0.00014716008445248008,
507
+ "target_distribution": 20.938332714038552,
508
+ "target_hidden": 0.0,
509
+ "verify": 0.19443316804245114
510
+ },
511
+ "tok_s": 44.071735166669846,
512
+ "token_preview": [
513
+ 9996,
514
+ 625,
515
+ 759,
516
+ 759,
517
+ 759,
518
+ 759,
519
+ 759,
520
+ 759,
521
+ 759,
522
+ 759,
523
+ 759,
524
+ 759,
525
+ 759,
526
+ 759,
527
+ 759,
528
+ 759
529
+ ],
530
+ "verify_calls": 170
531
+ }
532
+ ],
533
+ "best_block_size": 6,
534
+ "best_speedup": 1.9232171946219445,
535
+ "blockers": [
536
+ "best speedup 1.923x is below 2.000x",
537
+ "median confirmation speedup 1.923x is below 2.000x"
538
+ ],
539
+ "draft_sampler": {
540
+ "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.",
541
+ "inherits_target_sampler": true,
542
+ "temperature": null,
543
+ "top_k": null,
544
+ "top_p": null
545
+ },
546
+ "fan_restore": {
547
+ "after": {
548
+ "actual_max_rpm": 6341,
549
+ "actual_min_rpm": 5980,
550
+ "capacity_max_rpm": 7826,
551
+ "capacity_min_rpm": 7826,
552
+ "fans": [
553
+ {
554
+ "actual_rpm": 5980,
555
+ "max_capacity_rpm": 7826,
556
+ "mode": "auto",
557
+ "raw": {
558
+ "actual_rpm": 5980,
559
+ "index": 0,
560
+ "max_rpm": 7826,
561
+ "min_rpm": 2317,
562
+ "mode": "auto",
563
+ "target_rpm": 5490
564
+ },
565
+ "rpm": 5980,
566
+ "target_rpm": 5490
567
+ },
568
+ {
569
+ "actual_rpm": 6341,
570
+ "max_capacity_rpm": 7826,
571
+ "mode": "auto",
572
+ "raw": {
573
+ "actual_rpm": 6341,
574
+ "index": 1,
575
+ "max_rpm": 7826,
576
+ "min_rpm": 2317,
577
+ "mode": "auto",
578
+ "target_rpm": 5929
579
+ },
580
+ "rpm": 6341,
581
+ "target_rpm": 5929
582
+ }
583
+ ],
584
+ "max_rpm": 6341,
585
+ "min_rpm": 5980,
586
+ "ok": true,
587
+ "raw": {
588
+ "attempts": [
589
+ {
590
+ "command": [
591
+ "/Users/youssof/.mtplx/bin/thermalforge",
592
+ "status"
593
+ ],
594
+ "ok": true,
595
+ "returncode": 0,
596
+ "stderr": "",
597
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5980,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5490\n },\n {\n \"actual_rpm\" : 6341,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5929\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.2,\n \"TB0T\" : 32.6,\n \"TCDX\" : 77.2,\n \"TCHP\" : 64.2,\n \"TCMb\" : 92.3,\n \"TG0B\" : 32.5,\n \"TG0H\" : 32,\n \"TG0V\" : 32.5,\n \"TH0x\" : 41.2,\n \"TMVR\" : 69.4,\n \"TPDX\" : 70.4,\n \"TRDX\" : 83.8,\n \"TS0P\" : 76.1,\n \"Tg0j\" : 80.6,\n \"Tm08\" : 76.7,\n \"Tp04\" : 77.7,\n \"Tp08\" : 77.3,\n \"Tp0C\" : 78.1,\n \"Tp0G\" : 78.5,\n \"Tp0X\" : 78.3\n }\n}"
598
+ }
599
+ ],
600
+ "detection": {
601
+ "available": true,
602
+ "clock_anchor_enabled": false,
603
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
604
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
605
+ "selected": {
606
+ "kind": "thermalforge",
607
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
608
+ "version": {
609
+ "command": [
610
+ "/Users/youssof/.mtplx/bin/thermalforge",
611
+ "--version"
612
+ ],
613
+ "ok": true,
614
+ "returncode": 0,
615
+ "stderr": "",
616
+ "stdout": "0.1.0"
617
+ }
618
+ },
619
+ "tools": [
620
+ {
621
+ "kind": "thermalforge",
622
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
623
+ "version": {
624
+ "command": [
625
+ "/Users/youssof/.mtplx/bin/thermalforge",
626
+ "--version"
627
+ ],
628
+ "ok": true,
629
+ "returncode": 0,
630
+ "stderr": "",
631
+ "stdout": "0.1.0"
632
+ }
633
+ }
634
+ ]
635
+ },
636
+ "ok": true,
637
+ "status": {
638
+ "command": [
639
+ "/Users/youssof/.mtplx/bin/thermalforge",
640
+ "status"
641
+ ],
642
+ "ok": true,
643
+ "returncode": 0,
644
+ "stderr": "",
645
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5980,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5490\n },\n {\n \"actual_rpm\" : 6341,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5929\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.2,\n \"TB0T\" : 32.6,\n \"TCDX\" : 77.2,\n \"TCHP\" : 64.2,\n \"TCMb\" : 92.3,\n \"TG0B\" : 32.5,\n \"TG0H\" : 32,\n \"TG0V\" : 32.5,\n \"TH0x\" : 41.2,\n \"TMVR\" : 69.4,\n \"TPDX\" : 70.4,\n \"TRDX\" : 83.8,\n \"TS0P\" : 76.1,\n \"Tg0j\" : 80.6,\n \"Tm08\" : 76.7,\n \"Tp04\" : 77.7,\n \"Tp08\" : 77.3,\n \"Tp0C\" : 78.1,\n \"Tp0G\" : 78.5,\n \"Tp0X\" : 78.3\n }\n}"
646
+ }
647
+ },
648
+ "target_max_rpm": 5929,
649
+ "target_min_rpm": 5490
650
+ },
651
+ "message": "fan profile restored",
652
+ "ok": true,
653
+ "profile": "silent",
654
+ "set_result": {
655
+ "attempts": [
656
+ {
657
+ "command": [
658
+ "sudo",
659
+ "-n",
660
+ "/Users/youssof/.mtplx/bin/thermalforge",
661
+ "auto"
662
+ ],
663
+ "ok": true,
664
+ "returncode": 0,
665
+ "stderr": "No matching processes were found",
666
+ "stdout": "Fans reset to Apple defaults"
667
+ }
668
+ ],
669
+ "command": [
670
+ "sudo",
671
+ "-n",
672
+ "/Users/youssof/.mtplx/bin/thermalforge",
673
+ "auto"
674
+ ],
675
+ "detection": {
676
+ "available": true,
677
+ "clock_anchor_enabled": false,
678
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
679
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
680
+ "selected": {
681
+ "kind": "thermalforge",
682
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
683
+ "version": {
684
+ "command": [
685
+ "/Users/youssof/.mtplx/bin/thermalforge",
686
+ "--version"
687
+ ],
688
+ "ok": true,
689
+ "returncode": 0,
690
+ "stderr": "",
691
+ "stdout": "0.1.0"
692
+ }
693
+ },
694
+ "tools": [
695
+ {
696
+ "kind": "thermalforge",
697
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
698
+ "version": {
699
+ "command": [
700
+ "/Users/youssof/.mtplx/bin/thermalforge",
701
+ "--version"
702
+ ],
703
+ "ok": true,
704
+ "returncode": 0,
705
+ "stderr": "",
706
+ "stdout": "0.1.0"
707
+ }
708
+ }
709
+ ]
710
+ },
711
+ "dry_run": false,
712
+ "ok": true,
713
+ "profile": "silent"
714
+ }
715
+ },
716
+ "fanmax": {
717
+ "after": {
718
+ "actual_max_rpm": 7385,
719
+ "actual_min_rpm": 7332,
720
+ "capacity_max_rpm": 7826,
721
+ "capacity_min_rpm": 7826,
722
+ "fans": [
723
+ {
724
+ "actual_rpm": 7332,
725
+ "max_capacity_rpm": 7826,
726
+ "mode": "manual",
727
+ "raw": {
728
+ "actual_rpm": 7332,
729
+ "index": 0,
730
+ "max_rpm": 7826,
731
+ "min_rpm": 2317,
732
+ "mode": "manual",
733
+ "target_rpm": 7826
734
+ },
735
+ "rpm": 7332,
736
+ "target_rpm": 7826
737
+ },
738
+ {
739
+ "actual_rpm": 7385,
740
+ "max_capacity_rpm": 7826,
741
+ "mode": "manual",
742
+ "raw": {
743
+ "actual_rpm": 7385,
744
+ "index": 1,
745
+ "max_rpm": 7826,
746
+ "min_rpm": 2317,
747
+ "mode": "manual",
748
+ "target_rpm": 7826
749
+ },
750
+ "rpm": 7385,
751
+ "target_rpm": 7826
752
+ }
753
+ ],
754
+ "max_rpm": 7385,
755
+ "min_rpm": 7332,
756
+ "ok": true,
757
+ "raw": {
758
+ "attempts": [
759
+ {
760
+ "command": [
761
+ "/Users/youssof/.mtplx/bin/thermalforge",
762
+ "status"
763
+ ],
764
+ "ok": true,
765
+ "returncode": 0,
766
+ "stderr": "",
767
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7332,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7385,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.1,\n \"TB0T\" : 32.6,\n \"TCDX\" : 82.5,\n \"TCHP\" : 60.5,\n \"TCMb\" : 86.9,\n \"TG0B\" : 32.6,\n \"TG0H\" : 32,\n \"TG0V\" : 32.6,\n \"TH0x\" : 40,\n \"TMVR\" : 59.5,\n \"TPDX\" : 65.2,\n \"TRDX\" : 67.7,\n \"TS0P\" : 66.7,\n \"Tg0j\" : 67.2,\n \"Tm08\" : 67.9,\n \"Tp04\" : 75.1,\n \"Tp08\" : 74.2,\n \"Tp0C\" : 77.5,\n \"Tp0G\" : 76.2,\n \"Tp0X\" : 75.2\n }\n}"
768
+ }
769
+ ],
770
+ "detection": {
771
+ "available": true,
772
+ "clock_anchor_enabled": false,
773
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
774
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
775
+ "selected": {
776
+ "kind": "thermalforge",
777
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
778
+ "version": {
779
+ "command": [
780
+ "/Users/youssof/.mtplx/bin/thermalforge",
781
+ "--version"
782
+ ],
783
+ "ok": true,
784
+ "returncode": 0,
785
+ "stderr": "",
786
+ "stdout": "0.1.0"
787
+ }
788
+ },
789
+ "tools": [
790
+ {
791
+ "kind": "thermalforge",
792
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
793
+ "version": {
794
+ "command": [
795
+ "/Users/youssof/.mtplx/bin/thermalforge",
796
+ "--version"
797
+ ],
798
+ "ok": true,
799
+ "returncode": 0,
800
+ "stderr": "",
801
+ "stdout": "0.1.0"
802
+ }
803
+ }
804
+ ]
805
+ },
806
+ "ok": true,
807
+ "status": {
808
+ "command": [
809
+ "/Users/youssof/.mtplx/bin/thermalforge",
810
+ "status"
811
+ ],
812
+ "ok": true,
813
+ "returncode": 0,
814
+ "stderr": "",
815
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7332,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7385,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.1,\n \"TB0T\" : 32.6,\n \"TCDX\" : 82.5,\n \"TCHP\" : 60.5,\n \"TCMb\" : 86.9,\n \"TG0B\" : 32.6,\n \"TG0H\" : 32,\n \"TG0V\" : 32.6,\n \"TH0x\" : 40,\n \"TMVR\" : 59.5,\n \"TPDX\" : 65.2,\n \"TRDX\" : 67.7,\n \"TS0P\" : 66.7,\n \"Tg0j\" : 67.2,\n \"Tm08\" : 67.9,\n \"Tp04\" : 75.1,\n \"Tp08\" : 74.2,\n \"Tp0C\" : 77.5,\n \"Tp0G\" : 76.2,\n \"Tp0X\" : 75.2\n }\n}"
816
+ }
817
+ },
818
+ "target_max_rpm": 7826,
819
+ "target_min_rpm": 7826
820
+ },
821
+ "baseline": {
822
+ "actual_max_rpm": 5367,
823
+ "actual_min_rpm": 4968,
824
+ "capacity_max_rpm": 7826,
825
+ "capacity_min_rpm": 7826,
826
+ "fans": [
827
+ {
828
+ "actual_rpm": 4968,
829
+ "max_capacity_rpm": 7826,
830
+ "mode": "auto",
831
+ "raw": {
832
+ "actual_rpm": 4968,
833
+ "index": 0,
834
+ "max_rpm": 7826,
835
+ "min_rpm": 2317,
836
+ "mode": "auto",
837
+ "target_rpm": 4976
838
+ },
839
+ "rpm": 4968,
840
+ "target_rpm": 4976
841
+ },
842
+ {
843
+ "actual_rpm": 5367,
844
+ "max_capacity_rpm": 7826,
845
+ "mode": "auto",
846
+ "raw": {
847
+ "actual_rpm": 5367,
848
+ "index": 1,
849
+ "max_rpm": 7826,
850
+ "min_rpm": 2317,
851
+ "mode": "auto",
852
+ "target_rpm": 5374
853
+ },
854
+ "rpm": 5367,
855
+ "target_rpm": 5374
856
+ }
857
+ ],
858
+ "max_rpm": 5367,
859
+ "min_rpm": 4968,
860
+ "ok": true,
861
+ "raw": {
862
+ "attempts": [
863
+ {
864
+ "command": [
865
+ "/Users/youssof/.mtplx/bin/thermalforge",
866
+ "status"
867
+ ],
868
+ "ok": true,
869
+ "returncode": 0,
870
+ "stderr": "",
871
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 4968,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 4976\n },\n {\n \"actual_rpm\" : 5367,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5374\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.1,\n \"TB0T\" : 32.6,\n \"TCDX\" : 68.2,\n \"TCHP\" : 60.5,\n \"TCMb\" : 75.8,\n \"TG0B\" : 32.6,\n \"TG0H\" : 32,\n \"TG0V\" : 32.6,\n \"TH0x\" : 39.6,\n \"TMVR\" : 59.7,\n \"TPDX\" : 65.7,\n \"TRDX\" : 67.6,\n \"TS0P\" : 67.4,\n \"Tg0j\" : 67.2,\n \"Tm08\" : 68.6,\n \"Tp04\" : 76.5,\n \"Tp08\" : 75.4,\n \"Tp0C\" : 79.5,\n \"Tp0G\" : 77.4,\n \"Tp0X\" : 76.6\n }\n}"
872
+ }
873
+ ],
874
+ "detection": {
875
+ "available": true,
876
+ "clock_anchor_enabled": false,
877
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
878
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
879
+ "selected": {
880
+ "kind": "thermalforge",
881
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
882
+ "version": {
883
+ "command": [
884
+ "/Users/youssof/.mtplx/bin/thermalforge",
885
+ "--version"
886
+ ],
887
+ "ok": true,
888
+ "returncode": 0,
889
+ "stderr": "",
890
+ "stdout": "0.1.0"
891
+ }
892
+ },
893
+ "tools": [
894
+ {
895
+ "kind": "thermalforge",
896
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
897
+ "version": {
898
+ "command": [
899
+ "/Users/youssof/.mtplx/bin/thermalforge",
900
+ "--version"
901
+ ],
902
+ "ok": true,
903
+ "returncode": 0,
904
+ "stderr": "",
905
+ "stdout": "0.1.0"
906
+ }
907
+ }
908
+ ]
909
+ },
910
+ "ok": true,
911
+ "status": {
912
+ "command": [
913
+ "/Users/youssof/.mtplx/bin/thermalforge",
914
+ "status"
915
+ ],
916
+ "ok": true,
917
+ "returncode": 0,
918
+ "stderr": "",
919
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 4968,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 4976\n },\n {\n \"actual_rpm\" : 5367,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5374\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.1,\n \"TB0T\" : 32.6,\n \"TCDX\" : 68.2,\n \"TCHP\" : 60.5,\n \"TCMb\" : 75.8,\n \"TG0B\" : 32.6,\n \"TG0H\" : 32,\n \"TG0V\" : 32.6,\n \"TH0x\" : 39.6,\n \"TMVR\" : 59.7,\n \"TPDX\" : 65.7,\n \"TRDX\" : 67.6,\n \"TS0P\" : 67.4,\n \"Tg0j\" : 67.2,\n \"Tm08\" : 68.6,\n \"Tp04\" : 76.5,\n \"Tp08\" : 75.4,\n \"Tp0C\" : 79.5,\n \"Tp0G\" : 77.4,\n \"Tp0X\" : 76.6\n }\n}"
920
+ }
921
+ },
922
+ "target_max_rpm": 5374,
923
+ "target_min_rpm": 4976
924
+ },
925
+ "message": "fans ramped to max (actual 7332-7385 RPM; target 7826 RPM)",
926
+ "ok": true,
927
+ "profile": "max",
928
+ "set_result": {
929
+ "attempts": [
930
+ {
931
+ "command": [
932
+ "sudo",
933
+ "-n",
934
+ "/Users/youssof/.mtplx/bin/thermalforge",
935
+ "max"
936
+ ],
937
+ "ok": true,
938
+ "returncode": 0,
939
+ "stderr": "",
940
+ "stdout": "Fan 0: 4968 RPM \u2192 max (7826 RPM)\nFan 1: 5367 RPM \u2192 max (7826 RPM)"
941
+ }
942
+ ],
943
+ "command": [
944
+ "sudo",
945
+ "-n",
946
+ "/Users/youssof/.mtplx/bin/thermalforge",
947
+ "max"
948
+ ],
949
+ "detection": {
950
+ "available": true,
951
+ "clock_anchor_enabled": false,
952
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
953
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
954
+ "selected": {
955
+ "kind": "thermalforge",
956
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
957
+ "version": {
958
+ "command": [
959
+ "/Users/youssof/.mtplx/bin/thermalforge",
960
+ "--version"
961
+ ],
962
+ "ok": true,
963
+ "returncode": 0,
964
+ "stderr": "",
965
+ "stdout": "0.1.0"
966
+ }
967
+ },
968
+ "tools": [
969
+ {
970
+ "kind": "thermalforge",
971
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
972
+ "version": {
973
+ "command": [
974
+ "/Users/youssof/.mtplx/bin/thermalforge",
975
+ "--version"
976
+ ],
977
+ "ok": true,
978
+ "returncode": 0,
979
+ "stderr": "",
980
+ "stdout": "0.1.0"
981
+ }
982
+ }
983
+ ]
984
+ },
985
+ "dry_run": false,
986
+ "ok": true,
987
+ "profile": "max"
988
+ }
989
+ },
990
+ "max_tokens": 1000,
991
+ "median_confirmation_speedup": 1.9232171946219445,
992
+ "memory_gate": {
993
+ "ar_peak_memory_gib": 17.040895924903452,
994
+ "lte_ar_plus_6_gib": true,
995
+ "lte_ar_times_1_18": true,
996
+ "mtp_peak_memory_gib": 17.566339950077236
997
+ },
998
+ "mtp_by_block_size": {
999
+ "6": {
1000
+ "acceptance": 0.9810874704491725,
1001
+ "accepted_drafts": 830,
1002
+ "active_memory_gib": 17.410899550653994,
1003
+ "block_size": 6,
1004
+ "cache_memory_gib": 67.3336522737518,
1005
+ "decode_s": 22.690279750007903,
1006
+ "draft_sampler": {
1007
+ "temperature": 1.0,
1008
+ "top_k": 64,
1009
+ "top_p": 0.95
1010
+ },
1011
+ "drafted_tokens": 846,
1012
+ "generated_tokens": 1000,
1013
+ "mode": "mtp",
1014
+ "peak_memory_gib": 17.566339950077236,
1015
+ "prefill_s": 0.25343483399774414,
1016
+ "row_distribution_evals": 0,
1017
+ "speedup_vs_ar": 1.9232171946219445,
1018
+ "target_distribution_modes": {
1019
+ "batched_logits": 170
1020
+ },
1021
+ "target_sampler": {
1022
+ "temperature": 1.0,
1023
+ "top_k": 64,
1024
+ "top_p": 0.95
1025
+ },
1026
+ "telemetry": {
1027
+ "ar_dense_fallback_calls": 0,
1028
+ "decode_dense_fallback_calls": 0,
1029
+ "dense_fallback_calls_by_phase": {
1030
+ "ar_decode": 0,
1031
+ "decode_verify": 0,
1032
+ "postcommit": 0,
1033
+ "prefill": 0,
1034
+ "unknown": 0
1035
+ },
1036
+ "events": [],
1037
+ "paged_active_array_calls_by_phase": {
1038
+ "ar_decode": 0,
1039
+ "decode_verify": 0,
1040
+ "postcommit": 0,
1041
+ "prefill": 0,
1042
+ "unknown": 0
1043
+ },
1044
+ "paged_attention_bailouts_by_phase_reason": {
1045
+ "ar_decode": {
1046
+ "batch_not_1": 0,
1047
+ "block_size_mismatch": 0,
1048
+ "blocks_invalid": 0,
1049
+ "dtype_unsupported": 0,
1050
+ "empty_cache": 0,
1051
+ "head_dim_unsupported": 0,
1052
+ "kernel_unavailable": 0,
1053
+ "offset_invalid": 0,
1054
+ "partitioned_invalid_output": 0,
1055
+ "partitioned_unavailable": 0,
1056
+ "q_len_gt_max": 0,
1057
+ "q_len_invalid": 0,
1058
+ "turboquant_unsupported": 0,
1059
+ "unknown": 0,
1060
+ "unsupported_mask": 0
1061
+ },
1062
+ "decode_verify": {
1063
+ "batch_not_1": 0,
1064
+ "block_size_mismatch": 0,
1065
+ "blocks_invalid": 0,
1066
+ "dtype_unsupported": 0,
1067
+ "empty_cache": 0,
1068
+ "head_dim_unsupported": 0,
1069
+ "kernel_unavailable": 0,
1070
+ "offset_invalid": 0,
1071
+ "partitioned_invalid_output": 0,
1072
+ "partitioned_unavailable": 0,
1073
+ "q_len_gt_max": 0,
1074
+ "q_len_invalid": 0,
1075
+ "turboquant_unsupported": 0,
1076
+ "unknown": 0,
1077
+ "unsupported_mask": 0
1078
+ },
1079
+ "postcommit": {
1080
+ "batch_not_1": 0,
1081
+ "block_size_mismatch": 0,
1082
+ "blocks_invalid": 0,
1083
+ "dtype_unsupported": 0,
1084
+ "empty_cache": 0,
1085
+ "head_dim_unsupported": 0,
1086
+ "kernel_unavailable": 0,
1087
+ "offset_invalid": 0,
1088
+ "partitioned_invalid_output": 0,
1089
+ "partitioned_unavailable": 0,
1090
+ "q_len_gt_max": 0,
1091
+ "q_len_invalid": 0,
1092
+ "turboquant_unsupported": 0,
1093
+ "unknown": 0,
1094
+ "unsupported_mask": 0
1095
+ },
1096
+ "prefill": {
1097
+ "batch_not_1": 0,
1098
+ "block_size_mismatch": 0,
1099
+ "blocks_invalid": 0,
1100
+ "dtype_unsupported": 0,
1101
+ "empty_cache": 0,
1102
+ "head_dim_unsupported": 0,
1103
+ "kernel_unavailable": 0,
1104
+ "offset_invalid": 0,
1105
+ "partitioned_invalid_output": 0,
1106
+ "partitioned_unavailable": 0,
1107
+ "q_len_gt_max": 0,
1108
+ "q_len_invalid": 0,
1109
+ "turboquant_unsupported": 0,
1110
+ "unknown": 0,
1111
+ "unsupported_mask": 0
1112
+ },
1113
+ "unknown": {
1114
+ "batch_not_1": 0,
1115
+ "block_size_mismatch": 0,
1116
+ "blocks_invalid": 0,
1117
+ "dtype_unsupported": 0,
1118
+ "empty_cache": 0,
1119
+ "head_dim_unsupported": 0,
1120
+ "kernel_unavailable": 0,
1121
+ "offset_invalid": 0,
1122
+ "partitioned_invalid_output": 0,
1123
+ "partitioned_unavailable": 0,
1124
+ "q_len_gt_max": 0,
1125
+ "q_len_invalid": 0,
1126
+ "turboquant_unsupported": 0,
1127
+ "unknown": 0,
1128
+ "unsupported_mask": 0
1129
+ }
1130
+ },
1131
+ "paged_attention_large_q_path": {
1132
+ "dense_forbidden": 0,
1133
+ "large_q_split_sdpa_fallback": 0,
1134
+ "partitioned_paged": 0,
1135
+ "tail_paged": 0,
1136
+ "unknown": 0
1137
+ },
1138
+ "postcommit_dense_fallback_calls": 0,
1139
+ "prefill_dense_fallback_calls": 0,
1140
+ "trace_events": false
1141
+ },
1142
+ "timing_s": {
1143
+ "accept": 0.014734539974597283,
1144
+ "draft": 1.5304279569390928,
1145
+ "rollback": 0.00014716008445248008,
1146
+ "target_distribution": 20.938332714038552,
1147
+ "target_hidden": 0.0,
1148
+ "verify": 0.19443316804245114
1149
+ },
1150
+ "tok_s": 44.071735166669846,
1151
+ "token_preview": [
1152
+ 9996,
1153
+ 625,
1154
+ 759,
1155
+ 759,
1156
+ 759,
1157
+ 759,
1158
+ 759,
1159
+ 759,
1160
+ 759,
1161
+ 759,
1162
+ 759,
1163
+ 759,
1164
+ 759,
1165
+ 759,
1166
+ 759,
1167
+ 759
1168
+ ],
1169
+ "verify_calls": 170
1170
+ }
1171
+ },
1172
+ "passed": false,
1173
+ "prompt_id": "flappy_html5_canvas_game",
1174
+ "prompt_tokens": 119,
1175
+ "sampler": {
1176
+ "seed": 0,
1177
+ "temperature": 1.0,
1178
+ "top_k": 64,
1179
+ "top_p": 0.95
1180
+ }
1181
+ },
1182
+ "status": "failed"
1183
+ }
benchmarks/flappy1000-targetq4-assistantq6-sweep.json ADDED
@@ -0,0 +1,1702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_id": "gemma4-assistant-mtp",
3
+ "artifacts": {
4
+ "assistant_dtype": null,
5
+ "assistant_format": "q6-g64-affine",
6
+ "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
7
+ "assistant_quantization": {
8
+ "bits": 6,
9
+ "group_size": 64,
10
+ "mode": "affine"
11
+ },
12
+ "disk_ok": true,
13
+ "min_free_gib": 220.0,
14
+ "observed_free_gib": 704.0457382202148,
15
+ "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx",
16
+ "target_quantization": {
17
+ "bits": 4,
18
+ "format": "mlx-flat4-g64",
19
+ "group_size": 64,
20
+ "mode": "affine"
21
+ }
22
+ },
23
+ "backend": "gemma4_assistant",
24
+ "benchmark": {
25
+ "draft_block_sizes": [
26
+ 3,
27
+ 4,
28
+ 5,
29
+ 6
30
+ ],
31
+ "draft_sampler": {
32
+ "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.",
33
+ "inherits_target_sampler": true,
34
+ "temperature": null,
35
+ "top_k": null,
36
+ "top_p": null
37
+ },
38
+ "max_mode": true,
39
+ "max_tokens": 1000,
40
+ "profile": "sustained",
41
+ "prompt_suite": "mtplx/benchmarks/prompts/flappy.jsonl",
42
+ "reasoning": "off",
43
+ "sampler_source": {
44
+ "do_sample": true,
45
+ "local_reference": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx/generation_config.json",
46
+ "name": "official Gemma 4 generation_config.json",
47
+ "temperature": 1.0,
48
+ "top_k": 64,
49
+ "top_p": 0.95
50
+ },
51
+ "seed": 0,
52
+ "temperature": 1.0,
53
+ "top_k": 64,
54
+ "top_p": 0.95
55
+ },
56
+ "blockers": [
57
+ "best speedup 1.995x is below 2.000x",
58
+ "median confirmation speedup 1.995x is below 2.000x"
59
+ ],
60
+ "can_run_now": true,
61
+ "gates": {
62
+ "generated_tokens": 1000,
63
+ "longer_lengths_blocked_until_160_passes": true,
64
+ "median_of_3_min_speedup_vs_ar": 2.0,
65
+ "min_speedup_vs_ar": 2.0,
66
+ "mtp_peak_memory_lte_ar_multiplier": 1.18,
67
+ "mtp_peak_memory_lte_ar_plus_gib": 6
68
+ },
69
+ "official_sources": {
70
+ "assistant": "google/gemma-4-31B-it-assistant",
71
+ "assistant_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204",
72
+ "target": "google/gemma-4-31B-it",
73
+ "target_revision": "145dc2508c480a64b47242f160d286cff94a2343"
74
+ },
75
+ "pair": {
76
+ "assistant_exists": true,
77
+ "assistant_inspection": {
78
+ "architecture": "Gemma4AssistantForCausalLM",
79
+ "architecture_recognized": true,
80
+ "backbone_hidden_size": 5376,
81
+ "compatibility": {
82
+ "arch_id": "gemma4-assistant-mtp",
83
+ "can_run": false,
84
+ "exit_code": 3,
85
+ "message": "Official-style Gemma 4 31B assistant artifact recognized. This is an assistant-backed MTP pair, not a standalone target; MTPLX scaffold is present but QA and the 160-token speed/memory gate are still pending.",
86
+ "mtp_supported": "recognized",
87
+ "recognized": true,
88
+ "recommended_backend": "gemma4_assistant",
89
+ "recommended_profile": "performance-cold",
90
+ "runtime_compatibility": "assistant-pair-qa-pending",
91
+ "runtime_contract": null,
92
+ "runtime_contract_error": null,
93
+ "runtime_contract_path": null,
94
+ "support_level": "architecture-scaffolded-qa-pending",
95
+ "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.",
96
+ "supported": false,
97
+ "tier": "architecture-compatible-but-unverified",
98
+ "unsafe_force_required": false,
99
+ "unverified_model": true
100
+ },
101
+ "config_exists": true,
102
+ "hidden_size": 1024,
103
+ "layer_types": [
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "sliding_attention",
107
+ "full_attention"
108
+ ],
109
+ "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
110
+ "model_files": [
111
+ "model.safetensors"
112
+ ],
113
+ "model_type": "gemma4_assistant",
114
+ "mtp": {
115
+ "exists": false,
116
+ "expected_tensor_count": 15,
117
+ "extra_keys": [],
118
+ "metadata_only": true,
119
+ "missing_expected_keys": [],
120
+ "mtp_file": "model.safetensors.index.json::embedded",
121
+ "passes_tensor_gate": false,
122
+ "sidecar_format": "bf16",
123
+ "tensor_count": 0,
124
+ "tensors": []
125
+ },
126
+ "mtp_arch": "gemma4-assistant-mtp",
127
+ "mtp_num_hidden_layers": 0,
128
+ "mtp_pattern": null,
129
+ "mtp_supported": "recognized",
130
+ "num_hidden_layers": 4,
131
+ "num_kv_shared_layers": 4,
132
+ "passes_primary_gate": false,
133
+ "quantization": {
134
+ "bits": 6,
135
+ "group_size": 64,
136
+ "mode": "affine"
137
+ },
138
+ "recommended_backend": "gemma4_assistant",
139
+ "recommended_profile": "performance-cold",
140
+ "runtime_compatibility": "assistant-pair-qa-pending",
141
+ "runtime_contract_path": null,
142
+ "sidecars": {
143
+ "preprocessor_config.json": false,
144
+ "processor_config.json": false,
145
+ "video_preprocessor_config.json": false
146
+ },
147
+ "source": "local",
148
+ "support_level": "architecture-scaffolded-qa-pending",
149
+ "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.",
150
+ "unverified_model": true,
151
+ "use_ordered_embeddings": false,
152
+ "vocab_size": 262144
153
+ },
154
+ "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx",
155
+ "pair_error": null,
156
+ "pair_valid": true,
157
+ "target_exists": true,
158
+ "target_inspection": {
159
+ "architecture": "Gemma4ForConditionalGeneration",
160
+ "architecture_recognized": false,
161
+ "backbone_hidden_size": null,
162
+ "compatibility": {
163
+ "arch_id": null,
164
+ "can_run": false,
165
+ "exit_code": 2,
166
+ "message": "Model has no MTP head. MTPLX requires an MTP-equipped model.",
167
+ "mtp_supported": "no",
168
+ "recognized": false,
169
+ "recommended_backend": null,
170
+ "recommended_profile": null,
171
+ "runtime_compatibility": "unsupported",
172
+ "runtime_contract": null,
173
+ "runtime_contract_error": null,
174
+ "runtime_contract_path": null,
175
+ "support_level": "unsupported",
176
+ "support_notes": null,
177
+ "supported": false,
178
+ "tier": "no-MTP",
179
+ "unsafe_force_required": false,
180
+ "unverified_model": false
181
+ },
182
+ "config_exists": true,
183
+ "hidden_size": 5376,
184
+ "layer_types": [
185
+ "sliding_attention",
186
+ "sliding_attention",
187
+ "sliding_attention",
188
+ "sliding_attention",
189
+ "sliding_attention",
190
+ "full_attention",
191
+ "sliding_attention",
192
+ "sliding_attention",
193
+ "sliding_attention",
194
+ "sliding_attention",
195
+ "sliding_attention",
196
+ "full_attention",
197
+ "sliding_attention",
198
+ "sliding_attention",
199
+ "sliding_attention",
200
+ "sliding_attention",
201
+ "sliding_attention",
202
+ "full_attention",
203
+ "sliding_attention",
204
+ "sliding_attention",
205
+ "sliding_attention",
206
+ "sliding_attention",
207
+ "sliding_attention",
208
+ "full_attention",
209
+ "sliding_attention",
210
+ "sliding_attention",
211
+ "sliding_attention",
212
+ "sliding_attention",
213
+ "sliding_attention",
214
+ "full_attention",
215
+ "sliding_attention",
216
+ "sliding_attention",
217
+ "sliding_attention",
218
+ "sliding_attention",
219
+ "sliding_attention",
220
+ "full_attention",
221
+ "sliding_attention",
222
+ "sliding_attention",
223
+ "sliding_attention",
224
+ "sliding_attention",
225
+ "sliding_attention",
226
+ "full_attention",
227
+ "sliding_attention",
228
+ "sliding_attention",
229
+ "sliding_attention",
230
+ "sliding_attention",
231
+ "sliding_attention",
232
+ "full_attention",
233
+ "sliding_attention",
234
+ "sliding_attention",
235
+ "sliding_attention",
236
+ "sliding_attention",
237
+ "sliding_attention",
238
+ "full_attention",
239
+ "sliding_attention",
240
+ "sliding_attention",
241
+ "sliding_attention",
242
+ "sliding_attention",
243
+ "sliding_attention",
244
+ "full_attention"
245
+ ],
246
+ "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx",
247
+ "model_files": [
248
+ "model-00001-of-00004.safetensors",
249
+ "model-00002-of-00004.safetensors",
250
+ "model-00003-of-00004.safetensors",
251
+ "model-00004-of-00004.safetensors"
252
+ ],
253
+ "model_type": "gemma4_text",
254
+ "mtp": {
255
+ "exists": false,
256
+ "expected_tensor_count": 15,
257
+ "extra_keys": [],
258
+ "metadata_only": true,
259
+ "missing_expected_keys": [],
260
+ "mtp_file": "model.safetensors.index.json::embedded",
261
+ "passes_tensor_gate": false,
262
+ "sidecar_format": "bf16",
263
+ "tensor_count": 0,
264
+ "tensors": []
265
+ },
266
+ "mtp_arch": null,
267
+ "mtp_num_hidden_layers": 0,
268
+ "mtp_pattern": null,
269
+ "mtp_supported": "no",
270
+ "num_hidden_layers": 60,
271
+ "num_kv_shared_layers": 0,
272
+ "passes_primary_gate": false,
273
+ "quantization": {
274
+ "bits": 4,
275
+ "group_size": 64,
276
+ "mode": "affine"
277
+ },
278
+ "recommended_backend": null,
279
+ "recommended_profile": null,
280
+ "runtime_compatibility": "unsupported",
281
+ "runtime_contract_path": null,
282
+ "sidecars": {
283
+ "preprocessor_config.json": false,
284
+ "processor_config.json": false,
285
+ "video_preprocessor_config.json": false
286
+ },
287
+ "source": "local",
288
+ "support_level": "unsupported",
289
+ "support_notes": null,
290
+ "unverified_model": false,
291
+ "use_ordered_embeddings": null,
292
+ "vocab_size": 262144
293
+ },
294
+ "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx"
295
+ },
296
+ "planned_commands": {
297
+ "assistant_bf16_snapshot": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it-assistant', revision='cffbbd2cea41ea56a0fa5b0487e0d445121fd204', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx')\"",
298
+ "gate": "mtplx bench gemma-mtp --target-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --assistant-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx --profile sustained --max --prompt-suite mtplx/benchmarks/prompts/flappy.jsonl --max-tokens 1000 --temperature 1.0 --top-p 0.95 --top-k 64 --seed 0 --reasoning off --draft-block-sizes 3,4,5,6 --json --output outputs/gemma4/flappy1000-q6assistant-pure-sweep.json",
299
+ "target_flat4_g64": "uv run python -m mlx_lm.convert --hf-path /Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48 --mlx-path /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --quantize --q-bits 4 --q-group-size 64 --q-mode affine",
300
+ "target_revision_download": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it', revision='145dc2508c480a64b47242f160d286cff94a2343', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48')\""
301
+ },
302
+ "qa_pending": true,
303
+ "results": {
304
+ "ar": {
305
+ "active_memory_gib": 16.96541445143521,
306
+ "cache_memory_gib": 1.9738727556541562,
307
+ "decode_s": 45.80454516700411,
308
+ "generated_tokens": 1000,
309
+ "mode": "ar",
310
+ "peak_memory_gib": 17.040886400267482,
311
+ "prefill_s": 1.318873333002557,
312
+ "tok_s": 21.831894550071045,
313
+ "token_preview": [
314
+ 9996,
315
+ 625,
316
+ 24731,
317
+ 236761,
318
+ 9996,
319
+ 625,
320
+ 24731,
321
+ 236761,
322
+ 9996,
323
+ 625,
324
+ 24731,
325
+ 236761,
326
+ 9996,
327
+ 625,
328
+ 24731,
329
+ 236761
330
+ ]
331
+ },
332
+ "ar_confirmation": [
333
+ {
334
+ "active_memory_gib": 16.96541445143521,
335
+ "cache_memory_gib": 1.9738727556541562,
336
+ "decode_s": 45.80454516700411,
337
+ "generated_tokens": 1000,
338
+ "mode": "ar",
339
+ "peak_memory_gib": 17.040886400267482,
340
+ "prefill_s": 1.318873333002557,
341
+ "tok_s": 21.831894550071045,
342
+ "token_preview": [
343
+ 9996,
344
+ 625,
345
+ 24731,
346
+ 236761,
347
+ 9996,
348
+ 625,
349
+ 24731,
350
+ 236761,
351
+ 9996,
352
+ 625,
353
+ 24731,
354
+ 236761,
355
+ 9996,
356
+ 625,
357
+ 24731,
358
+ 236761
359
+ ]
360
+ }
361
+ ],
362
+ "best_block_confirmation": [
363
+ {
364
+ "acceptance": 0.9810874704491725,
365
+ "accepted_drafts": 830,
366
+ "active_memory_gib": 17.41092054359615,
367
+ "block_size": 6,
368
+ "cache_memory_gib": 67.33365368191153,
369
+ "decode_s": 22.95747512500384,
370
+ "draft_sampler": {
371
+ "temperature": 1.0,
372
+ "top_k": 64,
373
+ "top_p": 0.95
374
+ },
375
+ "drafted_tokens": 846,
376
+ "generated_tokens": 1000,
377
+ "mode": "mtp",
378
+ "peak_memory_gib": 17.566345684230328,
379
+ "prefill_s": 0.2074422090081498,
380
+ "row_distribution_evals": 0,
381
+ "speedup_vs_ar": 1.9951908873949589,
382
+ "target_distribution_modes": {
383
+ "batched_logits": 170
384
+ },
385
+ "target_sampler": {
386
+ "temperature": 1.0,
387
+ "top_k": 64,
388
+ "top_p": 0.95
389
+ },
390
+ "telemetry": {
391
+ "ar_dense_fallback_calls": 0,
392
+ "decode_dense_fallback_calls": 0,
393
+ "dense_fallback_calls_by_phase": {
394
+ "ar_decode": 0,
395
+ "decode_verify": 0,
396
+ "postcommit": 0,
397
+ "prefill": 0,
398
+ "unknown": 0
399
+ },
400
+ "events": [],
401
+ "paged_active_array_calls_by_phase": {
402
+ "ar_decode": 0,
403
+ "decode_verify": 0,
404
+ "postcommit": 0,
405
+ "prefill": 0,
406
+ "unknown": 0
407
+ },
408
+ "paged_attention_bailouts_by_phase_reason": {
409
+ "ar_decode": {
410
+ "batch_not_1": 0,
411
+ "block_size_mismatch": 0,
412
+ "blocks_invalid": 0,
413
+ "dtype_unsupported": 0,
414
+ "empty_cache": 0,
415
+ "head_dim_unsupported": 0,
416
+ "kernel_unavailable": 0,
417
+ "offset_invalid": 0,
418
+ "partitioned_invalid_output": 0,
419
+ "partitioned_unavailable": 0,
420
+ "q_len_gt_max": 0,
421
+ "q_len_invalid": 0,
422
+ "turboquant_unsupported": 0,
423
+ "unknown": 0,
424
+ "unsupported_mask": 0
425
+ },
426
+ "decode_verify": {
427
+ "batch_not_1": 0,
428
+ "block_size_mismatch": 0,
429
+ "blocks_invalid": 0,
430
+ "dtype_unsupported": 0,
431
+ "empty_cache": 0,
432
+ "head_dim_unsupported": 0,
433
+ "kernel_unavailable": 0,
434
+ "offset_invalid": 0,
435
+ "partitioned_invalid_output": 0,
436
+ "partitioned_unavailable": 0,
437
+ "q_len_gt_max": 0,
438
+ "q_len_invalid": 0,
439
+ "turboquant_unsupported": 0,
440
+ "unknown": 0,
441
+ "unsupported_mask": 0
442
+ },
443
+ "postcommit": {
444
+ "batch_not_1": 0,
445
+ "block_size_mismatch": 0,
446
+ "blocks_invalid": 0,
447
+ "dtype_unsupported": 0,
448
+ "empty_cache": 0,
449
+ "head_dim_unsupported": 0,
450
+ "kernel_unavailable": 0,
451
+ "offset_invalid": 0,
452
+ "partitioned_invalid_output": 0,
453
+ "partitioned_unavailable": 0,
454
+ "q_len_gt_max": 0,
455
+ "q_len_invalid": 0,
456
+ "turboquant_unsupported": 0,
457
+ "unknown": 0,
458
+ "unsupported_mask": 0
459
+ },
460
+ "prefill": {
461
+ "batch_not_1": 0,
462
+ "block_size_mismatch": 0,
463
+ "blocks_invalid": 0,
464
+ "dtype_unsupported": 0,
465
+ "empty_cache": 0,
466
+ "head_dim_unsupported": 0,
467
+ "kernel_unavailable": 0,
468
+ "offset_invalid": 0,
469
+ "partitioned_invalid_output": 0,
470
+ "partitioned_unavailable": 0,
471
+ "q_len_gt_max": 0,
472
+ "q_len_invalid": 0,
473
+ "turboquant_unsupported": 0,
474
+ "unknown": 0,
475
+ "unsupported_mask": 0
476
+ },
477
+ "unknown": {
478
+ "batch_not_1": 0,
479
+ "block_size_mismatch": 0,
480
+ "blocks_invalid": 0,
481
+ "dtype_unsupported": 0,
482
+ "empty_cache": 0,
483
+ "head_dim_unsupported": 0,
484
+ "kernel_unavailable": 0,
485
+ "offset_invalid": 0,
486
+ "partitioned_invalid_output": 0,
487
+ "partitioned_unavailable": 0,
488
+ "q_len_gt_max": 0,
489
+ "q_len_invalid": 0,
490
+ "turboquant_unsupported": 0,
491
+ "unknown": 0,
492
+ "unsupported_mask": 0
493
+ }
494
+ },
495
+ "paged_attention_large_q_path": {
496
+ "dense_forbidden": 0,
497
+ "large_q_split_sdpa_fallback": 0,
498
+ "partitioned_paged": 0,
499
+ "tail_paged": 0,
500
+ "unknown": 0
501
+ },
502
+ "postcommit_dense_fallback_calls": 0,
503
+ "prefill_dense_fallback_calls": 0,
504
+ "trace_events": false
505
+ },
506
+ "timing_s": {
507
+ "accept": 0.011550506053026766,
508
+ "draft": 1.5224984570086235,
509
+ "rollback": 0.0001419630425516516,
510
+ "target_distribution": 21.21943625298445,
511
+ "target_hidden": 0.0,
512
+ "verify": 0.19109521005884744
513
+ },
514
+ "tok_s": 43.558797060869416,
515
+ "token_preview": [
516
+ 9996,
517
+ 625,
518
+ 759,
519
+ 759,
520
+ 759,
521
+ 759,
522
+ 759,
523
+ 759,
524
+ 759,
525
+ 759,
526
+ 759,
527
+ 759,
528
+ 759,
529
+ 759,
530
+ 759,
531
+ 759
532
+ ],
533
+ "verify_calls": 170
534
+ }
535
+ ],
536
+ "best_block_size": 6,
537
+ "best_speedup": 1.9951908873949589,
538
+ "blockers": [
539
+ "best speedup 1.995x is below 2.000x",
540
+ "median confirmation speedup 1.995x is below 2.000x"
541
+ ],
542
+ "draft_sampler": {
543
+ "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.",
544
+ "inherits_target_sampler": true,
545
+ "temperature": null,
546
+ "top_k": null,
547
+ "top_p": null
548
+ },
549
+ "fan_restore": {
550
+ "after": {
551
+ "actual_max_rpm": 6643,
552
+ "actual_min_rpm": 6265,
553
+ "capacity_max_rpm": 7826,
554
+ "capacity_min_rpm": 7826,
555
+ "fans": [
556
+ {
557
+ "actual_rpm": 6265,
558
+ "max_capacity_rpm": 7826,
559
+ "mode": "auto",
560
+ "raw": {
561
+ "actual_rpm": 6265,
562
+ "index": 0,
563
+ "max_rpm": 7826,
564
+ "min_rpm": 2317,
565
+ "mode": "auto",
566
+ "target_rpm": 5768
567
+ },
568
+ "rpm": 6265,
569
+ "target_rpm": 5768
570
+ },
571
+ {
572
+ "actual_rpm": 6643,
573
+ "max_capacity_rpm": 7826,
574
+ "mode": "auto",
575
+ "raw": {
576
+ "actual_rpm": 6643,
577
+ "index": 1,
578
+ "max_rpm": 7826,
579
+ "min_rpm": 2317,
580
+ "mode": "auto",
581
+ "target_rpm": 6229
582
+ },
583
+ "rpm": 6643,
584
+ "target_rpm": 6229
585
+ }
586
+ ],
587
+ "max_rpm": 6643,
588
+ "min_rpm": 6265,
589
+ "ok": true,
590
+ "raw": {
591
+ "attempts": [
592
+ {
593
+ "command": [
594
+ "/Users/youssof/.mtplx/bin/thermalforge",
595
+ "status"
596
+ ],
597
+ "ok": true,
598
+ "returncode": 0,
599
+ "stderr": "",
600
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 6265,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5768\n },\n {\n \"actual_rpm\" : 6643,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6229\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.3,\n \"TCDX\" : 73.6,\n \"TCHP\" : 61.1,\n \"TCMb\" : 88.7,\n \"TG0B\" : 32.4,\n \"TG0H\" : 32,\n \"TG0V\" : 32.4,\n \"TH0x\" : 40.9,\n \"TMVR\" : 66.4,\n \"TPDX\" : 68.6,\n \"TRDX\" : 81.9,\n \"TS0P\" : 72.8,\n \"Tg0j\" : 77,\n \"Tm08\" : 73.7,\n \"Tp04\" : 74.2,\n \"Tp08\" : 73.9,\n \"Tp0C\" : 74.8,\n \"Tp0G\" : 75.2,\n \"Tp0X\" : 75\n }\n}"
601
+ }
602
+ ],
603
+ "detection": {
604
+ "available": true,
605
+ "clock_anchor_enabled": false,
606
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
607
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
608
+ "selected": {
609
+ "kind": "thermalforge",
610
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
611
+ "version": {
612
+ "command": [
613
+ "/Users/youssof/.mtplx/bin/thermalforge",
614
+ "--version"
615
+ ],
616
+ "ok": true,
617
+ "returncode": 0,
618
+ "stderr": "",
619
+ "stdout": "0.1.0"
620
+ }
621
+ },
622
+ "tools": [
623
+ {
624
+ "kind": "thermalforge",
625
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
626
+ "version": {
627
+ "command": [
628
+ "/Users/youssof/.mtplx/bin/thermalforge",
629
+ "--version"
630
+ ],
631
+ "ok": true,
632
+ "returncode": 0,
633
+ "stderr": "",
634
+ "stdout": "0.1.0"
635
+ }
636
+ }
637
+ ]
638
+ },
639
+ "ok": true,
640
+ "status": {
641
+ "command": [
642
+ "/Users/youssof/.mtplx/bin/thermalforge",
643
+ "status"
644
+ ],
645
+ "ok": true,
646
+ "returncode": 0,
647
+ "stderr": "",
648
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 6265,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5768\n },\n {\n \"actual_rpm\" : 6643,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6229\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.3,\n \"TCDX\" : 73.6,\n \"TCHP\" : 61.1,\n \"TCMb\" : 88.7,\n \"TG0B\" : 32.4,\n \"TG0H\" : 32,\n \"TG0V\" : 32.4,\n \"TH0x\" : 40.9,\n \"TMVR\" : 66.4,\n \"TPDX\" : 68.6,\n \"TRDX\" : 81.9,\n \"TS0P\" : 72.8,\n \"Tg0j\" : 77,\n \"Tm08\" : 73.7,\n \"Tp04\" : 74.2,\n \"Tp08\" : 73.9,\n \"Tp0C\" : 74.8,\n \"Tp0G\" : 75.2,\n \"Tp0X\" : 75\n }\n}"
649
+ }
650
+ },
651
+ "target_max_rpm": 6229,
652
+ "target_min_rpm": 5768
653
+ },
654
+ "message": "fan profile restored",
655
+ "ok": true,
656
+ "profile": "silent",
657
+ "set_result": {
658
+ "attempts": [
659
+ {
660
+ "command": [
661
+ "sudo",
662
+ "-n",
663
+ "/Users/youssof/.mtplx/bin/thermalforge",
664
+ "auto"
665
+ ],
666
+ "ok": true,
667
+ "returncode": 0,
668
+ "stderr": "No matching processes were found",
669
+ "stdout": "Fans reset to Apple defaults"
670
+ }
671
+ ],
672
+ "command": [
673
+ "sudo",
674
+ "-n",
675
+ "/Users/youssof/.mtplx/bin/thermalforge",
676
+ "auto"
677
+ ],
678
+ "detection": {
679
+ "available": true,
680
+ "clock_anchor_enabled": false,
681
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
682
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
683
+ "selected": {
684
+ "kind": "thermalforge",
685
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
686
+ "version": {
687
+ "command": [
688
+ "/Users/youssof/.mtplx/bin/thermalforge",
689
+ "--version"
690
+ ],
691
+ "ok": true,
692
+ "returncode": 0,
693
+ "stderr": "",
694
+ "stdout": "0.1.0"
695
+ }
696
+ },
697
+ "tools": [
698
+ {
699
+ "kind": "thermalforge",
700
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
701
+ "version": {
702
+ "command": [
703
+ "/Users/youssof/.mtplx/bin/thermalforge",
704
+ "--version"
705
+ ],
706
+ "ok": true,
707
+ "returncode": 0,
708
+ "stderr": "",
709
+ "stdout": "0.1.0"
710
+ }
711
+ }
712
+ ]
713
+ },
714
+ "dry_run": false,
715
+ "ok": true,
716
+ "profile": "silent"
717
+ }
718
+ },
719
+ "fanmax": {
720
+ "after": {
721
+ "actual_max_rpm": 7477,
722
+ "actual_min_rpm": 7385,
723
+ "capacity_max_rpm": 7826,
724
+ "capacity_min_rpm": 7826,
725
+ "fans": [
726
+ {
727
+ "actual_rpm": 7385,
728
+ "max_capacity_rpm": 7826,
729
+ "mode": "manual",
730
+ "raw": {
731
+ "actual_rpm": 7385,
732
+ "index": 0,
733
+ "max_rpm": 7826,
734
+ "min_rpm": 2317,
735
+ "mode": "manual",
736
+ "target_rpm": 7826
737
+ },
738
+ "rpm": 7385,
739
+ "target_rpm": 7826
740
+ },
741
+ {
742
+ "actual_rpm": 7477,
743
+ "max_capacity_rpm": 7826,
744
+ "mode": "manual",
745
+ "raw": {
746
+ "actual_rpm": 7477,
747
+ "index": 1,
748
+ "max_rpm": 7826,
749
+ "min_rpm": 2317,
750
+ "mode": "manual",
751
+ "target_rpm": 7826
752
+ },
753
+ "rpm": 7477,
754
+ "target_rpm": 7826
755
+ }
756
+ ],
757
+ "max_rpm": 7477,
758
+ "min_rpm": 7385,
759
+ "ok": true,
760
+ "raw": {
761
+ "attempts": [
762
+ {
763
+ "command": [
764
+ "/Users/youssof/.mtplx/bin/thermalforge",
765
+ "status"
766
+ ],
767
+ "ok": true,
768
+ "returncode": 0,
769
+ "stderr": "",
770
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7385,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7477,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 64.5,\n \"TCHP\" : 56,\n \"TCMb\" : 71.5,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 55.8,\n \"TPDX\" : 60.8,\n \"TRDX\" : 60.9,\n \"TS0P\" : 61.7,\n \"Tg0j\" : 61,\n \"Tm08\" : 61.6,\n \"Tp04\" : 64.6,\n \"Tp08\" : 64,\n \"Tp0C\" : 65.5,\n \"Tp0G\" : 65.3,\n \"Tp0X\" : 64\n }\n}"
771
+ }
772
+ ],
773
+ "detection": {
774
+ "available": true,
775
+ "clock_anchor_enabled": false,
776
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
777
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
778
+ "selected": {
779
+ "kind": "thermalforge",
780
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
781
+ "version": {
782
+ "command": [
783
+ "/Users/youssof/.mtplx/bin/thermalforge",
784
+ "--version"
785
+ ],
786
+ "ok": true,
787
+ "returncode": 0,
788
+ "stderr": "",
789
+ "stdout": "0.1.0"
790
+ }
791
+ },
792
+ "tools": [
793
+ {
794
+ "kind": "thermalforge",
795
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
796
+ "version": {
797
+ "command": [
798
+ "/Users/youssof/.mtplx/bin/thermalforge",
799
+ "--version"
800
+ ],
801
+ "ok": true,
802
+ "returncode": 0,
803
+ "stderr": "",
804
+ "stdout": "0.1.0"
805
+ }
806
+ }
807
+ ]
808
+ },
809
+ "ok": true,
810
+ "status": {
811
+ "command": [
812
+ "/Users/youssof/.mtplx/bin/thermalforge",
813
+ "status"
814
+ ],
815
+ "ok": true,
816
+ "returncode": 0,
817
+ "stderr": "",
818
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7385,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7477,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 64.5,\n \"TCHP\" : 56,\n \"TCMb\" : 71.5,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 55.8,\n \"TPDX\" : 60.8,\n \"TRDX\" : 60.9,\n \"TS0P\" : 61.7,\n \"Tg0j\" : 61,\n \"Tm08\" : 61.6,\n \"Tp04\" : 64.6,\n \"Tp08\" : 64,\n \"Tp0C\" : 65.5,\n \"Tp0G\" : 65.3,\n \"Tp0X\" : 64\n }\n}"
819
+ }
820
+ },
821
+ "target_max_rpm": 7826,
822
+ "target_min_rpm": 7826
823
+ },
824
+ "baseline": {
825
+ "actual_max_rpm": 6286,
826
+ "actual_min_rpm": 5825,
827
+ "capacity_max_rpm": 7826,
828
+ "capacity_min_rpm": 7826,
829
+ "fans": [
830
+ {
831
+ "actual_rpm": 5825,
832
+ "max_capacity_rpm": 7826,
833
+ "mode": "auto",
834
+ "raw": {
835
+ "actual_rpm": 5825,
836
+ "index": 0,
837
+ "max_rpm": 7826,
838
+ "min_rpm": 2317,
839
+ "mode": "auto",
840
+ "target_rpm": 5822
841
+ },
842
+ "rpm": 5825,
843
+ "target_rpm": 5822
844
+ },
845
+ {
846
+ "actual_rpm": 6286,
847
+ "max_capacity_rpm": 7826,
848
+ "mode": "auto",
849
+ "raw": {
850
+ "actual_rpm": 6286,
851
+ "index": 1,
852
+ "max_rpm": 7826,
853
+ "min_rpm": 2317,
854
+ "mode": "auto",
855
+ "target_rpm": 6287
856
+ },
857
+ "rpm": 6286,
858
+ "target_rpm": 6287
859
+ }
860
+ ],
861
+ "max_rpm": 6286,
862
+ "min_rpm": 5825,
863
+ "ok": true,
864
+ "raw": {
865
+ "attempts": [
866
+ {
867
+ "command": [
868
+ "/Users/youssof/.mtplx/bin/thermalforge",
869
+ "status"
870
+ ],
871
+ "ok": true,
872
+ "returncode": 0,
873
+ "stderr": "",
874
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5825,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5822\n },\n {\n \"actual_rpm\" : 6286,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6287\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 63.9,\n \"TCHP\" : 56.4,\n \"TCMb\" : 73.3,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 56.4,\n \"TPDX\" : 61.2,\n \"TRDX\" : 61.4,\n \"TS0P\" : 62.7,\n \"Tg0j\" : 61.3,\n \"Tm08\" : 61.7,\n \"Tp04\" : 63.4,\n \"Tp08\" : 62.9,\n \"Tp0C\" : 63.8,\n \"Tp0G\" : 64,\n \"Tp0X\" : 63.1\n }\n}"
875
+ }
876
+ ],
877
+ "detection": {
878
+ "available": true,
879
+ "clock_anchor_enabled": false,
880
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
881
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
882
+ "selected": {
883
+ "kind": "thermalforge",
884
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
885
+ "version": {
886
+ "command": [
887
+ "/Users/youssof/.mtplx/bin/thermalforge",
888
+ "--version"
889
+ ],
890
+ "ok": true,
891
+ "returncode": 0,
892
+ "stderr": "",
893
+ "stdout": "0.1.0"
894
+ }
895
+ },
896
+ "tools": [
897
+ {
898
+ "kind": "thermalforge",
899
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
900
+ "version": {
901
+ "command": [
902
+ "/Users/youssof/.mtplx/bin/thermalforge",
903
+ "--version"
904
+ ],
905
+ "ok": true,
906
+ "returncode": 0,
907
+ "stderr": "",
908
+ "stdout": "0.1.0"
909
+ }
910
+ }
911
+ ]
912
+ },
913
+ "ok": true,
914
+ "status": {
915
+ "command": [
916
+ "/Users/youssof/.mtplx/bin/thermalforge",
917
+ "status"
918
+ ],
919
+ "ok": true,
920
+ "returncode": 0,
921
+ "stderr": "",
922
+ "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5825,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5822\n },\n {\n \"actual_rpm\" : 6286,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6287\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 63.9,\n \"TCHP\" : 56.4,\n \"TCMb\" : 73.3,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 56.4,\n \"TPDX\" : 61.2,\n \"TRDX\" : 61.4,\n \"TS0P\" : 62.7,\n \"Tg0j\" : 61.3,\n \"Tm08\" : 61.7,\n \"Tp04\" : 63.4,\n \"Tp08\" : 62.9,\n \"Tp0C\" : 63.8,\n \"Tp0G\" : 64,\n \"Tp0X\" : 63.1\n }\n}"
923
+ }
924
+ },
925
+ "target_max_rpm": 6287,
926
+ "target_min_rpm": 5822
927
+ },
928
+ "message": "fans ramped to max (actual 7385-7477 RPM; target 7826 RPM)",
929
+ "ok": true,
930
+ "profile": "max",
931
+ "set_result": {
932
+ "attempts": [
933
+ {
934
+ "command": [
935
+ "sudo",
936
+ "-n",
937
+ "/Users/youssof/.mtplx/bin/thermalforge",
938
+ "max"
939
+ ],
940
+ "ok": true,
941
+ "returncode": 0,
942
+ "stderr": "",
943
+ "stdout": "Fan 0: 5825 RPM \u2192 max (7826 RPM)\nFan 1: 6286 RPM \u2192 max (7826 RPM)"
944
+ }
945
+ ],
946
+ "command": [
947
+ "sudo",
948
+ "-n",
949
+ "/Users/youssof/.mtplx/bin/thermalforge",
950
+ "max"
951
+ ],
952
+ "detection": {
953
+ "available": true,
954
+ "clock_anchor_enabled": false,
955
+ "clock_anchor_policy": "explicit experimental only; never used for product claims",
956
+ "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.",
957
+ "selected": {
958
+ "kind": "thermalforge",
959
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
960
+ "version": {
961
+ "command": [
962
+ "/Users/youssof/.mtplx/bin/thermalforge",
963
+ "--version"
964
+ ],
965
+ "ok": true,
966
+ "returncode": 0,
967
+ "stderr": "",
968
+ "stdout": "0.1.0"
969
+ }
970
+ },
971
+ "tools": [
972
+ {
973
+ "kind": "thermalforge",
974
+ "path": "/Users/youssof/.mtplx/bin/thermalforge",
975
+ "version": {
976
+ "command": [
977
+ "/Users/youssof/.mtplx/bin/thermalforge",
978
+ "--version"
979
+ ],
980
+ "ok": true,
981
+ "returncode": 0,
982
+ "stderr": "",
983
+ "stdout": "0.1.0"
984
+ }
985
+ }
986
+ ]
987
+ },
988
+ "dry_run": false,
989
+ "ok": true,
990
+ "profile": "max"
991
+ }
992
+ },
993
+ "max_tokens": 1000,
994
+ "median_confirmation_speedup": 1.9951908873949589,
995
+ "memory_gate": {
996
+ "ar_peak_memory_gib": 17.040886400267482,
997
+ "lte_ar_plus_6_gib": true,
998
+ "lte_ar_times_1_18": true,
999
+ "mtp_peak_memory_gib": 17.566345684230328
1000
+ },
1001
+ "mtp_by_block_size": {
1002
+ "3": {
1003
+ "acceptance": 0.991044776119403,
1004
+ "accepted_drafts": 664,
1005
+ "active_memory_gib": 17.409974498674273,
1006
+ "block_size": 3,
1007
+ "cache_memory_gib": 96.58854798320681,
1008
+ "decode_s": 37.873455042004935,
1009
+ "draft_sampler": {
1010
+ "temperature": 1.0,
1011
+ "top_k": 64,
1012
+ "top_p": 0.95
1013
+ },
1014
+ "drafted_tokens": 670,
1015
+ "generated_tokens": 1000,
1016
+ "mode": "mtp",
1017
+ "peak_memory_gib": 17.5211139684543,
1018
+ "prefill_s": 0.3529969999945024,
1019
+ "row_distribution_evals": 0,
1020
+ "speedup_vs_ar": 1.2094102615196556,
1021
+ "target_distribution_modes": {
1022
+ "batched_logits": 335
1023
+ },
1024
+ "target_sampler": {
1025
+ "temperature": 1.0,
1026
+ "top_k": 64,
1027
+ "top_p": 0.95
1028
+ },
1029
+ "telemetry": {
1030
+ "ar_dense_fallback_calls": 0,
1031
+ "decode_dense_fallback_calls": 0,
1032
+ "dense_fallback_calls_by_phase": {
1033
+ "ar_decode": 0,
1034
+ "decode_verify": 0,
1035
+ "postcommit": 0,
1036
+ "prefill": 0,
1037
+ "unknown": 0
1038
+ },
1039
+ "events": [],
1040
+ "paged_active_array_calls_by_phase": {
1041
+ "ar_decode": 0,
1042
+ "decode_verify": 0,
1043
+ "postcommit": 0,
1044
+ "prefill": 0,
1045
+ "unknown": 0
1046
+ },
1047
+ "paged_attention_bailouts_by_phase_reason": {
1048
+ "ar_decode": {
1049
+ "batch_not_1": 0,
1050
+ "block_size_mismatch": 0,
1051
+ "blocks_invalid": 0,
1052
+ "dtype_unsupported": 0,
1053
+ "empty_cache": 0,
1054
+ "head_dim_unsupported": 0,
1055
+ "kernel_unavailable": 0,
1056
+ "offset_invalid": 0,
1057
+ "partitioned_invalid_output": 0,
1058
+ "partitioned_unavailable": 0,
1059
+ "q_len_gt_max": 0,
1060
+ "q_len_invalid": 0,
1061
+ "turboquant_unsupported": 0,
1062
+ "unknown": 0,
1063
+ "unsupported_mask": 0
1064
+ },
1065
+ "decode_verify": {
1066
+ "batch_not_1": 0,
1067
+ "block_size_mismatch": 0,
1068
+ "blocks_invalid": 0,
1069
+ "dtype_unsupported": 0,
1070
+ "empty_cache": 0,
1071
+ "head_dim_unsupported": 0,
1072
+ "kernel_unavailable": 0,
1073
+ "offset_invalid": 0,
1074
+ "partitioned_invalid_output": 0,
1075
+ "partitioned_unavailable": 0,
1076
+ "q_len_gt_max": 0,
1077
+ "q_len_invalid": 0,
1078
+ "turboquant_unsupported": 0,
1079
+ "unknown": 0,
1080
+ "unsupported_mask": 0
1081
+ },
1082
+ "postcommit": {
1083
+ "batch_not_1": 0,
1084
+ "block_size_mismatch": 0,
1085
+ "blocks_invalid": 0,
1086
+ "dtype_unsupported": 0,
1087
+ "empty_cache": 0,
1088
+ "head_dim_unsupported": 0,
1089
+ "kernel_unavailable": 0,
1090
+ "offset_invalid": 0,
1091
+ "partitioned_invalid_output": 0,
1092
+ "partitioned_unavailable": 0,
1093
+ "q_len_gt_max": 0,
1094
+ "q_len_invalid": 0,
1095
+ "turboquant_unsupported": 0,
1096
+ "unknown": 0,
1097
+ "unsupported_mask": 0
1098
+ },
1099
+ "prefill": {
1100
+ "batch_not_1": 0,
1101
+ "block_size_mismatch": 0,
1102
+ "blocks_invalid": 0,
1103
+ "dtype_unsupported": 0,
1104
+ "empty_cache": 0,
1105
+ "head_dim_unsupported": 0,
1106
+ "kernel_unavailable": 0,
1107
+ "offset_invalid": 0,
1108
+ "partitioned_invalid_output": 0,
1109
+ "partitioned_unavailable": 0,
1110
+ "q_len_gt_max": 0,
1111
+ "q_len_invalid": 0,
1112
+ "turboquant_unsupported": 0,
1113
+ "unknown": 0,
1114
+ "unsupported_mask": 0
1115
+ },
1116
+ "unknown": {
1117
+ "batch_not_1": 0,
1118
+ "block_size_mismatch": 0,
1119
+ "blocks_invalid": 0,
1120
+ "dtype_unsupported": 0,
1121
+ "empty_cache": 0,
1122
+ "head_dim_unsupported": 0,
1123
+ "kernel_unavailable": 0,
1124
+ "offset_invalid": 0,
1125
+ "partitioned_invalid_output": 0,
1126
+ "partitioned_unavailable": 0,
1127
+ "q_len_gt_max": 0,
1128
+ "q_len_invalid": 0,
1129
+ "turboquant_unsupported": 0,
1130
+ "unknown": 0,
1131
+ "unsupported_mask": 0
1132
+ }
1133
+ },
1134
+ "paged_attention_large_q_path": {
1135
+ "dense_forbidden": 0,
1136
+ "large_q_split_sdpa_fallback": 0,
1137
+ "partitioned_paged": 0,
1138
+ "tail_paged": 0,
1139
+ "unknown": 0
1140
+ },
1141
+ "postcommit_dense_fallback_calls": 0,
1142
+ "prefill_dense_fallback_calls": 0,
1143
+ "trace_events": false
1144
+ },
1145
+ "timing_s": {
1146
+ "accept": 0.0206884491344681,
1147
+ "draft": 1.4088243619044079,
1148
+ "rollback": 0.0002345078537473455,
1149
+ "target_distribution": 36.03245366179908,
1150
+ "target_hidden": 0.0,
1151
+ "verify": 0.3872644162038341
1152
+ },
1153
+ "tok_s": 26.403717297270966,
1154
+ "token_preview": [
1155
+ 9996,
1156
+ 625,
1157
+ 759,
1158
+ 759,
1159
+ 759,
1160
+ 759,
1161
+ 759,
1162
+ 759,
1163
+ 759,
1164
+ 759,
1165
+ 759,
1166
+ 759,
1167
+ 759,
1168
+ 759,
1169
+ 759,
1170
+ 759
1171
+ ],
1172
+ "verify_calls": 335
1173
+ },
1174
+ "4": {
1175
+ "acceptance": 0.9548969072164949,
1176
+ "accepted_drafts": 741,
1177
+ "active_memory_gib": 17.411866588518023,
1178
+ "block_size": 4,
1179
+ "cache_memory_gib": 96.58724461961538,
1180
+ "decode_s": 36.01139437498932,
1181
+ "draft_sampler": {
1182
+ "temperature": 1.0,
1183
+ "top_k": 64,
1184
+ "top_p": 0.95
1185
+ },
1186
+ "drafted_tokens": 776,
1187
+ "generated_tokens": 1000,
1188
+ "mode": "mtp",
1189
+ "peak_memory_gib": 17.544619735330343,
1190
+ "prefill_s": 0.2180854579928564,
1191
+ "row_distribution_evals": 0,
1192
+ "speedup_vs_ar": 1.2719458927371148,
1193
+ "target_distribution_modes": {
1194
+ "batched_logits": 259
1195
+ },
1196
+ "target_sampler": {
1197
+ "temperature": 1.0,
1198
+ "top_k": 64,
1199
+ "top_p": 0.95
1200
+ },
1201
+ "telemetry": {
1202
+ "ar_dense_fallback_calls": 0,
1203
+ "decode_dense_fallback_calls": 0,
1204
+ "dense_fallback_calls_by_phase": {
1205
+ "ar_decode": 0,
1206
+ "decode_verify": 0,
1207
+ "postcommit": 0,
1208
+ "prefill": 0,
1209
+ "unknown": 0
1210
+ },
1211
+ "events": [],
1212
+ "paged_active_array_calls_by_phase": {
1213
+ "ar_decode": 0,
1214
+ "decode_verify": 0,
1215
+ "postcommit": 0,
1216
+ "prefill": 0,
1217
+ "unknown": 0
1218
+ },
1219
+ "paged_attention_bailouts_by_phase_reason": {
1220
+ "ar_decode": {
1221
+ "batch_not_1": 0,
1222
+ "block_size_mismatch": 0,
1223
+ "blocks_invalid": 0,
1224
+ "dtype_unsupported": 0,
1225
+ "empty_cache": 0,
1226
+ "head_dim_unsupported": 0,
1227
+ "kernel_unavailable": 0,
1228
+ "offset_invalid": 0,
1229
+ "partitioned_invalid_output": 0,
1230
+ "partitioned_unavailable": 0,
1231
+ "q_len_gt_max": 0,
1232
+ "q_len_invalid": 0,
1233
+ "turboquant_unsupported": 0,
1234
+ "unknown": 0,
1235
+ "unsupported_mask": 0
1236
+ },
1237
+ "decode_verify": {
1238
+ "batch_not_1": 0,
1239
+ "block_size_mismatch": 0,
1240
+ "blocks_invalid": 0,
1241
+ "dtype_unsupported": 0,
1242
+ "empty_cache": 0,
1243
+ "head_dim_unsupported": 0,
1244
+ "kernel_unavailable": 0,
1245
+ "offset_invalid": 0,
1246
+ "partitioned_invalid_output": 0,
1247
+ "partitioned_unavailable": 0,
1248
+ "q_len_gt_max": 0,
1249
+ "q_len_invalid": 0,
1250
+ "turboquant_unsupported": 0,
1251
+ "unknown": 0,
1252
+ "unsupported_mask": 0
1253
+ },
1254
+ "postcommit": {
1255
+ "batch_not_1": 0,
1256
+ "block_size_mismatch": 0,
1257
+ "blocks_invalid": 0,
1258
+ "dtype_unsupported": 0,
1259
+ "empty_cache": 0,
1260
+ "head_dim_unsupported": 0,
1261
+ "kernel_unavailable": 0,
1262
+ "offset_invalid": 0,
1263
+ "partitioned_invalid_output": 0,
1264
+ "partitioned_unavailable": 0,
1265
+ "q_len_gt_max": 0,
1266
+ "q_len_invalid": 0,
1267
+ "turboquant_unsupported": 0,
1268
+ "unknown": 0,
1269
+ "unsupported_mask": 0
1270
+ },
1271
+ "prefill": {
1272
+ "batch_not_1": 0,
1273
+ "block_size_mismatch": 0,
1274
+ "blocks_invalid": 0,
1275
+ "dtype_unsupported": 0,
1276
+ "empty_cache": 0,
1277
+ "head_dim_unsupported": 0,
1278
+ "kernel_unavailable": 0,
1279
+ "offset_invalid": 0,
1280
+ "partitioned_invalid_output": 0,
1281
+ "partitioned_unavailable": 0,
1282
+ "q_len_gt_max": 0,
1283
+ "q_len_invalid": 0,
1284
+ "turboquant_unsupported": 0,
1285
+ "unknown": 0,
1286
+ "unsupported_mask": 0
1287
+ },
1288
+ "unknown": {
1289
+ "batch_not_1": 0,
1290
+ "block_size_mismatch": 0,
1291
+ "blocks_invalid": 0,
1292
+ "dtype_unsupported": 0,
1293
+ "empty_cache": 0,
1294
+ "head_dim_unsupported": 0,
1295
+ "kernel_unavailable": 0,
1296
+ "offset_invalid": 0,
1297
+ "partitioned_invalid_output": 0,
1298
+ "partitioned_unavailable": 0,
1299
+ "q_len_gt_max": 0,
1300
+ "q_len_invalid": 0,
1301
+ "turboquant_unsupported": 0,
1302
+ "unknown": 0,
1303
+ "unsupported_mask": 0
1304
+ }
1305
+ },
1306
+ "paged_attention_large_q_path": {
1307
+ "dense_forbidden": 0,
1308
+ "large_q_split_sdpa_fallback": 0,
1309
+ "partitioned_paged": 0,
1310
+ "tail_paged": 0,
1311
+ "unknown": 0
1312
+ },
1313
+ "postcommit_dense_fallback_calls": 0,
1314
+ "prefill_dense_fallback_calls": 0,
1315
+ "trace_events": false
1316
+ },
1317
+ "timing_s": {
1318
+ "accept": 0.01497975607344415,
1319
+ "draft": 1.4912097119231476,
1320
+ "rollback": 0.0002762930525932461,
1321
+ "target_distribution": 34.18285454783472,
1322
+ "target_hidden": 0.0,
1323
+ "verify": 0.3010120859835297
1324
+ },
1325
+ "tok_s": 27.768988603632668,
1326
+ "token_preview": [
1327
+ 9996,
1328
+ 625,
1329
+ 759,
1330
+ 759,
1331
+ 759,
1332
+ 759,
1333
+ 759,
1334
+ 759,
1335
+ 759,
1336
+ 236761,
1337
+ 5715,
1338
+ 236789,
1339
+ 236751,
1340
+ 506,
1341
+ 3772,
1342
+ 236787
1343
+ ],
1344
+ "verify_calls": 259
1345
+ },
1346
+ "5": {
1347
+ "acceptance": 0.8822197055492639,
1348
+ "accepted_drafts": 779,
1349
+ "active_memory_gib": 17.411210460588336,
1350
+ "block_size": 5,
1351
+ "cache_memory_gib": 82.11562378518283,
1352
+ "decode_s": 24.697525832991232,
1353
+ "draft_sampler": {
1354
+ "temperature": 1.0,
1355
+ "top_k": 64,
1356
+ "top_p": 0.95
1357
+ },
1358
+ "drafted_tokens": 883,
1359
+ "generated_tokens": 1000,
1360
+ "mode": "mtp",
1361
+ "peak_memory_gib": 17.551317367702723,
1362
+ "prefill_s": 0.21940916699531954,
1363
+ "row_distribution_evals": 0,
1364
+ "speedup_vs_ar": 1.8546208019678587,
1365
+ "target_distribution_modes": {
1366
+ "batched_logits": 221
1367
+ },
1368
+ "target_sampler": {
1369
+ "temperature": 1.0,
1370
+ "top_k": 64,
1371
+ "top_p": 0.95
1372
+ },
1373
+ "telemetry": {
1374
+ "ar_dense_fallback_calls": 0,
1375
+ "decode_dense_fallback_calls": 0,
1376
+ "dense_fallback_calls_by_phase": {
1377
+ "ar_decode": 0,
1378
+ "decode_verify": 0,
1379
+ "postcommit": 0,
1380
+ "prefill": 0,
1381
+ "unknown": 0
1382
+ },
1383
+ "events": [],
1384
+ "paged_active_array_calls_by_phase": {
1385
+ "ar_decode": 0,
1386
+ "decode_verify": 0,
1387
+ "postcommit": 0,
1388
+ "prefill": 0,
1389
+ "unknown": 0
1390
+ },
1391
+ "paged_attention_bailouts_by_phase_reason": {
1392
+ "ar_decode": {
1393
+ "batch_not_1": 0,
1394
+ "block_size_mismatch": 0,
1395
+ "blocks_invalid": 0,
1396
+ "dtype_unsupported": 0,
1397
+ "empty_cache": 0,
1398
+ "head_dim_unsupported": 0,
1399
+ "kernel_unavailable": 0,
1400
+ "offset_invalid": 0,
1401
+ "partitioned_invalid_output": 0,
1402
+ "partitioned_unavailable": 0,
1403
+ "q_len_gt_max": 0,
1404
+ "q_len_invalid": 0,
1405
+ "turboquant_unsupported": 0,
1406
+ "unknown": 0,
1407
+ "unsupported_mask": 0
1408
+ },
1409
+ "decode_verify": {
1410
+ "batch_not_1": 0,
1411
+ "block_size_mismatch": 0,
1412
+ "blocks_invalid": 0,
1413
+ "dtype_unsupported": 0,
1414
+ "empty_cache": 0,
1415
+ "head_dim_unsupported": 0,
1416
+ "kernel_unavailable": 0,
1417
+ "offset_invalid": 0,
1418
+ "partitioned_invalid_output": 0,
1419
+ "partitioned_unavailable": 0,
1420
+ "q_len_gt_max": 0,
1421
+ "q_len_invalid": 0,
1422
+ "turboquant_unsupported": 0,
1423
+ "unknown": 0,
1424
+ "unsupported_mask": 0
1425
+ },
1426
+ "postcommit": {
1427
+ "batch_not_1": 0,
1428
+ "block_size_mismatch": 0,
1429
+ "blocks_invalid": 0,
1430
+ "dtype_unsupported": 0,
1431
+ "empty_cache": 0,
1432
+ "head_dim_unsupported": 0,
1433
+ "kernel_unavailable": 0,
1434
+ "offset_invalid": 0,
1435
+ "partitioned_invalid_output": 0,
1436
+ "partitioned_unavailable": 0,
1437
+ "q_len_gt_max": 0,
1438
+ "q_len_invalid": 0,
1439
+ "turboquant_unsupported": 0,
1440
+ "unknown": 0,
1441
+ "unsupported_mask": 0
1442
+ },
1443
+ "prefill": {
1444
+ "batch_not_1": 0,
1445
+ "block_size_mismatch": 0,
1446
+ "blocks_invalid": 0,
1447
+ "dtype_unsupported": 0,
1448
+ "empty_cache": 0,
1449
+ "head_dim_unsupported": 0,
1450
+ "kernel_unavailable": 0,
1451
+ "offset_invalid": 0,
1452
+ "partitioned_invalid_output": 0,
1453
+ "partitioned_unavailable": 0,
1454
+ "q_len_gt_max": 0,
1455
+ "q_len_invalid": 0,
1456
+ "turboquant_unsupported": 0,
1457
+ "unknown": 0,
1458
+ "unsupported_mask": 0
1459
+ },
1460
+ "unknown": {
1461
+ "batch_not_1": 0,
1462
+ "block_size_mismatch": 0,
1463
+ "blocks_invalid": 0,
1464
+ "dtype_unsupported": 0,
1465
+ "empty_cache": 0,
1466
+ "head_dim_unsupported": 0,
1467
+ "kernel_unavailable": 0,
1468
+ "offset_invalid": 0,
1469
+ "partitioned_invalid_output": 0,
1470
+ "partitioned_unavailable": 0,
1471
+ "q_len_gt_max": 0,
1472
+ "q_len_invalid": 0,
1473
+ "turboquant_unsupported": 0,
1474
+ "unknown": 0,
1475
+ "unsupported_mask": 0
1476
+ }
1477
+ },
1478
+ "paged_attention_large_q_path": {
1479
+ "dense_forbidden": 0,
1480
+ "large_q_split_sdpa_fallback": 0,
1481
+ "partitioned_paged": 0,
1482
+ "tail_paged": 0,
1483
+ "unknown": 0
1484
+ },
1485
+ "postcommit_dense_fallback_calls": 0,
1486
+ "prefill_dense_fallback_calls": 0,
1487
+ "trace_events": false
1488
+ },
1489
+ "timing_s": {
1490
+ "accept": 0.013920457829954103,
1491
+ "draft": 1.5722365041146986,
1492
+ "rollback": 0.00047238002298399806,
1493
+ "target_distribution": 22.840641582908574,
1494
+ "target_hidden": 0.0,
1495
+ "verify": 0.2542634050187189
1496
+ },
1497
+ "tok_s": 40.489885778930486,
1498
+ "token_preview": [
1499
+ 9996,
1500
+ 625,
1501
+ 759,
1502
+ 759,
1503
+ 759,
1504
+ 759,
1505
+ 759,
1506
+ 759,
1507
+ 759,
1508
+ 236761,
1509
+ 759,
1510
+ 759,
1511
+ 759,
1512
+ 759,
1513
+ 759,
1514
+ 759
1515
+ ],
1516
+ "verify_calls": 221
1517
+ },
1518
+ "6": {
1519
+ "acceptance": 0.9810874704491725,
1520
+ "accepted_drafts": 830,
1521
+ "active_memory_gib": 17.41092054359615,
1522
+ "block_size": 6,
1523
+ "cache_memory_gib": 67.33365368191153,
1524
+ "decode_s": 22.95747512500384,
1525
+ "draft_sampler": {
1526
+ "temperature": 1.0,
1527
+ "top_k": 64,
1528
+ "top_p": 0.95
1529
+ },
1530
+ "drafted_tokens": 846,
1531
+ "generated_tokens": 1000,
1532
+ "mode": "mtp",
1533
+ "peak_memory_gib": 17.566345684230328,
1534
+ "prefill_s": 0.2074422090081498,
1535
+ "row_distribution_evals": 0,
1536
+ "speedup_vs_ar": 1.9951908873949589,
1537
+ "target_distribution_modes": {
1538
+ "batched_logits": 170
1539
+ },
1540
+ "target_sampler": {
1541
+ "temperature": 1.0,
1542
+ "top_k": 64,
1543
+ "top_p": 0.95
1544
+ },
1545
+ "telemetry": {
1546
+ "ar_dense_fallback_calls": 0,
1547
+ "decode_dense_fallback_calls": 0,
1548
+ "dense_fallback_calls_by_phase": {
1549
+ "ar_decode": 0,
1550
+ "decode_verify": 0,
1551
+ "postcommit": 0,
1552
+ "prefill": 0,
1553
+ "unknown": 0
1554
+ },
1555
+ "events": [],
1556
+ "paged_active_array_calls_by_phase": {
1557
+ "ar_decode": 0,
1558
+ "decode_verify": 0,
1559
+ "postcommit": 0,
1560
+ "prefill": 0,
1561
+ "unknown": 0
1562
+ },
1563
+ "paged_attention_bailouts_by_phase_reason": {
1564
+ "ar_decode": {
1565
+ "batch_not_1": 0,
1566
+ "block_size_mismatch": 0,
1567
+ "blocks_invalid": 0,
1568
+ "dtype_unsupported": 0,
1569
+ "empty_cache": 0,
1570
+ "head_dim_unsupported": 0,
1571
+ "kernel_unavailable": 0,
1572
+ "offset_invalid": 0,
1573
+ "partitioned_invalid_output": 0,
1574
+ "partitioned_unavailable": 0,
1575
+ "q_len_gt_max": 0,
1576
+ "q_len_invalid": 0,
1577
+ "turboquant_unsupported": 0,
1578
+ "unknown": 0,
1579
+ "unsupported_mask": 0
1580
+ },
1581
+ "decode_verify": {
1582
+ "batch_not_1": 0,
1583
+ "block_size_mismatch": 0,
1584
+ "blocks_invalid": 0,
1585
+ "dtype_unsupported": 0,
1586
+ "empty_cache": 0,
1587
+ "head_dim_unsupported": 0,
1588
+ "kernel_unavailable": 0,
1589
+ "offset_invalid": 0,
1590
+ "partitioned_invalid_output": 0,
1591
+ "partitioned_unavailable": 0,
1592
+ "q_len_gt_max": 0,
1593
+ "q_len_invalid": 0,
1594
+ "turboquant_unsupported": 0,
1595
+ "unknown": 0,
1596
+ "unsupported_mask": 0
1597
+ },
1598
+ "postcommit": {
1599
+ "batch_not_1": 0,
1600
+ "block_size_mismatch": 0,
1601
+ "blocks_invalid": 0,
1602
+ "dtype_unsupported": 0,
1603
+ "empty_cache": 0,
1604
+ "head_dim_unsupported": 0,
1605
+ "kernel_unavailable": 0,
1606
+ "offset_invalid": 0,
1607
+ "partitioned_invalid_output": 0,
1608
+ "partitioned_unavailable": 0,
1609
+ "q_len_gt_max": 0,
1610
+ "q_len_invalid": 0,
1611
+ "turboquant_unsupported": 0,
1612
+ "unknown": 0,
1613
+ "unsupported_mask": 0
1614
+ },
1615
+ "prefill": {
1616
+ "batch_not_1": 0,
1617
+ "block_size_mismatch": 0,
1618
+ "blocks_invalid": 0,
1619
+ "dtype_unsupported": 0,
1620
+ "empty_cache": 0,
1621
+ "head_dim_unsupported": 0,
1622
+ "kernel_unavailable": 0,
1623
+ "offset_invalid": 0,
1624
+ "partitioned_invalid_output": 0,
1625
+ "partitioned_unavailable": 0,
1626
+ "q_len_gt_max": 0,
1627
+ "q_len_invalid": 0,
1628
+ "turboquant_unsupported": 0,
1629
+ "unknown": 0,
1630
+ "unsupported_mask": 0
1631
+ },
1632
+ "unknown": {
1633
+ "batch_not_1": 0,
1634
+ "block_size_mismatch": 0,
1635
+ "blocks_invalid": 0,
1636
+ "dtype_unsupported": 0,
1637
+ "empty_cache": 0,
1638
+ "head_dim_unsupported": 0,
1639
+ "kernel_unavailable": 0,
1640
+ "offset_invalid": 0,
1641
+ "partitioned_invalid_output": 0,
1642
+ "partitioned_unavailable": 0,
1643
+ "q_len_gt_max": 0,
1644
+ "q_len_invalid": 0,
1645
+ "turboquant_unsupported": 0,
1646
+ "unknown": 0,
1647
+ "unsupported_mask": 0
1648
+ }
1649
+ },
1650
+ "paged_attention_large_q_path": {
1651
+ "dense_forbidden": 0,
1652
+ "large_q_split_sdpa_fallback": 0,
1653
+ "partitioned_paged": 0,
1654
+ "tail_paged": 0,
1655
+ "unknown": 0
1656
+ },
1657
+ "postcommit_dense_fallback_calls": 0,
1658
+ "prefill_dense_fallback_calls": 0,
1659
+ "trace_events": false
1660
+ },
1661
+ "timing_s": {
1662
+ "accept": 0.011550506053026766,
1663
+ "draft": 1.5224984570086235,
1664
+ "rollback": 0.0001419630425516516,
1665
+ "target_distribution": 21.21943625298445,
1666
+ "target_hidden": 0.0,
1667
+ "verify": 0.19109521005884744
1668
+ },
1669
+ "tok_s": 43.558797060869416,
1670
+ "token_preview": [
1671
+ 9996,
1672
+ 625,
1673
+ 759,
1674
+ 759,
1675
+ 759,
1676
+ 759,
1677
+ 759,
1678
+ 759,
1679
+ 759,
1680
+ 759,
1681
+ 759,
1682
+ 759,
1683
+ 759,
1684
+ 759,
1685
+ 759,
1686
+ 759
1687
+ ],
1688
+ "verify_calls": 170
1689
+ }
1690
+ },
1691
+ "passed": false,
1692
+ "prompt_id": "flappy_html5_canvas_game",
1693
+ "prompt_tokens": 119,
1694
+ "sampler": {
1695
+ "seed": 0,
1696
+ "temperature": 1.0,
1697
+ "top_k": 64,
1698
+ "top_p": 0.95
1699
+ }
1700
+ },
1701
+ "status": "failed"
1702
+ }
mtplx_pair.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format_version": 1,
3
+ "name": "Gemma4 MTPLX Optimized Speed",
4
+ "variant": "speed",
5
+ "layout": {
6
+ "target": "target",
7
+ "assistant": "assistant"
8
+ },
9
+ "source": {
10
+ "target_repo": "google/gemma-4-31B-it",
11
+ "target_revision": "145dc2508c480a64b47242f160d286cff94a2343",
12
+ "assistant_repo": "google/gemma-4-31B-it-assistant",
13
+ "assistant_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204"
14
+ },
15
+ "target": {
16
+ "role": "verifier",
17
+ "model_type": "gemma4",
18
+ "quantization": {
19
+ "bits": 4,
20
+ "group_size": 64,
21
+ "mode": "affine"
22
+ }
23
+ },
24
+ "assistant": {
25
+ "role": "drafter",
26
+ "model_type": "gemma4_assistant",
27
+ "quantization": {
28
+ "bits": 6,
29
+ "group_size": 64,
30
+ "mode": "affine"
31
+ }
32
+ },
33
+ "benchmark": {
34
+ "prompt_suite": "flappy",
35
+ "max_tokens": 1000,
36
+ "temperature": 1.0,
37
+ "top_p": 0.95,
38
+ "top_k": 64,
39
+ "seed": 0,
40
+ "best_block_size": 6,
41
+ "acceptance": {
42
+ "accepted": 830,
43
+ "drafted": 846,
44
+ "ratio": 0.9810874704491725
45
+ },
46
+ "observed_mtp_tok_s": [
47
+ 43.558797060869416,
48
+ 44.457398919489286,
49
+ 44.071735166669846
50
+ ]
51
+ }
52
+ }
target/README.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - mlx
5
+ pipeline_tag: text-generation
6
+ library_name: mlx
7
+ ---
target/config.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": null,
6
+ "audio_token_id": 258881,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "dtype": "bfloat16",
10
+ "eoa_token_id": 258883,
11
+ "eoa_token_index": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106,
16
+ 50
17
+ ],
18
+ "image_token_id": 258880,
19
+ "initializer_range": 0.02,
20
+ "model_type": "gemma4",
21
+ "quantization": {
22
+ "group_size": 64,
23
+ "bits": 4,
24
+ "mode": "affine"
25
+ },
26
+ "quantization_config": {
27
+ "group_size": 64,
28
+ "bits": 4,
29
+ "mode": "affine"
30
+ },
31
+ "text_config": {
32
+ "attention_bias": false,
33
+ "attention_dropout": 0.0,
34
+ "attention_k_eq_v": true,
35
+ "bos_token_id": 2,
36
+ "dtype": "bfloat16",
37
+ "enable_moe_block": false,
38
+ "eos_token_id": 1,
39
+ "expert_intermediate_size": null,
40
+ "final_logit_softcapping": 30.0,
41
+ "global_head_dim": 512,
42
+ "head_dim": 256,
43
+ "hidden_activation": "gelu_pytorch_tanh",
44
+ "hidden_size": 5376,
45
+ "hidden_size_per_layer_input": 0,
46
+ "initializer_range": 0.02,
47
+ "intermediate_size": 21504,
48
+ "layer_types": [
49
+ "sliding_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "full_attention",
55
+ "sliding_attention",
56
+ "sliding_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
+ "sliding_attention",
60
+ "full_attention",
61
+ "sliding_attention",
62
+ "sliding_attention",
63
+ "sliding_attention",
64
+ "sliding_attention",
65
+ "sliding_attention",
66
+ "full_attention",
67
+ "sliding_attention",
68
+ "sliding_attention",
69
+ "sliding_attention",
70
+ "sliding_attention",
71
+ "sliding_attention",
72
+ "full_attention",
73
+ "sliding_attention",
74
+ "sliding_attention",
75
+ "sliding_attention",
76
+ "sliding_attention",
77
+ "sliding_attention",
78
+ "full_attention",
79
+ "sliding_attention",
80
+ "sliding_attention",
81
+ "sliding_attention",
82
+ "sliding_attention",
83
+ "sliding_attention",
84
+ "full_attention",
85
+ "sliding_attention",
86
+ "sliding_attention",
87
+ "sliding_attention",
88
+ "sliding_attention",
89
+ "sliding_attention",
90
+ "full_attention",
91
+ "sliding_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "sliding_attention",
95
+ "sliding_attention",
96
+ "full_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "sliding_attention",
101
+ "sliding_attention",
102
+ "full_attention",
103
+ "sliding_attention",
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "sliding_attention",
107
+ "sliding_attention",
108
+ "full_attention"
109
+ ],
110
+ "max_position_embeddings": 262144,
111
+ "model_type": "gemma4_text",
112
+ "num_attention_heads": 32,
113
+ "num_experts": null,
114
+ "num_global_key_value_heads": 4,
115
+ "num_hidden_layers": 60,
116
+ "num_key_value_heads": 16,
117
+ "num_kv_shared_layers": 0,
118
+ "pad_token_id": 0,
119
+ "rms_norm_eps": 1e-06,
120
+ "rope_parameters": {
121
+ "full_attention": {
122
+ "partial_rotary_factor": 0.25,
123
+ "rope_theta": 1000000.0,
124
+ "rope_type": "proportional"
125
+ },
126
+ "sliding_attention": {
127
+ "rope_theta": 10000.0,
128
+ "rope_type": "default"
129
+ }
130
+ },
131
+ "sliding_window": 1024,
132
+ "tie_word_embeddings": true,
133
+ "top_k_experts": null,
134
+ "use_bidirectional_attention": "vision",
135
+ "use_cache": true,
136
+ "use_double_wide_mlp": false,
137
+ "vocab_size": 262144,
138
+ "vocab_size_per_layer_input": 262144
139
+ },
140
+ "tie_word_embeddings": true,
141
+ "transformers_version": "5.5.0.dev0",
142
+ "video_token_id": 258884,
143
+ "vision_soft_tokens_per_image": 280
144
+ }
target/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106,
7
+ 50
8
+ ],
9
+ "pad_token_id": 0,
10
+ "temperature": 1.0,
11
+ "top_k": 64,
12
+ "top_p": 0.95,
13
+ "transformers_version": "5.5.0.dev0"
14
+ }
target/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988e2b1fd41d93b62b8c432f52f632c43b8cb7f86df4b957db36a3cc0dab40ca
3
+ size 5366617512
target/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a496a96fbd39cd11a9871f91d026013d91069dcac15f89ca861b93976f3857cf
3
+ size 5361642573
target/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa555ff0e1bc458c5b08aeef1f4499dce63e2bfb5d3a2aac716e47c0a5672c1
3
+ size 5367276094
target/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bde02793a3d2ed3f29c3d1629ea9fbeb9c25720bc42db0eca9720abe094235a
3
+ size 1173848301
target/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
target/mtplx_artifact.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "artifact_schema": "mtplx.gemma4.google.v1",
3
+ "config.json_sha256": "b2a7c25dc48e584d3debc824534a62cd73a801108b1cc7a15d32676195d7f03a",
4
+ "created_at_unix": 1778037794,
5
+ "generation_config.json_sha256": "d4226bbe3117d2d253ba4609720ba82c6c4ce4627a9a6ae05387c78983ac03de",
6
+ "quantization": {
7
+ "bits": 4,
8
+ "format": "mlx-flat4-g64",
9
+ "group_size": 64,
10
+ "mode": "affine"
11
+ },
12
+ "role": "target",
13
+ "source_repo": "google/gemma-4-31B-it",
14
+ "source_revision": "145dc2508c480a64b47242f160d286cff94a2343",
15
+ "source_snapshot": "/Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48",
16
+ "tokenizer.json_sha256": "cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f",
17
+ "tokenizer_config.json_sha256": "a284d1243b62be31faa9c13e1c28cece940c4abaa7bd9ad87b94f61b40687200"
18
+ }
target/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
target/tokenizer_config.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "is_local": true,
21
+ "local_files_only": false,
22
+ "mask_token": "<mask>",
23
+ "model_max_length": 1000000000000000019884624838656,
24
+ "model_specific_special_tokens": {
25
+ "audio_token": "<|audio|>",
26
+ "boa_token": "<|audio>",
27
+ "boi_token": "<|image>",
28
+ "eoa_token": "<audio|>",
29
+ "eoc_token": "<channel|>",
30
+ "eoi_token": "<image|>",
31
+ "eot_token": "<turn|>",
32
+ "escape_token": "<|\"|>",
33
+ "etc_token": "<tool_call|>",
34
+ "etd_token": "<tool|>",
35
+ "etr_token": "<tool_response|>",
36
+ "image_token": "<|image|>",
37
+ "soc_token": "<|channel>",
38
+ "sot_token": "<|turn>",
39
+ "stc_token": "<|tool_call>",
40
+ "std_token": "<|tool>",
41
+ "str_token": "<|tool_response>",
42
+ "think_token": "<|think|>"
43
+ },
44
+ "pad_token": "<pad>",
45
+ "padding_side": "left",
46
+ "processor_class": "Gemma4Processor",
47
+ "response_schema": {
48
+ "properties": {
49
+ "content": {
50
+ "type": "string"
51
+ },
52
+ "role": {
53
+ "const": "assistant"
54
+ },
55
+ "thinking": {
56
+ "type": "string"
57
+ },
58
+ "tool_calls": {
59
+ "items": {
60
+ "properties": {
61
+ "function": {
62
+ "properties": {
63
+ "arguments": {
64
+ "additionalProperties": {},
65
+ "type": "object",
66
+ "x-parser": "gemma4-tool-call"
67
+ },
68
+ "name": {
69
+ "type": "string"
70
+ }
71
+ },
72
+ "type": "object",
73
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
74
+ },
75
+ "type": {
76
+ "const": "function"
77
+ }
78
+ },
79
+ "type": "object"
80
+ },
81
+ "type": "array",
82
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
83
+ }
84
+ },
85
+ "type": "object",
86
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
87
+ },
88
+ "soc_token": "<|channel>",
89
+ "sot_token": "<|turn>",
90
+ "stc_token": "<|tool_call>",
91
+ "std_token": "<|tool>",
92
+ "str_token": "<|tool_response>",
93
+ "think_token": "<|think|>",
94
+ "tokenizer_class": "GemmaTokenizer",
95
+ "unk_token": "<unk>"
96
+ }