Qwen3.6-27B-MTPLX-Optimized-Speed / mtplx_runtime.json
finbase0530's picture
Duplicate from Youssofal/Qwen3.6-27B-MTPLX-Optimized-Speed
0aa3682
{
"arch_id": "qwen3-next-mtp",
"artifact_role": "maximum-speed-flat4-candidate",
"base_trunk": "mlx-community/Qwen3.6-27B-4bit",
"exactness_baseline": {
"attention_impl": "mlx_vector_paged",
"context": 64,
"gate": "phase0h-paged-verifier-exactness",
"max_abs_diff": 0.0,
"mode": "decode-from-stock-prefix",
"sample_agreement": 1.0,
"status": "passed",
"topk_overlap_ratio": 1.0,
"total_variation": 0.0,
"verify_tokens": 4
},
"mtp_depth_max": 3,
"mtp_sidecar": "Qwen3.6-27B-MTPLX-CyanKiwi-Packed-BF16-INT4-v3",
"mtplx_version": "0.1.0-preview",
"recommended_draft_lm_head": {
"bits": 3,
"group_size": 64,
"mode": "affine"
},
"recommended_draft_sampler": {
"temperature": 0.7,
"top_k": 20,
"top_p": 0.95
},
"recommended_profile": "performance-cold",
"sampler": {
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95
},
"speed_evidence": {
"acceptance_by_depth": [
1.0,
0.9795918367346939,
0.9387755102040817
],
"accepted_by_depth": [
49,
48,
46
],
"artifacts": [
"/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_cleanapps_20260503-230229.json",
"/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_cleanapps_repeat_20260503-230458.json"
],
"correction_tokens": 3,
"depth": 3,
"draft_lm_head": "3-bit affine group64 draft-only head",
"draft_sampler": {
"temperature": 0.7,
"top_k": 20,
"top_p": 0.95
},
"drafted_by_depth": [
49,
49,
49
],
"enable_thinking": false,
"greedy_diagnostic": {
"accepted_by_depth": [
49,
49,
43
],
"artifact": "/Users/youssof/.mtplx/fanmax_flat4_full_greedy_cleanapps_20260503-230229.json",
"correction_tokens": 0,
"draft_sampler": {
"temperature": 0.0,
"top_k": 0,
"top_p": 1.0
},
"drafted_by_depth": [
51,
51,
51
],
"target_sampler": {
"temperature": 0.0,
"top_k": 0,
"top_p": 1.0
},
"tok_s": 60.108346853627076,
"verify_calls": 51
},
"older_3bit_artifacts": [
"/Users/youssof/.mtplx/fanmax_flat4_cyankiwi_draft_head_3bit_isolated_20260503.json",
"/Users/youssof/.mtplx/fanmax_flat4_cyankiwi_draft_head_3bit_repeat_20260503.json"
],
"older_3bit_tok_s": [
60.03822277200243,
60.06063543439426
],
"previous_best_artifacts": [
"/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_optimized_env_20260503.json",
"/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_repeat_optimized_env_20260503.json"
],
"previous_best_tok_s": [
61.52744855840259,
60.89966247416089
],
"production_cli_contract_checks": [
{
"artifact": "/Users/youssof/.mtplx/fanmax_mtplx_flat4_cyankiwi_cli_contract_perfcold_disablethinking_20260503.json",
"tok_s": 59.51602930081416
},
{
"artifact": "/Users/youssof/.mtplx/fanmax_mtplx_flat4_cyankiwi_cli_contract_perfcold_disablethinking_repeat_20260503.json",
"tok_s": 59.97165246907193
}
],
"profile": "performance-cold",
"timestamp": "2026-05-03T23:07:00+0100",
"tok_s": [
63.05651084048216,
62.885564723941826
],
"verify_calls": 49
},
"verified_on": {
"hardware": "Apple M5 Max, 128 GB unified memory",
"machine_arch": "arm64",
"model": "Qwen3.6-27B-MTPLX-Flat4-CyanKiwiMTP",
"timestamp": "2026-05-03T17:49:00+0100"
}
}