{ "arch_id": "qwen3-next-mtp", "artifact_role": "maximum-speed-flat4-candidate", "base_trunk": "mlx-community/Qwen3.6-27B-4bit", "exactness_baseline": { "attention_impl": "mlx_vector_paged", "context": 64, "gate": "phase0h-paged-verifier-exactness", "max_abs_diff": 0.0, "mode": "decode-from-stock-prefix", "sample_agreement": 1.0, "status": "passed", "topk_overlap_ratio": 1.0, "total_variation": 0.0, "verify_tokens": 4 }, "mtp_depth_max": 3, "mtp_sidecar": "Qwen3.6-27B-MTPLX-CyanKiwi-Packed-BF16-INT4-v3", "mtplx_version": "0.1.0-preview", "precision_policy": { "intended_default_for": [ "m1", "m2" ], "note": "This is a sibling precision variant; it is not a universal speed claim.", "routing": "mtplx start auto-selects this artifact on M1/M2 Apple Silicon", "source_repo": "Youssofal/Qwen3.6-27B-MTPLX-Optimized-Speed", "variant": "fp16" }, "precision_variant": "fp16", "recommended_draft_lm_head": { "bits": 3, "group_size": 64, "mode": "affine" }, "recommended_draft_sampler": { "temperature": 0.7, "top_k": 20, "top_p": 0.95 }, "recommended_profile": "performance-cold", "sampler": { "temperature": 0.6, "top_k": 20, "top_p": 0.95 }, "speed_evidence": { "acceptance_by_depth": [ 1.0, 0.9795918367346939, 0.9387755102040817 ], "accepted_by_depth": [ 49, 48, 46 ], "artifacts": [ "/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_cleanapps_20260503-230229.json", "/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_cleanapps_repeat_20260503-230458.json" ], "correction_tokens": 3, "depth": 3, "draft_lm_head": "3-bit affine group64 draft-only head", "draft_sampler": { "temperature": 0.7, "top_k": 20, "top_p": 0.95 }, "drafted_by_depth": [ 49, 49, 49 ], "enable_thinking": false, "greedy_diagnostic": { "accepted_by_depth": [ 49, 49, 43 ], "artifact": "/Users/youssof/.mtplx/fanmax_flat4_full_greedy_cleanapps_20260503-230229.json", "correction_tokens": 0, "draft_sampler": { "temperature": 0.0, "top_k": 0, "top_p": 1.0 }, "drafted_by_depth": [ 51, 51, 51 ], "target_sampler": { "temperature": 0.0, "top_k": 0, "top_p": 1.0 }, "tok_s": 60.108346853627076, "verify_calls": 51 }, "older_3bit_artifacts": [ "/Users/youssof/.mtplx/fanmax_flat4_cyankiwi_draft_head_3bit_isolated_20260503.json", "/Users/youssof/.mtplx/fanmax_flat4_cyankiwi_draft_head_3bit_repeat_20260503.json" ], "older_3bit_tok_s": [ 60.03822277200243, 60.06063543439426 ], "previous_best_artifacts": [ "/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_optimized_env_20260503.json", "/Users/youssof/.mtplx/fanmax_flat4_draft_temp070_repeat_optimized_env_20260503.json" ], "previous_best_tok_s": [ 61.52744855840259, 60.89966247416089 ], "production_cli_contract_checks": [ { "artifact": "/Users/youssof/.mtplx/fanmax_mtplx_flat4_cyankiwi_cli_contract_perfcold_disablethinking_20260503.json", "tok_s": 59.51602930081416 }, { "artifact": "/Users/youssof/.mtplx/fanmax_mtplx_flat4_cyankiwi_cli_contract_perfcold_disablethinking_repeat_20260503.json", "tok_s": 59.97165246907193 } ], "profile": "performance-cold", "timestamp": "2026-05-03T23:07:00+0100", "tok_s": [ 63.05651084048216, 62.885564723941826 ], "verify_calls": 49 }, "verified_on": { "hardware": "Apple M5 Max, 128 GB unified memory", "machine_arch": "arm64", "model": "Qwen3.6-27B-MTPLX-Flat4-CyanKiwiMTP", "timestamp": "2026-05-03T17:49:00+0100" } }