| { |
| "model_name": "SpecKV-MLP16", |
| "description": "Lightweight acceptance rate predictor for adaptive speculative decoding gamma selection.", |
| "architecture": { |
| "type": "MLPRegressor", |
| "hidden_layers": [16], |
| "activation": "relu", |
| "output": "regression (acceptance rate, 0-1)" |
| }, |
| "input_features": [ |
| {"name": "mean_draft_entropy", "description": "Mean entropy of draft token distributions (bits)", "dtype": "float32"}, |
| {"name": "mean_draft_confidence", "description": "Mean top-1 probability of draft tokens", "dtype": "float32"}, |
| {"name": "max_draft_entropy", "description": "Max entropy across draft tokens in the step", "dtype": "float32"}, |
| {"name": "min_draft_confidence", "description": "Min top-1 probability across draft tokens in the step", "dtype": "float32"}, |
| {"name": "comp_enc", "description": "Compression level encoding: 0=fp16, 1=int8, 2=nf4", "dtype": "int"}, |
| {"name": "gamma", "description": "Candidate speculation length to evaluate", "dtype": "int"} |
| ], |
| "output": { |
| "name": "predicted_acceptance_rate", |
| "range": [0.0, 1.0], |
| "description": "Predicted fraction of draft tokens the target model will accept" |
| }, |
| "usage": { |
| "gamma_selection": "For each candidate gamma in {2, 4, 6, 8}, predict acceptance rate. Select gamma that maximizes: predicted_ar * gamma + 1", |
| "overhead": "0.34ms per decision (4 forward passes through the MLP)" |
| }, |
| "training": { |
| "data": "5112 step-level records from Phase 2 profiling", |
| "model_pair": "Llama-3.2-1B-Instruct (draft) / Llama-3.2-3B-Instruct (target)", |
| "compression_levels": ["fp16", "int8", "nf4"], |
| "gamma_values": [2, 4, 6, 8], |
| "tasks": ["code", "math", "chat", "summarization"], |
| "framework": "scikit-learn 1.x", |
| "random_seed": 42 |
| }, |
| "performance": { |
| "test_mse": 0.090, |
| "test_correlation": 0.685, |
| "decision_overhead_ms": 0.336, |
| "improvement_over_fixed4": "56.0%", |
| "statistical_significance": "p < 0.001 (paired bootstrap, 10K resamples)" |
| }, |
| "files": { |
| "speckv_mlp16.pkl": "Full sklearn model (pickle format)", |
| "speckv_mlp16_weights.npz": "Raw numpy weights for framework-agnostic loading" |
| }, |
| "license": "MIT", |
| "paper": "SpecKV: Adaptive Speculative Decoding with Compression-Aware Gamma Selection", |
| "repository": "https://github.com/Amorfati123/SpecKV" |
| } |
|
|