Upload 4 files
#1
by Shikhar1 - opened
- config.json +50 -0
- requirements.txt +11 -0
- speckv_mlp16.pkl +3 -0
- speckv_mlp16_weights.npz +3 -0
config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "SpecKV-MLP16",
|
| 3 |
+
"description": "Lightweight acceptance rate predictor for adaptive speculative decoding gamma selection.",
|
| 4 |
+
"architecture": {
|
| 5 |
+
"type": "MLPRegressor",
|
| 6 |
+
"hidden_layers": [16],
|
| 7 |
+
"activation": "relu",
|
| 8 |
+
"output": "regression (acceptance rate, 0-1)"
|
| 9 |
+
},
|
| 10 |
+
"input_features": [
|
| 11 |
+
{"name": "mean_draft_entropy", "description": "Mean entropy of draft token distributions (bits)", "dtype": "float32"},
|
| 12 |
+
{"name": "mean_draft_confidence", "description": "Mean top-1 probability of draft tokens", "dtype": "float32"},
|
| 13 |
+
{"name": "max_draft_entropy", "description": "Max entropy across draft tokens in the step", "dtype": "float32"},
|
| 14 |
+
{"name": "min_draft_confidence", "description": "Min top-1 probability across draft tokens in the step", "dtype": "float32"},
|
| 15 |
+
{"name": "comp_enc", "description": "Compression level encoding: 0=fp16, 1=int8, 2=nf4", "dtype": "int"},
|
| 16 |
+
{"name": "gamma", "description": "Candidate speculation length to evaluate", "dtype": "int"}
|
| 17 |
+
],
|
| 18 |
+
"output": {
|
| 19 |
+
"name": "predicted_acceptance_rate",
|
| 20 |
+
"range": [0.0, 1.0],
|
| 21 |
+
"description": "Predicted fraction of draft tokens the target model will accept"
|
| 22 |
+
},
|
| 23 |
+
"usage": {
|
| 24 |
+
"gamma_selection": "For each candidate gamma in {2, 4, 6, 8}, predict acceptance rate. Select gamma that maximizes: predicted_ar * gamma + 1",
|
| 25 |
+
"overhead": "0.34ms per decision (4 forward passes through the MLP)"
|
| 26 |
+
},
|
| 27 |
+
"training": {
|
| 28 |
+
"data": "5112 step-level records from Phase 2 profiling",
|
| 29 |
+
"model_pair": "Llama-3.2-1B-Instruct (draft) / Llama-3.2-3B-Instruct (target)",
|
| 30 |
+
"compression_levels": ["fp16", "int8", "nf4"],
|
| 31 |
+
"gamma_values": [2, 4, 6, 8],
|
| 32 |
+
"tasks": ["code", "math", "chat", "summarization"],
|
| 33 |
+
"framework": "scikit-learn 1.x",
|
| 34 |
+
"random_seed": 42
|
| 35 |
+
},
|
| 36 |
+
"performance": {
|
| 37 |
+
"test_mse": 0.090,
|
| 38 |
+
"test_correlation": 0.685,
|
| 39 |
+
"decision_overhead_ms": 0.336,
|
| 40 |
+
"improvement_over_fixed4": "56.0%",
|
| 41 |
+
"statistical_significance": "p < 0.001 (paired bootstrap, 10K resamples)"
|
| 42 |
+
},
|
| 43 |
+
"files": {
|
| 44 |
+
"speckv_mlp16.pkl": "Full sklearn model (pickle format)",
|
| 45 |
+
"speckv_mlp16_weights.npz": "Raw numpy weights for framework-agnostic loading"
|
| 46 |
+
},
|
| 47 |
+
"license": "MIT",
|
| 48 |
+
"paper": "SpecKV: Adaptive Speculative Decoding with Compression-Aware Gamma Selection",
|
| 49 |
+
"repository": "https://github.com/Amorfati123/SpecKV"
|
| 50 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy>=1.21.0
|
| 2 |
+
scikit-learn>=1.0.0
|
| 3 |
+
pandas>=1.3.0
|
| 4 |
+
matplotlib>=3.5.0
|
| 5 |
+
seaborn>=0.11.0
|
| 6 |
+
torch>=2.0.0
|
| 7 |
+
transformers>=4.35.0
|
| 8 |
+
bitsandbytes>=0.41.0
|
| 9 |
+
vllm>=0.7.0
|
| 10 |
+
huggingface-hub>=0.19.0
|
| 11 |
+
tqdm>=4.60.0
|
speckv_mlp16.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5af096ef29299d70d98dead0a0bf6a611ef4e8cc359ec1cd31129607265ff2f9
|
| 3 |
+
size 12187
|
speckv_mlp16_weights.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671fcf2ab0d2591b5d22b28023d89d956002886f764d934484e86d19a552bad8
|
| 3 |
+
size 1998
|