Shikhar1 commited on
Commit
2d86c06
·
1 Parent(s): 70e8fd6

Upload 4 files (#1)

Browse files

- Upload 4 files (6a0834fb2dadb0893ee142572ac1211300dac701)

Files changed (4) hide show
  1. config.json +50 -0
  2. requirements.txt +11 -0
  3. speckv_mlp16.pkl +3 -0
  4. speckv_mlp16_weights.npz +3 -0
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "SpecKV-MLP16",
3
+ "description": "Lightweight acceptance rate predictor for adaptive speculative decoding gamma selection.",
4
+ "architecture": {
5
+ "type": "MLPRegressor",
6
+ "hidden_layers": [16],
7
+ "activation": "relu",
8
+ "output": "regression (acceptance rate, 0-1)"
9
+ },
10
+ "input_features": [
11
+ {"name": "mean_draft_entropy", "description": "Mean entropy of draft token distributions (bits)", "dtype": "float32"},
12
+ {"name": "mean_draft_confidence", "description": "Mean top-1 probability of draft tokens", "dtype": "float32"},
13
+ {"name": "max_draft_entropy", "description": "Max entropy across draft tokens in the step", "dtype": "float32"},
14
+ {"name": "min_draft_confidence", "description": "Min top-1 probability across draft tokens in the step", "dtype": "float32"},
15
+ {"name": "comp_enc", "description": "Compression level encoding: 0=fp16, 1=int8, 2=nf4", "dtype": "int"},
16
+ {"name": "gamma", "description": "Candidate speculation length to evaluate", "dtype": "int"}
17
+ ],
18
+ "output": {
19
+ "name": "predicted_acceptance_rate",
20
+ "range": [0.0, 1.0],
21
+ "description": "Predicted fraction of draft tokens the target model will accept"
22
+ },
23
+ "usage": {
24
+ "gamma_selection": "For each candidate gamma in {2, 4, 6, 8}, predict acceptance rate. Select gamma that maximizes: predicted_ar * gamma + 1",
25
+ "overhead": "0.34ms per decision (4 forward passes through the MLP)"
26
+ },
27
+ "training": {
28
+ "data": "5112 step-level records from Phase 2 profiling",
29
+ "model_pair": "Llama-3.2-1B-Instruct (draft) / Llama-3.2-3B-Instruct (target)",
30
+ "compression_levels": ["fp16", "int8", "nf4"],
31
+ "gamma_values": [2, 4, 6, 8],
32
+ "tasks": ["code", "math", "chat", "summarization"],
33
+ "framework": "scikit-learn 1.x",
34
+ "random_seed": 42
35
+ },
36
+ "performance": {
37
+ "test_mse": 0.090,
38
+ "test_correlation": 0.685,
39
+ "decision_overhead_ms": 0.336,
40
+ "improvement_over_fixed4": "56.0%",
41
+ "statistical_significance": "p < 0.001 (paired bootstrap, 10K resamples)"
42
+ },
43
+ "files": {
44
+ "speckv_mlp16.pkl": "Full sklearn model (pickle format)",
45
+ "speckv_mlp16_weights.npz": "Raw numpy weights for framework-agnostic loading"
46
+ },
47
+ "license": "MIT",
48
+ "paper": "SpecKV: Adaptive Speculative Decoding with Compression-Aware Gamma Selection",
49
+ "repository": "https://github.com/Amorfati123/SpecKV"
50
+ }
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy>=1.21.0
2
+ scikit-learn>=1.0.0
3
+ pandas>=1.3.0
4
+ matplotlib>=3.5.0
5
+ seaborn>=0.11.0
6
+ torch>=2.0.0
7
+ transformers>=4.35.0
8
+ bitsandbytes>=0.41.0
9
+ vllm>=0.7.0
10
+ huggingface-hub>=0.19.0
11
+ tqdm>=4.60.0
speckv_mlp16.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af096ef29299d70d98dead0a0bf6a611ef4e8cc359ec1cd31129607265ff2f9
3
+ size 12187
speckv_mlp16_weights.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671fcf2ab0d2591b5d22b28023d89d956002886f764d934484e86d19a552bad8
3
+ size 1998