Update README
Browse files- README.md +23 -1
- config.json +1 -0
README.md
CHANGED
|
@@ -17,6 +17,28 @@ tags:
|
|
| 17 |
- Gemma-4-31B-IT
|
| 18 |
- lighthouse
|
| 19 |
pipeline_tag: text-generation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
---
|
| 22 |
|
|
@@ -104,7 +126,7 @@ vllm serve LilaRest/gemma-4-31B-it-NVFP4-turbo \
|
|
| 104 |
|
| 105 |
- `--quantization modelopt` — required, activates NVIDIA's optimized CUTLASS kernels
|
| 106 |
- `--kv-cache-dtype fp8` — halves KV cache memory on Blackwell
|
| 107 |
-
- `--max-model-len 16384` — maximum context length per request.
|
| 108 |
|
| 109 |
## Compatibility
|
| 110 |
|
|
|
|
| 17 |
- Gemma-4-31B-IT
|
| 18 |
- lighthouse
|
| 19 |
pipeline_tag: text-generation
|
| 20 |
+
model-index:
|
| 21 |
+
- name: gemma-4-31B-it-NVFP4-turbo
|
| 22 |
+
results:
|
| 23 |
+
- task:
|
| 24 |
+
type: text-generation
|
| 25 |
+
dataset:
|
| 26 |
+
name: GPQA Diamond
|
| 27 |
+
type: Idavidrein/gpqa
|
| 28 |
+
config: gpqa_diamond
|
| 29 |
+
metrics:
|
| 30 |
+
- name: Accuracy
|
| 31 |
+
type: accuracy
|
| 32 |
+
value: 72.73
|
| 33 |
+
- task:
|
| 34 |
+
type: text-generation
|
| 35 |
+
dataset:
|
| 36 |
+
name: MMLU Pro
|
| 37 |
+
type: TIGER-Lab/MMLU-Pro
|
| 38 |
+
metrics:
|
| 39 |
+
- name: Accuracy
|
| 40 |
+
type: accuracy
|
| 41 |
+
value: 83.93
|
| 42 |
|
| 43 |
---
|
| 44 |
|
|
|
|
| 126 |
|
| 127 |
- `--quantization modelopt` — required, activates NVIDIA's optimized CUTLASS kernels
|
| 128 |
- `--kv-cache-dtype fp8` — halves KV cache memory on Blackwell
|
| 129 |
+
- `--max-model-len 16384` — maximum context length per request. See [Compatibility](#compatibility) for max value per GPU.
|
| 130 |
|
| 131 |
## Compatibility
|
| 132 |
|
config.json
CHANGED
|
@@ -154,6 +154,7 @@
|
|
| 154 |
"model.embed_vision*",
|
| 155 |
"model.vision_tower*"
|
| 156 |
],
|
|
|
|
| 157 |
"quant_algo": "NVFP4",
|
| 158 |
"kv_cache_scheme": {
|
| 159 |
"dynamic": false,
|
|
|
|
| 154 |
"model.embed_vision*",
|
| 155 |
"model.vision_tower*"
|
| 156 |
],
|
| 157 |
+
"bits": 4,
|
| 158 |
"quant_algo": "NVFP4",
|
| 159 |
"kv_cache_scheme": {
|
| 160 |
"dynamic": false,
|