ZAYA1-8B-NVFP4 / quantization_manifest.json
switzerchees's picture
Add ZAYA1-8B NVFP4 quantization
6cd1c27 verified
{
"calibration_samples": 128,
"created_at_unix": 1779205965,
"cuda_runtime": "13.0",
"dtype": "bf16",
"excluded_patterns": [
"*lm_head*",
"*embed_tokens*",
"*router*",
"*conv_qk*"
],
"gpu_capability": [
12,
0
],
"gpu_name": "NVIDIA RTX PRO 6000 Blackwell Workstation Edition",
"max_seq_len": 2048,
"module_summary": {
"linear_expert": 1280,
"linear_lm_head": 1,
"linear_router": 160,
"linear_targeted_estimate": 1480,
"linear_total": 1641
},
"platform": "Linux-6.8.0-107-generic-x86_64-with-glibc2.39",
"python": "3.12.3",
"quantization": "ModelOpt NVFP4_DEFAULT_CFG",
"source_model": "Zyphra/ZAYA1-8B",
"source_revision": "970cfc9f5e7e5a4f5f6f0645955928a9b6a98415",
"tensor_stats": {
"files": [
{
"bytes": 5804183272,
"name": "model.safetensors"
}
],
"tensor_dtypes": {
"torch.bfloat16": 963,
"torch.float32": 3000,
"torch.float8_e4m3fn": 1480,
"torch.uint8": 1480
}
},
"torch": "2.12.0+cu130"
}