switzerchees
/

ZAYA1-8B-NVFP4

Text Generation

8-bit precision

Model card Files Files and versions

ZAYA1-8B-NVFP4 / quantization_manifest.json

switzerchees's picture

Add ZAYA1-8B NVFP4 quantization

6cd1c27 verified 5 days ago

history blame contribute delete

1.05 kB

	{
	"calibration_samples": 128,
	"created_at_unix": 1779205965,
	"cuda_runtime": "13.0",
	"dtype": "bf16",
	"excluded_patterns": [
	"lm_head",
	"embed_tokens",
	"router",
	"conv_qk"
	],
	"gpu_capability": [
	12,
	0
	],
	"gpu_name": "NVIDIA RTX PRO 6000 Blackwell Workstation Edition",
	"max_seq_len": 2048,
	"module_summary": {
	"linear_expert": 1280,
	"linear_lm_head": 1,
	"linear_router": 160,
	"linear_targeted_estimate": 1480,
	"linear_total": 1641
	},
	"platform": "Linux-6.8.0-107-generic-x86_64-with-glibc2.39",
	"python": "3.12.3",
	"quantization": "ModelOpt NVFP4_DEFAULT_CFG",
	"source_model": "Zyphra/ZAYA1-8B",
	"source_revision": "970cfc9f5e7e5a4f5f6f0645955928a9b6a98415",
	"tensor_stats": {
	"files": [
	{
	"bytes": 5804183272,
	"name": "model.safetensors"
	}
	],
	"tensor_dtypes": {
	"torch.bfloat16": 963,
	"torch.float32": 3000,
	"torch.float8_e4m3fn": 1480,
	"torch.uint8": 1480
	}
	},
	"torch": "2.12.0+cu130"
	}