MAC-AutoML
/

Qwen3-VL-2B-Instruct-SplitQuant-w8a8g128

Model card Files Files and versions

Qwen3-VL-2B-Instruct-SplitQuant-w8a8g128 / compressed_config.json

Xinxiong0912's picture

Upload folder using huggingface_hub

38d2a45 verified 19 days ago

history blame contribute delete

1.11 kB

	{
	"format": "mindpipe_hf_save_pretrained_debug",
	"algorithm": "splitquant",
	"model_path": "/mnt/82_store/LLM-weights/Qwen3-VL-2B-Instruct",
	"result_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/results/Qwen3-VL-2B-Instruct/splitquant/splitquant_w8a8_q16k16v16_seq512",
	"weight_bits": 8,
	"activation_bits": 8,
	"sequence_length": 512,
	"group_size": 128,
	"output_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/tmp_hf_save/Qwen3-VL-2B-Instruct-SplitQuant-w8a8g128/_pre_save_workflow",
	"pre_save_metrics": {
	"perplexity": 16.175421401474118,
	"evaluation_dataset": "wikitext2",
	"sequence_length": 512,
	"evaluated_chunks": 4,
	"batch_size": 1,
	"elapsed_seconds": 1.6193287540227175,
	"tokens_per_second": 1262.2514081358213,
	"model_path": "/mnt/82_store/LLM-weights/Qwen3-VL-2B-Instruct",
	"quantization_algorithm": "splitquant",
	"weight_bits": 8,
	"activation_bits": 8,
	"device": "cuda:0",
	"dtype": "float16",
	"artifacts_path": "artifacts.json"
	},
	"load_note": "Use MindPipe to rebuild the compression structure before loading the HF state dict."
	}