| { | |
| "format": "mindpipe_hf_save_pretrained_debug", | |
| "algorithm": "splitquant", | |
| "model_path": "/mnt/82_store/LLM-weights/Qwen3-VL-2B-Instruct", | |
| "result_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/results/Qwen3-VL-2B-Instruct/splitquant/splitquant_w8a8_q16k16v16_seq512", | |
| "weight_bits": 8, | |
| "activation_bits": 8, | |
| "sequence_length": 512, | |
| "group_size": 128, | |
| "output_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/tmp_hf_save/Qwen3-VL-2B-Instruct-SplitQuant-w8a8g128/_pre_save_workflow", | |
| "pre_save_metrics": { | |
| "perplexity": 16.175421401474118, | |
| "evaluation_dataset": "wikitext2", | |
| "sequence_length": 512, | |
| "evaluated_chunks": 4, | |
| "batch_size": 1, | |
| "elapsed_seconds": 1.6193287540227175, | |
| "tokens_per_second": 1262.2514081358213, | |
| "model_path": "/mnt/82_store/LLM-weights/Qwen3-VL-2B-Instruct", | |
| "quantization_algorithm": "splitquant", | |
| "weight_bits": 8, | |
| "activation_bits": 8, | |
| "device": "cuda:0", | |
| "dtype": "float16", | |
| "artifacts_path": "artifacts.json" | |
| }, | |
| "load_note": "Use MindPipe to rebuild the compression structure before loading the HF state dict." | |
| } |