{ "format": "mindpipe_hf_save_pretrained_debug", "algorithm": "omniquant", "model_path": "/mnt/82_store/LLM-weights/Qwen2.5-VL-7B-Instruct", "result_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/results/Qwen2.5-VL-7B-Instruct/omniquant/omniquant_w8a8_seq512", "weight_bits": 8, "activation_bits": 8, "sequence_length": 512, "group_size": 128, "output_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/tmp_hf_save/Qwen2.5-VL-7B-Instruct-OmniQuant-w8a8/_pre_save_workflow", "pre_save_metrics": { "perplexity": 11.527605190928812, "evaluation_dataset": "wikitext2", "sequence_length": 512, "evaluated_chunks": 4, "batch_size": 1, "elapsed_seconds": 0.570933124050498, "tokens_per_second": 3580.1040680540577, "model_path": "/mnt/82_store/LLM-weights/Qwen2.5-VL-7B-Instruct", "quantization_algorithm": "omniquant", "weight_bits": 8, "activation_bits": 8, "device": "cuda:0", "dtype": "float16", "artifacts_path": "artifacts.json" }, "load_note": "Use MindPipe to rebuild the compression structure before loading the HF state dict." }