File size: 1,109 Bytes
38d2a45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
{
  "format": "mindpipe_hf_save_pretrained_debug",
  "algorithm": "splitquant",
  "model_path": "/mnt/82_store/LLM-weights/Qwen3-VL-2B-Instruct",
  "result_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/results/Qwen3-VL-2B-Instruct/splitquant/splitquant_w8a8_q16k16v16_seq512",
  "weight_bits": 8,
  "activation_bits": 8,
  "sequence_length": 512,
  "group_size": 128,
  "output_dir": "/mnt/82_store/wxx/HWQuant/Mindpipe/tmp_hf_save/Qwen3-VL-2B-Instruct-SplitQuant-w8a8g128/_pre_save_workflow",
  "pre_save_metrics": {
    "perplexity": 16.175421401474118,
    "evaluation_dataset": "wikitext2",
    "sequence_length": 512,
    "evaluated_chunks": 4,
    "batch_size": 1,
    "elapsed_seconds": 1.6193287540227175,
    "tokens_per_second": 1262.2514081358213,
    "model_path": "/mnt/82_store/LLM-weights/Qwen3-VL-2B-Instruct",
    "quantization_algorithm": "splitquant",
    "weight_bits": 8,
    "activation_bits": 8,
    "device": "cuda:0",
    "dtype": "float16",
    "artifacts_path": "artifacts.json"
  },
  "load_note": "Use MindPipe to rebuild the compression structure before loading the HF state dict."
}