plezan's picture
v0.2: AutoRound 200 iters, multimodal, dual format HF + consolidated
c5f7c4d verified
{
"dim": 12288,
"n_layers": 88,
"head_dim": 128,
"hidden_dim": 28672,
"n_heads": 96,
"n_kv_heads": 8,
"rope_theta": 1000000.0,
"norm_eps": 1e-05,
"vocab_size": 131072,
"tied_embeddings": false,
"max_position_embeddings": 262144,
"llama_4_scaling": null,
"q_lora_rank": null,
"qk_rope_head_dim": null,
"qk_nope_head_dim": null,
"kv_lora_rank": null,
"v_head_dim": null,
"quantization": {
"config_groups": {
"group_0": {
"format": "pack-quantized",
"input_activations": null,
"output_activations": null,
"targets": [
"Linear"
],
"weights": {
"actorder": null,
"block_structure": null,
"dynamic": false,
"group_size": 128,
"num_bits": 4,
"observer": "memoryless_minmax",
"observer_kwargs": {},
"scale_dtype": null,
"strategy": "group",
"symmetric": true,
"type": "int",
"zp_dtype": null
}
}
},
"format": "pack-quantized",
"global_compression_ratio": null,
"ignore": [
"re:.*vision_encoder.*",
"re:.*vision_tower.*",
"re:.*multi_modal_projector.*",
"re:.*vision_language_adapter.*",
"re:.*patch_merger.*",
"re:.*pre_mm_projector_norm.*",
"lm_head",
"language_model.lm_head"
],
"kv_cache_scheme": null,
"quant_method": "compressed-tensors",
"quantization_status": "compressed"
},
"yarn": {
"original_max_position_embeddings": 4096,
"factor": 64.0,
"apply_scale": true,
"beta": 4.0,
"alpha": 1.0
},
"moe": null,
"vision_encoder": {
"image_token_id": 10,
"image_break_token_id": 12,
"image_end_token_id": 13,
"intermediate_size": 8192,
"num_hidden_layers": 48,
"num_attention_heads": 16,
"mm_projector_id": "patch_merge",
"spatial_merge_size": 2,
"hidden_size": 1664,
"num_channels": 3,
"image_size": 1540,
"max_image_size": 1540,
"patch_size": 14,
"rope_theta": 10000.0,
"add_pre_mm_projector_layer_norm": true,
"adapter_bias": false
}
}