{ "_attn_implementation": "flash_attention_2", "depth": 32, "fullatt_block_indexes": [7, 15, 23, 31], "hidden_act": "silu", "hidden_size": 1280, "in_channels": 3, "in_chans": 3, "intermediate_size": 3420, "model_type": "qwen2_5_vl", "num_heads": 16, "out_hidden_size": 2048, "patch_size": 14, "spatial_merge_size": 2, "spatial_patch_size": 14, "temporal_patch_size": 2, "tokens_per_second": 2, "torch_dtype": "bfloat16", "transformers_version": "4.49.0", "window_size": 112 }