{ "dim": 1024, "n_layers": 28, "n_heads": 16, "head_dim": 128, "n_kv_heads": 8, "vocab_size": 65536, "max_seq_len": 8192, "segm_out_dim": 256, "coord_out_dim": 2048, "size_out_dim": 2048, "coord_token_id": 240, "size_token_id": 241, "seg_token_id": 262, "eos_id": 11, "img_id": 227, "image_cls_token_id": 244, "img_end_id": 230, "spatial_patch_size": 16, "temporal_patch_size": 1, "channel_size": 3 }