| { | |
| "attn_bias": true, | |
| "d_ff": 4096, | |
| "d_model": 1536, | |
| "dropout_p": 0.0, | |
| "heads_per_group": 1, | |
| "layer_group_size": 36, | |
| "mlp_bias": false, | |
| "norm_eps": 0.0005, | |
| "norm_include_weight": false, | |
| "num_groups": 24, | |
| "num_heads": 24, | |
| "num_layers": 36, | |
| "num_output_patches": 1, | |
| "num_variate_layers_per_group": 1, | |
| "patch_size": 32, | |
| "per_dim_scale": true, | |
| "pre_norm": true, | |
| "qk_dim": 64, | |
| "qk_norm": false, | |
| "qk_norm_include_weight": false, | |
| "residual_attn_ratio": 5.136215466577748, | |
| "residual_mult": 0.75, | |
| "use_xpos": true, | |
| "v_dim": 64, | |
| "variate_layer_first": false | |
| } |