| { |
| "_class_name": "Transformer2DModel", |
| "_diffusers_version": "0.27.2", |
| "in_channels": 8, |
| "num_layers": 24, |
| "inner_dim": 2560, |
| "attention_head_dim": 128, |
| "num_attention_heads": 20, |
| "mlp_ratio": 2.5, |
| "out_channels": 8, |
| "max_position": 32768, |
| "rope_theta": 1000000.0, |
| "speaker_embedding_dim": 512, |
| "text_embedding_dim": 768, |
| "ssl_encoder_depths": [8, 8], |
| "ssl_names": ["mert", "m-hubert"], |
| "ssl_latent_dims": [1024, 768], |
| "patch_size": [16, 1], |
| "max_height": 16, |
| "max_width": 32768, |
| "lyric_encoder_vocab_size": 6693, |
| "lyric_hidden_size": 1024 |
| } |
|
|