| { | |
| "klass": "<class 'demucs.htdemucs.HTDemucs'>", | |
| "args": [], | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": true, | |
| "depth": 4, | |
| "rewrite": true, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 4, | |
| "norm_groups": 4, | |
| "dconv_mode": 3, | |
| "dconv_depth": 2, | |
| "dconv_comp": 8, | |
| "dconv_init": 0.001, | |
| "bottom_channels": 512, | |
| "t_layers": 5, | |
| "t_hidden_scale": 4.0, | |
| "t_heads": 8, | |
| "t_dropout": 0.02, | |
| "t_layer_scale": true, | |
| "t_gelu": true, | |
| "t_emb": "sin", | |
| "t_max_positions": 10000, | |
| "t_max_period": 10000.0, | |
| "t_weight_pos_embed": 1.0, | |
| "t_cape_mean_normalize": true, | |
| "t_cape_augment": true, | |
| "t_cape_glob_loc_scale": [ | |
| 5000.0, | |
| 1.0, | |
| 1.4 | |
| ], | |
| "t_sin_random_shift": 0, | |
| "t_norm_in": true, | |
| "t_norm_in_group": false, | |
| "t_group_norm": false, | |
| "t_norm_first": true, | |
| "t_norm_out": true, | |
| "t_weight_decay": 0.05, | |
| "t_lr": null, | |
| "t_sparse_self_attn": false, | |
| "t_sparse_cross_attn": false, | |
| "t_mask_type": "diag", | |
| "t_mask_random_seed": 42, | |
| "t_sparse_attn_window": 400, | |
| "t_global_window": 100, | |
| "t_sparsity": 0.95, | |
| "t_auto_sparsity": false, | |
| "t_cross_first": false, | |
| "rescale": 0.1 | |
| } | |
| } |