File size: 1,927 Bytes
5d31cd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
{
    "model_type": "autoencoder",
    "sample_size": 196608,
    "sample_rate": 44100,
    "audio_channels": 2,
    "model": {
        "pretransform": {
            "type": "patched",
            "config": {
                "patch_size": 256,
                "channels": 2
            }
        },
        "encoder": {
            "type": "same",
            "requires_grad": false,
            "config": {
                "in_channels": 512,
                "channels": 256,
                "c_mults": [6],
                "strides": [16],
                "latent_dim": 256,
                "transformer_depths": [12],
                "checkpointing": true,
                "differential": true,
                "dyt": true,
                "dim_heads": 64,
                "sliding_window": [1,1],
                "variable_stride": true,
                "mask_noise": 0.001
            }
        },
        "decoder": {
            "type": "same",
            "requires_grad": false,
            "config": {
                "out_channels": 512,
                "channels": 256,
                "c_mults": [6],
                "strides": [16],
                "latent_dim": 256,
                "transformer_depths": [12],
                "sinusoidal_blocks": [8],
                "checkpointing": false,
                "differential": true,
                "dyt": true,
                "dim_heads": 64,
                "sliding_window": [1,1],
                "variable_stride": true,
                "mask_noise": 0.1
            }
        },
        "bottleneck": {
            "type": "softnorm",
            "config": {
                "dim": 256,
                "noise_augment_dim": 0,
                "noise_regularize": true,
                "auto_scale": true,
                "freeze": true
            }
        },
        "latent_dim": 256,
        "downsampling_ratio": 4096,
        "io_channels": 2
    }
}