AEmotionStudio commited on
Commit
c03e58d
·
verified ·
1 Parent(s): 747375a

Upload model_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. model_config.json +131 -0
model_config.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffusion_cond",
3
+ "sample_size": 882000,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": true,
10
+ "config": {
11
+ "encoder": {
12
+ "type": "oobleck",
13
+ "requires_grad": false,
14
+ "config": {
15
+ "in_channels": 2,
16
+ "channels": 128,
17
+ "c_mults": [1, 2, 4, 8, 16],
18
+ "strides": [2, 4, 4, 8, 8],
19
+ "latent_dim": 128,
20
+ "use_snake": true
21
+ }
22
+ },
23
+ "decoder": {
24
+ "type": "oobleck",
25
+ "config": {
26
+ "out_channels": 2,
27
+ "channels": 128,
28
+ "c_mults": [1, 2, 4, 8, 16],
29
+ "strides": [2, 4, 4, 8, 8],
30
+ "latent_dim": 64,
31
+ "use_snake": true,
32
+ "final_tanh": false
33
+ }
34
+ },
35
+ "bottleneck": {
36
+ "type": "vae"
37
+ },
38
+ "latent_dim": 64,
39
+ "downsampling_ratio": 2048,
40
+ "io_channels": 2
41
+ }
42
+ },
43
+ "conditioning": {
44
+ "configs": [
45
+ {
46
+ "id": "prompt",
47
+ "type": "t5",
48
+ "config": {
49
+ "t5_model_name": "t5-base",
50
+ "max_length": 128
51
+ }
52
+ },
53
+ {
54
+ "id": "seconds_start",
55
+ "type": "number",
56
+ "config": {
57
+ "min_val": 0,
58
+ "max_val": 512
59
+ }
60
+ },
61
+ {
62
+ "id": "seconds_total",
63
+ "type": "number",
64
+ "config": {
65
+ "min_val": 0,
66
+ "max_val": 512
67
+ }
68
+ }
69
+ ],
70
+ "cond_dim": 768
71
+ },
72
+ "diffusion": {
73
+ "cross_attention_cond_ids": ["prompt", "seconds_start", "seconds_total"],
74
+ "global_cond_ids": ["seconds_start", "seconds_total"],
75
+ "type": "dit",
76
+ "config": {
77
+ "io_channels": 64,
78
+ "embed_dim": 1536,
79
+ "depth": 24,
80
+ "num_heads": 24,
81
+ "cond_token_dim": 768,
82
+ "global_cond_dim": 1536,
83
+ "project_cond_tokens": false,
84
+ "transformer_type": "continuous_transformer"
85
+ }
86
+ },
87
+ "io_channels": 64
88
+ },
89
+ "training": {
90
+ "use_ema": true,
91
+ "log_loss_info": false,
92
+ "optimizer_configs": {
93
+ "diffusion": {
94
+ "optimizer": {
95
+ "type": "AdamW",
96
+ "config": {
97
+ "lr": 5e-5,
98
+ "betas": [0.9, 0.999],
99
+ "weight_decay": 1e-3
100
+ }
101
+ },
102
+ "scheduler": {
103
+ "type": "InverseLR",
104
+ "config": {
105
+ "inv_gamma": 1000000,
106
+ "power": 0.5,
107
+ "warmup": 0.99
108
+ }
109
+ }
110
+ }
111
+ },
112
+ "demo": {
113
+ "demo_every": 14784,
114
+ "demo_steps": 250,
115
+ "num_demos": 10,
116
+ "demo_cond": [
117
+ {"prompt": "Guitar, Steel Guitar, Mids, Upper Mids, Highs, Pluck, Bright, Clean, simple melody, D minor, 100 BPM, 8 bars", "seconds_start": 0, "seconds_total": 19},
118
+ {"prompt": "Guitar, Steel Guitar, Mids, Upper Mids, Highs, Pluck, Bright, Clean, simple melody, D minor, 100 BPM, 8 bars", "seconds_start": 0, "seconds_total": 19},
119
+ {"prompt": "Bowed Strings, Violin, Cello, Mids, Upper Mids, Staccato, Tight, Focused, Smooth, Present, Near, Ensemble, Wet, catchy dance chord progression, with top dance melody, C# minor, 128 BPM, 8 bars", "seconds_start": 0, "seconds_total": 15},
120
+ {"prompt": "Vocal, Synthetic, Bass, Mids, Upper Mids, Highs, Digital, Retro, Synthetic Vox, D major, 140 BPM, 8 bars", "seconds_start": 0, "seconds_total": 14},
121
+ {"prompt": "Keys, Grand Piano, Highs, Full, Subdued, Wet, Medium Reverb, Medium Phaser, medium speed, off beat, repeating, melody, C minor, 128 BPM, 8 bars", "seconds_start": 0, "seconds_total": 15},
122
+ {"prompt": "Bass, FM Bass, Sub Bass, Bass, Upper Mids, Highs, Thick, Clean, Pitch Bend, Wet, Medium Delay, Medium Reverb, Low Distortion, Phaser, 4 bars, 150 BPM, D minor", "seconds_start": 0, "seconds_total": 6},
123
+ {"prompt": "Wind, World Winds, Flute, Airy, Hollow,Bb major, 110 BPM, 4 bars", "seconds_start": 0, "seconds_total": 8},
124
+ {"prompt": "Bass, Wavetable Bass, Sub Bass, Bass, Upper Mids, Highs, Acid, 8 bars, 128 BPM, E minor", "seconds_start": 0, "seconds_total": 15},
125
+ {"prompt": "Ocarina, Formant Vocal, Warm, Rich, Clean, Medium Reverb F minor, 128 BPM, 8 bars", "seconds_start": 0, "seconds_total": 15},
126
+ {"prompt": "Ocarina, Formant Vocal, Warm, Rich, Clean, Medium Reverb F minor, 128 BPM, 8 bars", "seconds_start": 0, "seconds_total": 15}
127
+ ],
128
+ "demo_cfg_scales": [7]
129
+ }
130
+ }
131
+ }