Demucs (models)
Browse files- .gitattributes +5 -0
- models/HTDemucs_Similarity_Extractor_by_wesleyr36/.gitattributes +35 -0
- models/HTDemucs_Similarity_Extractor_by_wesleyr36/config_htdemucs_similarity.yaml +144 -0
- models/HTDemucs_Similarity_Extractor_by_wesleyr36/model_htdemucs_ep_21_sdr_13.6970.ckpt +3 -0
- models/HTDemucs_Similarity_Extractor_by_wesleyr36/source.txt +1 -0
- models/ht-demucs/.gitattributes +35 -0
- models/ht-demucs/LICENSE +0 -0
- models/ht-demucs/README.md +5 -0
- models/ht-demucs/htdemucs_finetuned%20%281%29.pt +3 -0
- models/ht-demucs/htdemucs_finetuned.pt +3 -0
- models/ht-demucs/source.txt +1 -0
- models/htdemucs (pablebe)/.gitattributes +35 -0
- models/htdemucs (pablebe)/README.md +20 -0
- models/htdemucs (pablebe)/htdemucs_epoch=570-sdr=6.38.ckpt +3 -0
- models/htdemucs (pablebe)/source.txt +1 -0
- models/htdemucs-ort/.gitattributes +36 -0
- models/htdemucs-ort/htdemucs.ort +3 -0
- models/htdemucs-ort/manifest.json +26 -0
- models/htdemucs-ort/source.txt +1 -0
- models/htdemucs_ft/.gitattributes +39 -0
- models/htdemucs_ft/04573f0d-f3cf25b2.th +3 -0
- models/htdemucs_ft/92cfc3b6-ef3bcb9c.th +3 -0
- models/htdemucs_ft/README.md +3 -0
- models/htdemucs_ft/d12395a8-e57c48e6.th +3 -0
- models/htdemucs_ft/f7e0c4bc-ba3fe64a.th +3 -0
- models/htdemucs_ft/htdemucs_ft.yaml +7 -0
- models/htdemucs_ft/source.txt +1 -0
.gitattributes
CHANGED
|
@@ -79,3 +79,8 @@ models/demucs[[:space:]](therealvul)/SFX_separation_epoch_190.th filter=lfs diff
|
|
| 79 |
models/demucs[[:space:]](therealvul)/SFX_separation_epoch_60.th filter=lfs diff=lfs merge=lfs -text
|
| 80 |
models/demucs[[:space:]](therealvul)/SFX_separation_v2_epoch_338.th filter=lfs diff=lfs merge=lfs -text
|
| 81 |
models/demucs[[:space:]](therealvul)/styletts2_enhance_checkpoint_epoch_1.th filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
models/demucs[[:space:]](therealvul)/SFX_separation_epoch_60.th filter=lfs diff=lfs merge=lfs -text
|
| 80 |
models/demucs[[:space:]](therealvul)/SFX_separation_v2_epoch_338.th filter=lfs diff=lfs merge=lfs -text
|
| 81 |
models/demucs[[:space:]](therealvul)/styletts2_enhance_checkpoint_epoch_1.th filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
models/htdemucs_ft/04573f0d-f3cf25b2.th filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
models/htdemucs_ft/92cfc3b6-ef3bcb9c.th filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
models/htdemucs_ft/d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
models/htdemucs_ft/f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
models/htdemucs-ort/htdemucs.ort filter=lfs diff=lfs merge=lfs -text
|
models/HTDemucs_Similarity_Extractor_by_wesleyr36/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
models/HTDemucs_Similarity_Extractor_by_wesleyr36/config_htdemucs_similarity.yaml
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audio:
|
| 2 |
+
chunk_size: 132300 # samplerate * segment
|
| 3 |
+
min_mean_abs: 0.001
|
| 4 |
+
hop_length: 1024
|
| 5 |
+
|
| 6 |
+
training:
|
| 7 |
+
batch_size: 5
|
| 8 |
+
gradient_accumulation_steps: 3
|
| 9 |
+
grad_clip: 0
|
| 10 |
+
segment: 3
|
| 11 |
+
shift: 1
|
| 12 |
+
samplerate: 44100
|
| 13 |
+
channels: 2
|
| 14 |
+
normalize: true
|
| 15 |
+
instruments: ['similarity', 'difference']
|
| 16 |
+
target_instrument: null
|
| 17 |
+
num_epochs: 1000
|
| 18 |
+
num_steps: 1000
|
| 19 |
+
optimizer: prodigy
|
| 20 |
+
lr: 1.0
|
| 21 |
+
patience: 80
|
| 22 |
+
reduce_factor: 0.95
|
| 23 |
+
q: 0.95
|
| 24 |
+
coarse_loss_clip: true
|
| 25 |
+
ema_momentum: 0.999
|
| 26 |
+
other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
|
| 27 |
+
use_amp: false # enable or disable usage of mixed precision (float16) - usually it must be true
|
| 28 |
+
|
| 29 |
+
augmentations:
|
| 30 |
+
enable: false # enable or disable all augmentations (to fast disable if needed)
|
| 31 |
+
loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
|
| 32 |
+
loudness_min: 0.5
|
| 33 |
+
loudness_max: 1.5
|
| 34 |
+
mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
|
| 35 |
+
mixup_probs: [0.2, 0.02]
|
| 36 |
+
mixup_loudness_min: 0.5
|
| 37 |
+
mixup_loudness_max: 1.5
|
| 38 |
+
|
| 39 |
+
inference:
|
| 40 |
+
num_overlap: 4
|
| 41 |
+
batch_size: 18
|
| 42 |
+
|
| 43 |
+
loss_multistft:
|
| 44 |
+
fft_sizes:
|
| 45 |
+
- 2048
|
| 46 |
+
- 4096
|
| 47 |
+
hop_sizes:
|
| 48 |
+
- 1024
|
| 49 |
+
- 2048
|
| 50 |
+
win_lengths:
|
| 51 |
+
- 2048
|
| 52 |
+
- 4096
|
| 53 |
+
window: "hann_window"
|
| 54 |
+
scale: "mel"
|
| 55 |
+
n_bins: 128
|
| 56 |
+
sample_rate: 44100
|
| 57 |
+
perceptual_weighting: true
|
| 58 |
+
w_sc: 1.0
|
| 59 |
+
w_log_mag: 1.0
|
| 60 |
+
w_lin_mag: 0.0
|
| 61 |
+
w_phs: 0.0
|
| 62 |
+
mag_distance: "L1"
|
| 63 |
+
|
| 64 |
+
model: htdemucs
|
| 65 |
+
|
| 66 |
+
htdemucs: # see demucs/htdemucs.py for a detailed description
|
| 67 |
+
# Channels
|
| 68 |
+
channels: 48
|
| 69 |
+
channels_time:
|
| 70 |
+
growth: 2
|
| 71 |
+
# STFT
|
| 72 |
+
num_subbands: 1
|
| 73 |
+
nfft: 4096
|
| 74 |
+
wiener_iters: 0
|
| 75 |
+
end_iters: 0
|
| 76 |
+
wiener_residual: false
|
| 77 |
+
cac: true
|
| 78 |
+
# Main structure
|
| 79 |
+
depth: 4
|
| 80 |
+
rewrite: true
|
| 81 |
+
# Frequency Branch
|
| 82 |
+
multi_freqs: []
|
| 83 |
+
multi_freqs_depth: 3
|
| 84 |
+
freq_emb: 0.2
|
| 85 |
+
emb_scale: 10
|
| 86 |
+
emb_smooth: true
|
| 87 |
+
# Convolutions
|
| 88 |
+
kernel_size: 8
|
| 89 |
+
stride: 4
|
| 90 |
+
time_stride: 2
|
| 91 |
+
context: 1
|
| 92 |
+
context_enc: 0
|
| 93 |
+
# normalization
|
| 94 |
+
norm_starts: 4
|
| 95 |
+
norm_groups: 4
|
| 96 |
+
# DConv residual branch
|
| 97 |
+
dconv_mode: 3
|
| 98 |
+
dconv_depth: 2
|
| 99 |
+
dconv_comp: 8
|
| 100 |
+
dconv_init: 1e-3
|
| 101 |
+
# Before the Transformer
|
| 102 |
+
bottom_channels: 512
|
| 103 |
+
# CrossTransformer
|
| 104 |
+
# ------ Common to all
|
| 105 |
+
# Regular parameters
|
| 106 |
+
t_layers: 5
|
| 107 |
+
t_hidden_scale: 4.0
|
| 108 |
+
t_heads: 8
|
| 109 |
+
t_dropout: 0.0
|
| 110 |
+
t_layer_scale: True
|
| 111 |
+
t_gelu: True
|
| 112 |
+
# ------------- Positional Embedding
|
| 113 |
+
t_emb: sin
|
| 114 |
+
t_max_positions: 10000 # for the scaled embedding
|
| 115 |
+
t_max_period: 10000.0
|
| 116 |
+
t_weight_pos_embed: 1.0
|
| 117 |
+
t_cape_mean_normalize: True
|
| 118 |
+
t_cape_augment: True
|
| 119 |
+
t_cape_glob_loc_scale: [5000.0, 1.0, 1.4]
|
| 120 |
+
t_sin_random_shift: 0
|
| 121 |
+
# ------------- norm before a transformer encoder
|
| 122 |
+
t_norm_in: True
|
| 123 |
+
t_norm_in_group: False
|
| 124 |
+
# ------------- norm inside the encoder
|
| 125 |
+
t_group_norm: False
|
| 126 |
+
t_norm_first: True
|
| 127 |
+
t_norm_out: True
|
| 128 |
+
# ------------- optim
|
| 129 |
+
t_weight_decay: 0.0
|
| 130 |
+
t_lr:
|
| 131 |
+
# ------------- sparsity
|
| 132 |
+
t_sparse_self_attn: False
|
| 133 |
+
t_sparse_cross_attn: False
|
| 134 |
+
t_mask_type: diag
|
| 135 |
+
t_mask_random_seed: 42
|
| 136 |
+
t_sparse_attn_window: 400
|
| 137 |
+
t_global_window: 100
|
| 138 |
+
t_sparsity: 0.95
|
| 139 |
+
t_auto_sparsity: False
|
| 140 |
+
# Cross Encoder First (False)
|
| 141 |
+
t_cross_first: False
|
| 142 |
+
# Weight init
|
| 143 |
+
rescale: 0.1
|
| 144 |
+
|
models/HTDemucs_Similarity_Extractor_by_wesleyr36/model_htdemucs_ep_21_sdr_13.6970.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9970dca36a15c0d0cf3338d24592aa35469697dc3be4b07e9056f5d54b82185
|
| 3 |
+
size 168122809
|
models/HTDemucs_Similarity_Extractor_by_wesleyr36/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/jarredou/HTDemucs_Similarity_Extractor_by_wesleyr36
|
models/ht-demucs/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
models/ht-demucs/LICENSE
ADDED
|
File without changes
|
models/ht-demucs/README.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: other
|
| 3 |
+
license_name: facebook
|
| 4 |
+
license_link: LICENSE
|
| 5 |
+
---
|
models/ht-demucs/htdemucs_finetuned%20%281%29.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9150cd79333a8e8539defbd74073793e9ac85d0851de090371cbf243e4e4028
|
| 3 |
+
size 107715779
|
models/ht-demucs/htdemucs_finetuned.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bfc71644adba7d2065ec8ab1bb8d71d4e68d2fb15ed2c4d284e816b704dc698
|
| 3 |
+
size 107715779
|
models/ht-demucs/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/raddhuha/HT-Demucs
|
models/htdemucs (pablebe)/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
models/htdemucs (pablebe)/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
---
|
| 4 |
+
# Towards Reliable Objective Evaluation Metrics for Generative Singing Voice Separation
|
| 5 |
+
This repository contains the model checkpoint for the HTDemucs model trained from scratch for the WASPAA 2025 paper "Towards Reliable Objective Evaluation Metrics for Generative Singing Voice Separation" by Paul A. Bereuter, Benjamin Stahl, Mark D. Plumbley and Alois Sontacchi.
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
## Cite This Work:
|
| 9 |
+
If you use this model checkpoint please cite us with:
|
| 10 |
+
```bib
|
| 11 |
+
@misc{bereuter2025,
|
| 12 |
+
title={Towards Reliable Objective Evaluation Metrics for Generative Singing Voice Separation Models},
|
| 13 |
+
author={Paul A. Bereuter and Benjamin Stahl and Mark D. Plumbley and Alois Sontacchi},
|
| 14 |
+
year={2025},
|
| 15 |
+
eprint={2507.11427},
|
| 16 |
+
archivePrefix={arXiv},
|
| 17 |
+
primaryClass={eess.AS},
|
| 18 |
+
url={https://arxiv.org/abs/2507.11427},
|
| 19 |
+
}
|
| 20 |
+
```
|
models/htdemucs (pablebe)/htdemucs_epoch=570-sdr=6.38.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30d165a32b0a2238210f818c06b4b9f9da0198f2c734a95075ee21077cd596b8
|
| 3 |
+
size 1635710219
|
models/htdemucs (pablebe)/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/pablebe/htdemucs
|
models/htdemucs-ort/.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
htdemucs.ort filter=lfs diff=lfs merge=lfs -text
|
models/htdemucs-ort/htdemucs.ort
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09dc165512d8ef7480bcb2cacea9dda82d571f8dbf421d8c44a2ca5568bec729
|
| 3 |
+
size 209884896
|
models/htdemucs-ort/manifest.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "HTDemucs-ORT",
|
| 3 |
+
"format": "ort",
|
| 4 |
+
"version": "0.1.0",
|
| 5 |
+
"opset": 18,
|
| 6 |
+
"sample_rate_hz": 44100,
|
| 7 |
+
"window": 343980,
|
| 8 |
+
"hop": 171990,
|
| 9 |
+
"stems": ["drums","bass","other","vocals"],
|
| 10 |
+
"inputs": [
|
| 11 |
+
{ "name": "input", "layout": "BCT", "dtype": "f32", "shape": ["1","2","343980"] },
|
| 12 |
+
{ "name": "x", "layout": "BSCF", "dtype": "f32", "shape": ["1","4","2048","336"] }
|
| 13 |
+
],
|
| 14 |
+
"outputs": [
|
| 15 |
+
{ "name": "add_67", "layout": "BSCT", "dtype": "f32", "shape": ["1","4","2","343980"] }
|
| 16 |
+
],
|
| 17 |
+
"artifacts": [
|
| 18 |
+
{
|
| 19 |
+
"file": "htdemucs.ort",
|
| 20 |
+
"sha256": "09dc165512d8ef7480bcb2cacea9dda82d571f8dbf421d8c44a2ca5568bec729",
|
| 21 |
+
"size_bytes": 209884896,
|
| 22 |
+
"url": "https://huggingface.co/gentij/htdemucs-ort/resolve/main/htdemucs.ort"
|
| 23 |
+
}
|
| 24 |
+
],
|
| 25 |
+
"entry": "htdemucs.ort"
|
| 26 |
+
}
|
models/htdemucs-ort/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/gentij/htdemucs-ort
|
models/htdemucs_ft/.gitattributes
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
04573f0d-f3cf25b2.th filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
92cfc3b6-ef3bcb9c.th filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text
|
models/htdemucs_ft/04573f0d-f3cf25b2.th
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3cf25b222c4eed7cd49dd8b2c9597d50c18bd154090f7b919cfa5f93cf22c49
|
| 3 |
+
size 84141271
|
models/htdemucs_ft/92cfc3b6-ef3bcb9c.th
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef3bcb9c8b40d14ae5d51b6db2587339cc12c6b77c0be151ce6d69002e087bf2
|
| 3 |
+
size 84141271
|
models/htdemucs_ft/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
---
|
models/htdemucs_ft/d12395a8-e57c48e6.th
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e57c48e6b0e38af4f7118d7bd08c49f0a0c0edf7d09143bdd902ea0d237303e6
|
| 3 |
+
size 84141271
|
models/htdemucs_ft/f7e0c4bc-ba3fe64a.th
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba3fe64ae8ef66ac9a4857222ce48efbdc5eb3ad375cb79dd13debee5aaa4066
|
| 3 |
+
size 84141271
|
models/htdemucs_ft/htdemucs_ft.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d']
|
| 2 |
+
weights: [
|
| 3 |
+
[1., 0., 0., 0.],
|
| 4 |
+
[0., 1., 0., 0.],
|
| 5 |
+
[0., 0., 1., 0.],
|
| 6 |
+
[0., 0., 0., 1.],
|
| 7 |
+
]
|
models/htdemucs_ft/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/dokodesuka/htdemucs_ft
|