SoloSpeech-models / config_tsr.yaml
anonymous20250515's picture
add
581320b
version: 1.0
system: "base_tsr"
ddim:
v_prediction: true
diffusers:
num_train_timesteps: 1000
beta_schedule: 'scaled_linear'
beta_start: 0.00085
beta_end: 0.012
prediction_type: 'v_prediction'
rescale_betas_zero_snr: true
timestep_spacing: 'trailing'
clip_sample: false
diffwrap:
UDiT:
in_chans: 384
out_chans: 128
embed_dim: 768
depth: 12
num_heads: 12
mlp_ratio: 4.0
use_checkpoint: false