MelodyFlow / config /solver /magnet /magnet_32khz.yaml

Gael Le Lan

Initial commit

77a1f04 over 1 year ago

1.94 kB

	# @package __global__

	# This is the training loop solver
	# for the base MusicGen model (text-to-music)
	# on monophonic audio sampled at 32 kHz
	defaults:
	- musicgen/default
	- /model: lm/musicgen_lm
	- override /dset: audio/default
	- _self_

	lm_model: transformer_lm_magnet
	solver: magnet

	autocast: true
	autocast_dtype: float16

	# EnCodec large trained on mono-channel music audio sampled at 32khz
	# with a total stride of 640 leading to 50 frames/s.
	# rvq.n_q=4, rvq.bins=2048, no quantization dropout
	# (transformer_lm card and n_q must be compatible)
	compression_model_checkpoint: //pretrained/facebook/encodec_32khz

	efficient_attention_backend: xformers # restricted attention implementation supports only xformers at the moment

	channels: 1
	sample_rate: 32000

	deadlock:
	use: true # deadlock detection

	dataset:
	batch_size: 192 # 32 GPUs
	sample_on_weight: false # Uniform sampling all the way
	sample_on_duration: false # Uniform sampling all the way

	optim:
	epochs: 500
	optimizer: dadam
	lr: 1
	ema:
	use: true
	updates: 10
	device: cuda

	logging:
	log_tensorboard: true

	schedule:
	lr_scheduler: cosine
	cosine:
	warmup: 4000
	lr_min_ratio: 0.0
	cycle_length: 1.0

	codebooks_pattern:
	modeling: parallel
	parallel:
	empty_initial: -1

	transformer_lm:
	card: 2048
	causal: false
	subcodes_context: 5
	compression_model_framerate: 50 # NOTE: Must match the actual frame rate of the used compression model
	segment_duration: 0
	span_len: -1

	masking:
	span_len: 3

	generate:
	lm:
	max_prompt_len: null
	max_gen_len: null
	remove_prompts: false
	use_sampling: true
	temp: 3.0
	top_k: 0
	top_p: 0.9
	max_cfg_coef: 10.0
	min_cfg_coef: 1.0
	decoding_steps: [60, 10, 10, 10]
	anneal_temp: true
	span_scoring: 'max'
	span_arrangement: 'nonoverlap'
	prompted_samples: false
	samples:
	prompted: false
	unprompted: true