liumaolin

refactor(config): centralize configuration management in `project_config`

8f68d0a 3 months ago

4.27 kB

	#!/usr/bin/env python3
	"""
	GPT-SoVITS 命令行推理工具

	使用方法:
	python infer.py \
	--target-text "要合成的文本" \
	--ref-text "参考音频的文本" \
	--ref-audio "/path/to/reference.wav" \
	--gpt-model "/path/to/gpt_model.ckpt" \
	--sovits-model "/path/to/sovits_model.pth" \
	--output "/path/to/output.wav"
	"""
	import sys
	from pathlib import Path

	import click
	import soundfile as sf

	from project_config import settings
	from training_pipeline.configs import InferenceConfig
	from training_pipeline.stages.inference import create_tts_module, create_inference_config


	@click.command()
	@click.option(
	'--target-text', '-t',
	required=True,
	help='要合成的目标文本'
	)
	@click.option(
	'--ref-text', '-r',
	required=True,
	help='参考音频的文本内容（用于提示模型音色）'
	)
	@click.option(
	'--ref-audio', '-a',
	required=True,
	type=click.Path(exists=True),
	help='参考音频文件路径（用于提取音色）'
	)
	@click.option(
	'--gpt-model', '-g',
	required=True,
	type=click.Path(exists=True),
	help='GPT 模型权重路径（.ckpt 文件）'
	)
	@click.option(
	'--sovits-model', '-s',
	required=True,
	type=click.Path(exists=True),
	help='SoVITS 模型权重路径（.pth 文件）'
	)
	@click.option(
	'--output', '-o',
	default='output.wav',
	type=click.Path(),
	help='输出音频文件路径（默认: output.wav）'
	)
	@click.option(
	'--bert-path',
	default=settings.BERT_PRETRAINED_DIR,
	type=click.Path(exists=True),
	help='BERT 预训练模型路径'
	)
	@click.option(
	'--cnhubert-path',
	default=settings.SSL_PRETRAINED_DIR,
	type=click.Path(exists=True),
	help='Chinese HuBERT 预训练模型路径'
	)
	@click.option(
	'--text-lang',
	default='zh',
	help='目标文本语言（默认: zh）'
	)
	@click.option(
	'--prompt-lang',
	default='zh',
	help='参考文本语言（默认: zh）'
	)
	def main(
	target_text: str,
	ref_text: str,
	ref_audio: str,
	gpt_model: str,
	sovits_model: str,
	output: str,
	bert_path: str,
	cnhubert_path: str,
	text_lang: str,
	prompt_lang: str,
	):
	"""GPT-SoVITS 命令行推理工具

	使用指定的 GPT 和 SoVITS 模型，将目标文本合成为语音。
	需要提供参考音频和对应的文本来指定音色。
	"""
	click.echo(f"🎤 GPT-SoVITS 推理")
	click.echo(f" 目标文本: {target_text[:50]}{'...' if len(target_text) > 50 else ''}")
	click.echo(f" 参考文本: {ref_text[:50]}{'...' if len(ref_text) > 50 else ''}")
	click.echo(f" 参考音频: {ref_audio}")
	click.echo(f" GPT 模型: {gpt_model}")
	click.echo(f" SoVITS 模型: {sovits_model}")
	click.echo(f" 输出路径: {output}")
	click.echo()

	# 确保输出目录存在
	output_path = Path(output)
	output_path.parent.mkdir(parents=True, exist_ok=True)

	# 创建推理配置
	cfg = InferenceConfig(
	exp_name="cli_inference",
	gpt_path=gpt_model,
	sovits_path=sovits_model,
	bert_path=bert_path,
	cnhubert_base_path=cnhubert_path,
	ref_text=ref_text,
	ref_audio_path=ref_audio,
	target_text=target_text,
	)

	click.echo("⏳ 正在加载模型...")
	try:
	# 创建 TTS 模块
	tts_module = create_tts_module(cfg)

	# 创建推理配置
	inference_config = create_inference_config(
	text=target_text,
	ref_audio_path=ref_audio,
	prompt_text=ref_text,
	text_lang=text_lang,
	prompt_lang=prompt_lang,
	)

	click.echo("🔊 正在合成语音...")
	# 执行推理
	for item in tts_module.run(inference_config):
	sample_rate, audio_data = item[0], item[1]
	# 保存音频
	sf.write(str(output_path), audio_data, sample_rate, subtype='PCM_16')
	break # 只取第一个结果

	click.echo(f"✅ 成功！音频已保存至: {output_path.absolute()}")

	except Exception as e:
	click.echo(f"❌ 推理失败: {e}", err=True)
	sys.exit(1)


	if __name__ == '__main__':
	main()