#!/usr/bin/env python3
"""
GPT-SoVITS 命令行推理工具

使用方法:
    python infer.py \
        --target-text "要合成的文本" \
        --ref-text "参考音频的文本" \
        --ref-audio "/path/to/reference.wav" \
        --gpt-model "/path/to/gpt_model.ckpt" \
        --sovits-model "/path/to/sovits_model.pth" \
        --output "/path/to/output.wav"
"""
import sys
from pathlib import Path

import click
import soundfile as sf

from project_config import settings
from training_pipeline.configs import InferenceConfig
from training_pipeline.stages.inference import create_tts_module, create_inference_config


@click.command()
@click.option(
    '--target-text', '-t',
    required=True,
    help='要合成的目标文本'
)
@click.option(
    '--ref-text', '-r',
    required=True,
    help='参考音频的文本内容（用于提示模型音色）'
)
@click.option(
    '--ref-audio', '-a',
    required=True,
    type=click.Path(exists=True),
    help='参考音频文件路径（用于提取音色）'
)
@click.option(
    '--gpt-model', '-g',
    required=True,
    type=click.Path(exists=True),
    help='GPT 模型权重路径（.ckpt 文件）'
)
@click.option(
    '--sovits-model', '-s',
    required=True,
    type=click.Path(exists=True),
    help='SoVITS 模型权重路径（.pth 文件）'
)
@click.option(
    '--output', '-o',
    default='output.wav',
    type=click.Path(),
    help='输出音频文件路径（默认: output.wav）'
)
@click.option(
    '--bert-path',
    default=settings.BERT_PRETRAINED_DIR,
    type=click.Path(exists=True),
    help='BERT 预训练模型路径'
)
@click.option(
    '--cnhubert-path',
    default=settings.SSL_PRETRAINED_DIR,
    type=click.Path(exists=True),
    help='Chinese HuBERT 预训练模型路径'
)
@click.option(
    '--text-lang',
    default='zh',
    help='目标文本语言（默认: zh）'
)
@click.option(
    '--prompt-lang',
    default='zh',
    help='参考文本语言（默认: zh）'
)
def main(
        target_text: str,
        ref_text: str,
        ref_audio: str,
        gpt_model: str,
        sovits_model: str,
        output: str,
        bert_path: str,
        cnhubert_path: str,
        text_lang: str,
        prompt_lang: str,
):
    """GPT-SoVITS 命令行推理工具
    
    使用指定的 GPT 和 SoVITS 模型，将目标文本合成为语音。
    需要提供参考音频和对应的文本来指定音色。
    """
    click.echo(f"🎤 GPT-SoVITS 推理")
    click.echo(f"  目标文本: {target_text[:50]}{'...' if len(target_text) > 50 else ''}")
    click.echo(f"  参考文本: {ref_text[:50]}{'...' if len(ref_text) > 50 else ''}")
    click.echo(f"  参考音频: {ref_audio}")
    click.echo(f"  GPT 模型: {gpt_model}")
    click.echo(f"  SoVITS 模型: {sovits_model}")
    click.echo(f"  输出路径: {output}")
    click.echo()

    # 确保输出目录存在
    output_path = Path(output)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    # 创建推理配置
    cfg = InferenceConfig(
        exp_name="cli_inference",
        gpt_path=gpt_model,
        sovits_path=sovits_model,
        bert_path=bert_path,
        cnhubert_base_path=cnhubert_path,
        ref_text=ref_text,
        ref_audio_path=ref_audio,
        target_text=target_text,
    )

    click.echo("⏳ 正在加载模型...")
    try:
        # 创建 TTS 模块
        tts_module = create_tts_module(cfg)

        # 创建推理配置
        inference_config = create_inference_config(
            text=target_text,
            ref_audio_path=ref_audio,
            prompt_text=ref_text,
            text_lang=text_lang,
            prompt_lang=prompt_lang,
        )

        click.echo("🔊 正在合成语音...")
        # 执行推理
        for item in tts_module.run(inference_config):
            sample_rate, audio_data = item[0], item[1]
            # 保存音频
            sf.write(str(output_path), audio_data, sample_rate, subtype='PCM_16')
            break  # 只取第一个结果

        click.echo(f"✅ 成功！音频已保存至: {output_path.absolute()}")

    except Exception as e:
        click.echo(f"❌ 推理失败: {e}", err=True)
        sys.exit(1)


if __name__ == '__main__':
    main()