from heartlib import HeartTranscriptorPipeline import argparse import torch def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, required=True) parser.add_argument("--music_path", type=str, default="./assets/output.mp3") return parser.parse_args() if __name__ == "__main__": args = parse_args() pipe = HeartTranscriptorPipeline.from_pretrained( args.model_path, device=torch.device("cuda"), dtype=torch.float16, ) with torch.no_grad(): result = pipe( args.music_path, **{ "max_new_tokens": 256, "num_beams": 2, "task": "transcribe", "condition_on_prev_tokens": False, "compression_ratio_threshold": 1.8, "temperature": (0.0, 0.1, 0.2, 0.4), "logprob_threshold": -1.0, "no_speech_threshold": 0.4, }, ) print(result)