| from heartlib import HeartTranscriptorPipeline | |
| import argparse | |
| import torch | |
| def parse_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--model_path", type=str, required=True) | |
| parser.add_argument("--music_path", type=str, default="./assets/output.mp3") | |
| return parser.parse_args() | |
| if __name__ == "__main__": | |
| args = parse_args() | |
| pipe = HeartTranscriptorPipeline.from_pretrained( | |
| args.model_path, | |
| device=torch.device("cuda"), | |
| dtype=torch.float16, | |
| ) | |
| with torch.no_grad(): | |
| result = pipe( | |
| args.music_path, | |
| **{ | |
| "max_new_tokens": 256, | |
| "num_beams": 2, | |
| "task": "transcribe", | |
| "condition_on_prev_tokens": False, | |
| "compression_ratio_threshold": 1.8, | |
| "temperature": (0.0, 0.1, 0.2, 0.4), | |
| "logprob_threshold": -1.0, | |
| "no_speech_threshold": 0.4, | |
| }, | |
| ) | |
| print(result) | |