cond_gen / audio_tokens.py
Leon299's picture
Add files using upload-large-folder tool
8337fa0 verified
raw
history blame contribute delete
520 Bytes
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
SOA_TOKEN = "[SOA]"
EOA_TOKEN = "[EOA]"
MASK_AUDIO_TOKEN = "[MASK_AUDIO]"
def audio_id_to_token(audio_id: int) -> str:
return f"<AUDIO_{int(audio_id)}>"
def add_audio_special_tokens(tokenizer, num_audio_token: int) -> int:
special_tokens = [audio_id_to_token(i) for i in range(num_audio_token)] + [
MASK_AUDIO_TOKEN,
SOA_TOKEN,
EOA_TOKEN,
]
return tokenizer.add_tokens(
special_tokens,
special_tokens=True,
)