| """Translate english to chinese via a dict.""" |
| from typing import List, Union |
|
|
| from gradiobee.en2zh import en2zh |
| from gradiobee.insert_spaces import insert_spaces |
|
|
|
|
| |
| def en2zh_tokens( |
| |
| text: Union[str, List[str]], |
| dedup: bool = True, |
| ) -> List[List[str]]: |
| |
| """Translate english to chinese tokens via a dict. |
| |
| Args |
| text: to translate, list of str |
| dedup: if True, remove all duplicates |
| Returns |
| res: list of list of str/token/char |
| """ |
| res = en2zh(text) |
|
|
| if dedup: |
| return [list(set(insert_spaces(elm).split())) for elm in res] |
|
|
| return [insert_spaces(elm).split() for elm in res] |
|
|