GenerTeam commited on
Commit
cac27f2
·
verified ·
1 Parent(s): fabe9ba

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +9 -1
tokenizer.py CHANGED
@@ -16,6 +16,7 @@ Supports token_mask for Fine-grained Nucleotide Supervision (FNS):
16
 
17
  import os
18
  import json
 
19
  import itertools
20
  from typing import List, Optional, Tuple, Dict, Union, Any
21
 
@@ -322,7 +323,7 @@ class HybridDNATokenizer(PreTrainedTokenizer):
322
  else:
323
  base_ids = self._base_tokenizer.encode(
324
  segment_content,
325
- add_special_tokens=False
326
  )
327
  token_ids.extend(base_ids)
328
  if return_token_mask:
@@ -430,6 +431,13 @@ class HybridDNATokenizer(PreTrainedTokenizer):
430
  auto_dna_tags: Optional[bool] = None,
431
  **kwargs
432
  ) -> Dict[str, Any]:
 
 
 
 
 
 
 
433
  is_batch = isinstance(text, list)
434
  texts = text if is_batch else [text]
435
 
 
16
 
17
  import os
18
  import json
19
+ import warnings
20
  import itertools
21
  from typing import List, Optional, Tuple, Dict, Union, Any
22
 
 
323
  else:
324
  base_ids = self._base_tokenizer.encode(
325
  segment_content,
326
+ add_special_tokens=add_special_tokens
327
  )
328
  token_ids.extend(base_ids)
329
  if return_token_mask:
 
431
  auto_dna_tags: Optional[bool] = None,
432
  **kwargs
433
  ) -> Dict[str, Any]:
434
+ if add_special_tokens:
435
+ warnings.warn(
436
+ "HybridTokenizer does not support add_special_tokens=True, ignoring.",
437
+ UserWarning
438
+ )
439
+ add_special_tokens = False
440
+
441
  is_batch = isinstance(text, list)
442
  texts = text if is_batch else [text]
443