vishesh-t27 commited on
Commit
e3827cb
·
verified ·
1 Parent(s): bdf2e42

Update tokenization_nandi.py

Browse files
Files changed (1) hide show
  1. tokenization_nandi.py +18 -5
tokenization_nandi.py CHANGED
@@ -91,11 +91,24 @@ class NandiTokenizer(TokenizersBackend):
91
  **kwargs,
92
  )
93
 
94
- def prepare_for_tokenization(self, text, **kwargs):
95
-
96
- if isinstance(text, str):
97
- text = "<|im_start|> " + text
98
- return (text, kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  # def encode(
101
  # self,
 
91
  **kwargs,
92
  )
93
 
94
+ def __call__(self, text, *args, **kwargs):
95
+ add_special_tokens = kwargs.get("add_special_tokens", False)
96
+
97
+ def add_prefix(t):
98
+ if isinstance(t, str):
99
+ return "<|im_start|> " + t
100
+ return t
101
+
102
+ # Only inject when special tokens are disabled
103
+ if not add_special_tokens:
104
+ if isinstance(text, list):
105
+ text = [add_prefix(t) for t in text]
106
+ else:
107
+ text = add_prefix(text)
108
+
109
+ return super().__call__(text, *args, **kwargs)
110
+
111
+
112
 
113
  # def encode(
114
  # self,