vishesh-t27 commited on
Commit
138e620
·
verified ·
1 Parent(s): 42aa86f

Update tokenization_nandi.py

Browse files
Files changed (1) hide show
  1. tokenization_nandi.py +28 -28
tokenization_nandi.py CHANGED
@@ -112,34 +112,34 @@ class NandiTokenizer(TokenizersBackend):
112
 
113
 
114
 
115
- # def encode(
116
- # self,
117
- # text,
118
- # text_pair=None,
119
- # add_special_tokens: bool = True,
120
- # padding=False,
121
- # truncation=None,
122
- # max_length=None,
123
- # stride: int = 0,
124
- # padding_side=None,
125
- # return_tensors=None,
126
- # **kwargs,
127
- # ):
128
- # if isinstance(text, str):
129
- # # This is a temporary fix to match the behaviour of the training pipeline
130
- # text = "<|im_start|>" + " " + text
131
- # return super().encode(
132
- # text,
133
- # text_pair=text_pair,
134
- # add_special_tokens=add_special_tokens,
135
- # padding=padding,
136
- # truncation=truncation,
137
- # max_length=max_length,
138
- # stride=stride,
139
- # padding_side=padding_side,
140
- # return_tensors=return_tensors,
141
- # **kwargs,
142
- # )
143
 
144
 
145
  __all__ = ["NandiTokenizer"]
 
112
 
113
 
114
 
115
+ def encode(
116
+ self,
117
+ text,
118
+ text_pair=None,
119
+ add_special_tokens: bool = True,
120
+ padding=False,
121
+ truncation=None,
122
+ max_length=None,
123
+ stride: int = 0,
124
+ padding_side=None,
125
+ return_tensors=None,
126
+ **kwargs,
127
+ ):
128
+ if isinstance(text, str):
129
+ # This is a temporary fix to match the behaviour of the training pipeline
130
+ text = "<|im_start|>" + " " + text
131
+ return super().encode(
132
+ text,
133
+ text_pair=text_pair,
134
+ add_special_tokens=add_special_tokens,
135
+ padding=padding,
136
+ truncation=truncation,
137
+ max_length=max_length,
138
+ stride=stride,
139
+ padding_side=padding_side,
140
+ return_tensors=return_tensors,
141
+ **kwargs,
142
+ )
143
 
144
 
145
  __all__ = ["NandiTokenizer"]