vishesh-t27 commited on
Commit
b118bcb
·
verified ·
1 Parent(s): 5a65c7c

Update tokenization_nandi.py

Browse files
Files changed (1) hide show
  1. tokenization_nandi.py +28 -28
tokenization_nandi.py CHANGED
@@ -110,34 +110,34 @@ class NandiTokenizer(TokenizersBackend):
110
 
111
 
112
 
113
- # def encode(
114
- # self,
115
- # text,
116
- # text_pair=None,
117
- # add_special_tokens: bool = True,
118
- # padding=False,
119
- # truncation=None,
120
- # max_length=None,
121
- # stride: int = 0,
122
- # padding_side=None,
123
- # return_tensors=None,
124
- # **kwargs,
125
- # ):
126
- # if isinstance(text, str):
127
- # # This is a temporary fix to match the behaviour of the training pipeline
128
- # text = "<|im_start|>" + " " + text
129
- # return super().encode(
130
- # text,
131
- # text_pair=text_pair,
132
- # add_special_tokens=add_special_tokens,
133
- # padding=padding,
134
- # truncation=truncation,
135
- # max_length=max_length,
136
- # stride=stride,
137
- # padding_side=padding_side,
138
- # return_tensors=return_tensors,
139
- # **kwargs,
140
- # )
141
 
142
 
143
  __all__ = ["NandiTokenizer"]
 
110
 
111
 
112
 
113
+ def encode(
114
+ self,
115
+ text,
116
+ text_pair=None,
117
+ add_special_tokens: bool = True,
118
+ padding=False,
119
+ truncation=None,
120
+ max_length=None,
121
+ stride: int = 0,
122
+ padding_side=None,
123
+ return_tensors=None,
124
+ **kwargs,
125
+ ):
126
+ if isinstance(text, str):
127
+ # This is a temporary fix to match the behaviour of the training pipeline
128
+ text = "<|im_start|>" + " " + text
129
+ return super().encode(
130
+ text,
131
+ text_pair=text_pair,
132
+ add_special_tokens=add_special_tokens,
133
+ padding=padding,
134
+ truncation=truncation,
135
+ max_length=max_length,
136
+ stride=stride,
137
+ padding_side=padding_side,
138
+ return_tensors=return_tensors,
139
+ **kwargs,
140
+ )
141
 
142
 
143
  __all__ = ["NandiTokenizer"]