Spaces:
Sleeping
Sleeping
oyly commited on
Commit ·
af727db
1
Parent(s): 56aad4b
fix tokenizer bug
Browse files
flux/modules/conditioner_lore.py
CHANGED
|
@@ -112,23 +112,24 @@ class HFEmbedder(nn.Module):
|
|
| 112 |
if (words is None) or start_idx<0: # some samples do not need this
|
| 113 |
return [-1]
|
| 114 |
res = []
|
| 115 |
-
|
|
|
|
| 116 |
for i in range(start_idx,len(tokens)):
|
| 117 |
this_token = tokens[i].strip('▁')
|
| 118 |
if this_token == "":
|
| 119 |
continue
|
| 120 |
if words.startswith(this_token):
|
| 121 |
res.append(i)
|
| 122 |
-
|
| 123 |
-
if
|
| 124 |
break
|
| 125 |
else:
|
| 126 |
continue
|
| 127 |
-
if
|
| 128 |
-
|
| 129 |
-
break
|
| 130 |
-
if flag:
|
| 131 |
res.append(i)
|
|
|
|
|
|
|
| 132 |
return res
|
| 133 |
|
| 134 |
for src_words, tgt_words, src_index, tgt_index in replacements:
|
|
|
|
| 112 |
if (words is None) or start_idx<0: # some samples do not need this
|
| 113 |
return [-1]
|
| 114 |
res = []
|
| 115 |
+
l_words = len(words.replace(" ", ""))
|
| 116 |
+
l_find = 0
|
| 117 |
for i in range(start_idx,len(tokens)):
|
| 118 |
this_token = tokens[i].strip('▁')
|
| 119 |
if this_token == "":
|
| 120 |
continue
|
| 121 |
if words.startswith(this_token):
|
| 122 |
res.append(i)
|
| 123 |
+
l_find += len(this_token)
|
| 124 |
+
if l_find >= l_words:
|
| 125 |
break
|
| 126 |
else:
|
| 127 |
continue
|
| 128 |
+
if l_find:
|
| 129 |
+
l_find += len(this_token)
|
|
|
|
|
|
|
| 130 |
res.append(i)
|
| 131 |
+
if l_find >= l_words:
|
| 132 |
+
break
|
| 133 |
return res
|
| 134 |
|
| 135 |
for src_words, tgt_words, src_index, tgt_index in replacements:
|