Rekipjan's picture
Upload folder using huggingface_hub
06bc836 verified
raw
history blame contribute delete
760 Bytes
import kenlm
lm = kenlm.Model("char.bin")
CORPUS = open("dict.txt", encoding="utf-8").read().splitlines()
def generate_candidates(prefix, max_n=50):
cands = []
for line in CORPUS:
if prefix in line: # 或 startswith优化
words = line.split()
for w in words:
if w.startswith(prefix):
cands.append(w)
return list(set(cands))[:max_n]
def predict(prefix):
candidates = generate_candidates(prefix)
scored = []
for c in candidates:
scored.append((c, lm.score(c)))
return sorted(scored, key=lambda x: x[1], reverse=True)[:5]
while True:
p = input("prefix: ")
res = predict(p)
print("\n候选:")
for w, s in res:
print(w, s)