File size: 760 Bytes
06bc836 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import kenlm
lm = kenlm.Model("char.bin")
CORPUS = open("dict.txt", encoding="utf-8").read().splitlines()
def generate_candidates(prefix, max_n=50):
cands = []
for line in CORPUS:
if prefix in line: # 或 startswith优化
words = line.split()
for w in words:
if w.startswith(prefix):
cands.append(w)
return list(set(cands))[:max_n]
def predict(prefix):
candidates = generate_candidates(prefix)
scored = []
for c in candidates:
scored.append((c, lm.score(c)))
return sorted(scored, key=lambda x: x[1], reverse=True)[:5]
while True:
p = input("prefix: ")
res = predict(p)
print("\n候选:")
for w, s in res:
print(w, s)
|