import kenlm lm = kenlm.Model("char.bin") CORPUS = open("dict.txt", encoding="utf-8").read().splitlines() def generate_candidates(prefix, max_n=50): cands = [] for line in CORPUS: if prefix in line: # 或 startswith优化 words = line.split() for w in words: if w.startswith(prefix): cands.append(w) return list(set(cands))[:max_n] def predict(prefix): candidates = generate_candidates(prefix) scored = [] for c in candidates: scored.append((c, lm.score(c))) return sorted(scored, key=lambda x: x[1], reverse=True)[:5] while True: p = input("prefix: ") res = predict(p) print("\n候选:") for w, s in res: print(w, s)