| import kenlm | |
| lm = kenlm.Model("char.bin") | |
| CORPUS = open("dict.txt", encoding="utf-8").read().splitlines() | |
| def generate_candidates(prefix, max_n=50): | |
| cands = [] | |
| for line in CORPUS: | |
| if prefix in line: # 或 startswith优化 | |
| words = line.split() | |
| for w in words: | |
| if w.startswith(prefix): | |
| cands.append(w) | |
| return list(set(cands))[:max_n] | |
| def predict(prefix): | |
| candidates = generate_candidates(prefix) | |
| scored = [] | |
| for c in candidates: | |
| scored.append((c, lm.score(c))) | |
| return sorted(scored, key=lambda x: x[1], reverse=True)[:5] | |
| while True: | |
| p = input("prefix: ") | |
| res = predict(p) | |
| print("\n候选:") | |
| for w, s in res: | |
| print(w, s) | |