Rekipjan
/

Uyghur-Char-KenLM-Input-Method

character-level

Model card Files Files and versions

Uyghur-Char-KenLM-Input-Method / test.py

Rekipjan's picture

Upload folder using huggingface_hub

06bc836 verified 3 days ago

history blame contribute delete

760 Bytes

	import kenlm

	lm = kenlm.Model("char.bin")

	CORPUS = open("dict.txt", encoding="utf-8").read().splitlines()

	def generate_candidates(prefix, max_n=50):
	cands = []

	for line in CORPUS:
	if prefix in line: # 或 startswith优化
	words = line.split()
	for w in words:
	if w.startswith(prefix):
	cands.append(w)

	return list(set(cands))[:max_n]


	def predict(prefix):
	candidates = generate_candidates(prefix)

	scored = []
	for c in candidates:
	scored.append((c, lm.score(c)))

	return sorted(scored, key=lambda x: x[1], reverse=True)[:5]


	while True:
	p = input("prefix: ")
	res = predict(p)

	print("\n候选:")
	for w, s in res:
	print(w, s)