| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import kenlm | |
| app = FastAPI() | |
| lm = kenlm.Model("char.bin") | |
| CORPUS = open( | |
| "1.txt", | |
| encoding="utf-8" | |
| ).read().splitlines() | |
| class Query(BaseModel): | |
| text: str | |
| def generate_candidates(prefix, max_n=100): | |
| cands = [] | |
| for line in CORPUS: | |
| words = line.split() | |
| for w in words: | |
| if w.startswith(prefix): | |
| cands.append(w) | |
| return list(set(cands))[:max_n] | |
| def predict(q: Query): | |
| prefix = q.text.strip() | |
| candidates = generate_candidates(prefix) | |
| scored = [] | |
| for c in candidates: | |
| score = lm.score(c) | |
| scored.append({ | |
| "word": c, | |
| "score": score | |
| }) | |
| scored.sort(key=lambda x: x["score"], reverse=True) | |
| return { | |
| "candidates": scored[:5] | |
| } | |