| from fastapi import FastAPI |
| from huggingface_hub import hf_hub_download |
| import os |
| from pydantic import BaseModel |
| from fastapi.responses import JSONResponse |
|
|
| print("Version ---- 2") |
| app = FastAPI() |
|
|
| def download_file_from_hf(repo_id, filename): |
| target_dir = os.path.expanduser("~/.sinatools") |
| os.makedirs(target_dir, exist_ok=True) |
|
|
| file_path = hf_hub_download( |
| repo_id=repo_id, |
| filename=filename, |
| local_dir=target_dir, |
| local_dir_use_symlinks=False |
| ) |
|
|
| return file_path |
|
|
| download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle") |
| download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle") |
| download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle") |
| download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle") |
| download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle") |
|
|
| from sinatools.morphology.morph_analyzer import analyze |
| from sinatools.utils.tokenizer import sentence_tokenizer |
|
|
| class ALMARequest(BaseModel): |
| text: str |
| language: str |
| task: str |
| flag: str |
|
|
| @app.post("/predict") |
| def predict(request: ALMARequest): |
| text = request.text |
| language = request.language |
| task = request.task |
| flag = request.flag |
|
|
| sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True) |
| |
| results_with_sentences = [] |
| |
| for i, sentence in enumerate(sentences): |
| sentence_id = i + 1 |
| |
| result = analyze(sentence, language, task, str(flag)) |
| |
| results_with_sentences.append({ |
| "sentence_id": sentence_id, |
| "sentence": sentence, |
| "lemmatizer_results": result |
| }) |
| |
| content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0} |
|
|
| return JSONResponse( |
| content=content, |
| media_type="application/json", |
| status_code=200, |
| ) |
|
|