Spaces:

SinaLab
/

alma-api

Running

App Files Files Community

alma-api / app.py

TymaaHammouda

Update alma output

f43b328 3 months ago

raw

history blame contribute delete

1.91 kB

	from fastapi import FastAPI
	from huggingface_hub import hf_hub_download
	import os
	from pydantic import BaseModel
	from fastapi.responses import JSONResponse

	print("Version ---- 2")
	app = FastAPI()

	def download_file_from_hf(repo_id, filename):
	target_dir = os.path.expanduser("~/.sinatools")
	os.makedirs(target_dir, exist_ok=True)

	file_path = hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	local_dir=target_dir,
	local_dir_use_symlinks=False
	)

	return file_path

	download_file_from_hf("SinaLab/ALMA","lemmas_dic.pickle")
	download_file_from_hf("SinaLab/ArabGlossBERT","two_grams.pickle")
	download_file_from_hf("SinaLab/ArabGlossBERT","three_grams.pickle")
	download_file_from_hf("SinaLab/ArabGlossBERT","four_grams.pickle")
	download_file_from_hf("SinaLab/ArabGlossBERT","five_grams.pickle")

	from sinatools.morphology.morph_analyzer import analyze
	from sinatools.utils.tokenizer import sentence_tokenizer

	class ALMARequest(BaseModel):
	text: str
	language: str
	task: str
	flag: str

	@app.post("/predict")
	def predict(request: ALMARequest):
	text = request.text
	language = request.language
	task = request.task
	flag = request.flag

	sentences = sentence_tokenizer(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True)

	results_with_sentences = []

	for i, sentence in enumerate(sentences):
	sentence_id = i + 1

	result = analyze(sentence, language, task, str(flag))

	results_with_sentences.append({
	"sentence_id": sentence_id,
	"sentence": sentence,
	"lemmatizer_results": result
	})

	content = {"resp": results_with_sentences, "statusText": "OK", "statusCode": 0}

	return JSONResponse(
	content=content,
	media_type="application/json",
	status_code=200,
	)