| from typing import Any |
|
|
| import datasets |
| import evaluate |
|
|
| from sklearn.metrics import f1_score, accuracy_score |
|
|
|
|
| _DESCRIPTION = """ |
| This metric is used to compute the accuracy and F1 score of models on the ViHSD dataset from [A Large-scale Dataset for Hate Speech Detection on Vietnamese Social Media Texts](https://arxiv.org/abs/2103.11528) by Luu et al. (2021). |
| The ViHSD dataset is a large-scale dataset for hate speech detection on Vietnamese social media texts. |
| It contains over 30,000 comments, each labeled as CLEAN, OFFENSIVE, or HATE. |
| The dataset is used to evaluate the quality of hate speech detection models, including deep learning and transformer models. |
| """ |
|
|
| _KWARGS_DESCRIPTION = """ |
| Args: |
| predictions: list of predictions to score. |
| Each translation should be tokenized into a list of tokens. |
| references: list of lists of references for each translation. |
| Each reference should be tokenized into a list of tokens. |
| Returns: depending on the GLUE subset, one or several of: |
| "accuracy": Accuracy |
| "micro_f1": Micro averaged F1 score |
| "macro_f1": Macro averaged F1 score |
| "weighted_f1": Weighted averaged F1 score |
| """ |
|
|
| _CITATION = """ |
| @InProceedings{10.1007/978-3-030-79457-6_35, |
| author="Luu, Son T. |
| and Nguyen, Kiet Van |
| and Nguyen, Ngan Luu-Thuy", |
| editor="Fujita, Hamido |
| and Selamat, Ali |
| and Lin, Jerry Chun-Wei |
| and Ali, Moonis", |
| title="A Large-Scale Dataset for Hate Speech Detection on Vietnamese Social Media Texts", |
| booktitle="Advances and Trends in Artificial Intelligence. Artificial Intelligence Practices", |
| year="2021", |
| publisher="Springer International Publishing", |
| address="Cham", |
| pages="415--426", |
| abstract="In recent years, Vietnam witnesses the mass development of social network users on different social platforms such as Facebook, Youtube, Instagram, and Tiktok. On social media, hate speech has become a critical problem for social network users. To solve this problem, we introduce the ViHSD - a human-annotated dataset for automatically detecting hate speech on the social network. This dataset contains over 30,000 comments, each comment in the dataset has one of three labels: CLEAN, OFFENSIVE, or HATE. Besides, we introduce the data creation process for annotating and evaluating the quality of the dataset. Finally, we evaluate the dataset by deep learning and transformer models.", |
| isbn="978-3-030-79457-6" |
| } |
| """ |
|
|
|
|
| def acc_and_f1(preds, labels): |
| return { |
| "accuracy": float(accuracy_score(y_true=labels, y_pred=preds)), |
| "micro_f1": float(f1_score(y_true=labels, y_pred=preds, average="micro")), |
| "macro_f1": float(f1_score(y_true=labels, y_pred=preds, average="macro")), |
| "weighted_f1": float(f1_score(y_true=labels, y_pred=preds, average="weighted")), |
| } |
|
|
|
|
| @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
| class ViHSD(evaluate.Metric): |
| def _info(self): |
| return evaluate.MetricInfo( |
| description=_DESCRIPTION, |
| citation=_CITATION, |
| homepage="https://github.com/sonlam1102/vihsd", |
| inputs_description=_KWARGS_DESCRIPTION, |
| features=datasets.Features( |
| { |
| "predictions": datasets.Value("int64"), |
| "references": datasets.Value("int64"), |
| } |
| ), |
| codebase_urls=["https://github.com/sonlam1102/vihsd"], |
| reference_urls=[ |
| "https://github.com/sonlam1102/vihsd", |
| "https://arxiv.org/abs/2103.11528", |
| ], |
| format="numpy", |
| ) |
|
|
|
|
| def _compute( |
| self, |
| predictions: Any = None, |
| references: Any = None, |
| **kwargs: Any |
| ): |
| return acc_and_f1(predictions, references) |
|
|