| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """TODO: Add a description here.""" |
|
|
| import os |
|
|
| import datasets |
| import evaluate |
|
|
| |
| _CITATION = """\ |
| @InProceedings{huggingface:module, |
| title = {A great new module}, |
| authors={huggingface, Inc.}, |
| year={2020} |
| } |
| """ |
|
|
| |
| _DESCRIPTION = """\ |
| This new module is designed to solve this great ML task and is crafted with a lot of care. |
| """ |
|
|
|
|
| |
| _KWARGS_DESCRIPTION = """ |
| Calculates how good are predictions given some references, using certain scores |
| Args: |
| predictions: list of predictions to score. Each predictions |
| should be a string with tokens separated by spaces. |
| references: list of reference for each prediction. Each |
| reference should be a string with tokens separated by spaces. |
| Returns: |
| accuracy: description of the first score, |
| another_score: description of the second score, |
| Examples: |
| Examples should be written in doctest format, and should illustrate how |
| to use the function. |
| |
| >>> my_new_module = evaluate.load("my_new_module") |
| >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1]) |
| >>> print(results) |
| {'accuracy': 1.0} |
| """ |
|
|
| |
| BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" |
|
|
|
|
| @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
| class docred(evaluate.Metric): |
| """TODO: Short description of my evaluation module.""" |
|
|
| dataset_feat = { |
| "title": datasets.Value("string"), |
| |
| "vertexSet": datasets.Sequence( |
| datasets.Sequence( |
| { |
| "name": datasets.Value("string"), |
| "sent_id": datasets.Value("int32"), |
| "pos": datasets.Sequence(datasets.Value("int32"), length=2), |
| "type": datasets.Value("string"), |
| } |
| ) |
| ), |
| "labels": { |
| "head": datasets.Sequence(datasets.Value("int32")), |
| "tail": datasets.Sequence(datasets.Value("int32")), |
| "relation_id": datasets.Sequence(datasets.Value("string")), |
| "evidence": datasets.Sequence(datasets.Sequence(datasets.Value("int32"))), |
| }, |
| } |
| eps = 1e-12 |
|
|
| def _info(self): |
|
|
| |
| return evaluate.MetricInfo( |
| |
| module_type="metric", |
| description=_DESCRIPTION, |
| citation=_CITATION, |
| inputs_description=_KWARGS_DESCRIPTION, |
| |
| features=datasets.Features({"predictions": self.dataset_feat, "references": self.dataset_feat}), |
| |
| homepage="http://module.homepage", |
| |
| codebase_urls=["http://github.com/path/to/codebase/of/new_module"], |
| reference_urls=["http://path.to.reference.url/new_module"], |
| ) |
|
|
| def _download_and_prepare(self, dl_manager): |
| """Optional: download external resources useful to compute the scores""" |
| |
| pass |
|
|
| def _generate_fact(self, dataset): |
| if dataset is None: |
| return set() |
| facts = set() |
| for data in dataset: |
| vertexSet = data["vertexSet"] |
| labels = self._convert_labels_to_list(data["labels"]) |
| for label in labels: |
| rel = label["relation_id"] |
| for n1 in vertexSet[label["head"]]: |
| for n2 in vertexSet[label["tail"]]: |
| facts.add((n1["name"], n2["name"], rel)) |
| return facts |
|
|
| def _convert_to_relation_set(self, data): |
| relation_set = set() |
| for d in data: |
| labels = d["labels"] |
| labels = self._convert_labels_to_list(labels) |
| for label in labels: |
| relation_set.add((d["title"], label["head"], label["tail"], label["relation_id"])) |
| return relation_set |
|
|
| def _convert_labels_to_list(self, labels): |
| keys = list(labels.keys()) |
| labels = [{key: labels[key][i] for key in keys} for i in range(len(labels[keys[0]]))] |
| return labels |
|
|
| def _compute(self, predictions, references, train_data=None): |
| """Returns the scores""" |
|
|
| fact_in_train_annotated = self._generate_fact(train_data) |
|
|
| std = {} |
| tot_evidences = 0 |
| ref_titleset = set([]) |
|
|
| title2vectexSet = {} |
|
|
| for x in references: |
| title = x["title"] |
| ref_titleset.add(title) |
|
|
| vertexSet = x["vertexSet"] |
| title2vectexSet[title] = vertexSet |
| labels = self._convert_labels_to_list(x["labels"]) |
| for label in labels: |
| r = label["relation_id"] |
| h_idx = label["head"] |
| t_idx = label["tail"] |
| std[(title, r, h_idx, t_idx)] = set(label["evidence"]) |
| tot_evidences += len(label["evidence"]) |
|
|
| tot_relations = len(std) |
| pred_rel = self._convert_to_relation_set(predictions) |
| submission_answer = sorted(pred_rel, key=lambda x: (x[0], x[1], x[2], x[3])) |
|
|
| correct_re = 0 |
| correct_evidence = 0 |
| pred_evi = 0 |
|
|
| correct_in_train_annotated = 0 |
| titleset2 = set([]) |
| for x in submission_answer: |
| title, h_idx, t_idx, r = x |
| titleset2.add(title) |
| if title not in title2vectexSet: |
| continue |
| vertexSet = title2vectexSet[title] |
|
|
| if "evidence" in x: |
| evi = set(x["evidence"]) |
| else: |
| evi = set([]) |
| pred_evi += len(evi) |
|
|
| if (title, r, h_idx, t_idx) in std: |
| correct_re += 1 |
| stdevi = std[(title, r, h_idx, t_idx)] |
| correct_evidence += len(stdevi & evi) |
| in_train_annotated = in_train_distant = False |
| for n1 in vertexSet[h_idx]["name"]: |
| for n2 in vertexSet[t_idx]["name"]: |
| if (n1, n2, r) in fact_in_train_annotated: |
| in_train_annotated = True |
|
|
| if in_train_annotated: |
| correct_in_train_annotated += 1 |
| |
| |
|
|
| re_p = 1.0 * correct_re / (len(submission_answer) + self.eps) |
| re_r = 1.0 * correct_re / (tot_relations + self.eps) |
| if re_p + re_r == 0: |
| re_f1 = 0 |
| else: |
| re_f1 = 2.0 * re_p * re_r / (re_p + re_r) |
|
|
| evi_p = 1.0 * correct_evidence / pred_evi if pred_evi > 0 else 0 |
| evi_r = 1.0 * correct_evidence / (tot_evidences + self.eps) |
| if evi_p + evi_r == 0: |
| evi_f1 = 0 |
| else: |
| evi_f1 = 2.0 * evi_p * evi_r / (evi_p + evi_r) |
|
|
| re_p_ignore_train_annotated = ( |
| 1.0 |
| * (correct_re - correct_in_train_annotated) |
| / (len(submission_answer) - correct_in_train_annotated + self.eps) |
| ) |
| |
| |
| |
|
|
| if re_p_ignore_train_annotated + re_r == 0: |
| re_f1_ignore_train_annotated = 0 |
| else: |
| re_f1_ignore_train_annotated = ( |
| 2.0 * re_p_ignore_train_annotated * re_r / (re_p_ignore_train_annotated + re_r) |
| ) |
|
|
| |
| |
| |
| |
|
|
| |
| return {"f1": re_f1, "precision": re_p, "recall": re_r, "ign_f1": re_f1_ignore_train_annotated} |
|
|