| import torch |
| import prettytable |
| import copy |
| import sys |
| from importlib import import_module |
| from inspect import signature |
| from pathlib import Path |
| from typing import Optional, Union |
|
|
| import numpy as np |
| from scipy.stats import kendalltau, pearsonr, spearmanr |
| from sklearn.metrics import ( |
| confusion_matrix, |
| f1_score, |
| fbeta_score, |
| get_scorer, |
| get_scorer_names, |
| make_scorer, |
| ) |
|
|
|
|
| def binary_accuracy(output: torch.Tensor, target: torch.Tensor) -> float: |
| """Computes the accuracy for binary classification""" |
| with torch.no_grad(): |
| batch_size = target.size(0) |
| pred = (output >= 0.5).float().t().view(-1) |
| correct = pred.eq(target.view(-1)).float().sum() |
| correct.mul_(100.0 / batch_size) |
| return correct |
|
|
|
|
| def accuracy(output, target, topk=(1,)): |
| r""" |
| Computes the accuracy over the k top predictions for the specified values of k |
| |
| Args: |
| output (tensor): Classification outputs, :math:`(N, C)` where `C = number of classes` |
| target (tensor): :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1` |
| topk (sequence[int]): A list of top-N number. |
| |
| Returns: |
| Top-N accuracies (N :math:`\in` topK). |
| """ |
| with torch.no_grad(): |
| maxk = max(topk) |
| batch_size = target.size(0) |
|
|
| _, pred = output.topk(maxk, 1, True, True) |
| pred = pred.t() |
| correct = pred.eq(target[None]) |
|
|
| res = [] |
| for k in topk: |
| correct_k = correct[:k].flatten().sum(dtype=torch.float32) |
| res.append(correct_k * (100.0 / batch_size)) |
| return res |
|
|
|
|
| class ConfusionMatrix(object): |
| def __init__(self, num_classes): |
| self.num_classes = num_classes |
| self.mat = None |
|
|
| def update(self, target, output): |
| """ |
| Update confusion matrix. |
| |
| Args: |
| target: ground truth |
| output: predictions of models |
| |
| Shape: |
| - target: :math:`(minibatch, C)` where C means the number of classes. |
| - output: :math:`(minibatch, C)` where C means the number of classes. |
| """ |
| n = self.num_classes |
| if self.mat is None: |
| self.mat = torch.zeros((n, n), dtype=torch.int64, device=target.device) |
| with torch.no_grad(): |
| k = (target >= 0) & (target < n) |
| inds = n * target[k].to(torch.int64) + output[k] |
| self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) |
|
|
| def reset(self): |
| self.mat.zero_() |
|
|
| def compute(self): |
| """compute global accuracy, per-class accuracy and per-class IoU""" |
| h = self.mat.float() |
| acc_global = torch.diag(h).sum() / h.sum() |
| acc = torch.diag(h) / h.sum(1) |
| iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) |
| return acc_global, acc, iu |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| def __str__(self): |
| acc_global, acc, iu = self.compute() |
| return ( |
| "global correct: {:.1f}\n" |
| "average row correct: {}\n" |
| "IoU: {}\n" |
| "mean IoU: {:.1f}" |
| ).format( |
| acc_global.item() * 100, |
| ["{:.1f}".format(i) for i in (acc * 100).tolist()], |
| ["{:.1f}".format(i) for i in (iu * 100).tolist()], |
| iu.mean().item() * 100, |
| ) |
|
|
| def format(self, classes: list): |
| """Get the accuracy and IoU for each class in the table format""" |
| acc_global, acc, iu = self.compute() |
|
|
| table = prettytable.PrettyTable(["class", "acc", "iou"]) |
| for i, class_name, per_acc, per_iu in zip( |
| range(len(classes)), classes, (acc * 100).tolist(), (iu * 100).tolist() |
| ): |
| table.add_row([class_name, per_acc, per_iu]) |
|
|
| return ( |
| "global correct: {:.1f}\nmean correct:{:.1f}\nmean IoU: {:.1f}\n{}".format( |
| acc_global.item() * 100, |
| acc.mean().item() * 100, |
| iu.mean().item() * 100, |
| table.get_string(), |
| ) |
| ) |
|
|
|
|
| def kappa( |
| y_true: np.ndarray, |
| y_pred: np.ndarray, |
| weights: Optional[Union[str, np.ndarray]] = None, |
| allow_off_by_one: bool = False, |
| ) -> float: |
| """ |
| Calculate the kappa inter-rater agreement. |
| |
| The agreement is calculated between the gold standard and the predicted |
| ratings. Potential values range from -1 (representing complete disagreement) |
| to 1 (representing complete agreement). A kappa value of 0 is expected if |
| all agreement is due to chance. |
| |
| In the course of calculating kappa, all items in ``y_true`` and ``y_pred`` will |
| first be converted to floats and then rounded to integers. |
| |
| It is assumed that y_true and y_pred contain the complete range of possible |
| ratings. |
| |
| This function contains a combination of code from yorchopolis's kappa-stats |
| and Ben Hamner's Metrics projects on Github. |
| |
| Parameters |
| ---------- |
| y_true : numpy.ndarray |
| The true/actual/gold labels for the data. |
| y_pred : numpy.ndarray |
| The predicted/observed labels for the data. |
| weights : Optional[Union[str, numpy.ndarray]], default=None |
| Specifies the weight matrix for the calculation. |
| Possible values are: ``None`` (unweighted-kappa), ``"quadratic"`` |
| (quadratically weighted kappa), ``"linear"`` (linearly weighted kappa), |
| and a two-dimensional numpy array (a custom matrix of weights). Each |
| weight in this array corresponds to the :math:`w_{ij}` values in the |
| Wikipedia description of how to calculate weighted Cohen's kappa. |
| allow_off_by_one : bool, default=False |
| If true, ratings that are off by one are counted as |
| equal, and all other differences are reduced by |
| one. For example, 1 and 2 will be considered to be |
| equal, whereas 1 and 3 will have a difference of 1 |
| for when building the weights matrix. |
| |
| Returns |
| ------- |
| float |
| The weighted or unweighted kappa score. |
| |
| Raises |
| ------ |
| AssertionError |
| If ``y_true`` != ``y_pred``. |
| ValueError |
| If labels cannot be converted to int. |
| ValueError |
| If invalid weight scheme. |
| """ |
| |
| assert len(y_true) == len(y_pred) |
|
|
| |
| |
| |
| |
| |
| |
| |
| try: |
| y_true = np.array([int(np.round(float(y))) for y in y_true]) |
| y_pred = np.array([int(np.round(float(y))) for y in y_pred]) |
| except ValueError: |
| raise ValueError( |
| "For kappa, the labels should be integers or strings" |
| " that can be converted to ints (E.g., '4.0' or " |
| "'3')." |
| ) |
|
|
| |
| min_rating = min(min(y_true), min(y_pred)) |
| max_rating = max(max(y_true), max(y_pred)) |
|
|
| |
| |
| y_true = y_true - min_rating |
| y_pred = y_pred - min_rating |
|
|
| |
| num_ratings = max_rating - min_rating + 1 |
| observed = confusion_matrix(y_true, y_pred, labels=list(range(num_ratings))) |
| num_scored_items = float(len(y_true)) |
|
|
| |
| if isinstance(weights, str): |
| wt_scheme = weights |
| weights = None |
| else: |
| wt_scheme = "" |
|
|
| if weights is None: |
| kappa_weights = np.empty((num_ratings, num_ratings)) |
| for i in range(num_ratings): |
| for j in range(num_ratings): |
| diff = abs(i - j) |
| if allow_off_by_one and diff: |
| diff -= 1 |
| if wt_scheme == "linear": |
| kappa_weights[i, j] = diff |
| elif wt_scheme == "quadratic": |
| kappa_weights[i, j] = diff**2 |
| elif not wt_scheme: |
| kappa_weights[i, j] = bool(diff) |
| else: |
| raise ValueError( |
| "Invalid weight scheme specified for " f"kappa: {wt_scheme}" |
| ) |
| else: |
| kappa_weights = weights |
|
|
| hist_true: np.ndarray = np.bincount(y_true, minlength=num_ratings) |
| hist_true = hist_true[:num_ratings] / num_scored_items |
| hist_pred: np.ndarray = np.bincount(y_pred, minlength=num_ratings) |
| hist_pred = hist_pred[:num_ratings] / num_scored_items |
| expected = np.outer(hist_true, hist_pred) |
|
|
| |
| observed = observed / num_scored_items |
|
|
| |
| k = 1.0 |
| if np.count_nonzero(kappa_weights): |
| observed_sum = np.sum(kappa_weights * observed) |
| expected_sum = np.sum(kappa_weights * expected) |
| k -= np.sum(observed_sum) / np.sum(expected_sum) |
|
|
| return k |
|
|
|
|
| def correlation( |
| y_true: np.ndarray, y_pred: np.ndarray, corr_type: str = "pearson" |
| ) -> float: |
| """ |
| Calculate given correlation type between ``y_true`` and ``y_pred``. |
| |
| ``y_pred`` can be multi-dimensional. If ``y_pred`` is 1-dimensional, it |
| may either contain probabilities, most-likely classification labels, or |
| regressor predictions. In that case, we simply return the correlation |
| between ``y_true`` and ``y_pred``. If ``y_pred`` is multi-dimensional, |
| it contains probabilties for multiple classes in which case, we infer the |
| most likely labels and then compute the correlation between those and |
| ``y_true``. |
| |
| Parameters |
| ---------- |
| y_true : numpy.ndarray |
| The true/actual/gold labels for the data. |
| y_pred : numpy.ndarray |
| The predicted/observed labels for the data. |
| corr_type : str, default="pearson" |
| Which type of correlation to compute. Possible |
| choices are "pearson", "spearman", and "kendall_tau". |
| |
| Returns |
| ------- |
| float |
| correlation value if well-defined, else 0.0 |
| """ |
| |
| corr_func = pearsonr |
| if corr_type == "spearman": |
| corr_func = spearmanr |
| elif corr_type == "kendall_tau": |
| corr_func = kendalltau |
|
|
| |
| y_pred = np.array(y_pred) |
|
|
| |
| if y_pred.ndim > 1: |
| labels = np.argmax(y_pred, axis=1) |
| ret_score = corr_func(y_true, labels)[0] |
| |
| else: |
| ret_score = corr_func(y_true, y_pred)[0] |
| return ret_score |
|
|
|
|
| def f1_score_least_frequent(y_true: np.ndarray, y_pred: np.ndarray) -> float: |
| """ |
| Calculate F1 score of the least frequent label/class. |
| |
| Parameters |
| ---------- |
| y_true : numpy.ndarray |
| The true/actual/gold labels for the data. |
| y_pred : numpy.ndarray |
| The predicted/observed labels for the data. |
| |
| Returns |
| ------- |
| float |
| F1 score of the least frequent label. |
| """ |
| least_frequent = np.bincount(y_true).argmin() |
| return f1_score(y_true, y_pred, average=None)[least_frequent] |
|
|