| import os
|
| import gc
|
| import time
|
| import json
|
| import math
|
| import collections
|
| from datetime import datetime
|
| from typing import Optional, List, Dict, Tuple, Callable, Any, Union
|
|
|
| import torch
|
| import numpy as np
|
|
|
| from transformers import (
|
| is_datasets_available,
|
| is_torch_tpu_available,
|
| is_torch_xla_available,
|
| )
|
|
|
| from transformers.trainer_utils import (
|
| PredictionOutput,
|
| EvalPrediction,
|
| EvalLoopOutput,
|
| denumpify_detensorize,
|
| speed_metrics,
|
| )
|
|
|
| from transformers.utils import logging
|
| from transformers.debug_utils import DebugOption
|
|
|
| if is_datasets_available():
|
| import datasets
|
|
|
|
|
|
|
|
|
|
|
| from transformers import Trainer
|
|
|
| logger = logging.get_logger(__name__)
|
|
|
| class ToMixin:
|
| def _optimizer_to(self, devide: str = "cpu"):
|
| """
|
| Move the optimizer state to the specified device.
|
|
|
| Args:
|
| devide (str, optional): The device to move the optimizer state to. Defaults to "cpu".
|
| """
|
| for param in self.optimizer.state.values():
|
| if isinstance(param, torch.Tensor):
|
| param.data = param.data.to(devide)
|
| if param._grad is not None:
|
| param._grad.data = param._grad.data.to(devide)
|
| elif isinstance(param, dict):
|
| for subparam in param.values():
|
| if isinstance(subparam, torch.Tensor):
|
| subparam.data = subparam.data.to(devide)
|
| if subparam._grad is not None:
|
| subparam._grad.data = subparam._grad.data.to(devide)
|
|
|
| def _scheduler_to(self, devide: str = "cpu") -> None:
|
| """
|
| Move the scheduler state to the specified device.
|
|
|
| Args:
|
| devide (str, optional): The device to move the scheduler state to. Defaults to "cpu".
|
|
|
| Returns:
|
| None
|
| """
|
| for param in self.lr_scheduler.__dict__.values():
|
| if isinstance(param, torch.Tensor):
|
| param.data = param.data.to(devide)
|
| if param._grad is not None:
|
| param._grad.data = param._grad.data.to(devide)
|
|
|
| class BaseReader(Trainer, ToMixin):
|
| name: str = None
|
|
|
| def __init__(
|
| self,
|
| *args,
|
| data_args = {},
|
| eval_examples: datasets.Dataset = None,
|
| **kwargs
|
| ):
|
| """
|
| Initializes the BaseReader.
|
|
|
| Args:
|
| *args: Positional arguments passed to Trainer.__init__.
|
| data_args (dict): Additional arguments for data loading.
|
| eval_examples (datasets.Dataset): Evaluation examples.
|
| **kwargs: Keyword arguments passed to Trainer.__init__.
|
| """
|
|
|
| super().__init__(*args, **kwargs)
|
|
|
|
|
| self.data_args = data_args
|
|
|
|
|
| self.eval_examples = eval_examples
|
|
|
| def free_memory(self):
|
| """
|
| Move the model, optimizer and scheduler state to the CPU, empty the CUDA cache and garbage collect.
|
|
|
| This method is useful to free up GPU memory before checkpointing the model or saving it to disk.
|
| """
|
| self.model.to("cpu")
|
| self._optimizer_to("cpu")
|
| self._scheduler_to("cpu")
|
| torch.cuda.empty_cache()
|
| gc.collect()
|
|
|
|
|
| def postprocess(
|
| self,
|
| output: EvalLoopOutput,
|
| ) -> Union[Any, PredictionOutput]:
|
| """
|
| Preprocess the evaluation loop output.
|
|
|
| This method is called after the evaluation loop has finished and before the evaluation metrics are computed.
|
| It receives the output of the evaluation loop and can be used to modify it before it is passed to the compute_metrics function.
|
|
|
| Args:
|
| output (EvalLoopOutput): The output of the evaluation loop.
|
|
|
| Returns:
|
| Union[Any, PredictionOutput]: The modified output that will be passed to the compute_metrics function.
|
| """
|
| return output
|
|
|
|
|
| def evaluate(
|
| self,
|
| eval_dataset: Optional[datasets.Dataset] = None,
|
| eval_examples: Optional[datasets.Dataset] = None,
|
| ignore_keys: Optional[List[str]] = None,
|
| metric_key_prefix: str = "eval",
|
| ) -> Dict[str, float]:
|
| """
|
| Evaluate the model on the given dataset.
|
|
|
| Args:
|
| eval_dataset (Optional[datasets.Dataset], optional): The evaluation dataset. Defaults to None.
|
| eval_examples (Optional[datasets.Dataset], optional): The evaluation examples. Defaults to None.
|
| ignore_keys (Optional[List[str]], optional): Keys to ignore when calculating metrics. Defaults to None.
|
| metric_key_prefix (str, optional): The prefix for metric keys. Defaults to "eval".
|
|
|
| Returns:
|
| Dict[str, float]: The evaluation metrics.
|
| """
|
|
|
|
|
| self._memory_tracker.start()
|
|
|
|
|
| eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
|
| eval_dataloader = self.get_eval_dataloader(eval_dataset)
|
|
|
|
|
| eval_examples = self.eval_examples if eval_examples is None else eval_examples
|
|
|
|
|
| start_time = time.time()
|
|
|
|
|
| compute_metrics = self.compute_metrics
|
| self.compute_metrics = None
|
|
|
|
|
| eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
|
| try:
|
|
|
| output = eval_loop(
|
| eval_dataloader,
|
| description="Evaluation",
|
|
|
| prediction_loss_only=True if compute_metrics is None else None,
|
| ignore_keys=ignore_keys,
|
| metric_key_prefix=metric_key_prefix,
|
| )
|
| finally:
|
|
|
| self.compute_metrics = compute_metrics
|
|
|
|
|
| if isinstance(eval_dataset, datasets.Dataset):
|
| eval_dataset.set_format(
|
| type=eval_dataset.format["type"],
|
| columns=list(eval_dataset.features.keys()),
|
| )
|
|
|
|
|
| eval_preds = self.postprocess(output, eval_examples, eval_dataset, mode="evaluate")
|
|
|
|
|
| metrics = {}
|
| if self.compute_metrics is not None:
|
| metrics = self.compute_metrics(eval_preds)
|
|
|
|
|
| metrics = denumpify_detensorize(metrics)
|
|
|
|
|
| for key in list(metrics.keys()):
|
| if not key.startswith(f"{metric_key_prefix}_"):
|
| metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
|
|
|
|
|
| total_batch_size = self.args.eval_batch_size * self.args.world_size
|
| metrics.update(
|
| speed_metrics(
|
| metric_key_prefix,
|
| start_time,
|
| num_samples=output.num_samples,
|
| num_steps=math.ceil(output.num_samples / total_batch_size),
|
| )
|
| )
|
|
|
|
|
| self.log(metrics)
|
|
|
|
|
| filename = "eval_results.txt"
|
| eval_result_file = self.name + '_' + filename if self.name else filename
|
| with open(os.path.join(self.args.output_dir, eval_result_file), "w") as writer:
|
| logger.info(f"***** Eval results *****")
|
| writer.write("***** Eval results *****\n")
|
| writer.write(f"{datetime.now()}")
|
| for key in sorted(metrics.keys()):
|
| logger.info(f" {key} = {metrics[key]}")
|
| writer.write(f"{key} = {metrics[key]}\n")
|
| writer.write("\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
| self.control = self.callback_handler.on_evaluate(
|
| self.args, self.state, self.control, metrics
|
| )
|
|
|
|
|
| self._memory_tracker.stop_and_update_metrics(metrics)
|
|
|
| return metrics
|
|
|
| def predict(
|
| self,
|
| test_dataset: datasets.Dataset,
|
| test_examples: Optional[datasets.Dataset] = None,
|
| ignore_keys: Optional[List[str]] = None,
|
| metric_key_prefix: str = "test",
|
| mode: bool = "predict",
|
| ) -> PredictionOutput:
|
| """
|
| Predicts on the given test dataset and returns the predictions.
|
|
|
| Args:
|
| test_dataset (datasets.Dataset): The test dataset.
|
| test_examples (Optional[datasets.Dataset], optional): The test examples. Defaults to None.
|
| ignore_keys (Optional[List[str]], optional): Keys to ignore when calculating metrics. Defaults to None.
|
| metric_key_prefix (str, optional): The prefix for metric keys. Defaults to "test".
|
| mode (bool, optional): The mode of prediction. Defaults to "predict".
|
|
|
| Returns:
|
| PredictionOutput: The predictions.
|
| """
|
|
|
|
|
| self._memory_tracker.start()
|
|
|
|
|
| test_dataloader = self.get_test_dataloader(test_dataset)
|
| start_time = time.time()
|
|
|
|
|
| compute_metrics = self.compute_metrics
|
| self.compute_metrics = None
|
|
|
|
|
| eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
|
| try:
|
|
|
| output = eval_loop(
|
| test_dataloader,
|
| description="Prediction",
|
| ignore_keys=ignore_keys,
|
| metric_key_prefix=metric_key_prefix,
|
| )
|
| finally:
|
|
|
| self.compute_metrics = compute_metrics
|
|
|
|
|
| if isinstance(test_dataset, datasets.Dataset):
|
| test_dataset.set_format(
|
| type=test_dataset.format["type"],
|
| columns=list(test_dataset.features.keys()),
|
| )
|
|
|
|
|
| predictions = self.postprocess(output, test_examples, test_dataset, mode=mode)
|
|
|
|
|
| self._memory_tracker.stop_and_update_metrics(output.metrics)
|
|
|
| return predictions
|
|
|