| |
| import logging |
| import os |
| import os.path as osp |
| from collections import OrderedDict |
| from pathlib import Path |
| from typing import Dict, Optional, Sequence, Union |
|
|
| import numpy as np |
| import torch |
|
|
| from mmengine.fileio import FileClient, dump |
| from mmengine.fileio.io import get_file_backend |
| from mmengine.hooks import Hook |
| from mmengine.logging import print_log |
| from mmengine.registry import HOOKS |
| from mmengine.utils import is_seq_of, scandir |
|
|
| DATA_BATCH = Optional[Union[dict, tuple, list]] |
| SUFFIX_TYPE = Union[Sequence[str], str] |
|
|
|
|
| @HOOKS.register_module() |
| class LoggerHook(Hook): |
| """Collect logs from different components of ``Runner`` and write them to |
| terminal, JSON file, tensorboard and wandb .etc. |
| |
| ``LoggerHook`` is used to record logs formatted by ``LogProcessor`` during |
| training/validation/testing phase. It is used to control following |
| behaviors: |
| |
| - The frequency of logs update in terminal, local, tensorboad wandb.etc. |
| - The frequency of show experiment information in terminal. |
| - The work directory to save logs. |
| |
| Args: |
| interval (int): Logging interval (every k iterations). |
| Defaults to 10. |
| ignore_last (bool): Ignore the log of last iterations in each epoch if |
| the number of remaining iterations is less than :attr:`interval`. |
| Defaults to True. |
| interval_exp_name (int): Logging interval for experiment name. This |
| feature is to help users conveniently get the experiment |
| information from screen or log file. Defaults to 1000. |
| out_dir (str or Path, optional): The root directory to save |
| checkpoints. If not specified, ``runner.work_dir`` will be used |
| by default. If specified, the ``out_dir`` will be the concatenation |
| of ``out_dir`` and the last level directory of ``runner.work_dir``. |
| For example, if the input ``out_dir`` is ``./tmp`` and |
| ``runner.work_dir`` is ``./work_dir/cur_exp``, then the log will be |
| saved in ``./tmp/cur_exp``. Defaults to None. |
| out_suffix (Tuple[str] or str): Those files in ``runner._log_dir`` |
| ending with ``out_suffix`` will be copied to ``out_dir``. Defaults |
| to ('json', '.log', '.py'). |
| keep_local (bool): Whether to keep local logs in the local machine |
| when :attr:`out_dir` is specified. If False, the local log will be |
| removed. Defaults to True. |
| file_client_args (dict, optional): Arguments to instantiate a |
| FileClient. See :class:`mmengine.fileio.FileClient` for details. |
| Defaults to None. It will be deprecated in future. Please use |
| `backend_args` instead. |
| log_metric_by_epoch (bool): Whether to output metric in validation step |
| by epoch. It can be true when running in epoch based runner. |
| If set to True, `after_val_epoch` will set `step` to self.epoch in |
| `runner.visualizer.add_scalars`. Otherwise `step` will be |
| self.iter. Defaults to True. |
| backend_args (dict, optional): Arguments to instantiate the |
| prefix of uri corresponding backend. Defaults to None. |
| New in v0.2.0. |
| |
| Examples: |
| >>> # The simplest LoggerHook config. |
| >>> logger_hook_cfg = dict(interval=20) |
| """ |
| priority = 'BELOW_NORMAL' |
|
|
| def __init__(self, |
| interval: int = 10, |
| ignore_last: bool = True, |
| interval_exp_name: int = 1000, |
| out_dir: Optional[Union[str, Path]] = None, |
| out_suffix: SUFFIX_TYPE = ('.json', '.log', '.py', 'yaml'), |
| keep_local: bool = True, |
| file_client_args: Optional[dict] = None, |
| log_metric_by_epoch: bool = True, |
| backend_args: Optional[dict] = None): |
|
|
| if not isinstance(interval, int): |
| raise TypeError('interval must be an integer') |
| if interval <= 0: |
| raise ValueError('interval must be greater than 0') |
|
|
| if not isinstance(ignore_last, bool): |
| raise TypeError('ignore_last must be a boolean') |
|
|
| if not isinstance(interval_exp_name, int): |
| raise TypeError('interval_exp_name must be an integer') |
| if interval_exp_name <= 0: |
| raise ValueError('interval_exp_name must be greater than 0') |
|
|
| if out_dir is not None and not isinstance(out_dir, (str, Path)): |
| raise TypeError('out_dir must be a str or Path object') |
|
|
| if not isinstance(keep_local, bool): |
| raise TypeError('keep_local must be a boolean') |
|
|
| if out_dir is None and file_client_args is not None: |
| raise ValueError( |
| 'file_client_args should be "None" when `out_dir` is not' |
| 'specified.') |
|
|
| if file_client_args is not None: |
| print_log( |
| '"file_client_args" will be deprecated in future. ' |
| 'Please use "backend_args" instead', |
| logger='current', |
| level=logging.WARNING) |
| if backend_args is not None: |
| raise ValueError( |
| '"file_client_args" and "backend_args" cannot be set ' |
| 'at the same time.') |
|
|
| if not (isinstance(out_suffix, str) or is_seq_of(out_suffix, str)): |
| raise TypeError('out_suffix should be a string or a sequence of ' |
| f'string, but got {type(out_suffix)}') |
|
|
| self.out_suffix = out_suffix |
| self.out_dir = out_dir |
| self.interval = interval |
| self.ignore_last = ignore_last |
| self.interval_exp_name = interval_exp_name |
| self.keep_local = keep_local |
| self.file_client_args = file_client_args |
| self.json_log_path: Optional[str] = None |
|
|
| if self.out_dir is not None: |
| self.file_client = FileClient.infer_client(file_client_args, |
| self.out_dir) |
| if file_client_args is None: |
| self.file_backend = get_file_backend( |
| self.out_dir, backend_args=backend_args) |
| else: |
| self.file_backend = self.file_client |
|
|
| self.log_metric_by_epoch = log_metric_by_epoch |
|
|
| def before_run(self, runner) -> None: |
| """Infer ``self.file_client`` from ``self.out_dir``. Initialize the |
| ``self.start_iter`` and record the meta information. |
| |
| Args: |
| runner (Runner): The runner of the training process. |
| """ |
| if self.out_dir is not None: |
| |
| |
| basename = osp.basename(runner.work_dir.rstrip(osp.sep)) |
| self.out_dir = self.file_backend.join_path(self.out_dir, basename) |
| runner.logger.info( |
| f'Text logs will be saved to {self.out_dir} after the ' |
| 'training process.') |
|
|
| self.json_log_path = f'{runner.timestamp}.json' |
|
|
| def after_train_iter(self, |
| runner, |
| batch_idx: int, |
| data_batch: DATA_BATCH = None, |
| outputs: Optional[dict] = None) -> None: |
| """Record logs after training iteration. |
| |
| Args: |
| runner (Runner): The runner of the training process. |
| batch_idx (int): The index of the current batch in the train loop. |
| data_batch (dict tuple or list, optional): Data from dataloader. |
| outputs (dict, optional): Outputs from model. |
| """ |
| |
| if self.every_n_train_iters( |
| runner, self.interval_exp_name) or (self.end_of_epoch( |
| runner.train_dataloader, batch_idx)): |
| exp_info = f'Exp name: {runner.experiment_name}' |
| runner.logger.info(exp_info) |
| if self.every_n_inner_iters(batch_idx, self.interval): |
| tag, log_str = runner.log_processor.get_log_after_iter( |
| runner, batch_idx, 'train') |
| elif (self.end_of_epoch(runner.train_dataloader, batch_idx) |
| and (not self.ignore_last |
| or len(runner.train_dataloader) <= self.interval)): |
| |
| |
| |
| |
| tag, log_str = runner.log_processor.get_log_after_iter( |
| runner, batch_idx, 'train') |
| else: |
| return |
| runner.logger.info(log_str) |
| runner.visualizer.add_scalars( |
| tag, step=runner.iter + 1, file_path=self.json_log_path) |
|
|
| def after_val_iter(self, |
| runner, |
| batch_idx: int, |
| data_batch: DATA_BATCH = None, |
| outputs: Optional[Sequence] = None) -> None: |
| """Record logs after validation iteration. |
| |
| Args: |
| runner (Runner): The runner of the validation process. |
| batch_idx (int): The index of the current batch in the validation |
| loop. |
| data_batch (dict or tuple or list, optional): Data from dataloader. |
| Defaults to None. |
| outputs (sequence, optional): Outputs from model. |
| """ |
| if self.every_n_inner_iters(batch_idx, self.interval): |
| _, log_str = runner.log_processor.get_log_after_iter( |
| runner, batch_idx, 'val') |
| runner.logger.info(log_str) |
|
|
| def after_test_iter(self, |
| runner, |
| batch_idx: int, |
| data_batch: DATA_BATCH = None, |
| outputs: Optional[Sequence] = None) -> None: |
| """Record logs after testing iteration. |
| |
| Args: |
| runner (Runner): The runner of the testing process. |
| batch_idx (int): The index of the current batch in the test loop. |
| data_batch (dict or tuple or list, optional): Data from dataloader. |
| outputs (sequence, optional): Outputs from model. |
| """ |
| if self.every_n_inner_iters(batch_idx, self.interval): |
| _, log_str = runner.log_processor.get_log_after_iter( |
| runner, batch_idx, 'test') |
| runner.logger.info(log_str) |
|
|
| def after_val_epoch(self, |
| runner, |
| metrics: Optional[Dict[str, float]] = None) -> None: |
| """All subclasses should override this method, if they need any |
| operations after each validation epoch. |
| |
| Args: |
| runner (Runner): The runner of the validation process. |
| metrics (Dict[str, float], optional): Evaluation results of all |
| metrics on validation dataset. The keys are the names of the |
| metrics, and the values are corresponding results. |
| """ |
| tag, log_str = runner.log_processor.get_log_after_epoch( |
| runner, len(runner.val_dataloader), 'val') |
| runner.logger.info(log_str) |
| if self.log_metric_by_epoch: |
| |
| |
| |
| |
| if (isinstance(runner._train_loop, dict) |
| or runner._train_loop is None): |
| epoch = 0 |
| else: |
| epoch = runner.epoch |
| runner.visualizer.add_scalars( |
| tag, step=epoch, file_path=self.json_log_path) |
| else: |
| if (isinstance(runner._train_loop, dict) |
| or runner._train_loop is None): |
| iter = 0 |
| else: |
| iter = runner.iter |
| runner.visualizer.add_scalars( |
| tag, step=iter, file_path=self.json_log_path) |
|
|
| def after_test_epoch(self, |
| runner, |
| metrics: Optional[Dict[str, float]] = None) -> None: |
| """All subclasses should override this method, if they need any |
| operations after each test epoch. |
| |
| Args: |
| runner (Runner): The runner of the testing process. |
| metrics (Dict[str, float], optional): Evaluation results of all |
| metrics on test dataset. The keys are the names of the |
| metrics, and the values are corresponding results. |
| """ |
| tag, log_str = runner.log_processor.get_log_after_epoch( |
| runner, len(runner.test_dataloader), 'test', with_non_scalar=True) |
| runner.logger.info(log_str) |
| dump( |
| self._process_tags(tag), |
| osp.join(runner.log_dir, self.json_log_path)) |
|
|
| @staticmethod |
| def _process_tags(tags: dict): |
| """Convert tag values to json-friendly type.""" |
|
|
| def process_val(value): |
| if isinstance(value, (list, tuple)): |
| |
| return [process_val(item) for item in value] |
| elif isinstance(value, dict): |
| |
| return {k: process_val(v) for k, v in value.items()} |
| elif isinstance(value, (str, int, float, bool)) or value is None: |
| |
| return value |
| elif isinstance(value, (torch.Tensor, np.ndarray)): |
| return value.tolist() |
| |
|
|
| processed_tags = OrderedDict(process_val(tags)) |
|
|
| return processed_tags |
|
|
| def after_run(self, runner) -> None: |
| """Copy logs to ``self.out_dir`` if ``self.out_dir is not None`` |
| |
| Args: |
| runner (Runner): The runner of the training/testing/validation |
| process. |
| """ |
| |
| runner.visualizer.close() |
|
|
| |
| if self.out_dir is None: |
| return |
|
|
| removed_files = [] |
| for filename in scandir(runner._log_dir, self.out_suffix, True): |
| local_filepath = osp.join(runner._log_dir, filename) |
| removed_files.append(local_filepath) |
| out_filepath = self.file_backend.join_path(self.out_dir, filename) |
| with open(local_filepath) as f: |
| self.file_backend.put_text(f.read(), out_filepath) |
|
|
| runner.logger.info( |
| f'The file {local_filepath} has been uploaded to ' |
| f'{out_filepath}.') |
|
|
| if not self.keep_local: |
| runner.logger.info(f'{local_filepath} was removed due to the ' |
| '`self.keep_local=False`. You can check ' |
| f'the running logs in {out_filepath}') |
|
|
| if not self.keep_local: |
| |
| for handler in runner.logger.handlers: |
| if isinstance(handler, logging.FileHandler): |
| handler.close() |
|
|
| for file in removed_files: |
| os.remove(file) |
|
|