|
|
| from abc import ABCMeta, abstractmethod
|
|
|
|
|
| class BaseStorageBackend(metaclass=ABCMeta):
|
| """Abstract class of storage backends.
|
|
|
| All backends need to implement two apis: ``get()`` and ``get_text()``.
|
| ``get()`` reads the file as a byte stream and ``get_text()`` reads the file
|
| as texts.
|
| """
|
|
|
| @abstractmethod
|
| def get(self, filepath):
|
| pass
|
|
|
| @abstractmethod
|
| def get_text(self, filepath):
|
| pass
|
|
|
|
|
| class MemcachedBackend(BaseStorageBackend):
|
| """Memcached storage backend.
|
|
|
| Attributes:
|
| server_list_cfg (str): Config file for memcached server list.
|
| client_cfg (str): Config file for memcached client.
|
| sys_path (str | None): Additional path to be appended to `sys.path`.
|
| Default: None.
|
| """
|
|
|
| def __init__(self, server_list_cfg, client_cfg, sys_path=None):
|
| if sys_path is not None:
|
| import sys
|
| sys.path.append(sys_path)
|
| try:
|
| import mc
|
| except ImportError:
|
| raise ImportError('Please install memcached to enable MemcachedBackend.')
|
|
|
| self.server_list_cfg = server_list_cfg
|
| self.client_cfg = client_cfg
|
| self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, self.client_cfg)
|
|
|
| self._mc_buffer = mc.pyvector()
|
|
|
| def get(self, filepath):
|
| filepath = str(filepath)
|
| import mc
|
| self._client.Get(filepath, self._mc_buffer)
|
| value_buf = mc.ConvertBuffer(self._mc_buffer)
|
| return value_buf
|
|
|
| def get_text(self, filepath):
|
| raise NotImplementedError
|
|
|
|
|
| class HardDiskBackend(BaseStorageBackend):
|
| """Raw hard disks storage backend."""
|
|
|
| def get(self, filepath):
|
| filepath = str(filepath)
|
| with open(filepath, 'rb') as f:
|
| value_buf = f.read()
|
| return value_buf
|
|
|
| def get_text(self, filepath):
|
| filepath = str(filepath)
|
| with open(filepath, 'r') as f:
|
| value_buf = f.read()
|
| return value_buf
|
|
|
|
|
| class LmdbBackend(BaseStorageBackend):
|
| """Lmdb storage backend.
|
|
|
| Args:
|
| db_paths (str | list[str]): Lmdb database paths.
|
| client_keys (str | list[str]): Lmdb client keys. Default: 'default'.
|
| readonly (bool, optional): Lmdb environment parameter. If True,
|
| disallow any write operations. Default: True.
|
| lock (bool, optional): Lmdb environment parameter. If False, when
|
| concurrent access occurs, do not lock the database. Default: False.
|
| readahead (bool, optional): Lmdb environment parameter. If False,
|
| disable the OS filesystem readahead mechanism, which may improve
|
| random read performance when a database is larger than RAM.
|
| Default: False.
|
|
|
| Attributes:
|
| db_paths (list): Lmdb database path.
|
| _client (list): A list of several lmdb envs.
|
| """
|
|
|
| def __init__(self, db_paths, client_keys='default', readonly=True, lock=False, readahead=False, **kwargs):
|
| try:
|
| import lmdb
|
| except ImportError:
|
| raise ImportError('Please install lmdb to enable LmdbBackend.')
|
|
|
| if isinstance(client_keys, str):
|
| client_keys = [client_keys]
|
|
|
| if isinstance(db_paths, list):
|
| self.db_paths = [str(v) for v in db_paths]
|
| elif isinstance(db_paths, str):
|
| self.db_paths = [str(db_paths)]
|
| assert len(client_keys) == len(self.db_paths), ('client_keys and db_paths should have the same length, '
|
| f'but received {len(client_keys)} and {len(self.db_paths)}.')
|
|
|
| self._client = {}
|
| for client, path in zip(client_keys, self.db_paths):
|
| self._client[client] = lmdb.open(path, readonly=readonly, lock=lock, readahead=readahead, **kwargs)
|
|
|
| def get(self, filepath, client_key):
|
| """Get values according to the filepath from one lmdb named client_key.
|
|
|
| Args:
|
| filepath (str | obj:`Path`): Here, filepath is the lmdb key.
|
| client_key (str): Used for distinguishing different lmdb envs.
|
| """
|
| filepath = str(filepath)
|
| assert client_key in self._client, (f'client_key {client_key} is not in lmdb clients.')
|
| client = self._client[client_key]
|
| with client.begin(write=False) as txn:
|
| value_buf = txn.get(filepath.encode('ascii'))
|
| return value_buf
|
|
|
| def get_text(self, filepath):
|
| raise NotImplementedError
|
|
|
|
|
| class FileClient(object):
|
| """A general file client to access files in different backend.
|
|
|
| The client loads a file or text in a specified backend from its path
|
| and return it as a binary file. it can also register other backend
|
| accessor with a given name and backend class.
|
|
|
| Attributes:
|
| backend (str): The storage backend type. Options are "disk",
|
| "memcached" and "lmdb".
|
| client (:obj:`BaseStorageBackend`): The backend object.
|
| """
|
|
|
| _backends = {
|
| 'disk': HardDiskBackend,
|
| 'memcached': MemcachedBackend,
|
| 'lmdb': LmdbBackend,
|
| }
|
|
|
| def __init__(self, backend='disk', **kwargs):
|
| if backend not in self._backends:
|
| raise ValueError(f'Backend {backend} is not supported. Currently supported ones'
|
| f' are {list(self._backends.keys())}')
|
| self.backend = backend
|
| self.client = self._backends[backend](**kwargs)
|
|
|
| def get(self, filepath, client_key='default'):
|
|
|
|
|
| if self.backend == 'lmdb':
|
| return self.client.get(filepath, client_key)
|
| else:
|
| return self.client.get(filepath)
|
|
|
| def get_text(self, filepath):
|
| return self.client.get_text(filepath)
|
|
|