| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """Library to run HHsearch from Python.""" |
| import glob |
| import logging |
| import os |
| import subprocess |
| from typing import Sequence |
|
|
| from openfold.data.tools import utils |
|
|
|
|
| class HHSearch: |
| """Python wrapper of the HHsearch binary.""" |
|
|
| def __init__( |
| self, |
| *, |
| binary_path: str, |
| databases: Sequence[str], |
| n_cpu: int = 2, |
| maxseq: int = 1_000_000, |
| ): |
| """Initializes the Python HHsearch wrapper. |
| |
| Args: |
| binary_path: The path to the HHsearch executable. |
| databases: A sequence of HHsearch database paths. This should be the |
| common prefix for the database files (i.e. up to but not including |
| _hhm.ffindex etc.) |
| n_cpu: The number of CPUs to use |
| maxseq: The maximum number of rows in an input alignment. Note that this |
| parameter is only supported in HHBlits version 3.1 and higher. |
| |
| Raises: |
| RuntimeError: If HHsearch binary not found within the path. |
| """ |
| self.binary_path = binary_path |
| self.databases = databases |
| self.n_cpu = n_cpu |
| self.maxseq = maxseq |
|
|
| for database_path in self.databases: |
| if not glob.glob(database_path + "_*"): |
| logging.error( |
| "Could not find HHsearch database %s", database_path |
| ) |
| raise ValueError( |
| f"Could not find HHsearch database {database_path}" |
| ) |
|
|
| def query(self, a3m: str) -> str: |
| """Queries the database using HHsearch using a given a3m.""" |
| with utils.tmpdir_manager(base_dir="/tmp") as query_tmp_dir: |
| input_path = os.path.join(query_tmp_dir, "query.a3m") |
| hhr_path = os.path.join(query_tmp_dir, "output.hhr") |
| with open(input_path, "w") as f: |
| f.write(a3m) |
|
|
| db_cmd = [] |
| for db_path in self.databases: |
| db_cmd.append("-d") |
| db_cmd.append(db_path) |
| cmd = [ |
| self.binary_path, |
| "-i", |
| input_path, |
| "-o", |
| hhr_path, |
| "-maxseq", |
| str(self.maxseq), |
| "-cpu", |
| str(self.n_cpu), |
| ] + db_cmd |
|
|
| logging.info('Launching subprocess "%s"', " ".join(cmd)) |
| process = subprocess.Popen( |
| cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE |
| ) |
| with utils.timing("HHsearch query"): |
| stdout, stderr = process.communicate() |
| retcode = process.wait() |
|
|
| if retcode: |
| |
| raise RuntimeError( |
| "HHSearch failed:\nstdout:\n%s\n\nstderr:\n%s\n" |
| % (stdout.decode("utf-8"), stderr[:100_000].decode("utf-8")) |
| ) |
|
|
| with open(hhr_path) as f: |
| hhr = f.read() |
| return hhr |
|
|