| import os, re |
| import prody as pr |
| from tqdm import tqdm |
| from multiprocessing import Pool |
|
|
|
|
| |
| |
|
|
| def write_file(output_file, outline): |
| buffer = open(output_file, 'w') |
| buffer.write(outline) |
| buffer.close() |
|
|
|
|
| def lig_rename(infile, outfile): |
| |
| lines = open(infile, 'r').readlines() |
| newlines = [] |
| for line in lines: |
| if re.search(r'^HETATM|^ATOM', line): |
| newlines.append(line[:17] + "LIG" + line[20:]) |
| else: |
| newlines.append(line) |
| write_file(outfile, ''.join(newlines)) |
|
|
|
|
| def check_mol(infile, outfile): |
| |
| os.system("cat %s | sed '/LIG/d' > %s" % (infile, outfile)) |
|
|
|
|
| def extract_pocket(protpath, |
| ligpath, |
| cutoff=5.0, |
| protname=None, |
| ligname=None, |
| pdb_pocket_file=None, |
| workdir='.'): |
| """ |
| protpath: the path of protein file (.pdb). |
| ligpath: the path of ligand file (.sdf|.mol2|.pdb). |
| cutoff: the distance range within the ligand to determine the pocket. |
| protname: the name of the protein. |
| ligname: the name of the ligand. |
| workdir: working directory. |
| """ |
| if protname is None: |
| protname = os.path.basename(protpath).split('.')[0] |
| if ligname is None: |
| ligname = os.path.basename(ligpath).split('.')[0] |
|
|
|
|
| if not re.search(r'.pdb$', ligpath): |
| os.system(f"obabel {ligpath} -O {workdir}/{ligname}.pdb") |
| else: |
| os.system(f"cp {ligpath} {workdir}/{ligname}.pdb") |
|
|
| xprot = pr.parsePDB(protpath) |
| |
|
|
| |
| |
| |
| lig_rename("%s/%s.pdb" % (workdir, ligname), "%s/%s2.pdb" % (workdir, ligname)) |
| os.remove("%s/%s.pdb" % (workdir, ligname)) |
| os.rename("%s/%s2.pdb" % (workdir, ligname), "%s/%s.pdb" % (workdir, ligname)) |
| xlig = pr.parsePDB("%s/%s.pdb" % (workdir, ligname)) |
| lresname = xlig.getResnames()[0] |
| xcom = xlig + xprot |
|
|
| |
| ret = xcom.select(f'same residue as exwithin %s of resname %s' % (cutoff, lresname)) |
|
|
| pr.writePDB("%s/%s_pocket_%s_temp.pdb" % (workdir, protname, cutoff), ret) |
| |
|
|
| check_mol("%s/%s_pocket_%s_temp.pdb" % (workdir, protname, cutoff), pdb_pocket_file) |
| os.remove("%s/%s_pocket_%s_temp.pdb" % (workdir, protname, cutoff)) |
|
|
|
|
| def get_fasta_seq(fasta_file): |
| with open(fasta_file) as f: |
| lines = [] |
| for line in f.readlines(): |
| lines.append(line.strip()) |
| fasta = "".join(lines[1:]) |
| return fasta |
|
|
|
|
| def read_fasta_from_protein(pdb_file, lig_file, target_id="test", cutoff=5.0, pdb_pocket_file="test_pocket.pdb", fasta_pocket_file="test_pocket.fasta"): |
| if not os.path.exists(pdb_file): |
| return |
|
|
| try: |
| extract_pocket(pdb_file, |
| lig_file, |
| cutoff=cutoff, |
| protname=target_id, |
| pdb_pocket_file=pdb_pocket_file, |
| ligname=f"{target_id}_ligand") |
| except Exception as e: |
| print(e) |
| return |
|
|
| os.system(f"./pdb2fasta {pdb_pocket_file} > {fasta_pocket_file}") |
| return get_fasta_seq(fasta_pocket_file) |
|
|
|
|
| def read_fasta_from_pocket(pocket_pdb_file, fasta_pocket_file="test_pocket.fasta"): |
| os.system(f"./pdb2fasta {pocket_pdb_file} > {fasta_pocket_file}") |
| return get_fasta_seq(fasta_pocket_file) |
|
|