| import os |
| import pandas as pd |
| import subprocess |
| import torch |
| import esm |
| import numpy as np |
| import shutil |
| from tqdm import tqdm |
|
|
| from joblib import delayed, Parallel |
|
|
| import warnings |
| from Bio import BiopythonWarning, SeqIO |
|
|
| from geometry import * |
|
|
| |
| warnings.filterwarnings('ignore', category=BiopythonWarning) |
|
|
| input_dir="./Data/Baselines_new/Tests" |
| output_dir="/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Baselines_new/Codesign" |
|
|
| model = esm.pretrained.esmfold_v1() |
| model = model.eval().to('cuda:2') |
|
|
| def process_rf(name='1aze_B'): |
| input_dir=".Data/Baselines_new/Tests" |
| output_dir=".Data/Baselines_new/Codesign" |
| struct_dir = os.path.join(output_dir,name,'rfs_refold') |
| seq_dir = os.path.join(output_dir,name,'mpnns','seqs') |
| os.makedirs(struct_dir,exist_ok=True) |
| seqs = {} |
| for seq_path in os.listdir(seq_dir): |
| tmp_seqs = [] |
| if seq_path.endswith('.fasta'): |
| for record in SeqIO.parse(os.path.join(seq_dir,seq_path), "fasta"): |
| tmp_seqs.append(str(record.seq)) |
| seqs[seq_path.split('.')[0]] = tmp_seqs[-1] |
| for seq_name,seq in seqs.items(): |
| with torch.no_grad(): |
| output = model.infer_pdb(seq) |
| with open(os.path.join(struct_dir,seq_name+'.pdb'),'w') as f: |
| f.write(output) |
|
|
| def process_pg(name='1aze_B',chain_id='A'): |
| input_dir=".Data/Baselines_new/Tests" |
| output_dir=".Data/Baselines_new/Codesign" |
| struct_dir = os.path.join(output_dir,name,'pgs_refold') |
| seq_dir = os.path.join(output_dir,name,'pgs') |
| os.makedirs(struct_dir,exist_ok=True) |
| seqs = {} |
| for seq_path in os.listdir(seq_dir): |
| if seq_path.endswith('.pdb'): |
| seqs[seq_path.split('.')[0]] = get_seq(os.path.join(seq_dir,seq_path),chain_id) |
| for seq_name,seq in seqs.items(): |
| with torch.no_grad(): |
| output = model.infer_pdb(seq) |
| with open(os.path.join(struct_dir,seq_name+'.pdb'),'w') as f: |
| f.write(output) |
|
|
| def refold(name,chain_id,sub_dir): |
| raw_dir = os.path.join('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Codesign',sub_dir,'pdbs') |
| refold_dir = os.path.join('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Codesign',sub_dir,'pdbs_refold') |
| os.makedirs(os.path.join(refold_dir,name),exist_ok=True) |
| seqs = {} |
| for seq_path in os.listdir(os.path.join(raw_dir,name)): |
| if seq_path.endswith('.pdb'): |
| seqs[seq_path.split('.')[0]] = get_seq(os.path.join(raw_dir,name,seq_path),chain_id) |
| for seq_name,seq in seqs.items(): |
| with torch.no_grad(): |
| output = model.infer_pdb(seq) |
| with open(os.path.join(refold_dir,name,seq_name+'.pdb'),'w') as f: |
| f.write(output) |