| import os |
| import re |
| import pandas as pd |
| import MDAnalysis as mda |
| from MDAnalysis.analysis import pca, align, rms |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import warnings |
| import argparse |
| warnings.filterwarnings("ignore") |
|
|
|
|
| def cal_PCA(md_pdb_path,ref_path,pred_pdb_path,n_components = 2): |
| print("") |
| print('filename=',os.path.basename(ref_path)) |
|
|
| u = mda.Universe(md_pdb_path, md_pdb_path) |
| u_ref = mda.Universe(ref_path, ref_path) |
|
|
| aligner = align.AlignTraj(u, |
| u_ref, |
| select='name CA or name C or name N', |
| in_memory=True).run() |
|
|
| pc = pca.PCA(u, |
| select='name CA or name C or name N', |
| align=False, mean=None, |
| |
| n_components=n_components, |
| ).run() |
|
|
| backbone = u.select_atoms('name CA or name C or name N') |
| n_bb = len(backbone) |
| print('There are {} backbone atoms in the analysis'.format(n_bb)) |
|
|
| for i in range(n_components): |
| print(f"Cumulated variance {i+1}: {pc.cumulated_variance[i]:.3f}") |
|
|
| transformed = pc.transform(backbone, n_components=n_components) |
|
|
| print(transformed.shape) |
|
|
| df = pd.DataFrame(transformed, |
| columns=['PC{}'.format(i+1) for i in range(n_components)]) |
|
|
|
|
| plt.scatter(df['PC1'],df['PC2'],marker='o') |
| plt.show() |
|
|
| output_dir = os.path.dirname(md_pdb_path) |
| output_filename = os.path.basename(md_pdb_path).split('.')[0] |
|
|
| df.to_csv(os.path.join(output_dir, f'{output_filename}_md_pca.csv')) |
| plt.savefig(os.path.join(output_dir, f'{output_filename}_md_pca.png')) |
|
|
|
|
| for k,v in pred_pdb_path.items(): |
| u_pred = mda.Universe(v, v) |
| aligner = align.AlignTraj(u_pred, |
| u_ref, |
| select='name CA or name C or name N', |
| in_memory=True).run() |
| pred_backbone = u_pred.select_atoms('name CA or name C or name N') |
| pred_transformed = pc.transform(pred_backbone, n_components=n_components) |
|
|
| df = pd.DataFrame(pred_transformed, |
| columns=['PC{}'.format(i+1) for i in range(n_components)]) |
|
|
| plt.scatter(df['PC1'],df['PC2'],marker='o') |
| plt.show() |
|
|
| output_dir = os.path.dirname(v) |
| output_filename = os.path.basename(v).split('.')[0] |
| df.to_csv(os.path.join(output_dir, f'{output_filename}_{k}_pca.csv')) |
| plt.savefig(os.path.join(output_dir, f'{output_filename}_{k}_pca.png')) |
| plt.clf() |
|
|
|
|
| if __name__ == '__main__': |
|
|
| parser = argparse.ArgumentParser() |
|
|
| parser.add_argument("--pred_pdb_dir", type=str, default="./inference/test/pred_merge_results") |
| parser.add_argument("--target_dir", type=str, default="./inference/test/target_dir") |
| parser.add_argument("--crystal_dir", type=str, default="./inference/test/crystal_dir") |
|
|
| args = parser.parse_args() |
|
|
|
|
| pred_pdb_path_org={ |
| 'P2DFlow':args.pred_pdb_dir, |
| } |
| md_pdb_path_org = args.target_dir |
| ref_path_org = args.crystal_dir |
|
|
|
|
| for file in os.listdir(md_pdb_path_org): |
| if re.search('\.pdb',file): |
| pred_pdb_path={ |
| 'P2DFlow':'', |
| |
| |
| } |
| for k,v in pred_pdb_path.items(): |
| pred_pdb_path[k]=os.path.join(pred_pdb_path_org[k],file) |
| md_pdb_path = os.path.join(md_pdb_path_org, file) |
| ref_path = os.path.join(ref_path_org, file) |
| cal_PCA(md_pdb_path,ref_path,pred_pdb_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|