{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import pathlib\n", "path = pathlib.Path(\"/home/ubuntu/giovanni\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "sim_df = pd.read_csv(path/\"code/2024_Chandrasekaran_NatureMethods/benchmark/output/compound_genetic_perturbation_cosine_similarity.csv\")\n", "jump_metadata = pd.read_csv(path/\"code/2024_Chandrasekaran_NatureMethods/metadata/external_metadata/JUMP-Target-1_compound_metadata.tsv\", sep=\"\\t\")\n", "\n", "sim_df = sim_df.rename(columns={'Metadata_broad_sample': 'broad_sample'})\n", "\n", "sim_df = pd.merge(\n", " sim_df,\n", " jump_metadata[['broad_sample', 'InChIKey', 'smiles']],\n", " on='broad_sample',\n", " how='left'\n", ")\n", "\n", "sim_df.to_csv(path/\"data/compound_genetic_perturbation_cosine_similarity_inchikey.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Metadata_Source | \n", "Metadata_Plate | \n", "Metadata_Well | \n", "Metadata_JCP2022 | \n", "Cells_AreaShape_Area | \n", "Cells_AreaShape_BoundingBoxArea | \n", "Cells_AreaShape_BoundingBoxMaximum_X | \n", "Cells_AreaShape_BoundingBoxMaximum_Y | \n", "Cells_AreaShape_BoundingBoxMinimum_X | \n", "Cells_AreaShape_BoundingBoxMinimum_Y | \n", "... | \n", "Nuclei_Texture_Variance_RNA_10_02_256 | \n", "Nuclei_Texture_Variance_RNA_10_03_256 | \n", "Nuclei_Texture_Variance_RNA_3_00_256 | \n", "Nuclei_Texture_Variance_RNA_3_01_256 | \n", "Nuclei_Texture_Variance_RNA_3_02_256 | \n", "Nuclei_Texture_Variance_RNA_3_03_256 | \n", "Nuclei_Texture_Variance_RNA_5_00_256 | \n", "Nuclei_Texture_Variance_RNA_5_01_256 | \n", "Nuclei_Texture_Variance_RNA_5_02_256 | \n", "Nuclei_Texture_Variance_RNA_5_03_256 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "source_2 | \n", "1053597806 | \n", "A01 | \n", "JCP2022_085227 | \n", "-0.988041 | \n", "-0.907001 | \n", "0.096069 | \n", "0.323007 | \n", "0.146602 | \n", "0.534867 | \n", "... | \n", "1.605010 | \n", "1.693312 | \n", "1.615753 | \n", "1.604152 | \n", "1.624943 | \n", "1.612151 | \n", "1.589302 | \n", "1.594195 | \n", "1.627130 | \n", "1.610461 | \n", "
| 1 | \n", "source_2 | \n", "1053597806 | \n", "K22 | \n", "JCP2022_049123 | \n", "-0.247098 | \n", "-0.389633 | \n", "-0.828097 | \n", "0.727857 | \n", "-0.708414 | \n", "0.887224 | \n", "... | \n", "-0.696735 | \n", "-0.547206 | \n", "-0.737023 | \n", "-0.739017 | \n", "-0.741798 | \n", "-0.741577 | \n", "-0.724849 | \n", "-0.714124 | \n", "-0.755699 | \n", "-0.729048 | \n", "
| 2 | \n", "source_2 | \n", "1053597806 | \n", "K21 | \n", "JCP2022_025146 | \n", "0.882814 | \n", "0.635229 | \n", "-0.866758 | \n", "1.593473 | \n", "-0.798442 | \n", "1.682966 | \n", "... | \n", "0.427341 | \n", "0.699385 | \n", "0.506531 | \n", "0.478770 | \n", "0.518382 | \n", "0.476467 | \n", "0.454404 | \n", "0.473200 | \n", "0.500973 | \n", "0.484033 | \n", "
| 3 | \n", "source_2 | \n", "1053597806 | \n", "K20 | \n", "JCP2022_109006 | \n", "1.551945 | \n", "1.409719 | \n", "-0.624934 | \n", "-1.552434 | \n", "-0.634504 | \n", "-1.846982 | \n", "... | \n", "-0.016852 | \n", "0.321772 | \n", "0.019403 | \n", "-0.023218 | \n", "0.025426 | \n", "-0.020011 | \n", "-0.036913 | \n", "-0.064012 | \n", "-0.037481 | \n", "-0.048811 | \n", "
| 4 | \n", "source_2 | \n", "1053597806 | \n", "K19 | \n", "JCP2022_040739 | \n", "0.782228 | \n", "0.394115 | \n", "0.016284 | \n", "0.335031 | \n", "0.009199 | \n", "0.228013 | \n", "... | \n", "0.549137 | \n", "0.589808 | \n", "0.643868 | \n", "0.616816 | \n", "0.655204 | \n", "0.615139 | \n", "0.594326 | \n", "0.588221 | \n", "0.632808 | \n", "0.615094 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 803848 | \n", "source_1 | \n", "UL001799 | \n", "E28 | \n", "JCP2022_113764 | \n", "0.604395 | \n", "0.369966 | \n", "0.079747 | \n", "-1.430108 | \n", "0.084427 | \n", "-1.370574 | \n", "... | \n", "-0.568779 | \n", "-0.627040 | \n", "-0.692929 | \n", "-0.761466 | \n", "-0.735498 | \n", "-0.730493 | \n", "-0.723268 | \n", "-0.689246 | \n", "-0.753452 | \n", "-0.735510 | \n", "
| 803849 | \n", "source_1 | \n", "UL001799 | \n", "E27 | \n", "JCP2022_005165 | \n", "-0.337308 | \n", "-0.179527 | \n", "-0.589389 | \n", "-1.586146 | \n", "-0.523829 | \n", "-1.452674 | \n", "... | \n", "-0.942051 | \n", "-0.893310 | \n", "-1.033766 | \n", "-1.077996 | \n", "-1.055936 | \n", "-1.042059 | \n", "-1.028724 | \n", "-0.999373 | \n", "-1.056726 | \n", "-1.003945 | \n", "
| 803850 | \n", "source_1 | \n", "UL001799 | \n", "E26 | \n", "JCP2022_051483 | \n", "-0.292228 | \n", "-0.463995 | \n", "0.929566 | \n", "0.850108 | \n", "1.035715 | \n", "0.820275 | \n", "... | \n", "-0.542843 | \n", "-0.556913 | \n", "-0.597007 | \n", "-0.645565 | \n", "-0.606680 | \n", "-0.628779 | \n", "-0.611744 | \n", "-0.611571 | \n", "-0.648432 | \n", "-0.631909 | \n", "
| 803851 | \n", "source_1 | \n", "UL001799 | \n", "E35 | \n", "JCP2022_042924 | \n", "0.080451 | \n", "0.101718 | \n", "0.480523 | \n", "-1.644516 | \n", "0.439748 | \n", "-1.469025 | \n", "... | \n", "-0.204452 | \n", "-0.115495 | \n", "-0.218607 | \n", "-0.260423 | \n", "-0.242490 | \n", "-0.273611 | \n", "-0.263938 | \n", "-0.233076 | \n", "-0.275965 | \n", "-0.253517 | \n", "
| 803852 | \n", "source_1 | \n", "UL001799 | \n", "C03 | \n", "JCP2022_025848 | \n", "0.163948 | \n", "0.670563 | \n", "-0.751337 | \n", "0.413076 | \n", "-0.812000 | \n", "0.378351 | \n", "... | \n", "-0.639439 | \n", "-0.705759 | \n", "-0.730660 | \n", "-0.746096 | \n", "-0.771677 | \n", "-0.765681 | \n", "-0.719094 | \n", "-0.677973 | \n", "-0.762016 | \n", "-0.712435 | \n", "
803853 rows × 3184 columns
\n", "| \n", " | drug | \n", "targets | \n", "moa-broad | \n", "moa-fine | \n", "human-approved | \n", "clinical-trials | \n", "gpt-notes-approval | \n", "canonical_smiles | \n", "pubchem_cid | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Talc | \n", "None | \n", "unclear | \n", "unclear | \n", "yes | \n", "yes | \n", "Talc used in pharma and cosmetics; safety unde... | \n", "[OH-].[OH-].[O-][Si]12O[Si]3(O[Si](O1)(O[Si](O... | \n", "165411828.0 | \n", "
| 1 | \n", "Bortezomib | \n", "PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma and mantle cell ... | \n", "B(C(CC(C)C)NC(=O)C(CC1=CC=CC=C1)NC(=O)C2=NC=CN... | \n", "387447.0 | \n", "
| 2 | \n", "Ixazomib | \n", "PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma treatment. | \n", "B(C(CC(C)C)NC(=O)CNC(=O)C1=C(C=CC(=C1)Cl)Cl)(O)O | \n", "25183872.0 | \n", "
| 3 | \n", "Ixazomib citrate | \n", "PSMB1, PSMB2, PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma treatment as par... | \n", "B1(OC(=O)C(O1)(CC(=O)O)CC(=O)O)C(CC(C)C)NC(=O)... | \n", "56844015.0 | \n", "
| 4 | \n", "Lactate (calcium) | \n", "None | \n", "unclear | \n", "unclear | \n", "yes | \n", "yes | \n", "Used in medical settings, but not specifically... | \n", "C.CC(C(=O)[O-])O.[Ca+2] | \n", "168311648.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 374 | \n", "Verteporfin | \n", "YAP1 | \n", "inhibitor/antagonist | \n", "unclear | \n", "yes | \n", "yes | \n", "Used in photodynamic therapy for macular degen... | \n", "None | \n", "NaN | \n", "
| 375 | \n", "Quinidine (15% dihydroquinidine) | \n", "KCNH2 | \n", "inhibitor/antagonist | \n", "unclear | \n", "yes | \n", "yes | \n", "Approved for arrhythmias as part of quinine al... | \n", "COC1=CC2=C(C=CN=C2C=C1)[C@@H]([C@H]3C[C@@H]4CC... | \n", "441074.0 | \n", "
| 376 | \n", "Canagliflozin (hemihydrate) | \n", "SLC5A2 | \n", "inhibitor/antagonist | \n", "Glucose transporter inhibitor | \n", "yes | \n", "yes | \n", "Approved for type 2 diabetes. | \n", "CC1=C(C=C(C=C1)[C@H]2[C@@H]([C@H]([C@@H]([C@H]... | \n", "24997615.0 | \n", "
| 377 | \n", "Osimertinib (mesylate) | \n", "EGFR | \n", "inhibitor/antagonist | \n", "EGFR/ERBB inhibitor | \n", "yes | \n", "yes | \n", "Approved for non-small cell lung cancer treatm... | \n", "CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=... | \n", "78357807.0 | \n", "
| 378 | \n", "γ-Oryzanol | \n", "None | \n", "inhibitor/antagonist | \n", "DNA methyltransferase inhibitor | \n", "no | \n", "yes | \n", "Used in supplements; limited human data. | \n", "C[C@H](CCC=C(C)C)[C@H]1CC[C@@]2([C@@]1(CC[C@]3... | \n", "5282164.0 | \n", "
379 rows × 9 columns
\n", "| \n", " | drug | \n", "targets | \n", "moa-broad | \n", "moa-fine | \n", "human-approved | \n", "clinical-trials | \n", "gpt-notes-approval | \n", "canonical_smiles | \n", "pubchem_cid | \n", "InChIKey | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Talc | \n", "None | \n", "unclear | \n", "unclear | \n", "yes | \n", "yes | \n", "Talc used in pharma and cosmetics; safety unde... | \n", "[OH-].[OH-].[O-][Si]12O[Si]3(O[Si](O1)(O[Si](O... | \n", "165411828.0 | \n", "NaN | \n", "
| 1 | \n", "Bortezomib | \n", "PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma and mantle cell ... | \n", "B(C(CC(C)C)NC(=O)C(CC1=CC=CC=C1)NC(=O)C2=NC=CN... | \n", "387447.0 | \n", "NaN | \n", "
| 2 | \n", "Ixazomib | \n", "PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma treatment. | \n", "B(C(CC(C)C)NC(=O)CNC(=O)C1=C(C=CC(=C1)Cl)Cl)(O)O | \n", "25183872.0 | \n", "NaN | \n", "
| 3 | \n", "Ixazomib citrate | \n", "PSMB1, PSMB2, PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma treatment as par... | \n", "B1(OC(=O)C(O1)(CC(=O)O)CC(=O)O)C(CC(C)C)NC(=O)... | \n", "56844015.0 | \n", "NaN | \n", "
| 4 | \n", "Lactate (calcium) | \n", "None | \n", "unclear | \n", "unclear | \n", "yes | \n", "yes | \n", "Used in medical settings, but not specifically... | \n", "C.CC(C(=O)[O-])O.[Ca+2] | \n", "168311648.0 | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 374 | \n", "Verteporfin | \n", "YAP1 | \n", "inhibitor/antagonist | \n", "unclear | \n", "yes | \n", "yes | \n", "Used in photodynamic therapy for macular degen... | \n", "None | \n", "NaN | \n", "NaN | \n", "
| 375 | \n", "Quinidine (15% dihydroquinidine) | \n", "KCNH2 | \n", "inhibitor/antagonist | \n", "unclear | \n", "yes | \n", "yes | \n", "Approved for arrhythmias as part of quinine al... | \n", "COC1=CC2=C(C=CN=C2C=C1)[C@@H]([C@H]3C[C@@H]4CC... | \n", "441074.0 | \n", "LOUPRKONTZGTKE-LHHVKLHASA-N | \n", "
| 376 | \n", "Canagliflozin (hemihydrate) | \n", "SLC5A2 | \n", "inhibitor/antagonist | \n", "Glucose transporter inhibitor | \n", "yes | \n", "yes | \n", "Approved for type 2 diabetes. | \n", "CC1=C(C=C(C=C1)[C@H]2[C@@H]([C@H]([C@@H]([C@H]... | \n", "24997615.0 | \n", "NaN | \n", "
| 377 | \n", "Osimertinib (mesylate) | \n", "EGFR | \n", "inhibitor/antagonist | \n", "EGFR/ERBB inhibitor | \n", "yes | \n", "yes | \n", "Approved for non-small cell lung cancer treatm... | \n", "CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=... | \n", "78357807.0 | \n", "NaN | \n", "
| 378 | \n", "γ-Oryzanol | \n", "None | \n", "inhibitor/antagonist | \n", "DNA methyltransferase inhibitor | \n", "no | \n", "yes | \n", "Used in supplements; limited human data. | \n", "C[C@H](CCC=C(C)C)[C@H]1CC[C@@]2([C@@]1(CC[C@]3... | \n", "5282164.0 | \n", "NaN | \n", "
379 rows × 10 columns
\n", "| \n", " | drug | \n", "InChIKey | \n", "
|---|---|---|
| 36 | \n", "Salicylic acid | \n", "YGSDEFSMJLZEOE-UHFFFAOYSA-N | \n", "
| 58 | \n", "5-Azacytidine | \n", "NMUSYJAQQFHJEW-KVTDHHQDSA-N | \n", "
| 69 | \n", "Larotrectinib | \n", "NYNZQNWKBKUAII-KBXCAEBGSA-N | \n", "
| 72 | \n", "Filgotinib | \n", "RIJLVEAXPNLDTC-UHFFFAOYSA-N | \n", "
| 75 | \n", "Cilostazol | \n", "RRGUKTPIGVIEKM-UHFFFAOYSA-N | \n", "
| 81 | \n", "Lonafarnib | \n", "DHMTURDWPRKSOA-RUZDIDTESA-N | \n", "
| 95 | \n", "Acetohexamide | \n", "VGZSUPCWNCWDAN-UHFFFAOYSA-N | \n", "
| 113 | \n", "Homoharringtonine | \n", "HYFHYPWGAURHIV-JFIAXGOJSA-N | \n", "
| 169 | \n", "Ponatinib | \n", "PHXJVRSECIGDHY-UHFFFAOYSA-N | \n", "
| 187 | \n", "Nevirapine | \n", "NQDJXKOVJZTUJA-UHFFFAOYSA-N | \n", "
| 198 | \n", "Menadione | \n", "MJVAVZPDRWSRRC-UHFFFAOYSA-N | \n", "
| 212 | \n", "Olanzapine | \n", "KVWDHTXUZHCGIO-UHFFFAOYSA-N | \n", "
| 246 | \n", "Dexamethasone | \n", "UREBDLICKHMUKA-CXSFZGCWSA-N | \n", "
| 273 | \n", "Cyclosporin A | \n", "PMATZTZNYRCHOR-CGLBZJNRSA-N | \n", "
| 343 | \n", "Regorafenib | \n", "FNHKPVJBJVTLMP-UHFFFAOYSA-N | \n", "
| 350 | \n", "Tranilast | \n", "NZHGWWWHIYHZNX-CSKARUKUSA-N | \n", "
| 375 | \n", "Quinidine (15% dihydroquinidine) | \n", "LOUPRKONTZGTKE-LHHVKLHASA-N | \n", "
| \n", " | broad_sample | \n", "InChIKey | \n", "pert_iname | \n", "pubchem_cid | \n", "gene | \n", "pert_type | \n", "control_type | \n", "smiles | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "BRD-A86665761-001-01-1 | \n", "TZDUHAJSIBHXDL-UHFFFAOYSA-N | \n", "gabapentin-enacarbil | \n", "9883933.0 | \n", "CACNB4 | \n", "trt | \n", "NaN | \n", "CC(C)C(=O)OC(C)OC(=O)NCC1(CC(O)=O)CCCCC1 | \n", "
| 1 | \n", "BRD-A22032524-074-09-9 | \n", "HTIQEAQVCYTUBX-UHFFFAOYSA-N | \n", "amlodipine | \n", "2162.0 | \n", "CACNA2D3 | \n", "trt | \n", "NaN | \n", "CCOC(=O)C1=C(COCCN)NC(C)=C(C1c1ccccc1Cl)C(=O)OC | \n", "
| 2 | \n", "BRD-A01078468-001-14-8 | \n", "PBBGSZCBWVPOOL-UHFFFAOYSA-N | \n", "hexestrol | \n", "3606.0 | \n", "AKR1C1 | \n", "trt | \n", "NaN | \n", "CCC(C(CC)c1ccc(O)cc1)c1ccc(O)cc1 | \n", "
| 3 | \n", "BRD-K48278478-001-01-2 | \n", "LOUPRKONTZGTKE-AFHBHXEDSA-N | \n", "quinine | \n", "94175.0 | \n", "KCNN4 | \n", "trt | \n", "NaN | \n", "COc1ccc2nccc([C@@H](O)[C@H]3C[C@@H]4CC[N@]3C[C... | \n", "
| 4 | \n", "BRD-K36574127-001-01-3 | \n", "NYNZQNWKBKUAII-KBXCAEBGSA-N | \n", "LOXO-101 | \n", "46188928.0 | \n", "NTRK1 | \n", "trt | \n", "NaN | \n", "O[C@H]1CCN(C1)C(=O)Nc1cnn2ccc(nc12)N1CCC[C@@H]... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 302 | \n", "BRD-K24616672-003-20-1 | \n", "MFDFERRIHVXMIY-UHFFFAOYSA-N | \n", "procaine | \n", "4914.0 | \n", "HTR3A | \n", "trt | \n", "NaN | \n", "CCN(CC)CCOC(=O)c1ccc(N)cc1 | \n", "
| 303 | \n", "BRD-A82396632-008-30-8 | \n", "BYBLEWFAAKGYCD-UHFFFAOYSA-N | \n", "miconazole | \n", "4189.0 | \n", "KCNN1 | \n", "trt | \n", "NaN | \n", "Clc1ccc(COC(Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1 | \n", "
| 304 | \n", "BRD-K61250553-003-30-6 | \n", "RDOIQAHITMMDAJ-UHFFFAOYSA-N | \n", "loperamide | \n", "3955.0 | \n", "OPRM1 | \n", "trt | \n", "NaN | \n", "CN(C)C(=O)C(CCN1CCC(O)(CC1)c1ccc(Cl)cc1)(c1ccc... | \n", "
| 305 | \n", "BRD-K70358946-001-17-3 | \n", "CEUORZQYGODEFX-UHFFFAOYSA-N | \n", "aripiprazole | \n", "60795.0 | \n", "HTR3A | \n", "trt | \n", "NaN | \n", "Clc1cccc(N2CCN(CCCCOc3ccc4CCC(=O)Nc4c3)CC2)c1Cl | \n", "
| 306 | \n", "NaN | \n", "IAZDPXIOMUYVGZ-UHFFFAOYSA-N | \n", "DMSO | \n", "679.0 | \n", "NaN | \n", "control | \n", "negcon | \n", "CS(=O)C | \n", "
307 rows × 8 columns
\n", "| \n", " | Cell | \n", "Genetic_Perturbation | \n", "Modality_1_timepoint | \n", "Modality_2_timepoint | \n", "cosine_sim | \n", "broad_sample | \n", "Metadata_matching_target | \n", "InChIKey | \n", "smiles | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.588253 | \n", "BRD-A89164055-001-03-3 | \n", "AKR1B1 | \n", "LXANPKRCLVQAOG-UHFFFAOYSA-N | \n", "Fc1ccc2OCCC3(NC(=O)NC3=O)c2c1 | \n", "
| 1 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.576196 | \n", "BRD-A74391928-051-03-9 | \n", "CACNG1 | \n", "ALOBUEHUHMBRLE-UHFFFAOYSA-N | \n", "CCCCCCCN(CC)CCCC(O)c1ccc(NS(C)(=O)=O)cc1 | \n", "
| 2 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.549354 | \n", "BRD-K08893438-001-06-4 | \n", "RGS4 | \n", "QUIIIYITNGOFEI-UHFFFAOYSA-N | \n", "Cc1ccc(cc1)-n1sc(=O)n(Cc2ccc(F)cc2)c1=O | \n", "
| 3 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.501251 | \n", "BRD-K38512030-001-01-7 | \n", "SLCO2B1 | \n", "HJYYPODYNSCCOU-ODRIEIDWSA-N | \n", "CO[C@H]1\\C=C\\O[C@@]2(C)Oc3c(C2=O)c2c(O)cc(NC(=... | \n", "
| 4 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.433312 | \n", "BRD-K22482860-001-20-6 | \n", "KCNH7 | \n", "NUKYPUAOHBNCPY-UHFFFAOYSA-N | \n", "Nc1ccncc1 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 7451 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.616746 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 7452 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.644838 | \n", "BRD-K93779381-001-01-9 | \n", "PRKCE | \n", "VDJHFHXMUKFKET-WDUFCVPESA-N | \n", "C\\C=C(\\C)C(=O)O[C@H]1C(C)=C[C@@]23[C@H](C)C[C@... | \n", "
| 7453 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.701634 | \n", "BRD-K44227013-001-08-0 | \n", "LYN | \n", "PHXJVRSECIGDHY-UHFFFAOYSA-N | \n", "CN1CCN(Cc2ccc(NC(=O)c3ccc(C)c(c3)C#Cc3cnc4cccn... | \n", "
| 7454 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.702138 | \n", "BRD-K95785537-001-22-3 | \n", "ABL1 | \n", "PBBRWFOVCUAONR-UHFFFAOYSA-N | \n", "CC(C)(C)n1nc(-c2ccc(Cl)cc2)c2c(N)ncnc12 | \n", "
| 7455 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.714925 | \n", "BRD-K77060810-001-01-5 | \n", "P2RY12 | \n", "NEMHKCNXXRQYRF-UHFFFAOYSA-N | \n", "CCOC(=O)c1cc(C#N)c(nc1C)N1CCC(CC1)C(=O)NS(=O)(... | \n", "
7456 rows × 9 columns
\n", "| \n", " | Cell | \n", "Genetic_Perturbation | \n", "Modality_1_timepoint | \n", "Modality_2_timepoint | \n", "cosine_sim | \n", "broad_sample | \n", "Metadata_matching_target | \n", "InChIKey | \n", "smiles | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 449 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "0.550335 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 463 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "0.748997 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 902 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "short | \n", "0.536371 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 930 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "short | \n", "0.773190 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 1256 | \n", "A549 | \n", "CRISPR | \n", "short | \n", "long | \n", "0.218241 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 1397 | \n", "A549 | \n", "CRISPR | \n", "short | \n", "long | \n", "0.762776 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 1708 | \n", "A549 | \n", "CRISPR | \n", "short | \n", "short | \n", "0.217543 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 1863 | \n", "A549 | \n", "CRISPR | \n", "short | \n", "short | \n", "0.746296 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 1918 | \n", "A549 | \n", "ORF | \n", "long | \n", "long | \n", "-0.218416 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 2262 | \n", "A549 | \n", "ORF | \n", "long | \n", "long | \n", "0.316084 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 2352 | \n", "A549 | \n", "ORF | \n", "long | \n", "short | \n", "-0.329261 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 2459 | \n", "A549 | \n", "ORF | \n", "long | \n", "short | \n", "-0.079798 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 2893 | \n", "A549 | \n", "ORF | \n", "short | \n", "long | \n", "-0.105228 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 3165 | \n", "A549 | \n", "ORF | \n", "short | \n", "long | \n", "0.293320 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 3316 | \n", "A549 | \n", "ORF | \n", "short | \n", "short | \n", "-0.247933 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 3396 | \n", "A549 | \n", "ORF | \n", "short | \n", "short | \n", "-0.105536 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 4095 | \n", "U2OS | \n", "CRISPR | \n", "long | \n", "long | \n", "0.267729 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 4193 | \n", "U2OS | \n", "CRISPR | \n", "long | \n", "long | \n", "0.683034 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 4325 | \n", "U2OS | \n", "CRISPR | \n", "long | \n", "short | \n", "-0.016004 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 4659 | \n", "U2OS | \n", "CRISPR | \n", "long | \n", "short | \n", "0.891863 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 4994 | \n", "U2OS | \n", "CRISPR | \n", "short | \n", "long | \n", "0.229830 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 5124 | \n", "U2OS | \n", "CRISPR | \n", "short | \n", "long | \n", "0.650440 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 5283 | \n", "U2OS | \n", "CRISPR | \n", "short | \n", "short | \n", "-0.007166 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 5591 | \n", "U2OS | \n", "CRISPR | \n", "short | \n", "short | \n", "0.893123 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 5805 | \n", "U2OS | \n", "ORF | \n", "long | \n", "long | \n", "0.045007 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 6057 | \n", "U2OS | \n", "ORF | \n", "long | \n", "long | \n", "0.608289 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 6117 | \n", "U2OS | \n", "ORF | \n", "long | \n", "short | \n", "-0.187308 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 6515 | \n", "U2OS | \n", "ORF | \n", "long | \n", "short | \n", "0.574759 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 6802 | \n", "U2OS | \n", "ORF | \n", "short | \n", "long | \n", "0.113212 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 6984 | \n", "U2OS | \n", "ORF | \n", "short | \n", "long | \n", "0.592689 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 7093 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "-0.097227 | \n", "BRD-K64890080-001-02-1 | \n", "PLK1 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| 7451 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.616746 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "XQVVPGYIWAGRNI-JOCHJYFZSA-N | \n", "CC[C@H]1N(C2CCCC2)c2nc(Nc3ccc(cc3OC)C(=O)NC3CC... | \n", "
| \n", " | Cell | \n", "Genetic_Perturbation | \n", "Modality_1_timepoint | \n", "Modality_2_timepoint | \n", "cosine_sim | \n", "Metadata_broad_sample | \n", "Metadata_matching_target | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.588253 | \n", "BRD-A89164055-001-03-3 | \n", "AKR1B1 | \n", "
| 1 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.576196 | \n", "BRD-A74391928-051-03-9 | \n", "CACNG1 | \n", "
| 2 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.549354 | \n", "BRD-K08893438-001-06-4 | \n", "RGS4 | \n", "
| 3 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.501251 | \n", "BRD-K38512030-001-01-7 | \n", "SLCO2B1 | \n", "
| 4 | \n", "A549 | \n", "CRISPR | \n", "long | \n", "long | \n", "-0.433312 | \n", "BRD-K22482860-001-20-6 | \n", "KCNH7 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 7451 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.616746 | \n", "BRD-K64890080-001-02-1 | \n", "BRD4 | \n", "
| 7452 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.644838 | \n", "BRD-K93779381-001-01-9 | \n", "PRKCE | \n", "
| 7453 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.701634 | \n", "BRD-K44227013-001-08-0 | \n", "LYN | \n", "
| 7454 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.702138 | \n", "BRD-K95785537-001-22-3 | \n", "ABL1 | \n", "
| 7455 | \n", "U2OS | \n", "ORF | \n", "short | \n", "short | \n", "0.714925 | \n", "BRD-K77060810-001-01-5 | \n", "P2RY12 | \n", "
7456 rows × 7 columns
\n", "