PROTAC
drug-discovery
xgboost
cheminformatics
ribesstefano commited on
Commit
04f71a6
·
verified ·
1 Parent(s): d22fb6a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. dc50_best_arch_ensemble/best_config-model=mlp_dc50_protac-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-task=dc50-group=scaffold.yaml +22 -0
  2. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=0_hparams.yaml +60 -0
  3. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=0_state.pt +3 -0
  4. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=10_hparams.yaml +60 -0
  5. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=10_state.pt +3 -0
  6. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=11_hparams.yaml +60 -0
  7. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=11_state.pt +3 -0
  8. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=12_hparams.yaml +60 -0
  9. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=12_state.pt +3 -0
  10. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=13_hparams.yaml +60 -0
  11. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=13_state.pt +3 -0
  12. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=14_hparams.yaml +60 -0
  13. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=14_state.pt +3 -0
  14. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=15_hparams.yaml +60 -0
  15. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=15_state.pt +3 -0
  16. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=16_hparams.yaml +60 -0
  17. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=16_state.pt +3 -0
  18. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=17_hparams.yaml +60 -0
  19. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=17_state.pt +3 -0
  20. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=18_hparams.yaml +60 -0
  21. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=18_state.pt +3 -0
  22. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=19_hparams.yaml +60 -0
  23. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=19_state.pt +3 -0
  24. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=1_hparams.yaml +60 -0
  25. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=1_state.pt +3 -0
  26. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=20_hparams.yaml +60 -0
  27. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=20_state.pt +3 -0
  28. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=21_hparams.yaml +60 -0
  29. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=21_state.pt +3 -0
  30. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=22_hparams.yaml +60 -0
  31. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=22_state.pt +3 -0
  32. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=23_hparams.yaml +60 -0
  33. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=23_state.pt +3 -0
  34. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=24_hparams.yaml +60 -0
  35. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=24_state.pt +3 -0
  36. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=2_hparams.yaml +60 -0
  37. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=2_state.pt +3 -0
  38. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=3_hparams.yaml +60 -0
  39. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=3_state.pt +3 -0
  40. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=4_hparams.yaml +60 -0
  41. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=4_state.pt +3 -0
  42. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=5_hparams.yaml +60 -0
  43. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=5_state.pt +3 -0
  44. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=6_hparams.yaml +60 -0
  45. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=6_state.pt +3 -0
  46. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=7_hparams.yaml +60 -0
  47. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=7_state.pt +3 -0
  48. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=8_hparams.yaml +60 -0
  49. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=8_state.pt +3 -0
  50. dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=9_hparams.yaml +60 -0
dc50_best_arch_ensemble/best_config-model=mlp_dc50_protac-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-task=dc50-group=scaffold.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_config:
2
+ learning_rate: 0.0007485278176691929
3
+ hidden_dim:
4
+ - 256
5
+ head_depth: 1
6
+ dropout: 0.0
7
+ head_dropout: 0.1
8
+ task_type: point
9
+ lr_scheduler_type: reduce_on_plateau
10
+ warmup_ratio: 0.0
11
+ norm_type: layer
12
+ activation: relu
13
+ input_dim: 509
14
+ training_config:
15
+ max_epochs: 200
16
+ gradient_clip_val: 1.0
17
+ enable_checkpointing: true
18
+ enable_progress_bar: false
19
+ enable_model_summary: true
20
+ accelerator: auto
21
+ devices: 1
22
+ precision: 16-mixed
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=0_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=0_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cbee6d87a1698c39cb2896ccd26d43633122a1977deb14e45caa6093cd7d78c
3
+ size 164527
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=10_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=10_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f73287b64adf1bd821daccc230c4b69c0ab2f462b4ff639d219e5283f85cea
3
+ size 164533
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=11_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=11_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6842739643150101b82136db268accc6c15961e9c8f059649babb12dc5d75aa0
3
+ size 164725
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=12_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=12_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b31a192842eaacb08d24a50c5b00bd3eaa5e2b4a81655e02f78c1ba0b59aab
3
+ size 164277
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=13_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=13_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:889528f14f5c53f64a764a17120c9de3afc5c362d2b997b8c01f53361f875086
3
+ size 164469
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=14_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=14_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e034797f28804fb7d7819d8fd8d3c45d409fa026646c25788649b167c9286ca1
3
+ size 164533
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=15_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=15_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab45bec692bdeac8ac9224930401271537d061fe65bf17b1d34e509f28a2c7f
3
+ size 164405
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=16_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=16_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b1a7285037227eb64829125999f1535444935dcc2eda19cb1b55fc439917cd
3
+ size 164597
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=17_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=17_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51d09d134f771113e612b23c0fa603a92faa4fbbbd55927b1b1d46e90b055e9
3
+ size 164469
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=18_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=18_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebadedee8a424dca40477dad09b0c5668cb21086b5e4018ba51497bbe1b6900a
3
+ size 164341
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=19_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=19_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c4d24355023ef142adecbd50b5cee3599da96801eea1e50ff376fd333cda15
3
+ size 164469
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=1_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=1_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d092dde180e964b979b0d6854cd724ba7d2ac89dc01990be3567c27833aafd3
3
+ size 164591
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=20_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=20_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8a701c5d7110e3daf1f0a2d75491e2a7c8f0badace9510758a8f5946d12a763
3
+ size 164661
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=21_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=21_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55593c6ba1509655fec879f4e21511aa54f32cf0875b3ce616ad593d6da666d2
3
+ size 164469
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=22_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=22_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce312732dbb919a9a289cb6d7bac7048a515fa1d5947380c1b7d94cfeb361c79
3
+ size 164469
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=23_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=23_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73581bd6112b82411356bf6bd8c4276347d50699c0e7d82f33a65bf15041b706
3
+ size 164213
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=24_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=24_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdce4d615a7aba08ba30453aac46238c3f372945a86b1f53f053622a00ef5d47
3
+ size 164597
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=2_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=2_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646473aa2c5aa682b5e2f3e813daed111e6178f18f6fe00d8a620eda877d0ee7
3
+ size 164463
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=3_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=3_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee0a67b811502da1e0f43c308a182cb6ab40e00ada2397cf9d37578844ed60d
3
+ size 164399
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=4_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=4_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec0db035fdfa5597e043f22785ceca028050a30fcf89847979d08cdff55954e
3
+ size 164335
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=5_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=5_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9bcc01ad91f4c46feba71f065767eeed083fc823d6c99a1f965751649076271
3
+ size 164527
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=6_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=6_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fff3de2c6222914ff5960fb616ad94b48f5b8b26d5688333bc3a0a4e9d50170
3
+ size 164399
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=7_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=7_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a39aba80bc245929c05947000682cd53f917bc801d709350b60388cee5d94f
3
+ size 164591
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=8_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=8_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e39dc8c01636473201b26350131d187f9bf2e5c4fc890bc0fa0457440c8853
3
+ size 164655
dc50_best_arch_ensemble/datamodule-data=all_desc_poi_onehot_lig_onehot_cell_onehot_assay_time_norm_labels_DC50-group=scaffold-fold=9_hparams.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ smiles_col: SMILES
2
+ ligase_col: Ligase_Name
3
+ ligase_sequence_col: Ligase_Sequence
4
+ poi_col: POI_Name
5
+ poi_sequence_col: POI_Sequence
6
+ cell_line_col: Cell_Line_ID
7
+ assay_type_col: Assay
8
+ treatment_time_col: Assay_Time
9
+ treatment_time_dmax_col: Assay_Time
10
+ treatment_time_dc50_col: Assay_Time
11
+ treatment_time_ic50_col: Treatment Time (h) (Cellular activities, IC50)
12
+ labels:
13
+ - DC50
14
+ normalize_labels: true
15
+ standardize_labels: false
16
+ impute_labels: true
17
+ fp_size: 512
18
+ radius: 16
19
+ use_fingerprints: false
20
+ use_descriptors: true
21
+ use_relevant_descriptors: false
22
+ selected_descriptors: null
23
+ use_poi_sequence_embedding: false
24
+ use_poi_name_embedding: true
25
+ use_ligase_name_embedding: true
26
+ use_poi_precomputed_embedding: false
27
+ use_ligase_precomputed_embedding: false
28
+ poi_embeddings_file: null
29
+ poi_embeddings_format: npz
30
+ poi_embeddings_per_residue: true
31
+ poi_residue_pooling: sum
32
+ poi_embeddings_id_type: sequence
33
+ ligase_embeddings_file: null
34
+ ligase_embeddings_format: npz
35
+ ligase_embeddings_per_residue: true
36
+ ligase_residue_pooling: sum
37
+ ligase_embeddings_id_type: sequence
38
+ use_poi_pca: false
39
+ poi_pca_n_components: null
40
+ use_ligase_pca: false
41
+ ligase_pca_n_components: null
42
+ use_cell_description_embedding: false
43
+ use_cell_name_embedding: true
44
+ use_tokenizer: false
45
+ tokenizer_name: google-bert/bert-base-cased
46
+ max_length: 512
47
+ prompt_template: null
48
+ label_task_col: Value_Type
49
+ degrader_type_col: null
50
+ default_degrader_type: PROTAC
51
+ include_prompt: false
52
+ is_bert_multitask: false
53
+ use_assay_type_encoding: true
54
+ use_treatment_time: true
55
+ batch_size: 32
56
+ num_workers: 0
57
+ num_proc: 1
58
+ verbose: 0
59
+ sort_features: true
60
+ categorical_encoding: onehot