| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| HOME_LOC=~/ |
| SCRIPT_LOC=$HOME_LOC/PeptiVerse/training_classifiers |
| ALT_EMB_LOC=$HOME_LOC/PeptiVerse/training_data_cleaned |
| LOG_LOC=$SCRIPT_LOC/src_bash/logs |
| mkdir -p $LOG_LOC |
| DATE=$(date +%m_%d) |
|
|
| cd $SCRIPT_LOC |
| |
| |
| |
|
|
| |
| |
| run_bootstrap() { |
| local OBJECTIVE=$1 |
| local WT=$2 |
| local SCRIPT=$3 |
| local MODEL_TYPE=$4 |
| local UNC_MODE=$5 |
|
|
| local VAL_PREDS="${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}/val_predictions.csv" |
| local OUT_DIR="${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}" |
| local LOG_FILE="${LOG_LOC}/${DATE}_ci_${MODEL_TYPE}_${OBJECTIVE}_${WT}.log" |
|
|
| if [ ! -f "$VAL_PREDS" ]; then |
| echo " [SKIP bootstrap] val_predictions.csv not found: $VAL_PREDS" |
| return |
| fi |
|
|
| echo " [bootstrap ci] ${MODEL_TYPE} / ${OBJECTIVE} / ${WT}" |
| python -u "$SCRIPT" \ |
| --mode ci \ |
| --val_preds "$VAL_PREDS" \ |
| --out_dir "$OUT_DIR" \ |
| --model_name "${MODEL_TYPE}_${WT}" \ |
| >> "$LOG_FILE" 2>&1 |
|
|
| echo " [bootstrap unc] ${MODEL_TYPE} / ${OBJECTIVE} / ${WT} (${UNC_MODE})" |
| python -u "$SCRIPT" \ |
| --mode "$UNC_MODE" \ |
| --val_preds "$VAL_PREDS" \ |
| --out_dir "$OUT_DIR" \ |
| --model_name "${MODEL_TYPE}_${WT}" \ |
| >> "$LOG_FILE" 2>&1 |
|
|
| echo " ${OUT_DIR}/" |
| } |
|
|
| |
| |
| run_walltime() { |
| local OBJECTIVE=$1 |
| local WT=$2 |
| local MODEL_TYPE=$3 |
| local DATASET_PATH=$4 |
|
|
| local MODEL_DIR="${SCRIPT_LOC}/${OBJECTIVE}/${MODEL_TYPE}_${WT}" |
| local LOG_FILE="${LOG_LOC}/${DATE}_walltime_${MODEL_TYPE}_${OBJECTIVE}_${WT}.log" |
|
|
| if [ ! -d "$MODEL_DIR" ]; then |
| echo " [SKIP walltime] model_dir not found: $MODEL_DIR" |
| return |
| fi |
| if [ ! -d "$DATASET_PATH" ]; then |
| echo " [SKIP walltime] dataset not found: $DATASET_PATH" |
| return |
| fi |
|
|
| echo " [walltime] ${MODEL_TYPE} / ${OBJECTIVE} / ${WT}" |
| python -u refit_ml_walltime.py \ |
| --model_dir "$MODEL_DIR" \ |
| --dataset_path "$DATASET_PATH" \ |
| --logs_dir "$LOG_LOC" \ |
| >> "$LOG_FILE" 2>&1 |
|
|
| echo " logged to ${LOG_LOC}/${DATE}_wall_clock_ml.jsonl" |
| } |
|
|
| |
| |
| |
| |
| get_dataset_path() { |
| local OBJECTIVE=$1 |
| local WT=$2 |
|
|
| local DATA_LOC=$HOME_LOC/projects/Classifier_Weight/training_data_cleaned |
|
|
| case "${OBJECTIVE}|${WT}" in |
| |
| "hemolysis|wt") echo "${DATA_LOC}/hemolysis/hemo_wt_with_embeddings" ;; |
| "nf|wt") echo "${DATA_LOC}/nf/nf_wt_with_embeddings" ;; |
| "solubility|wt") echo "${DATA_LOC}/solubility/sol_wt_with_embeddings" ;; |
| "permeability_penetrance|wt") echo "${DATA_LOC}/permeability_penetrance/perm_wt_with_embeddings_pooled" ;; |
| |
| "hemolysis|smiles") echo "${ALT_EMB_LOC}/hemolysis_peptideclm/hemo_smiles_with_embeddings" ;; |
| "nf|smiles") echo "${ALT_EMB_LOC}/nf_peptideclm/nf_smiles_with_embeddings" ;; |
| "permeability_pampa|smiles") echo "${ALT_EMB_LOC}/permeability_pampa_peptideclm/pampa_smiles_with_embeddings" ;; |
| "permeability_caco2|smiles") echo "${ALT_EMB_LOC}/permeability_caco2_peptideclm/caco2_smiles_with_embeddings" ;; |
| |
| "hemolysis|chemberta") echo "${ALT_EMB_LOC}/hemolysis_chemberta/hemo_smiles_with_embeddings" ;; |
| "nf|chemberta") echo "${ALT_EMB_LOC}/nf_chemberta/nf_smiles_with_embeddings" ;; |
| "permeability_penetrance|chemberta") echo "${ALT_EMB_LOC}/permeability_chemberta/perm_smiles_with_embeddings" ;; |
| "permeability_penetrance|peptideclm") echo "${ALT_EMB_LOC}/permeability_peptideclm/perm_smiles_with_embeddings" ;; |
| "permeability_pampa|chemberta") echo "${ALT_EMB_LOC}/permeability_pampa_chemberta/pampa_smiles_with_embeddings" ;; |
| "permeability_caco2|chemberta") echo "${ALT_EMB_LOC}/permeability_caco2_chemberta/caco2_smiles_with_embeddings" ;; |
| *) |
| echo "" |
| ;; |
| esac |
| } |
|
|
| |
| |
| |
| echo "" |
| echo "============================================================" |
| echo " SECTION 1: Classification bootstrap + walltime" |
| echo "============================================================" |
|
|
| CLS_MODEL_TYPES=("svm_gpu" "enet_gpu" "xgb") |
|
|
| |
| for OBJECTIVE in "hemolysis" "nf"; do |
| for WT in "wt" "smiles" "chemberta"; do |
| for MODEL_TYPE in "${CLS_MODEL_TYPES[@]}"; do |
| echo "" |
| echo "-- ${OBJECTIVE} / ${WT} / ${MODEL_TYPE} --" |
| run_bootstrap "$OBJECTIVE" "$WT" "ml_uncertainty.py" "$MODEL_TYPE" "uncertainty_prob" |
| DPATH=$(get_dataset_path "$OBJECTIVE" "$WT") |
| run_walltime "$OBJECTIVE" "$WT" "$MODEL_TYPE" "$DPATH" |
| done |
| done |
| done |
|
|
| |
| for OBJECTIVE in "solubility" "permeability_penetrance"; do |
| for WT in "wt" "chemberta"; do |
| for MODEL_TYPE in "${CLS_MODEL_TYPES[@]}"; do |
| echo "" |
| echo "-- ${OBJECTIVE} / ${WT} / ${MODEL_TYPE} --" |
| run_bootstrap "$OBJECTIVE" "$WT" "ml_uncertainty.py" "$MODEL_TYPE" "uncertainty_prob" |
| DPATH=$(get_dataset_path "$OBJECTIVE" "$WT") |
| run_walltime "$OBJECTIVE" "$WT" "$MODEL_TYPE" "$DPATH" |
| done |
| done |
| done |
|
|
| |
| |
| |
| echo "" |
| echo "============================================================" |
| echo " SECTION 2: Regression bootstrap + walltime" |
| echo "============================================================" |
|
|
| REG_MODEL_TYPES=("svr" "enet_gpu" "xgb") |
|
|
| for OBJECTIVE in "permeability_pampa" "permeability_caco2"; do |
| for WT in "smiles" "chemberta"; do |
| for MODEL_TYPE in "${REG_MODEL_TYPES[@]}"; do |
| echo "" |
| echo "-- ${OBJECTIVE} / ${WT} / ${MODEL_TYPE} --" |
| run_bootstrap "$OBJECTIVE" "$WT" "ml_uncertainty_reg.py" "$MODEL_TYPE" "uncertainty_residual" |
| DPATH=$(get_dataset_path "$OBJECTIVE" "$WT") |
| run_walltime "$OBJECTIVE" "$WT" "$MODEL_TYPE" "$DPATH" |
| done |
| done |
| done |
|
|
| echo "" |
| echo "============================================================" |
| echo "All runs completed at $(date)" |
| echo "============================================================" |
|
|
| conda deactivate |
|
|