Sync from GitHub (preserve manual model files)
Browse files- Data/Data Editors/csvCleanup.py +1 -0
- Data/Data Editors/csvCombiner.py +1 -0
- Data/Data Editors/fastaCleanup.py +1 -0
- StreamlitApp/StreamlitApp.py +57 -19
- StreamlitApp/utils/analyze.py +1 -0
- StreamlitApp/utils/optimize.py +1 -0
- StreamlitApp/utils/predict.py +7 -2
- StreamlitApp/utils/rate_limit.py +1 -1
- StreamlitApp/utils/shared_ui.py +6 -1
- StreamlitApp/utils/tsne.py +2 -0
- StreamlitApp/utils/visualize.py +4 -2
Data/Data Editors/csvCleanup.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
# Load data
|
|
|
|
| 1 |
+
# Post-process a combined CSV: drop index noise and duplicate sequences.
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
# Load data
|
Data/Data Editors/csvCombiner.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
from Bio import SeqIO
|
| 3 |
from pathlib import Path
|
|
|
|
| 1 |
+
# Merge AMP / non-AMP FASTA files into one labeled CSV for training or app Data/.
|
| 2 |
import pandas as pd
|
| 3 |
from Bio import SeqIO
|
| 4 |
from pathlib import Path
|
Data/Data Editors/fastaCleanup.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from Bio import SeqIO
|
| 2 |
import pandas as pd
|
| 3 |
|
|
|
|
| 1 |
+
# Filter FASTA to canonical amino acids and length bounds; emit FASTA + CSV.
|
| 2 |
from Bio import SeqIO
|
| 3 |
import pandas as pd
|
| 4 |
|
StreamlitApp/StreamlitApp.py
CHANGED
|
@@ -78,6 +78,24 @@ def _try_copy_to_clipboard(text: str) -> None:
|
|
| 78 |
except Exception:
|
| 79 |
pass
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# Configure global app layout once before rendering widgets.
|
| 82 |
st.set_page_config(page_title="PeptideAI", layout="wide")
|
| 83 |
|
|
@@ -91,16 +109,17 @@ if "predictions" not in st.session_state:
|
|
| 91 |
st.session_state.predictions = [] # list of dicts
|
| 92 |
if "predict_ran" not in st.session_state:
|
| 93 |
st.session_state.predict_ran = False
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
if "analyze_input" not in st.session_state:
|
| 97 |
st.session_state.analyze_input = "" # last analyze input
|
|
|
|
|
|
|
| 98 |
if "analyze_output" not in st.session_state:
|
| 99 |
st.session_state.analyze_output = None # (label, conf_display, comp, props, analysis)
|
| 100 |
if "optimize_input" not in st.session_state:
|
| 101 |
-
st.session_state.optimize_input = "" # last optimize
|
| 102 |
-
if "optimize_input_widget" not in st.session_state:
|
| 103 |
-
st.session_state.optimize_input_widget = st.session_state.optimize_input
|
| 104 |
if "optimize_output" not in st.session_state:
|
| 105 |
st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
|
| 106 |
if "optimize_last_ran_input" not in st.session_state:
|
|
@@ -111,8 +130,6 @@ if "visualize_df" not in st.session_state:
|
|
| 111 |
st.session_state.visualize_df = None
|
| 112 |
if "visualize_peptide_input" not in st.session_state:
|
| 113 |
st.session_state.visualize_peptide_input = ""
|
| 114 |
-
if "visualize_peptide_input_widget" not in st.session_state:
|
| 115 |
-
st.session_state.visualize_peptide_input_widget = st.session_state.visualize_peptide_input
|
| 116 |
|
| 117 |
# Sidebar route selector drives top-level page rendering.
|
| 118 |
st.sidebar.header("Navigation")
|
|
@@ -135,7 +152,10 @@ if st.sidebar.button("Clear All Fields"):
|
|
| 135 |
"predictions",
|
| 136 |
"predict_ran",
|
| 137 |
"predict_input_widget",
|
|
|
|
| 138 |
"analyze_input",
|
|
|
|
|
|
|
| 139 |
"analyze_output",
|
| 140 |
"optimize_input",
|
| 141 |
"optimize_input_widget",
|
|
@@ -182,18 +202,28 @@ if page == "Predict":
|
|
| 182 |
preset_cols = st.columns(2)
|
| 183 |
with preset_cols[0]:
|
| 184 |
if st.button("Use strong AMP example"):
|
| 185 |
-
|
|
|
|
|
|
|
| 186 |
st.rerun()
|
| 187 |
with preset_cols[1]:
|
| 188 |
if st.button("Use weak sequence example"):
|
| 189 |
-
|
|
|
|
|
|
|
| 190 |
st.rerun()
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
seq_input = st.text_area(
|
| 193 |
"Enter peptide sequences (one per line):",
|
| 194 |
height=150,
|
| 195 |
key="predict_input_widget",
|
|
|
|
| 196 |
)
|
|
|
|
| 197 |
uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
|
| 198 |
|
| 199 |
# Show quick length guidance before running the model.
|
|
@@ -282,12 +312,16 @@ elif page == "Analyze":
|
|
| 282 |
|
| 283 |
# Match optimizer-like boxed input style for consistent UI spacing.
|
| 284 |
with st.container(border=True):
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
"Enter a peptide sequence to analyze:",
|
| 289 |
-
|
|
|
|
| 290 |
)
|
|
|
|
|
|
|
| 291 |
|
| 292 |
warn = sequence_length_warning(seq)
|
| 293 |
if warn:
|
|
@@ -319,6 +353,7 @@ elif page == "Analyze":
|
|
| 319 |
|
| 320 |
# Save computed payload for display + report exports below.
|
| 321 |
st.session_state.analyze_input = seq
|
|
|
|
| 322 |
st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
|
| 323 |
|
| 324 |
# Render last computed analysis block.
|
|
@@ -513,13 +548,15 @@ elif page == "Optimize":
|
|
| 513 |
st.header("Peptide Optimizer")
|
| 514 |
|
| 515 |
with st.container(border=True):
|
|
|
|
|
|
|
| 516 |
st.text_input(
|
| 517 |
"Enter a peptide sequence to optimize:",
|
| 518 |
key="optimize_input_widget",
|
|
|
|
| 519 |
)
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
st.session_state.optimize_input = seq
|
| 523 |
|
| 524 |
warn_opt = sequence_length_warning(seq) if seq else None
|
| 525 |
if warn_opt:
|
|
@@ -595,13 +632,14 @@ elif page == "Optimize":
|
|
| 595 |
elif page == "Visualize":
|
| 596 |
st.header("Peptide Visualizer")
|
| 597 |
with st.container(border=True):
|
|
|
|
|
|
|
| 598 |
st.text_input(
|
| 599 |
"Enter a peptide sequence to visualize:",
|
| 600 |
key="visualize_peptide_input_widget",
|
|
|
|
| 601 |
)
|
| 602 |
-
|
| 603 |
-
# Mirror widget value into a stable saved key for persistence parity with other pages.
|
| 604 |
-
st.session_state.visualize_peptide_input = st.session_state.get("visualize_peptide_input_widget", "")
|
| 605 |
seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
|
| 606 |
clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
|
| 607 |
if clean_viz:
|
|
|
|
| 78 |
except Exception:
|
| 79 |
pass
|
| 80 |
|
| 81 |
+
|
| 82 |
+
# Widget keys are cleared when a page is not rendered; these copy text into plain session keys.
|
| 83 |
+
def _sync_predict_input_saved():
|
| 84 |
+
st.session_state.predict_input_saved = st.session_state.get("predict_input_widget", "")
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _sync_analyze_draft():
|
| 88 |
+
st.session_state.analyze_draft = st.session_state.get("analyze_input_widget", "")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _sync_optimize_input():
|
| 92 |
+
st.session_state.optimize_input = st.session_state.get("optimize_input_widget", "")
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _sync_visualize_peptide_input():
|
| 96 |
+
st.session_state.visualize_peptide_input = st.session_state.get("visualize_peptide_input_widget", "")
|
| 97 |
+
|
| 98 |
+
|
| 99 |
# Configure global app layout once before rendering widgets.
|
| 100 |
st.set_page_config(page_title="PeptideAI", layout="wide")
|
| 101 |
|
|
|
|
| 109 |
st.session_state.predictions = [] # list of dicts
|
| 110 |
if "predict_ran" not in st.session_state:
|
| 111 |
st.session_state.predict_ran = False
|
| 112 |
+
# predict_input_saved: survives navigation when Streamlit strips widget keys.
|
| 113 |
+
if "predict_input_saved" not in st.session_state:
|
| 114 |
+
st.session_state.predict_input_saved = ""
|
| 115 |
if "analyze_input" not in st.session_state:
|
| 116 |
st.session_state.analyze_input = "" # last analyze input
|
| 117 |
+
if "analyze_draft" not in st.session_state:
|
| 118 |
+
st.session_state.analyze_draft = "" # typed analyze sequence (persists across pages)
|
| 119 |
if "analyze_output" not in st.session_state:
|
| 120 |
st.session_state.analyze_output = None # (label, conf_display, comp, props, analysis)
|
| 121 |
if "optimize_input" not in st.session_state:
|
| 122 |
+
st.session_state.optimize_input = "" # last optimize sequence (persisted draft)
|
|
|
|
|
|
|
| 123 |
if "optimize_output" not in st.session_state:
|
| 124 |
st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
|
| 125 |
if "optimize_last_ran_input" not in st.session_state:
|
|
|
|
| 130 |
st.session_state.visualize_df = None
|
| 131 |
if "visualize_peptide_input" not in st.session_state:
|
| 132 |
st.session_state.visualize_peptide_input = ""
|
|
|
|
|
|
|
| 133 |
|
| 134 |
# Sidebar route selector drives top-level page rendering.
|
| 135 |
st.sidebar.header("Navigation")
|
|
|
|
| 152 |
"predictions",
|
| 153 |
"predict_ran",
|
| 154 |
"predict_input_widget",
|
| 155 |
+
"predict_input_saved",
|
| 156 |
"analyze_input",
|
| 157 |
+
"analyze_draft",
|
| 158 |
+
"analyze_input_widget",
|
| 159 |
"analyze_output",
|
| 160 |
"optimize_input",
|
| 161 |
"optimize_input_widget",
|
|
|
|
| 202 |
preset_cols = st.columns(2)
|
| 203 |
with preset_cols[0]:
|
| 204 |
if st.button("Use strong AMP example"):
|
| 205 |
+
ex = "RGGRLCYCRGWICFCVGR"
|
| 206 |
+
st.session_state.predict_input_widget = ex
|
| 207 |
+
st.session_state.predict_input_saved = ex
|
| 208 |
st.rerun()
|
| 209 |
with preset_cols[1]:
|
| 210 |
if st.button("Use weak sequence example"):
|
| 211 |
+
ex = "KAEEEVEKNKEEAEEKAEKKIAE"
|
| 212 |
+
st.session_state.predict_input_widget = ex
|
| 213 |
+
st.session_state.predict_input_saved = ex
|
| 214 |
st.rerun()
|
| 215 |
|
| 216 |
+
# Restore textarea after navigating away (widget key may have been dropped).
|
| 217 |
+
if "predict_input_widget" not in st.session_state:
|
| 218 |
+
st.session_state.predict_input_widget = st.session_state.predict_input_saved
|
| 219 |
+
|
| 220 |
seq_input = st.text_area(
|
| 221 |
"Enter peptide sequences (one per line):",
|
| 222 |
height=150,
|
| 223 |
key="predict_input_widget",
|
| 224 |
+
on_change=_sync_predict_input_saved,
|
| 225 |
)
|
| 226 |
+
_sync_predict_input_saved()
|
| 227 |
uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
|
| 228 |
|
| 229 |
# Show quick length guidance before running the model.
|
|
|
|
| 312 |
|
| 313 |
# Match optimizer-like boxed input style for consistent UI spacing.
|
| 314 |
with st.container(border=True):
|
| 315 |
+
if "analyze_input_widget" not in st.session_state:
|
| 316 |
+
init = st.session_state.analyze_draft or st.session_state.analyze_input
|
| 317 |
+
st.session_state.analyze_input_widget = init
|
| 318 |
+
st.text_input(
|
| 319 |
"Enter a peptide sequence to analyze:",
|
| 320 |
+
key="analyze_input_widget",
|
| 321 |
+
on_change=_sync_analyze_draft,
|
| 322 |
)
|
| 323 |
+
_sync_analyze_draft()
|
| 324 |
+
seq = st.session_state.analyze_draft
|
| 325 |
|
| 326 |
warn = sequence_length_warning(seq)
|
| 327 |
if warn:
|
|
|
|
| 353 |
|
| 354 |
# Save computed payload for display + report exports below.
|
| 355 |
st.session_state.analyze_input = seq
|
| 356 |
+
st.session_state.analyze_draft = seq
|
| 357 |
st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
|
| 358 |
|
| 359 |
# Render last computed analysis block.
|
|
|
|
| 548 |
st.header("Peptide Optimizer")
|
| 549 |
|
| 550 |
with st.container(border=True):
|
| 551 |
+
if "optimize_input_widget" not in st.session_state:
|
| 552 |
+
st.session_state.optimize_input_widget = st.session_state.optimize_input
|
| 553 |
st.text_input(
|
| 554 |
"Enter a peptide sequence to optimize:",
|
| 555 |
key="optimize_input_widget",
|
| 556 |
+
on_change=_sync_optimize_input,
|
| 557 |
)
|
| 558 |
+
_sync_optimize_input()
|
| 559 |
+
seq = st.session_state.optimize_input
|
|
|
|
| 560 |
|
| 561 |
warn_opt = sequence_length_warning(seq) if seq else None
|
| 562 |
if warn_opt:
|
|
|
|
| 632 |
elif page == "Visualize":
|
| 633 |
st.header("Peptide Visualizer")
|
| 634 |
with st.container(border=True):
|
| 635 |
+
if "visualize_peptide_input_widget" not in st.session_state:
|
| 636 |
+
st.session_state.visualize_peptide_input_widget = st.session_state.visualize_peptide_input
|
| 637 |
st.text_input(
|
| 638 |
"Enter a peptide sequence to visualize:",
|
| 639 |
key="visualize_peptide_input_widget",
|
| 640 |
+
on_change=_sync_visualize_peptide_input,
|
| 641 |
)
|
| 642 |
+
_sync_visualize_peptide_input()
|
|
|
|
|
|
|
| 643 |
seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
|
| 644 |
clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
|
| 645 |
if clean_viz:
|
StreamlitApp/utils/analyze.py
CHANGED
|
@@ -10,6 +10,7 @@ def aa_composition(sequence):
|
|
| 10 |
|
| 11 |
def compute_properties(sequence):
|
| 12 |
# Compute simple length, mass, hydrophobicity, and net-charge signals.
|
|
|
|
| 13 |
aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
|
| 14 |
'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
|
| 15 |
'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
|
|
|
|
| 10 |
|
| 11 |
def compute_properties(sequence):
|
| 12 |
# Compute simple length, mass, hydrophobicity, and net-charge signals.
|
| 13 |
+
# Hydrophobic fraction uses AILMFWYV; charge = K+R+H minus D+E (rough heuristic).
|
| 14 |
aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
|
| 15 |
'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
|
| 16 |
'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
|
StreamlitApp/utils/optimize.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# Heuristic mutation search used by the Optimize page.
|
|
|
|
| 2 |
import random
|
| 3 |
from utils.predict import predict_amp
|
| 4 |
|
|
|
|
| 1 |
# Heuristic mutation search used by the Optimize page.
|
| 2 |
+
# Each round scores single-site mutants with predict_amp; accepts the best gain above threshold.
|
| 3 |
import random
|
| 4 |
from utils.predict import predict_amp
|
| 5 |
|
StreamlitApp/utils/predict.py
CHANGED
|
@@ -6,11 +6,12 @@ import streamlit as st
|
|
| 6 |
from torch import nn
|
| 7 |
from transformers import BertModel, BertTokenizer
|
| 8 |
|
| 9 |
-
MODEL_INPUT_DIM = 1024
|
| 10 |
MODEL_ARCH = "FastMLP"
|
| 11 |
-
PROTBERT_MODEL_NAME = "Rostlab/prot_bert"
|
| 12 |
|
| 13 |
class FastMLP(nn.Module):
|
|
|
|
| 14 |
def __init__(self, input_dim=MODEL_INPUT_DIM):
|
| 15 |
super(FastMLP, self).__init__()
|
| 16 |
self.layers = nn.Sequential(
|
|
@@ -40,6 +41,7 @@ def _load_checkpoint(path: pathlib.Path):
|
|
| 40 |
|
| 41 |
|
| 42 |
def _infer_first_layer_input_dim(state_dict: dict) -> int | None:
|
|
|
|
| 43 |
w = state_dict.get("layers.0.weight")
|
| 44 |
if w is None:
|
| 45 |
return None
|
|
@@ -49,6 +51,7 @@ def _infer_first_layer_input_dim(state_dict: dict) -> int | None:
|
|
| 49 |
|
| 50 |
|
| 51 |
def _normalize_sequence(sequence: str) -> str:
|
|
|
|
| 52 |
return "".join(c for c in str(sequence).upper() if not c.isspace())
|
| 53 |
|
| 54 |
|
|
@@ -64,6 +67,7 @@ def load_model():
|
|
| 64 |
repo_root / "models" / "ampMLModel.pt",
|
| 65 |
streamlitapp_dir / "models" / "ampMLModel.pt",
|
| 66 |
]
|
|
|
|
| 67 |
model_path = next((p for p in candidates if p.exists()), candidates[0])
|
| 68 |
|
| 69 |
if not model_path.exists():
|
|
@@ -125,6 +129,7 @@ def encode_sequence(seq, model_bundle):
|
|
| 125 |
|
| 126 |
|
| 127 |
def get_embedding_extractor(model_bundle):
|
|
|
|
| 128 |
classifier = model_bundle["classifier"]
|
| 129 |
extractor = torch.nn.Sequential(*list(classifier.layers)[:-1])
|
| 130 |
extractor.eval()
|
|
|
|
| 6 |
from torch import nn
|
| 7 |
from transformers import BertModel, BertTokenizer
|
| 8 |
|
| 9 |
+
MODEL_INPUT_DIM = 1024 # ProtBERT pooled embedding size; MLP first layer must match.
|
| 10 |
MODEL_ARCH = "FastMLP"
|
| 11 |
+
PROTBERT_MODEL_NAME = "Rostlab/prot_bert" # HF id for tokenizer + encoder weights.
|
| 12 |
|
| 13 |
class FastMLP(nn.Module):
|
| 14 |
+
# Small classifier head on top of frozen ProtBERT embeddings at inference.
|
| 15 |
def __init__(self, input_dim=MODEL_INPUT_DIM):
|
| 16 |
super(FastMLP, self).__init__()
|
| 17 |
self.layers = nn.Sequential(
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def _infer_first_layer_input_dim(state_dict: dict) -> int | None:
|
| 44 |
+
# Infer MLP input dim from Linear weight shape (out_features, in_features).
|
| 45 |
w = state_dict.get("layers.0.weight")
|
| 46 |
if w is None:
|
| 47 |
return None
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
def _normalize_sequence(sequence: str) -> str:
|
| 54 |
+
# Uppercase + strip whitespace so tokenization matches training conventions.
|
| 55 |
return "".join(c for c in str(sequence).upper() if not c.isspace())
|
| 56 |
|
| 57 |
|
|
|
|
| 67 |
repo_root / "models" / "ampMLModel.pt",
|
| 68 |
streamlitapp_dir / "models" / "ampMLModel.pt",
|
| 69 |
]
|
| 70 |
+
# Prefer first existing path so local / HF layouts both work.
|
| 71 |
model_path = next((p for p in candidates if p.exists()), candidates[0])
|
| 72 |
|
| 73 |
if not model_path.exists():
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
def get_embedding_extractor(model_bundle):
|
| 132 |
+
# Penultimate MLP activations for t-SNE (same depth as training-time “embedding” use).
|
| 133 |
classifier = model_bundle["classifier"]
|
| 134 |
extractor = torch.nn.Sequential(*list(classifier.layers)[:-1])
|
| 135 |
extractor.eval()
|
StreamlitApp/utils/rate_limit.py
CHANGED
|
@@ -12,7 +12,7 @@ class RateLimiter:
|
|
| 12 |
def allow(self) -> bool:
|
| 13 |
now = time.time()
|
| 14 |
|
| 15 |
-
#
|
| 16 |
while self.calls and self.calls[0] <= now - self.period:
|
| 17 |
self.calls.popleft()
|
| 18 |
if len(self.calls) < self.max_calls:
|
|
|
|
| 12 |
def allow(self) -> bool:
|
| 13 |
now = time.time()
|
| 14 |
|
| 15 |
+
# Sliding window: drop calls older than `period` seconds.
|
| 16 |
while self.calls and self.calls[0] <= now - self.period:
|
| 17 |
self.calls.popleft()
|
| 18 |
if len(self.calls) < self.max_calls:
|
StreamlitApp/utils/shared_ui.py
CHANGED
|
@@ -18,11 +18,12 @@ def predicted_confidence(row: Dict) -> Optional[float]:
|
|
| 18 |
return None
|
| 19 |
if pred == "AMP":
|
| 20 |
return p_amp
|
| 21 |
-
#
|
| 22 |
return 1.0 - p_amp
|
| 23 |
|
| 24 |
|
| 25 |
def format_conf_percent(conf_prob: float, digits: int = 1) -> str:
|
|
|
|
| 26 |
return f"{round(conf_prob * 100, digits)}%"
|
| 27 |
|
| 28 |
|
|
@@ -99,6 +100,7 @@ def mutation_heatmap_html(original: str, final: str) -> str:
|
|
| 99 |
|
| 100 |
|
| 101 |
def mutation_diff_table(original: str, final: str) -> List[Dict]:
|
|
|
|
| 102 |
orig = original or ""
|
| 103 |
fin = final or ""
|
| 104 |
max_len = max(len(orig), len(fin))
|
|
@@ -118,6 +120,7 @@ def mutation_diff_table(original: str, final: str) -> List[Dict]:
|
|
| 118 |
|
| 119 |
|
| 120 |
def _ideal_distance_to_interval(value: float, low: float, high: float) -> float:
|
|
|
|
| 121 |
if low <= value <= high:
|
| 122 |
return 0.0
|
| 123 |
if value < low:
|
|
@@ -172,6 +175,7 @@ def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_
|
|
| 172 |
|
| 173 |
|
| 174 |
def sequence_length_warning(seq: str) -> Optional[str]:
|
|
|
|
| 175 |
if not seq:
|
| 176 |
return None
|
| 177 |
n = len(seq)
|
|
@@ -312,6 +316,7 @@ def build_analysis_summary_text(
|
|
| 312 |
props: Dict,
|
| 313 |
analysis_lines: List[str],
|
| 314 |
) -> str:
|
|
|
|
| 315 |
length = props.get("Length", len(sequence))
|
| 316 |
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
|
| 317 |
hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
|
|
|
|
| 18 |
return None
|
| 19 |
if pred == "AMP":
|
| 20 |
return p_amp
|
| 21 |
+
# Non-AMP: use complement so “confidence” matches the displayed class.
|
| 22 |
return 1.0 - p_amp
|
| 23 |
|
| 24 |
|
| 25 |
def format_conf_percent(conf_prob: float, digits: int = 1) -> str:
|
| 26 |
+
# Probability in [0,1] -> percent string for UI / exports.
|
| 27 |
return f"{round(conf_prob * 100, digits)}%"
|
| 28 |
|
| 29 |
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
def mutation_diff_table(original: str, final: str) -> List[Dict]:
|
| 103 |
+
# Side-by-side per-position rows for the optimizer diff expander.
|
| 104 |
orig = original or ""
|
| 105 |
fin = final or ""
|
| 106 |
max_len = max(len(orig), len(fin))
|
|
|
|
| 120 |
|
| 121 |
|
| 122 |
def _ideal_distance_to_interval(value: float, low: float, high: float) -> float:
|
| 123 |
+
# Zero if inside [low, high]; else distance to nearest bound (hydrophobic “ideal band”).
|
| 124 |
if low <= value <= high:
|
| 125 |
return 0.0
|
| 126 |
if value < low:
|
|
|
|
| 175 |
|
| 176 |
|
| 177 |
def sequence_length_warning(seq: str) -> Optional[str]:
|
| 178 |
+
# Soft guardrails for typical AMP length; model itself has no hard cutoff.
|
| 179 |
if not seq:
|
| 180 |
return None
|
| 181 |
n = len(seq)
|
|
|
|
| 316 |
props: Dict,
|
| 317 |
analysis_lines: List[str],
|
| 318 |
) -> str:
|
| 319 |
+
# Flat text blob for Analyze page TXT download.
|
| 320 |
length = props.get("Length", len(sequence))
|
| 321 |
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
|
| 322 |
hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
|
StreamlitApp/utils/tsne.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# t-SNE page: optional helper embedding + scatter (StreamlitApp also runs t-SNE inline with Plotly).
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from sklearn.manifold import TSNE
|
|
@@ -21,6 +22,7 @@ def tsne_visualization(sequences, model):
|
|
| 21 |
|
| 22 |
embeddings = np.vstack(embeddings)
|
| 23 |
|
|
|
|
| 24 |
perplexity = min(30, len(sequences) - 1)
|
| 25 |
if perplexity < 2:
|
| 26 |
st.warning("Need at least 2 sequences for visualization.")
|
|
|
|
| 1 |
# t-SNE page: optional helper embedding + scatter (StreamlitApp also runs t-SNE inline with Plotly).
|
| 2 |
+
# Kept for reuse; main app path uses the same encode_sequence + MLP hidden features.
|
| 3 |
import pandas as pd
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
from sklearn.manifold import TSNE
|
|
|
|
| 22 |
|
| 23 |
embeddings = np.vstack(embeddings)
|
| 24 |
|
| 25 |
+
# Perplexity must be < n_samples; cap at 30 for stability on small sets.
|
| 26 |
perplexity = min(30, len(sequences) - 1)
|
| 27 |
if perplexity < 2:
|
| 28 |
st.warning("Need at least 2 sequences for visualization.")
|
StreamlitApp/utils/visualize.py
CHANGED
|
@@ -18,6 +18,7 @@ _FALLBACK_KNOWN_AMPS: Tuple[str, ...] = (
|
|
| 18 |
)
|
| 19 |
|
| 20 |
def _amp_data_csv_path() -> pathlib.Path:
|
|
|
|
| 21 |
# StreamlitApp/utils/visualize.py -> repo root is parents[2]
|
| 22 |
return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv"
|
| 23 |
|
|
@@ -192,7 +193,7 @@ COMPACT_MAP_LEGEND: str = """
|
|
| 192 |
|
| 193 |
|
| 194 |
def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any:
|
| 195 |
-
#
|
| 196 |
import matplotlib.pyplot as plt
|
| 197 |
from matplotlib import patheffects as pe
|
| 198 |
|
|
@@ -388,6 +389,7 @@ def _helical_wheel_resultant(indices: List[int]) -> float:
|
|
| 388 |
return float(math.hypot(vx, vy))
|
| 389 |
|
| 390 |
|
|
|
|
| 391 |
def build_shape_visual_summary(
|
| 392 |
sequence: str,
|
| 393 |
*,
|
|
@@ -485,7 +487,7 @@ def render_3d_plotly(
|
|
| 485 |
*,
|
| 486 |
height: int = 460,
|
| 487 |
) -> bool:
|
| 488 |
-
#
|
| 489 |
try:
|
| 490 |
import plotly.graph_objects as go
|
| 491 |
import streamlit as st
|
|
|
|
| 18 |
)
|
| 19 |
|
| 20 |
def _amp_data_csv_path() -> pathlib.Path:
|
| 21 |
+
# `Data/ampData.csv`: label=1 rows become KNOWN_AMPS for “similar AMP” lookup.
|
| 22 |
# StreamlitApp/utils/visualize.py -> repo root is parents[2]
|
| 23 |
return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv"
|
| 24 |
|
|
|
|
| 193 |
|
| 194 |
|
| 195 |
def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any:
|
| 196 |
+
# Polar wheel: 100°/residue, same phase as `helix_coordinates` / 3D trace (not a solved structure).
|
| 197 |
import matplotlib.pyplot as plt
|
| 198 |
from matplotlib import patheffects as pe
|
| 199 |
|
|
|
|
| 389 |
return float(math.hypot(vx, vy))
|
| 390 |
|
| 391 |
|
| 392 |
+
# Heuristic bullets from wheel geometry + residue classes; not a second classifier.
|
| 393 |
def build_shape_visual_summary(
|
| 394 |
sequence: str,
|
| 395 |
*,
|
|
|
|
| 487 |
*,
|
| 488 |
height: int = 460,
|
| 489 |
) -> bool:
|
| 490 |
+
# Plotly: CA helix trace + residue markers (same geometry as wheel / 3Dmol).
|
| 491 |
try:
|
| 492 |
import plotly.graph_objects as go
|
| 493 |
import streamlit as st
|