Sync from GitHub (preserve manual model files)
Browse files- README.md +19 -8
- StreamlitApp/StreamlitApp.py +12 -9
- StreamlitApp/utils/analyze.py +1 -0
- StreamlitApp/utils/optimize.py +1 -0
- StreamlitApp/utils/peptide_extras.py +2 -2
- StreamlitApp/utils/predict.py +1 -0
- StreamlitApp/utils/rateLimit.py +1 -1
- StreamlitApp/utils/ui_helpers.py +2 -0
- StreamlitApp/utils/visualize.py +1 -1
- requirements.txt +9 -9
- space_card.yaml +12 -0
README.md
CHANGED
|
@@ -8,15 +8,26 @@ sdk_version: "1.41.1"
|
|
| 8 |
python_version: "3.13"
|
| 9 |
app_file: StreamlitApp/StreamlitApp.py
|
| 10 |
pinned: false
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
# PeptideAI
|
| 14 |
-
Antimicrobial Peptide (AMP) Prediction App
|
| 15 |
-
A machine learning web app that predicts antimicrobial activity from peptide sequences.
|
| 16 |
-
Built with Python, PyTorch, and Streamlit, it uses ProtBERT embeddings to represent biological sequences and a custom neural network classifier for prediction.
|
| 17 |
-
Includes features for:
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
python_version: "3.13"
|
| 9 |
app_file: StreamlitApp/StreamlitApp.py
|
| 10 |
pinned: false
|
| 11 |
+
short_description: AMP peptide scoring, composition, wheel & 3D views.
|
| 12 |
---
|
| 13 |
|
| 14 |
# PeptideAI
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
**Live app:** [huggingface.co/spaces/m0ksh/PeptideAI](https://huggingface.co/spaces/m0ksh/PeptideAI)
|
| 17 |
+
|
| 18 |
+
PeptideAI is a Streamlit app for working with short peptide sequences. It estimates whether a sequence might behave like an antimicrobial peptide (AMP) using a small neural network, and adds views for composition, rough physicochemical numbers, optional mutation search, and helix-style visualization.
|
| 19 |
+
|
| 20 |
+
## What you can do
|
| 21 |
+
|
| 22 |
+
- Get an AMP vs non-AMP prediction with a confidence-style score
|
| 23 |
+
- See amino acid composition and simple properties (length, charge, hydrophobic fraction, mass)
|
| 24 |
+
- Run a greedy “optimize” pass that tries mutations the model likes more
|
| 25 |
+
- Visualize a helix-like trace and helical wheel (approximation, not a solved structure)
|
| 26 |
+
- Run t-SNE on embeddings when you have several sequences
|
| 27 |
+
|
| 28 |
+
## Run it on your machine
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
pip install -r requirements.txt
|
| 32 |
+
streamlit run StreamlitApp/StreamlitApp.py
|
| 33 |
+
```
|
StreamlitApp/StreamlitApp.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Main Streamlit entrypoint
|
| 2 |
import streamlit as st
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
|
@@ -39,7 +39,7 @@ from utils.peptide_extras import (
|
|
| 39 |
)
|
| 40 |
|
| 41 |
try:
|
| 42 |
-
import pyperclip
|
| 43 |
except Exception:
|
| 44 |
pyperclip = None
|
| 45 |
|
|
@@ -68,17 +68,17 @@ st.divider()
|
|
| 68 |
|
| 69 |
# Initialize session keys so navigation keeps user state across pages.
|
| 70 |
if "predictions" not in st.session_state:
|
| 71 |
-
st.session_state.predictions = []
|
| 72 |
if "predict_ran" not in st.session_state:
|
| 73 |
st.session_state.predict_ran = False
|
| 74 |
if "predict_input_widget" not in st.session_state:
|
| 75 |
st.session_state.predict_input_widget = ""
|
| 76 |
if "analyze_input" not in st.session_state:
|
| 77 |
-
st.session_state.analyze_input = ""
|
| 78 |
if "analyze_output" not in st.session_state:
|
| 79 |
-
st.session_state.analyze_output = None
|
| 80 |
if "optimize_input" not in st.session_state:
|
| 81 |
-
st.session_state.optimize_input = ""
|
| 82 |
if "optimize_output" not in st.session_state:
|
| 83 |
st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
|
| 84 |
if "optimize_last_ran_input" not in st.session_state:
|
|
@@ -131,7 +131,7 @@ if st.sidebar.button("Clear All Fields"):
|
|
| 131 |
st.stop()
|
| 132 |
|
| 133 |
|
| 134 |
-
#
|
| 135 |
model = load_model()
|
| 136 |
|
| 137 |
# Shared style tweak keeps expander spacing consistent across pages.
|
|
@@ -473,7 +473,7 @@ elif page == "Analyze":
|
|
| 473 |
mime="text/plain",
|
| 474 |
)
|
| 475 |
|
| 476 |
-
# Optimize page:
|
| 477 |
elif page == "Optimize":
|
| 478 |
st.header("Peptide Optimizer")
|
| 479 |
|
|
@@ -620,6 +620,7 @@ elif page == "Visualize":
|
|
| 620 |
st.markdown(f"- {line}")
|
| 621 |
|
| 622 |
# t-SNE page: embedding projection for multi-sequence exploration.
|
|
|
|
| 623 |
elif page == "t-SNE":
|
| 624 |
st.header("t-SNE Visualizer")
|
| 625 |
st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
|
|
@@ -714,12 +715,14 @@ elif page == "t-SNE":
|
|
| 714 |
• Coloring by properties reveals biochemical trends.
|
| 715 |
""")
|
| 716 |
|
| 717 |
-
# About
|
| 718 |
elif page == "About":
|
| 719 |
st.header("About the Project")
|
| 720 |
st.markdown("""
|
| 721 |
PeptideAI is a lightweight Streamlit app for exploring antimicrobial peptide (AMP) sequences.
|
| 722 |
|
|
|
|
|
|
|
| 723 |
It uses a trained neural network to estimate whether a peptide is likely to be antimicrobial, then helps you interpret and improve candidates:
|
| 724 |
- **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
|
| 725 |
- **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
|
|
|
|
| 1 |
+
# Main Streamlit entrypoint: one file, several “pages” chosen from the sidebar.
|
| 2 |
import streamlit as st
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
|
|
|
| 39 |
)
|
| 40 |
|
| 41 |
try:
|
| 42 |
+
import pyperclip
|
| 43 |
except Exception:
|
| 44 |
pyperclip = None
|
| 45 |
|
|
|
|
| 68 |
|
| 69 |
# Initialize session keys so navigation keeps user state across pages.
|
| 70 |
if "predictions" not in st.session_state:
|
| 71 |
+
st.session_state.predictions = [] # list of dicts
|
| 72 |
if "predict_ran" not in st.session_state:
|
| 73 |
st.session_state.predict_ran = False
|
| 74 |
if "predict_input_widget" not in st.session_state:
|
| 75 |
st.session_state.predict_input_widget = ""
|
| 76 |
if "analyze_input" not in st.session_state:
|
| 77 |
+
st.session_state.analyze_input = "" # last analyze input
|
| 78 |
if "analyze_output" not in st.session_state:
|
| 79 |
+
st.session_state.analyze_output = None # (label, conf_display, comp, props, analysis)
|
| 80 |
if "optimize_input" not in st.session_state:
|
| 81 |
+
st.session_state.optimize_input = "" # last optimize input
|
| 82 |
if "optimize_output" not in st.session_state:
|
| 83 |
st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
|
| 84 |
if "optimize_last_ran_input" not in st.session_state:
|
|
|
|
| 131 |
st.stop()
|
| 132 |
|
| 133 |
|
| 134 |
+
# Load weights once; every page shares this same model instance.
|
| 135 |
model = load_model()
|
| 136 |
|
| 137 |
# Shared style tweak keeps expander spacing consistent across pages.
|
|
|
|
| 473 |
mime="text/plain",
|
| 474 |
)
|
| 475 |
|
| 476 |
+
# Optimize page: Mutation search with per-step diagnostics.
|
| 477 |
elif page == "Optimize":
|
| 478 |
st.header("Peptide Optimizer")
|
| 479 |
|
|
|
|
| 620 |
st.markdown(f"- {line}")
|
| 621 |
|
| 622 |
# t-SNE page: embedding projection for multi-sequence exploration.
|
| 623 |
+
# --- t-SNE on first-layer activations ---
|
| 624 |
elif page == "t-SNE":
|
| 625 |
st.header("t-SNE Visualizer")
|
| 626 |
st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
|
|
|
|
| 715 |
• Coloring by properties reveals biochemical trends.
|
| 716 |
""")
|
| 717 |
|
| 718 |
+
# --- About (overview + disclaimer) ---
|
| 719 |
elif page == "About":
|
| 720 |
st.header("About the Project")
|
| 721 |
st.markdown("""
|
| 722 |
PeptideAI is a lightweight Streamlit app for exploring antimicrobial peptide (AMP) sequences.
|
| 723 |
|
| 724 |
+
A hosted copy may be available at [Hugging Face Spaces](https://huggingface.co/spaces/m0ksh/PeptideAI).
|
| 725 |
+
|
| 726 |
It uses a trained neural network to estimate whether a peptide is likely to be antimicrobial, then helps you interpret and improve candidates:
|
| 727 |
- **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
|
| 728 |
- **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
|
StreamlitApp/utils/analyze.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# Sequence composition and physicochemical property helpers.
|
|
|
|
| 2 |
from collections import Counter
|
| 3 |
|
| 4 |
def aa_composition(sequence):
|
|
|
|
| 1 |
# Sequence composition and physicochemical property helpers.
|
| 2 |
+
# Mass and charge are textbook approximations for the UI, not for publishing numbers.
|
| 3 |
from collections import Counter
|
| 4 |
|
| 5 |
def aa_composition(sequence):
|
StreamlitApp/utils/optimize.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# Heuristic mutation search used by the Optimize page.
|
|
|
|
| 2 |
import random
|
| 3 |
from utils.predict import predict_amp
|
| 4 |
|
|
|
|
| 1 |
# Heuristic mutation search used by the Optimize page.
|
| 2 |
+
# It’s greedy and uses a few residue buckets — fun to play with, not a real design pipeline.
|
| 3 |
import random
|
| 4 |
from utils.predict import predict_amp
|
| 5 |
|
StreamlitApp/utils/peptide_extras.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
# Optional peptide UI helpers: 3D approximation (py3Dmol), known-AMP similarity,
|
| 2 |
-
#
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import csv
|
|
|
|
| 1 |
+
# Optional peptide UI helpers: 3D approximation (py3Dmol / Plotly), known-AMP similarity, helical wheel, etc.
|
| 2 |
+
# None of this feeds the classifier — it’s for intuition and teaching.
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import csv
|
StreamlitApp/utils/predict.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# Model loading, sequence encoding, and AMP inference helpers.
|
|
|
|
| 2 |
import pathlib
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
|
|
|
| 1 |
# Model loading, sequence encoding, and AMP inference helpers.
|
| 2 |
+
# Features are flattened one-hots (length × 20), not transformer embeddings — keeps the app small and CPU-friendly.
|
| 3 |
import pathlib
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
StreamlitApp/utils/rateLimit.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Simple in-memory sliding-window rate limiter.
|
| 2 |
import time
|
| 3 |
from collections import deque
|
| 4 |
|
|
|
|
| 1 |
+
# Simple in-memory sliding-window rate limiter — good enough for a single Streamlit server.
|
| 2 |
import time
|
| 3 |
from collections import deque
|
| 4 |
|
StreamlitApp/utils/ui_helpers.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# UI-facing formatting and summary helpers shared across pages.
|
|
|
|
| 2 |
import html as _html
|
| 3 |
from typing import Dict, List, Tuple, Optional
|
| 4 |
|
|
@@ -195,6 +196,7 @@ def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float
|
|
| 195 |
return "Unlikely AMP", "#d62728"
|
| 196 |
|
| 197 |
|
|
|
|
| 198 |
def build_analysis_insights(
|
| 199 |
label: str,
|
| 200 |
conf: float,
|
|
|
|
| 1 |
# UI-facing formatting and summary helpers shared across pages.
|
| 2 |
+
# Lots of HTML strings here — keep them boring and escaped where it matters.
|
| 3 |
import html as _html
|
| 4 |
from typing import Dict, List, Tuple, Optional
|
| 5 |
|
|
|
|
| 196 |
return "Unlikely AMP", "#d62728"
|
| 197 |
|
| 198 |
|
| 199 |
+
# Plain-language bullets for Analyze — rules of thumb, not a second model.
|
| 200 |
def build_analysis_insights(
|
| 201 |
label: str,
|
| 202 |
conf: float,
|
StreamlitApp/utils/visualize.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from sklearn.manifold import TSNE
|
|
|
|
| 1 |
+
# t-SNE helper — uses the first linear layer as a quick embedding; main app duplicates this with Plotly inline.
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from sklearn.manifold import TSNE
|
requirements.txt
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
streamlit
|
| 2 |
-
pandas
|
| 3 |
-
numpy
|
| 4 |
-
torch
|
| 5 |
-
scikit-learn
|
| 6 |
-
matplotlib
|
| 7 |
-
plotly
|
| 8 |
-
requests
|
| 9 |
-
py3dmol
|
|
|
|
| 1 |
+
streamlit #1
|
| 2 |
+
pandas #2
|
| 3 |
+
numpy #3
|
| 4 |
+
torch #4
|
| 5 |
+
scikit-learn #5
|
| 6 |
+
matplotlib #6
|
| 7 |
+
plotly #7
|
| 8 |
+
requests #8
|
| 9 |
+
py3dmol #9
|
space_card.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Mirror of the YAML block at the top of README.md (Hugging Face Spaces reads README only).
|
| 2 |
+
|
| 3 |
+
title: PeptideAI
|
| 4 |
+
emoji: 🔬
|
| 5 |
+
colorFrom: blue
|
| 6 |
+
colorTo: purple
|
| 7 |
+
sdk: streamlit
|
| 8 |
+
sdk_version: "1.41.1"
|
| 9 |
+
python_version: "3.13"
|
| 10 |
+
app_file: StreamlitApp/StreamlitApp.py
|
| 11 |
+
pinned: false
|
| 12 |
+
short_description: AMP peptide scoring, composition, wheel & 3D views.
|