m0ksh commited on
Commit
8ec3495
·
verified ·
1 Parent(s): 09d954a

Sync from GitHub (preserve manual model files)

Browse files
README.md CHANGED
@@ -8,15 +8,26 @@ sdk_version: "1.41.1"
8
  python_version: "3.13"
9
  app_file: StreamlitApp/StreamlitApp.py
10
  pinned: false
 
11
  ---
12
 
13
  # PeptideAI
14
- Antimicrobial Peptide (AMP) Prediction App
15
- A machine learning web app that predicts antimicrobial activity from peptide sequences.
16
- Built with Python, PyTorch, and Streamlit, it uses ProtBERT embeddings to represent biological sequences and a custom neural network classifier for prediction.
17
- Includes features for:
18
 
19
- - AMP probability prediction
20
- - Amino acid composition analysis
21
- - Physicochemical property computation
22
- - t-SNE visualization of embeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  python_version: "3.13"
9
  app_file: StreamlitApp/StreamlitApp.py
10
  pinned: false
11
+ short_description: AMP peptide scoring, composition, wheel & 3D views.
12
  ---
13
 
14
  # PeptideAI
 
 
 
 
15
 
16
+ **Live app:** [huggingface.co/spaces/m0ksh/PeptideAI](https://huggingface.co/spaces/m0ksh/PeptideAI)
17
+
18
+ PeptideAI is a Streamlit app for working with short peptide sequences. It estimates whether a sequence might behave like an antimicrobial peptide (AMP) using a small neural network, and adds views for composition, rough physicochemical numbers, optional mutation search, and helix-style visualization.
19
+
20
+ ## What you can do
21
+
22
+ - Get an AMP vs non-AMP prediction with a confidence-style score
23
+ - See amino acid composition and simple properties (length, charge, hydrophobic fraction, mass)
24
+ - Run a greedy “optimize” pass that tries mutations the model likes more
25
+ - Visualize a helix-like trace and helical wheel (approximation, not a solved structure)
26
+ - Run t-SNE on embeddings when you have several sequences
27
+
28
+ ## Run it on your machine
29
+
30
+ ```bash
31
+ pip install -r requirements.txt
32
+ streamlit run StreamlitApp/StreamlitApp.py
33
+ ```
StreamlitApp/StreamlitApp.py CHANGED
@@ -1,4 +1,4 @@
1
- # Main Streamlit entrypoint wiring Predict, Analyze, Optimize, Visualize, and t-SNE pages.
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
@@ -39,7 +39,7 @@ from utils.peptide_extras import (
39
  )
40
 
41
  try:
42
- import pyperclip # Optional; may not exist in all environments.
43
  except Exception:
44
  pyperclip = None
45
 
@@ -68,17 +68,17 @@ st.divider()
68
 
69
  # Initialize session keys so navigation keeps user state across pages.
70
  if "predictions" not in st.session_state:
71
- st.session_state.predictions = [] # list of dicts
72
  if "predict_ran" not in st.session_state:
73
  st.session_state.predict_ran = False
74
  if "predict_input_widget" not in st.session_state:
75
  st.session_state.predict_input_widget = ""
76
  if "analyze_input" not in st.session_state:
77
- st.session_state.analyze_input = "" # last analyze input
78
  if "analyze_output" not in st.session_state:
79
- st.session_state.analyze_output = None # (label, conf_display, comp, props, analysis)
80
  if "optimize_input" not in st.session_state:
81
- st.session_state.optimize_input = "" # last optimize input
82
  if "optimize_output" not in st.session_state:
83
  st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
84
  if "optimize_last_ran_input" not in st.session_state:
@@ -131,7 +131,7 @@ if st.sidebar.button("Clear All Fields"):
131
  st.stop()
132
 
133
 
134
- # Cache model weights once per server process for fast repeated inference.
135
  model = load_model()
136
 
137
  # Shared style tweak keeps expander spacing consistent across pages.
@@ -473,7 +473,7 @@ elif page == "Analyze":
473
  mime="text/plain",
474
  )
475
 
476
- # Optimize page: greedy mutation search with per-step diagnostics.
477
  elif page == "Optimize":
478
  st.header("Peptide Optimizer")
479
 
@@ -620,6 +620,7 @@ elif page == "Visualize":
620
  st.markdown(f"- {line}")
621
 
622
  # t-SNE page: embedding projection for multi-sequence exploration.
 
623
  elif page == "t-SNE":
624
  st.header("t-SNE Visualizer")
625
  st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
@@ -714,12 +715,14 @@ elif page == "t-SNE":
714
  • Coloring by properties reveals biochemical trends.
715
  """)
716
 
717
- # About page: quick orientation + disclaimer for new users.
718
  elif page == "About":
719
  st.header("About the Project")
720
  st.markdown("""
721
  PeptideAI is a lightweight Streamlit app for exploring antimicrobial peptide (AMP) sequences.
722
 
 
 
723
  It uses a trained neural network to estimate whether a peptide is likely to be antimicrobial, then helps you interpret and improve candidates:
724
  - **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
725
  - **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
 
1
+ # Main Streamlit entrypoint: one file, several “pages” chosen from the sidebar.
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
 
39
  )
40
 
41
  try:
42
+ import pyperclip
43
  except Exception:
44
  pyperclip = None
45
 
 
68
 
69
  # Initialize session keys so navigation keeps user state across pages.
70
  if "predictions" not in st.session_state:
71
+ st.session_state.predictions = [] # list of dicts
72
  if "predict_ran" not in st.session_state:
73
  st.session_state.predict_ran = False
74
  if "predict_input_widget" not in st.session_state:
75
  st.session_state.predict_input_widget = ""
76
  if "analyze_input" not in st.session_state:
77
+ st.session_state.analyze_input = "" # last analyze input
78
  if "analyze_output" not in st.session_state:
79
+ st.session_state.analyze_output = None # (label, conf_display, comp, props, analysis)
80
  if "optimize_input" not in st.session_state:
81
+ st.session_state.optimize_input = "" # last optimize input
82
  if "optimize_output" not in st.session_state:
83
  st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
84
  if "optimize_last_ran_input" not in st.session_state:
 
131
  st.stop()
132
 
133
 
134
+ # Load weights once; every page shares this same model instance.
135
  model = load_model()
136
 
137
  # Shared style tweak keeps expander spacing consistent across pages.
 
473
  mime="text/plain",
474
  )
475
 
476
+ # Optimize page: Mutation search with per-step diagnostics.
477
  elif page == "Optimize":
478
  st.header("Peptide Optimizer")
479
 
 
620
  st.markdown(f"- {line}")
621
 
622
  # t-SNE page: embedding projection for multi-sequence exploration.
623
+ # --- t-SNE on first-layer activations ---
624
  elif page == "t-SNE":
625
  st.header("t-SNE Visualizer")
626
  st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
 
715
  • Coloring by properties reveals biochemical trends.
716
  """)
717
 
718
+ # --- About (overview + disclaimer) ---
719
  elif page == "About":
720
  st.header("About the Project")
721
  st.markdown("""
722
  PeptideAI is a lightweight Streamlit app for exploring antimicrobial peptide (AMP) sequences.
723
 
724
+ A hosted copy may be available at [Hugging Face Spaces](https://huggingface.co/spaces/m0ksh/PeptideAI).
725
+
726
  It uses a trained neural network to estimate whether a peptide is likely to be antimicrobial, then helps you interpret and improve candidates:
727
  - **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
728
  - **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
StreamlitApp/utils/analyze.py CHANGED
@@ -1,4 +1,5 @@
1
  # Sequence composition and physicochemical property helpers.
 
2
  from collections import Counter
3
 
4
  def aa_composition(sequence):
 
1
  # Sequence composition and physicochemical property helpers.
2
+ # Mass and charge are textbook approximations for the UI, not for publishing numbers.
3
  from collections import Counter
4
 
5
  def aa_composition(sequence):
StreamlitApp/utils/optimize.py CHANGED
@@ -1,4 +1,5 @@
1
  # Heuristic mutation search used by the Optimize page.
 
2
  import random
3
  from utils.predict import predict_amp
4
 
 
1
  # Heuristic mutation search used by the Optimize page.
2
+ # It’s greedy and uses a few residue buckets — fun to play with, not a real design pipeline.
3
  import random
4
  from utils.predict import predict_amp
5
 
StreamlitApp/utils/peptide_extras.py CHANGED
@@ -1,5 +1,5 @@
1
- # Optional peptide UI helpers: 3D approximation (py3Dmol), known-AMP similarity, and residue highlighting.
2
- # This module is UI-oriented and does not alter model loading or prediction logic.
3
  from __future__ import annotations
4
 
5
  import csv
 
1
+ # Optional peptide UI helpers: 3D approximation (py3Dmol / Plotly), known-AMP similarity, helical wheel, etc.
2
+ # None of this feeds the classifier it’s for intuition and teaching.
3
  from __future__ import annotations
4
 
5
  import csv
StreamlitApp/utils/predict.py CHANGED
@@ -1,4 +1,5 @@
1
  # Model loading, sequence encoding, and AMP inference helpers.
 
2
  import pathlib
3
  import numpy as np
4
  import torch
 
1
  # Model loading, sequence encoding, and AMP inference helpers.
2
+ # Features are flattened one-hots (length × 20), not transformer embeddings — keeps the app small and CPU-friendly.
3
  import pathlib
4
  import numpy as np
5
  import torch
StreamlitApp/utils/rateLimit.py CHANGED
@@ -1,4 +1,4 @@
1
- # Simple in-memory sliding-window rate limiter.
2
  import time
3
  from collections import deque
4
 
 
1
+ # Simple in-memory sliding-window rate limiter — good enough for a single Streamlit server.
2
  import time
3
  from collections import deque
4
 
StreamlitApp/utils/ui_helpers.py CHANGED
@@ -1,4 +1,5 @@
1
  # UI-facing formatting and summary helpers shared across pages.
 
2
  import html as _html
3
  from typing import Dict, List, Tuple, Optional
4
 
@@ -195,6 +196,7 @@ def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float
195
  return "Unlikely AMP", "#d62728"
196
 
197
 
 
198
  def build_analysis_insights(
199
  label: str,
200
  conf: float,
 
1
  # UI-facing formatting and summary helpers shared across pages.
2
+ # Lots of HTML strings here — keep them boring and escaped where it matters.
3
  import html as _html
4
  from typing import Dict, List, Tuple, Optional
5
 
 
196
  return "Unlikely AMP", "#d62728"
197
 
198
 
199
+ # Plain-language bullets for Analyze — rules of thumb, not a second model.
200
  def build_analysis_insights(
201
  label: str,
202
  conf: float,
StreamlitApp/utils/visualize.py CHANGED
@@ -1,4 +1,4 @@
1
- # Legacy t-SNE helper retained for ad-hoc embedding previews.
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from sklearn.manifold import TSNE
 
1
+ # t-SNE helper uses the first linear layer as a quick embedding; main app duplicates this with Plotly inline.
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from sklearn.manifold import TSNE
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- streamlit
2
- pandas
3
- numpy
4
- torch
5
- scikit-learn
6
- matplotlib
7
- plotly
8
- requests
9
- py3dmol
 
1
+ streamlit #1
2
+ pandas #2
3
+ numpy #3
4
+ torch #4
5
+ scikit-learn #5
6
+ matplotlib #6
7
+ plotly #7
8
+ requests #8
9
+ py3dmol #9
space_card.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Mirror of the YAML block at the top of README.md (Hugging Face Spaces reads README only).
2
+
3
+ title: PeptideAI
4
+ emoji: 🔬
5
+ colorFrom: blue
6
+ colorTo: purple
7
+ sdk: streamlit
8
+ sdk_version: "1.41.1"
9
+ python_version: "3.13"
10
+ app_file: StreamlitApp/StreamlitApp.py
11
+ pinned: false
12
+ short_description: AMP peptide scoring, composition, wheel & 3D views.