AliSaadatV's picture
Add protein_aggregator package and example
28bbe23 verified
"""
Protein Sequence-Level Prediction with Multiple Token Aggregation Methods.
Extracts residue embeddings from ESM2 (frozen backbone) and performs
sequence-level prediction (e.g., localization) using 6 aggregation strategies:
1. Mean pooling
2. Max pooling
3. CLS token
4. GLOT (cosine-similarity token graph)
5. GLOT-Residue (protein residue contact graph via graphein)
6. Covariance pooling
Reference:
- GLOT: "Towards Improved Sentence Representations using Token Graphs" (arXiv:2603.03389)
- Covariance Pooling: https://www.goodfire.ai/research/covariance-pooling
- Graphein: https://graphein.ai/
"""
from .model import ProteinSequenceClassifier
from .aggregators import (
MeanPooling,
MaxPooling,
CLSPooling,
GLOTPooling,
GLOTResidueGraphPooling,
CovariancePooling,
)
__all__ = [
"ProteinSequenceClassifier",
"MeanPooling",
"MaxPooling",
"CLSPooling",
"GLOTPooling",
"GLOTResidueGraphPooling",
"CovariancePooling",
]