| # t-SNE page: optional helper embedding + scatter (StreamlitApp also runs t-SNE inline with Plotly). | |
| # Kept for reuse; main app path uses the same encode_sequence + MLP hidden features. | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from sklearn.manifold import TSNE | |
| import streamlit as st | |
| import torch | |
| import numpy as np | |
| from utils.predict import encode_sequence, get_embedding_extractor | |
| def tsne_visualization(sequences, model): | |
| # Project model embeddings into 2D and render a quick scatter plot. | |
| st.info("Generating embeddings... this may take a moment.") | |
| embeddings = [] | |
| embedding_extractor = get_embedding_extractor(model) | |
| for seq in sequences: | |
| x = torch.tensor(encode_sequence(seq, model), dtype=torch.float32).unsqueeze(0) | |
| with torch.no_grad(): | |
| # Use an early hidden layer as a compact learned representation. | |
| emb = embedding_extractor(x) | |
| embeddings.append(emb.numpy().flatten()) | |
| embeddings = np.vstack(embeddings) | |
| # Perplexity must be < n_samples; cap at 30 for stability on small sets. | |
| perplexity = min(30, len(sequences) - 1) | |
| if perplexity < 2: | |
| st.warning("Need at least 2 sequences for visualization.") | |
| return | |
| tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity) | |
| reduced = tsne.fit_transform(embeddings) | |
| df = pd.DataFrame(reduced, columns=["x", "y"]) | |
| st.success("t-SNE visualization complete.") | |
| st.scatter_chart(df) |