Spaces:

Angione-Lab
/

FateFormerExplorer

Building

App Files Files Community

kaveh commited on 7 days ago

Commit

4d886f4

1 Parent(s): 756cec6

Updated. First version.

Browse files

Files changed (27) hide show

.dockerignore +12 -0
.gitignore +8 -1
Dockerfile +17 -0
README.md +59 -0
analysis/df_features.csv +0 -0
notebooks/analysis_plots.ipynb +0 -0
requirements.txt +15 -0
scripts/precompute_streamlit_cache.py +486 -0
streamlit_hf/.streamlit/config.toml +13 -0
streamlit_hf/README.md +36 -0
streamlit_hf/__init__.py +1 -0
streamlit_hf/app.py +35 -0
streamlit_hf/cache/.gitkeep +0 -0
streamlit_hf/home.py +58 -0
streamlit_hf/lib/__init__.py +0 -0
streamlit_hf/lib/formatters.py +118 -0
streamlit_hf/lib/io.py +171 -0
streamlit_hf/lib/pathways.py +133 -0
streamlit_hf/lib/plots.py +1421 -0
streamlit_hf/lib/reactions.py +12 -0
streamlit_hf/lib/ui.py +24 -0
streamlit_hf/pages/1_Single_Cell_Explorer.py +158 -0
streamlit_hf/pages/2_Feature_insights.py +294 -0
streamlit_hf/pages/3_Flux_analysis.py +161 -0
streamlit_hf/pages/4_Gene_expression_analysis.py +168 -0
streamlit_hf/requirements-docker.txt +6 -0
streamlit_hf/static/app_icon.svg +14 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,12 @@

+.git
+.venv
+venv
+__pycache__
+*.py[cod]
+*.egg-info
+.pytest_cache
+.mypy_cache
+.ruff_cache
+.cursor
+*.ipynb_checkpoints
+notebooks

.gitignore CHANGED Viewed

@@ -1,3 +1,10 @@
 __pycache__/
-.DS_Store

 __pycache__/
+.DS_Store
+.venv/
+venv/
+# Precomputed explorer artifacts (regenerate with scripts/precompute_streamlit_cache.py)
+streamlit_hf/cache/*.pkl
+streamlit_hf/cache/*.parquet
+streamlit_hf/cache/*.csv

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Hugging Face Spaces (Streamlit + Docker). Port 7860.
+# Build context: repository root. Upload `streamlit_hf/cache/*` (pickles + parquet) via Git LFS or CI.
+FROM python:3.11-slim-bookworm
+WORKDIR /app
+COPY streamlit_hf/requirements-docker.txt /app/requirements-docker.txt
+RUN pip install --no-cache-dir -r /app/requirements-docker.txt
+COPY . /app
+ENV PYTHONPATH=/app
+ENV STREAMLIT_SERVER_HEADLESS=true
+EXPOSE 7860
+CMD ["streamlit", "run", "streamlit_hf/app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--browser.gatherUsageStats", "false"]

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+---
+title: FateFormer Explorer
+short_description: Streamlit app to explore multimodal single-cell fate modeling (RNA, ATAC, metabolic flux, attention, and rankings).
+emoji: 🧬
+colorFrom: violet
+colorTo: indigo
+tags:
+  - streamlit
+  - single-cell
+  - multi-omics
+  - genomics
+  - atac-seq
+  - rna-seq
+  - metabolic-modeling
+  - deep-learning
+  - biology
+license: mit
+sdk: docker
+app_port: 7860
+---
+# FateFormerApp
+## Interactive explorer (Streamlit)
+From the repo root, with the project virtualenv activated:
+```bash
+PYTHONPATH=. streamlit run streamlit_hf/app.py
+```
+The default local port is **8501**. The **Dockerfile** (and Hugging Face Space card above) use **7860** to match Spaces.
+### Updating results after new experiments (no code changes)
+The app reads **fixed paths**. Replace files under `streamlit_hf/cache/` using the **same filenames**; then **restart Streamlit** (or do a hard refresh) so the new data loads.
+| File | What it drives |
+|------|----------------|
+| `streamlit_hf/cache/latent_umap.pkl` | Single-Cell Explorer (UMAP) |
+| `streamlit_hf/cache/df_features.parquet` | Feature insights + Flux analysis |
+| `streamlit_hf/cache/attention_summary.pkl` | “Attention vs prediction” in Feature insights |
+| `streamlit_hf/cache/attention_feature_ranks.pkl` | Optional; attention lists also live inside `attention_summary.pkl` |
+You can also keep `analysis/df_features.csv` in sync for your own workflows; the UI **prefers** `streamlit_hf/cache/df_features.parquet` when present.
+### Regenerating caches from this repo
+If you updated checkpoints, fold splits, shift pickles, or deg tables **inside this project**, run:
+```bash
+python scripts/precompute_streamlit_cache.py
+```
+That script expects (among others) `ckp/*.pth`, `objects/fold_results_multi.pkl`, `objects/mutlimodal_dataset.pkl`, `objects/fi_shift_*.pkl`, and `objects/degs.pkl`. Point those inputs at your new experiment outputs **before** running the script, or copy your new pickles/CSVs into `streamlit_hf/cache/` manually as in the table above.
+### Docker / Hugging Face
+See `streamlit_hf/HUGGINGFACE.md` and the root `Dockerfile`.

analysis/df_features.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/analysis_plots.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+# FateFormerApp — training, precompute, and local Streamlit dev
+torch>=2.1.0
+numpy>=1.24.0
+pandas>=2.0.0
+scipy>=1.11.0
+scikit-learn>=1.3.0
+umap-learn>=0.5.5
+tqdm>=4.66.0
+anndata>=0.10.0
+scanpy>=1.9.0
+statsmodels>=0.14.0
+tabulate>=0.9.0
+streamlit>=1.40.0
+plotly>=5.22.0
+pyarrow>=14.0.0

scripts/precompute_streamlit_cache.py ADDED Viewed

	@@ -0,0 +1,486 @@

+#!/usr/bin/env python3
+"""
+One-off cache builder for the Streamlit explorer.
+Run from the repository root:
+  python scripts/precompute_streamlit_cache.py
+  python scripts/precompute_streamlit_cache.py --skip-attention   # faster: reuse objects/fi_shift_*.pkl only for df_features if attention_summary exists
+"""
+from __future__ import annotations
+import argparse
+import os
+import pickle
+import sys
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import torch
+import umap
+ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+os.chdir(ROOT)
+from data import create_dataset  # noqa: E402
+from interpretation import attentions as att  # noqa: E402
+from interpretation import latentspace as ls  # noqa: E402
+from interpretation import predictions as prds  # noqa: E402
+CACHE = ROOT / "streamlit_hf" / "cache"
+CACHE.mkdir(parents=True, exist_ok=True)
+def replace_fold_results_path(fold_results, ckp_root: str = "ckp"):
+    """Point checkpoints at flat `ckp/multi_seed0_fold{k}.pth` layout in this repo."""
+    for fold in fold_results:
+        ckpt_name = os.path.basename(fold["best_model_path"])
+        fold_token = next((part for part in ckpt_name.split("_") if part.startswith("fold")), "")
+        fold_idx = "".join(ch for ch in fold_token if ch.isdigit())
+        if fold_idx:
+            clean_ckpt_name = f"multi_seed0_fold{fold_idx}.pth"
+        else:
+            clean_ckpt_name = ckpt_name
+        fold["best_model_path"] = os.path.join(ckp_root, clean_ckpt_name)
+    return fold_results
+def load_training_context():
+    with open(ROOT / "objects" / "mutlimodal_dataset.pkl", "rb") as f:
+        md = pickle.load(f)
+    X, y_label = md["X"], md["y_label"]
+    b, df_indices, pcts = md["b"], md["df_indices"], md["pcts"]
+    y_number = torch.tensor(
+        [{"reprogramming": 1, "dead-end": 0}[i] for i in list(y_label)],
+        dtype=torch.float32,
+    )
+    multimodal_dataset = create_dataset.MultiModalDataset(
+        X, b, y_number, df_indices, pcts, y_label
+    )
+    with open(ROOT / "objects" / "fold_results_multi.pkl", "rb") as f:
+        fold_results = pickle.load(f)
+    fold_results = replace_fold_results_path(fold_results)
+    share_config = {
+        "d_model": 128,
+        "d_ff": 16,
+        "n_heads": 8,
+        "n_encoder_layers": 2,
+        "n_batches": 3,
+        "dropout_rate": 0.0,
+    }
+    model_config_rna = {"vocab_size": 5914, "seq_len": X[0].shape[1]}
+    model_config_atac = {"vocab_size": 1, "seq_len": X[1].shape[1]}
+    model_config_flux = {"vocab_size": 1, "seq_len": X[2].shape[1]}
+    model_config_multi = {"d_model": 128, "n_heads_cls": 8, "d_ff_cls": 16}
+    model_config = {
+        "Share": share_config,
+        "RNA": model_config_rna,
+        "ATAC": model_config_atac,
+        "Flux": model_config_flux,
+        "Multi": model_config_multi,
+    }
+    feature_names = (
+        list(X[0].columns)
+        + ["batch_rna"]
+        + list(X[1].columns)
+        + ["batch_atac"]
+        + list(X[2].columns)
+        + ["batch_flux"]
+    )
+    adata_RNA_labelled = None
+    rna_pkl = ROOT / "data" / "datasets" / "rna_labelled.pkl"
+    try:
+        with open(rna_pkl, "rb") as f:
+            adata_RNA_labelled = pickle.load(f)
+    except Exception as e:
+        print(
+            f"Warning: could not load {rna_pkl} ({e}). "
+            "Sample table will omit AnnData-derived metadata (e.g. clone_id)."
+        )
+    return (
+        multimodal_dataset,
+        fold_results,
+        model_config,
+        feature_names,
+        adata_RNA_labelled,
+    )
+def build_latent_umap(multimodal_dataset, fold_results, model_config, common_samples: bool = False):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    ls_v, labels, preds = ls.get_latent_space(
+        "Multi",
+        fold_results,
+        multimodal_dataset,
+        model_config,
+        device,
+        common_samples=common_samples,
+    )
+    reducer = umap.UMAP(n_components=2, random_state=0, n_neighbors=30, min_dist=1.0)
+    xy = reducer.fit_transform(ls_v)
+    ordered_indices: list[int] = []
+    fold_ids: list[int] = []
+    from interpretation.attentions import filter_idx  # noqa: PLC0415
+    from torch.utils.data import Subset  # noqa: PLC0415
+    for fold_idx, fold in enumerate(fold_results):
+        val_idx = fold["val_idx"]
+        if common_samples:
+            val_idx = filter_idx(multimodal_dataset, val_idx)
+        ordered_indices.extend(val_idx)
+        fold_ids.extend([fold_idx + 1] * len(val_idx))
+    labels = np.asarray(labels).ravel()
+    preds = np.asarray(preds).ravel().astype(int)
+    label_name = np.where(labels > 0.5, "reprogramming", "dead-end")
+    pred_name = np.where(preds > 0.5, "reprogramming", "dead-end")
+    correct = (preds == labels.astype(int)).astype(np.int8)
+    ds = multimodal_dataset
+    batch_no = np.array([int(ds.batch_no[i].item()) for i in ordered_indices], dtype=np.int32)
+    pcts = np.array([float(ds.pcts[i]) for i in ordered_indices], dtype=np.float64)
+    modalities = []
+    for i in ordered_indices:
+        has_r = (ds.rna_data[i] != 0).any().item()
+        has_a = (ds.atac_data[i] != 0).any().item()
+        has_f = (ds.flux_data[i] != 0).any().item()
+        s = "".join(c for c, h in (("R", has_r), ("A", has_a), ("F", has_f)) if h)
+        modalities.append(s or "None")
+    return {
+        "umap_x": xy[:, 0].astype(np.float32),
+        "umap_y": xy[:, 1].astype(np.float32),
+        "label_name": label_name,
+        "pred_name": pred_name,
+        "correct": correct,
+        "fold": np.array(fold_ids, dtype=np.int32),
+        "batch_no": batch_no,
+        "pct": pcts,
+        "modality": modalities,
+        "dataset_idx": np.array(ordered_indices, dtype=np.int32),
+        "common_samples": common_samples,
+    }
+def create_combined_feature_dataframe(
+    fi_shift_rna,
+    fi_shift_atac,
+    fi_shift_flux,
+    fi_att_rna,
+    fi_att_atac,
+    fi_att_flux,
+    df_rna_degs=None,
+    df_atac_degs=None,
+    df_flux_degs=None,
+    remove_batch=True,
+):
+    def process_modality(shift_list, att_list, degs_df, modality_name):
+        shift_df = pd.DataFrame(shift_list, columns=["feature", "importance_shift"]).reset_index()
+        shift_df.rename(columns={"index": "rank_shift_in_modal"}, inplace=True)
+        shift_df["rank_shift_in_modal"] += 1
+        att_df = pd.DataFrame(att_list, columns=["feature", "importance_att"]).reset_index()
+        att_df.rename(columns={"index": "rank_att_in_modal"}, inplace=True)
+        att_df["rank_att_in_modal"] += 1
+        combined_df = pd.merge(shift_df, att_df, on="feature", how="outer")
+        if degs_df is not None:
+            combined_df = pd.merge(combined_df, degs_df, on="feature", how="left")
+        combined_df["modality"] = modality_name
+        return combined_df
+    rna_df = process_modality(fi_shift_rna, fi_att_rna, df_rna_degs, "RNA")
+    atac_df = process_modality(fi_shift_atac, fi_att_atac, df_atac_degs, "ATAC")
+    flux_df = process_modality(fi_shift_flux, fi_att_flux, df_flux_degs, "Flux")
+    all_features_df = pd.concat([rna_df, atac_df, flux_df], ignore_index=True)
+    if remove_batch:
+        all_features_df = all_features_df[~all_features_df["feature"].str.contains("batch", na=False)]
+    max_rank_modal = max(
+        all_features_df["rank_att_in_modal"].max(), all_features_df["rank_shift_in_modal"].max()
+    )
+    all_features_df[["rank_att_in_modal", "rank_shift_in_modal"]] = all_features_df[
+        ["rank_att_in_modal", "rank_shift_in_modal"]
+    ].fillna(max_rank_modal + 1)
+    all_features_df[["rank_att_in_modal", "rank_shift_in_modal"]] = all_features_df[
+        ["rank_att_in_modal", "rank_shift_in_modal"]
+    ].astype("int32")
+    all_features_df[["importance_att", "importance_shift"]] = (
+        all_features_df[["importance_att", "importance_shift"]].fillna(0).astype("float64")
+    )
+    all_features_df["rank_shift"] = (
+        all_features_df["importance_shift"].rank(ascending=False, method="first").astype("int32")
+    )
+    all_features_df["rank_att"] = (
+        all_features_df["importance_att"].rank(ascending=False, method="first").astype("int32")
+    )
+    all_features_df["mean_rank"] = all_features_df[["rank_att", "rank_shift"]].mean(axis=1)
+    top_th = int(all_features_df.shape[0] * 0.1) + 1
+    all_features_df["top_10_pct"] = all_features_df.apply(
+        lambda row: "both"
+        if row["rank_shift"] <= top_th and row["rank_att"] <= top_th
+        else (
+            "shift"
+            if row["rank_shift"] <= top_th
+            else ("att" if row["rank_att"] <= top_th else "None")
+        ),
+        axis=1,
+    )
+    float_cols = [
+        col for col in all_features_df.columns if col.startswith(("log_fc", "mean_", "std_", "pval_"))
+    ]
+    if float_cols:
+        all_features_df[float_cols] = all_features_df[float_cols].round(6)
+    all_features_df["importance_att"] = all_features_df["importance_att"].round(6)
+    all_features_df["importance_shift"] = all_features_df["importance_shift"].round(6)
+    all_features_df = all_features_df.sort_values(by="mean_rank", ascending=True)
+    cols = [
+        "mean_rank",
+        "feature",
+        "rank_shift",
+        "rank_att",
+        "rank_shift_in_modal",
+        "rank_att_in_modal",
+        "modality",
+        "importance_shift",
+        "importance_att",
+        "top_10_pct",
+        "mean_de",
+        "mean_re",
+        "std_de",
+        "std_re",
+        "pval",
+        "pval_adj",
+        "log_fc",
+        "group",
+        "pval_adj_log",
+        "mean_diff",
+        "pathway",
+        "module",
+    ]
+    for c in cols:
+        if c not in all_features_df.columns:
+            all_features_df[c] = np.nan
+    return all_features_df[cols]
+def run_attention_and_fi(
+    multimodal_dataset,
+    fold_results,
+    model_config,
+    feature_names,
+    device: str,
+    adata_rna,
+):
+    df_samples = prds.get_sample_predictions_dataframe(
+        model_type="Multi",
+        multimodal_dataset=multimodal_dataset,
+        fold_results=fold_results,
+        model_config=model_config,
+        device=device,
+        batch_size=32,
+        threshold=0.5,
+        adata_rna=adata_rna,
+    )
+    all_indices = df_samples["ind"].tolist()
+    de_preds_indices = df_samples[df_samples["predicted_class"] == "dead-end"]["ind"].tolist()
+    re_preds_indices = df_samples[df_samples["predicted_class"] == "reprogramming"]["ind"].tolist()
+    print("Running flow attention (all validation)…")
+    all_layers_all = att.analyze_cls_attention(
+        "Multi",
+        fold_results,
+        multimodal_dataset,
+        model_config,
+        device=device,
+        indices=all_indices,
+        average_heads=False,
+        return_flow_attention=True,
+    )
+    print("Running flow attention (predicted dead-end)…")
+    all_layers_de = att.analyze_cls_attention(
+        "Multi",
+        fold_results,
+        multimodal_dataset,
+        model_config,
+        device=device,
+        indices=de_preds_indices,
+        average_heads=False,
+        return_flow_attention=True,
+    )
+    print("Running flow attention (predicted reprogramming)…")
+    all_layers_re = att.analyze_cls_attention(
+        "Multi",
+        fold_results,
+        multimodal_dataset,
+        model_config,
+        device=device,
+        indices=re_preds_indices,
+        average_heads=False,
+        return_flow_attention=True,
+    )
+    rollout_all = att.multimodal_attention_rollout(all_layers_all)
+    rollout_de = att.multimodal_attention_rollout(all_layers_de)
+    rollout_re = att.multimodal_attention_rollout(all_layers_re)
+    rollout_all = rollout_all / rollout_all.sum(dim=-1, keepdim=True)
+    rollout_de = rollout_de / rollout_de.sum(dim=-1, keepdim=True)
+    rollout_re = rollout_re / rollout_re.sum(dim=-1, keepdim=True)
+    # Explicit splits (notebook): RNA [:945], ATAC [945:945+884], flux rest
+    i0, i1, i2 = 0, 945, 945 + 884
+    def mean_vec(t):
+        return t.mean(dim=0).detach().cpu().numpy()
+    rollout_mean = {
+        "all": mean_vec(rollout_all),
+        "dead_end": mean_vec(rollout_de),
+        "reprogramming": mean_vec(rollout_re),
+    }
+    top_n_get = None
+    fi = {"all": {}, "dead_end": {}, "reprogramming": {}}
+    for name, tensor in (
+        ("all", rollout_all),
+        ("dead_end", rollout_de),
+        ("reprogramming", rollout_re),
+    ):
+        fi[name]["rna"] = att.get_top_features(
+            tensor[:, i0:i1], feature_names[i0:i1], modality="RNA", top_n=top_n_get
+        )
+        fi[name]["atac"] = att.get_top_features(
+            tensor[:, i1:i2], feature_names[i1:i2], modality="ATAC", top_n=top_n_get
+        )
+        fi[name]["flux"] = att.get_top_features(
+            tensor[:, i2:], feature_names[i2:], modality="Flux", top_n=top_n_get
+        )
+    summary = {
+        "feature_names": feature_names,
+        "slices": {
+            "RNA": {"start": i0, "stop": i1},
+            "ATAC": {"start": i1, "stop": i2},
+            "Flux": {"start": i2, "stop": len(feature_names)},
+        },
+        "rollout_mean": rollout_mean,
+        "fi_att": fi,
+    }
+    return summary, df_samples
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--skip-attention", action="store_true", help="Skip attention if summary exists")
+    ap.add_argument(
+        "--common-samples",
+        action="store_true",
+        help="Use common-samples filter for latent UMAP (default: False, notebook-style)",
+    )
+    args = ap.parse_args()
+    common_samples = args.common_samples
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Device: {device}")
+    (
+        multimodal_dataset,
+        fold_results,
+        model_config,
+        feature_names,
+        adata_RNA_labelled,
+    ) = load_training_context()
+    print("Building latent UMAP bundle…")
+    latent = build_latent_umap(
+        multimodal_dataset, fold_results, model_config, common_samples=common_samples
+    )
+    with open(CACHE / "latent_umap.pkl", "wb") as f:
+        pickle.dump(latent, f)
+    att_path = CACHE / "attention_summary.pkl"
+    df_samples_path = CACHE / "samples.parquet"
+    if args.skip_attention and att_path.is_file():
+        print("Skipping attention (--skip-attention, file exists).")
+        with open(att_path, "rb") as f:
+            summary = pickle.load(f)
+    else:
+        print("Computing attention + rollout (slow)…")
+        summary, df_samples = run_attention_and_fi(
+            multimodal_dataset,
+            fold_results,
+            model_config,
+            feature_names,
+            device,
+            adata_RNA_labelled,
+        )
+        with open(att_path, "wb") as f:
+            pickle.dump(summary, f)
+        with open(CACHE / "attention_feature_ranks.pkl", "wb") as f:
+            pickle.dump(summary["fi_att"], f)
+        df_samples.to_parquet(df_samples_path, index=False)
+    if args.skip_attention and att_path.is_file() and not df_samples_path.is_file():
+        df_samples = prds.get_sample_predictions_dataframe(
+            model_type="Multi",
+            multimodal_dataset=multimodal_dataset,
+            fold_results=fold_results,
+            model_config=model_config,
+            device=device,
+            batch_size=32,
+            threshold=0.5,
+            adata_rna=adata_RNA_labelled,
+        )
+        df_samples.to_parquet(df_samples_path, index=False)
+    for name in ["fi_shift_rna.pkl", "fi_shift_atac.pkl", "fi_shift_flux.pkl"]:
+        src = ROOT / "objects" / name
+        if not src.is_file():
+            print(f"Warning: missing {src}")
+    with open(ROOT / "objects" / "fi_shift_rna.pkl", "rb") as f:
+        fi_shift_rna = pickle.load(f)
+    with open(ROOT / "objects" / "fi_shift_atac.pkl", "rb") as f:
+        fi_shift_atac = pickle.load(f)
+    with open(ROOT / "objects" / "fi_shift_flux.pkl", "rb") as f:
+        fi_shift_flux = pickle.load(f)
+    with open(ROOT / "objects" / "degs.pkl", "rb") as f:
+        degs = pickle.load(f)
+    df_rna_degs, df_atac_degs, df_flux_degs = degs[0], degs[1], degs[2]
+    fi = summary["fi_att"]
+    df_features = create_combined_feature_dataframe(
+        fi_shift_rna,
+        fi_shift_atac,
+        fi_shift_flux,
+        fi["all"]["rna"],
+        fi["all"]["atac"],
+        fi["all"]["flux"],
+        df_rna_degs,
+        df_atac_degs,
+        df_flux_degs,
+    )
+    df_features.to_parquet(CACHE / "df_features.parquet", index=False)
+    df_features.to_csv(ROOT / "analysis" / "df_features.csv", index=False)
+    print(f"Wrote {CACHE / 'df_features.parquet'} and analysis/df_features.csv")
+    print("Done.")
+if __name__ == "__main__":
+    main()

streamlit_hf/.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[theme]
+primaryColor = "#2563eb"
+backgroundColor = "#f8fafc"
+secondaryBackgroundColor = "#ffffff"
+textColor = "#0f172a"
+font = "sans-serif"
+[server]
+headless = true
+# Default CORS + XSRF settings avoid the "enableCORS=false vs XSRF" conflict on localhost.
+[browser]
+gatherUsageStats = false

streamlit_hf/README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Hugging Face Space (Docker + Streamlit)
+The **root `README.md`** starts with the YAML card Hugging Face reads for the Space (title, tags, colours, `sdk: docker`, `app_port: 7860`). Copy that block if you maintain a separate Space README.
+```yaml
+---
+title: FateFormer Explorer
+short_description: Streamlit app to explore multimodal single-cell fate modeling (RNA, ATAC, metabolic flux, attention, and rankings).
+emoji: 🧬
+colorFrom: violet
+colorTo: indigo
+tags:
+  - streamlit
+  - single-cell
+  - multi-omics
+  - genomics
+  - atac-seq
+  - rna-seq
+  - metabolic-modeling
+  - deep-learning
+  - biology
+license: mit
+sdk: docker
+app_port: 7860
+---
+```
+`app_port` **7860** matches the root **`Dockerfile`** (`streamlit ... --server.port 7860`). Local runs use Streamlit’s default **8501** unless you pass `--server.port`.
+## Before first deploy
+1. Run locally: `python scripts/precompute_streamlit_cache.py` (requires GPU/CPU time for attention).
+2. Commit **`streamlit_hf/cache/`** contents (`latent_umap.pkl`, `attention_summary.pkl`, `attention_feature_ranks.pkl`, `df_features.parquet`, and optionally `samples.parquet` if you use it elsewhere) or attach via **Git LFS** if files are large. These paths are listed in `.gitignore`; use `git add -f streamlit_hf/cache/*` when you want them in the remote.
+3. Keep **`ckp/`** model weights available only if you run precompute in CI; the slim Docker image does **not** include PyTorch and expects precomputed caches.
+The repository **`Dockerfile`** at the root builds the Space.

streamlit_hf/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Streamlit explorer package (run with PYTHONPATH=repo root).

streamlit_hf/app.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+FateFormer: interactive analysis explorer.
+Run from repository root: PYTHONPATH=. streamlit run streamlit_hf/app.py
+"""
+from pathlib import Path
+import streamlit as st
+_APP_DIR = Path(__file__).resolve().parent
+_ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
+_page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
+st.set_page_config(
+    page_title="FateFormer Explorer",
+    layout="wide",
+    initial_sidebar_state="expanded",
+    **_page_icon_kw,
+)
+_home = str(_APP_DIR / "home.py")
+_p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
+_p2 = str(_APP_DIR / "pages" / "2_Feature_insights.py")
+_p3 = str(_APP_DIR / "pages" / "3_Flux_analysis.py")
+_p4 = str(_APP_DIR / "pages" / "4_Gene_expression_analysis.py")
+pages = [
+    st.Page(_home, title="Home", icon=":material/home:", default=True),
+    st.Page(_p1, title="Single-Cell Explorer", icon=":material/scatter_plot:"),
+    st.Page(_p2, title="Feature Insights", icon=":material/analytics:"),
+    st.Page(_p3, title="Flux Analysis", icon=":material/account_tree:"),
+    st.Page(_p4, title="Gene Expression & TF Activity", icon=":material/genetics:"),
+]
+nav = st.navigation(pages)
+nav.run()

streamlit_hf/cache/.gitkeep ADDED Viewed

File without changes

streamlit_hf/home.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Landing content for the FateFormer Streamlit hub."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[1]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import ui
+_CACHE = Path(__file__).resolve().parent / "cache"
+_HAS_CACHE = (_CACHE / "latent_umap.pkl").is_file() and (_CACHE / "df_features.parquet").is_file()
+ui.inject_app_styles()
+st.title("FateFormer: interactive analysis")
+st.caption("Choose a workspace below or use the sidebar. All views use the same precomputed validation results.")
+if not _HAS_CACHE:
+    st.warning(
+        "This deployment does not have precomputed results yet. Ask the maintainer to publish data, then reload."
+    )
+else:
+    st.success("Precomputed results are available. After a server-side update, refresh the browser to load new plots.")
+st.subheader("Open a page")
+r1a, r1b, r1c = st.columns(3)
+with r1a:
+    with st.container(border=True):
+        st.page_link("pages/1_Single_Cell_Explorer.py", label="Single-Cell Explorer", icon=":material/scatter_plot:")
+        st.caption("Latent UMAP: colour by fate, prediction, fold, batch, modalities, or dominant fate emphasis.")
+with r1b:
+    with st.container(border=True):
+        st.page_link("pages/2_Feature_insights.py", label="Feature Insights", icon=":material/analytics:")
+        st.caption("Shift and attention rankings, cohort comparisons, and full feature tables.")
+with r1c:
+    with st.container(border=True):
+        st.page_link("pages/3_Flux_analysis.py", label="Flux Analysis", icon=":material/account_tree:")
+        st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
+r2a, _, _ = st.columns(3)
+with r2a:
+    with st.container(border=True):
+        st.page_link(
+            "pages/4_Gene_expression_analysis.py",
+            label="Gene Expression & TF Activity",
+            icon=":material/genetics:",
+        )
+        st.caption("Pathway enrichment, motif activity, and gene / motif tables.")
+st.markdown("---")
+st.markdown(
+    "**Tips:** use chart toolbars for pan/zoom and lasso selection where offered. Tables support search and column sort from the header row."
+)

streamlit_hf/lib/__init__.py ADDED Viewed

File without changes

streamlit_hf/lib/formatters.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""Human-readable labels for compact codes used in cached tables."""
+from __future__ import annotations
+import numpy as np
+import pandas as pd
+# Matches interpretation.predictions._get_modality_info letter codes (R/A/F order).
+# Short table-friendly labels (no long parentheticals).
+_MODALITY_LONG: dict[str, str] = {
+    "RAF": "RNA + ATAC + Flux",
+    "RA": "RNA + ATAC",
+    "RF": "RNA + Flux",
+    "AF": "ATAC + Flux",
+    "R": "RNA only",
+    "A": "ATAC only",
+    "F": "Flux only",
+    "None": "No modality data",
+    "none": "No modality data",
+    "nan": "No modality data",
+}
+# Rename row fields in inspector tables for display.
+_FIELD_DISPLAY: dict[str, str] = {
+    "label": "CellTag-Multi label",
+}
+# Latent explorer: table headers and key–value inspector (exclude non-meaningful / internal cols).
+LATENT_TABLE_RENAME: dict[str, str] = {
+    "label": "CellTag-Multi label",
+    "predicted_class": "Predicted fate",
+    "predicted_value": "Prediction score",
+    "correct": "Prediction correct",
+    "pct": "Dominant fate (%)",
+    "modality_label": "Available modalities",
+    "dataset_idx": "Dataset index",
+    "batch_no": "Batch",
+    "fold": "CV fold",
+    "clone_id": "Clone ID",
+    "clone_size": "Clone size",
+    "cell_type": "Cell type",
+}
+LATENT_DROP_FROM_TABLES: frozenset[str] = frozenset({"umap_x", "umap_y", "modality", "pct_decile"})
+_NAME_MAP = {**_FIELD_DISPLAY, **LATENT_TABLE_RENAME}
+def _format_scalar(v) -> str:
+    if v is None:
+        return ""
+    if isinstance(v, bool):
+        return "Yes" if v else "No"
+    try:
+        if pd.isna(v):
+            return ""
+    except (ValueError, TypeError):
+        pass
+    if isinstance(v, (float, np.floating)) and np.isnan(v):
+        return ""
+    return str(v)
+def _field_label(name: str, *, fallback_field_display: bool) -> str:
+    k = str(name)
+    if fallback_field_display:
+        return _NAME_MAP.get(k, _FIELD_DISPLAY.get(k, k))
+    return _NAME_MAP.get(k, k)
+def expand_modality(code) -> str:
+    """Map R/A/F codes (e.g. RAF, RA) to full names."""
+    if code is None:
+        return _MODALITY_LONG["None"]
+    try:
+        if pd.isna(code):
+            return _MODALITY_LONG["None"]
+    except (ValueError, TypeError):
+        pass
+    if isinstance(code, (float, np.floating)) and np.isnan(code):
+        return _MODALITY_LONG["None"]
+    key = str(code).strip()
+    if not key or key.lower() == "nan":
+        return _MODALITY_LONG["None"]
+    return _MODALITY_LONG.get(key, key)
+def annotate_modality_column(df, code_col: str = "modality", label_col: str = "modality_label"):
+    """Add human-readable modality column; returns a copy."""
+    out = df.copy()
+    out[label_col] = out[code_col].map(expand_modality)
+    return out
+def prepare_latent_display_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+    """Drop UMAP / internal columns and rename headers for Selected-points style tables."""
+    drop = [c for c in df.columns if c in LATENT_DROP_FROM_TABLES or str(c).startswith("umap_")]
+    out = df.drop(columns=drop, errors="ignore")
+    return out.rename(columns=LATENT_TABLE_RENAME)
+def latent_inspector_key_value(series: pd.Series) -> pd.DataFrame:
+    """Key–value inspector row: human names, no UMAP coordinates."""
+    s = series.drop(
+        labels=[c for c in series.index if c in LATENT_DROP_FROM_TABLES or str(c).startswith("umap_")],
+        errors="ignore",
+    )
+    idx = [_field_label(i, fallback_field_display=False) for i in s.index]
+    vals = [_format_scalar(v) for v in s.values]
+    return pd.DataFrame({"Field": idx, "Value": vals})
+def dataframe_to_arrow_safe_kv(series: pd.Series) -> pd.DataFrame:
+    """Two string columns for Streamlit/PyArrow (avoids mixed-type single column)."""
+    s = series.copy()
+    idx = [_field_label(i, fallback_field_display=True) for i in s.index]
+    vals = [_format_scalar(v) for v in s.values]
+    return pd.DataFrame({"field": idx, "value": vals})

streamlit_hf/lib/io.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""Load precomputed explorer artifacts (no torch required at runtime)."""
+from __future__ import annotations
+import pickle
+from pathlib import Path
+import numpy as np
+import pandas as pd
+from streamlit_hf.lib.formatters import annotate_modality_column
+from streamlit_hf.lib.reactions import normalize_reaction_key
+REPO_ROOT = Path(__file__).resolve().parents[2]
+CACHE_DIR = REPO_ROOT / "streamlit_hf" / "cache"
+METABOLIC_MODEL_METADATA = REPO_ROOT / "data" / "datasets" / "metabolic_model_metadata.csv"
+def _is_valid_features_csv(path: Path) -> bool:
+    if not path.is_file():
+        return False
+    try:
+        head = pd.read_csv(path, nrows=2)
+    except Exception:
+        return False
+    return "feature" in head.columns and "importance_shift" in head.columns
+def load_latent_bundle():
+    path = CACHE_DIR / "latent_umap.pkl"
+    if not path.is_file():
+        return None
+    with open(path, "rb") as f:
+        return pickle.load(f)
+def load_attention_summary():
+    path = CACHE_DIR / "attention_summary.pkl"
+    if not path.is_file():
+        return None
+    with open(path, "rb") as f:
+        return pickle.load(f)
+def load_samples_df() -> pd.DataFrame | None:
+    pq = CACHE_DIR / "samples.parquet"
+    if pq.is_file():
+        df = pd.read_parquet(pq)
+        return annotate_modality_column(df) if "modality" in df.columns else df
+    return None
+def _add_within_modality_orders(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Align scatter / table columns with the notebook.
+    Parquet from precompute already has rank_shift_in_modal / rank_att_in_modal from the same
+    merge-of-sorted-lists logic as the notebook; do not overwrite those with pandas ranks on
+    rounded importances (tie order can differ and changes the RNA cloud).
+    """
+    out = df.copy()
+    if "modality" not in out.columns:
+        return out
+    if "rank_shift_in_modal" in out.columns and "rank_att_in_modal" in out.columns:
+        out["shift_order_mod"] = out["rank_shift_in_modal"].astype(int)
+        out["attention_order_mod"] = out["rank_att_in_modal"].astype(int)
+    else:
+        g = out.groupby("modality", observed=True)
+        out["shift_order_mod"] = g["importance_shift"].rank(ascending=False, method="first").astype(int)
+        out["attention_order_mod"] = g["importance_att"].rank(ascending=False, method="first").astype(int)
+        out["rank_shift_in_modal"] = out["shift_order_mod"]
+        out["rank_att_in_modal"] = out["attention_order_mod"]
+    if "combined_order_mod" not in out.columns:
+        g = out.groupby("modality", observed=True)
+        out["combined_order_mod"] = g["mean_rank"].rank(ascending=True, method="first").astype(int)
+    return out
+def load_metabolic_model_metadata() -> pd.DataFrame | None:
+    """Directed reaction edges: substrate → product, grouped by supermodule (see CSV headers)."""
+    if not METABOLIC_MODEL_METADATA.is_file():
+        return None
+    return pd.read_csv(METABOLIC_MODEL_METADATA)
+def build_metabolic_model_table(
+    meta: pd.DataFrame,
+    flux_df: pd.DataFrame,
+    supermodule_id: int | None = None,
+) -> pd.DataFrame:
+    """
+    Static edge list: substrate → product, reaction label, module class, plus DE / model columns when the
+    reaction string matches a row in the flux feature table.
+    """
+    need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
+    if not need.issubset(set(meta.columns)):
+        return pd.DataFrame()
+    m = meta.copy()
+    if supermodule_id is not None:
+        m = m[m["Supermodule_id"] == int(supermodule_id)]
+    if m.empty:
+        return pd.DataFrame()
+    fd = flux_df.copy()
+    fd["_rk"] = fd["feature"].map(normalize_reaction_key)
+    fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
+    rows: list[dict] = []
+    for _, r in m.iterrows():
+        k = normalize_reaction_key(str(r["rxnName"]))
+        base = {
+            "Supermodule": r.get("Super.Module.class"),
+            "Module_id": r.get("Module_id"),
+            "Substrate": r["Compound_IN_name"],
+            "Product": r["Compound_OUT_name"],
+            "Reaction": r["rxnName"],
+        }
+        if k in fd.index:
+            row = fd.loc[k]
+            if isinstance(row, pd.DataFrame):
+                row = row.iloc[0]
+            base["log_fc"] = row["log_fc"] if "log_fc" in row.index else None
+            base["pval_adj"] = row["pval_adj"] if "pval_adj" in row.index else None
+            base["mean_rank"] = row["mean_rank"] if "mean_rank" in row.index else None
+            base["pathway"] = row["pathway"] if "pathway" in row.index else None
+        else:
+            base["log_fc"] = None
+            base["pval_adj"] = None
+            base["mean_rank"] = None
+            base["pathway"] = None
+        rows.append(base)
+    return pd.DataFrame(rows)
+def load_df_features() -> pd.DataFrame | None:
+    pq = CACHE_DIR / "df_features.parquet"
+    if pq.is_file():
+        return _add_within_modality_orders(pd.read_parquet(pq))
+    csv_cache = CACHE_DIR / "df_features.csv"
+    if csv_cache.is_file():
+        return _add_within_modality_orders(pd.read_csv(csv_cache))
+    analysis_csv = REPO_ROOT / "analysis" / "df_features.csv"
+    if _is_valid_features_csv(analysis_csv):
+        return _add_within_modality_orders(pd.read_csv(analysis_csv))
+    return None
+def latent_join_samples(bundle: dict, samples: pd.DataFrame | None) -> pd.DataFrame:
+    """One row per UMAP point, aligned with bundle arrays."""
+    n = len(bundle["umap_x"])
+    df = pd.DataFrame(
+        {
+            "umap_x": bundle["umap_x"],
+            "umap_y": bundle["umap_y"],
+            "label": bundle["label_name"],
+            "predicted_class": bundle["pred_name"],
+            "correct": bundle["correct"].astype(bool),
+            "fold": bundle["fold"].astype(int),
+            "batch_no": bundle["batch_no"].astype(int),
+            "pct": bundle["pct"],
+            "modality": bundle["modality"],
+            "dataset_idx": bundle["dataset_idx"].astype(int),
+        }
+    )
+    if samples is not None and not samples.empty:
+        s = samples.drop_duplicates(subset=["ind"], keep="first").set_index("ind")
+        extra = s.reindex(df["dataset_idx"].values)
+        for col in ["predicted_value", "clone_id", "clone_size", "cell_type"]:
+            if col in extra.columns:
+                df[col] = extra[col].values
+    return annotate_modality_column(df)

streamlit_hf/lib/pathways.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""Pathway enrichment tables (DAVID-style exports) for Reactome and KEGG panels."""
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import pandas as pd
+REPO_ROOT = Path(__file__).resolve().parents[2]
+DE_TSV = REPO_ROOT / "analysis" / "de_all_48.tsv"
+RE_TSV = REPO_ROOT / "analysis" / "re_all_48.tsv"
+def load_de_re_tsv() -> tuple[pd.DataFrame, pd.DataFrame] | None:
+    if not DE_TSV.is_file() or not RE_TSV.is_file():
+        return None
+    return pd.read_csv(DE_TSV, sep="\t"), pd.read_csv(RE_TSV, sep="\t")
+def preprocess_pathway_file(df: pd.DataFrame, splitter: str) -> pd.DataFrame:
+    out = df.copy()
+    out["Term"] = out["Term"].astype(str).str.split(splitter).str[-1]
+    if splitter == "-":
+        out["Term"] = out["Term"].astype(str).str.split("~").str[-1]
+    out = out[out["Benjamini"] < 0.05].copy()
+    out["Gene Ratio"] = out["Count"] / out["List Total"]
+    return out
+def merged_reactome_kegg_bubble_frames(
+    de_all: pd.DataFrame, re_all: pd.DataFrame
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    """Rows for bubble plot (Gene Ratio, Count, Benjamini, Library, Term) per notebook cell 31."""
+    reactome_de = de_all[de_all["Category"] == "REACTOME_PATHWAY"]
+    reactome_re = re_all[re_all["Category"] == "REACTOME_PATHWAY"]
+    kegg_de = de_all[de_all["Category"] == "KEGG_PATHWAY"]
+    kegg_re = re_all[re_all["Category"] == "KEGG_PATHWAY"]
+    rde = preprocess_pathway_file(reactome_de, "~")
+    rde["Library"] = "Reactome"
+    rre = preprocess_pathway_file(reactome_re, "~")
+    rre["Library"] = "Reactome"
+    kde = preprocess_pathway_file(kegg_de, ":")
+    kde["Library"] = "KEGG"
+    kre = preprocess_pathway_file(kegg_re, ":")
+    kre["Library"] = "KEGG"
+    merged_dead = pd.concat([rde, kde], ignore_index=True)
+    merged_re = pd.concat([rre, kre], ignore_index=True)
+    return merged_dead, merged_re
+def _preprocess_exploded(df: pd.DataFrame, pval_threshold: float, splitter: str, label: str) -> pd.DataFrame:
+    d = df.copy()
+    d["Term"] = d["Term"].astype(str).str.split(splitter).str[-1]
+    if splitter == "-":
+        d["Term"] = d["Term"].astype(str).str.split("~").str[-1]
+    def _trunc(x: str) -> str:
+        return x[:60] + "..." if len(x) > 60 else x
+    d["Term"] = d["Term"].map(_trunc)
+    d = d[d["Benjamini"] < pval_threshold]
+    sub = d[["Term", "Genes", "Benjamini"]].copy()
+    sub["Label"] = label
+    exploded = (
+        sub.set_index(["Term", "Benjamini", "Label"])["Genes"].str.split(", ").explode().reset_index()
+    )
+    return exploded
+def _binary_matrix(data: pd.DataFrame) -> tuple[pd.DataFrame, pd.Series, pd.Series]:
+    binary = pd.crosstab(data["Term"], data["Genes"])
+    labels = data.groupby("Term")["Label"].first()
+    pvals = data.groupby("Term")["Benjamini"].first()
+    return binary, labels, pvals
+def _sort_matrix(matrix: pd.DataFrame) -> pd.DataFrame:
+    sp = matrix.sum(axis=1).sort_values(ascending=False).index
+    sg = matrix.sum(axis=0).sort_values(ascending=False).index
+    return matrix.loc[sp, sg]
+def build_merged_pathway_membership(
+    de_all: pd.DataFrame, re_all: pd.DataFrame, pval_threshold: float = 0.05
+) -> tuple[np.ndarray, list[str], list[str]] | None:
+    """
+    Numeric grid for heatmap: values 0=white, 1=dead-end gene, 2=reprogramming gene,
+    3=Reactome library stripe, 4=KEGG library stripe (notebook cell 29).
+    """
+    reactome_de = de_all[de_all["Category"] == "REACTOME_PATHWAY"]
+    reactome_re = re_all[re_all["Category"] == "REACTOME_PATHWAY"]
+    kegg_de = de_all[de_all["Category"] == "KEGG_PATHWAY"]
+    kegg_re = re_all[re_all["Category"] == "KEGG_PATHWAY"]
+    rde = _preprocess_exploded(reactome_de, pval_threshold, "~", "Dead-end")
+    rre = _preprocess_exploded(reactome_re, pval_threshold, "~", "Reprogramming")
+    rcomb = pd.concat([rde, rre], ignore_index=True)
+    kde = _preprocess_exploded(kegg_de, pval_threshold, ":", "Dead-end")
+    kre = _preprocess_exploded(kegg_re, pval_threshold, ":", "Reprogramming")
+    kcomb = pd.concat([kde, kre], ignore_index=True)
+    rm, rlab, _ = _binary_matrix(rcomb)
+    km, klab, _ = _binary_matrix(kcomb)
+    rm = _sort_matrix(rm)
+    km = _sort_matrix(km)
+    reactome_lib = pd.Series("Reactome", index=rm.index)
+    kegg_lib = pd.Series("KEGG", index=km.index)
+    merged = pd.concat([rm, km], axis=0, sort=False).fillna(0)
+    if merged.empty or merged.shape[1] == 0:
+        return None
+    merged_labels = pd.concat([rlab, klab])
+    merged_library = pd.concat([reactome_lib, kegg_lib])
+    label_code = {"Dead-end": 1, "Reprogramming": 2}
+    lib_code = {"Reactome": 3, "KEGG": 4}
+    gene_cols = list(merged.columns)
+    z = np.zeros((len(merged), len(gene_cols) + 1), dtype=float)
+    for i, term in enumerate(merged.index):
+        lc = label_code.get(str(merged_labels.loc[term]), 0)
+        for j, g in enumerate(gene_cols):
+            v = float(merged.loc[term, g])
+            if v > 0 and lc:
+                z[i, j] = v * lc
+        z[i, -1] = lib_code.get(str(merged_library.loc[term]), 0)
+    row_labels = [str(t) for t in merged.index]
+    col_labels = gene_cols + ["Library"]
+    return z, row_labels, col_labels

streamlit_hf/lib/plots.py ADDED Viewed

	@@ -0,0 +1,1421 @@

+"""Plotly helpers for the explorer UI."""
+from __future__ import annotations
+from typing import Any
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from streamlit_hf.lib.reactions import normalize_reaction_key
+# Matches Streamlit theme primary + slate text; used across Plotly layouts.
+PLOT_FONT = dict(family="Inter, system-ui, sans-serif", size=12)
+PALETTE = (
+    "#2563eb",
+    "#dc2626",
+    "#059669",
+    "#d97706",
+    "#7c3aed",
+    "#db2777",
+    "#0d9488",
+    "#4f46e5",
+)
+MODALITY_COLOR = {"RNA": "#E64B35", "ATAC": "#4DBBD5", "Flux": "#00A087"}
+# Global modality pie only: edit here to try other hues (bars/scatter use MODALITY_COLOR).
+MODALITY_PIE_COLOR = dict(MODALITY_COLOR)
+# Log₂FC heatmaps/sunburst: colours like ggplot2 scale_colour_gradient2 (mid grey at 0).
+LOG_FC_COLOR_MIN = -0.5
+LOG_FC_COLOR_MAX = 0.5
+LOG_FC_DIVERGING_SCALE: list[list] = [
+    [0.0, "#1C86EE"],
+    [0.5, "#FAFAFA"],
+    [1.0, "#FF0000"],
+]
+# Unicode minus (U+2212) and subscript ₁₀ / ₂ for axes/colorbars.
+LABEL_NEG_LOG10_ADJ_P = "\u2212log\u2081\u2080 adj. p"
+LABEL_LOG2FC = "Log\u2082FC"
+# Cached attention dict uses lowercase modality keys.
+FI_ATT_MOD_KEY = {"RNA": "rna", "ATAC": "atac", "Flux": "flux"}
+# Model appends one batch-embedding token per modality; hide from attention rankings in the UI.
+BATCH_EMBEDDING_FEATURE_NAMES = frozenset({"batch_rna", "batch_atac", "batch_flux"})
+def _attention_pairs_skip_batch(pairs: list) -> list:
+    return [(n, s) for n, s in pairs if str(n) not in BATCH_EMBEDDING_FEATURE_NAMES]
+def rollout_top_features_table(feature_names, vec, top_n: int) -> pd.DataFrame:
+    """Top `top_n` rollout weights per modality slice, excluding batch-embedding tokens."""
+    names = [str(x) for x in feature_names]
+    v = np.asarray(vec, dtype=float)
+    rows = [
+        (names[i], float(v[i]))
+        for i in range(len(names))
+        if names[i] not in BATCH_EMBEDDING_FEATURE_NAMES
+    ]
+    rows.sort(key=lambda x: -x[1])
+    rows = rows[:top_n]
+    if not rows:
+        return pd.DataFrame(columns=["feature", "mean_attention"])
+    feat, val = zip(*rows)
+    return pd.DataFrame({"feature": list(feat), "mean_attention": list(val)})
+# Themed continuous scale for dominant-fate % on UMAP (low → high emphasis).
+UMAP_PCT_COLORSCALE: list[list] = [
+    [0.0, "#eff6ff"],
+    [0.25, "#bfdbfe"],
+    [0.55, "#3b82f6"],
+    [0.82, "#2563eb"],
+    [1.0, "#1e3a8a"],
+]
+# Okabe–Ito–style distinct colours (colourblind-friendly) for categorical UMAP hues.
+LATENT_DISCRETE_PALETTE = (
+    "#0072B2",
+    "#E69F00",
+    "#009E73",
+    "#CC79A7",
+    "#56B4E9",
+    "#D55E00",
+    "#F0E442",
+    "#000000",
+)
+def latent_scatter(
+    df,
+    color_col: str,
+    title: str,
+    width: int = 720,
+    height: int = 520,
+    marker_size: float = 5.0,
+    marker_opacity: float = 0.78,
+):
+    d = df.copy()
+    hover_spec = {
+        "umap_x": ":.3f",
+        "umap_y": ":.3f",
+        "dataset_idx": True,
+        "fold": True,
+        "batch_no": True,
+        "predicted_class": True,
+        "label": True,
+        "correct": True,
+        "pct": ":.2f",
+        "modality_label": True,
+        "modality": True,
+        "predicted_value": ":.3f",
+        "clone_id": True,
+        "clone_size": True,
+        "cell_type": True,
+    }
+    if "modality_label" in d.columns:
+        hover_spec.pop("modality", None)
+    hover_data = {k: v for k, v in hover_spec.items() if k in d.columns}
+    _disp = {
+        "label": "CellTag-Multi label",
+        "predicted_class": "Predicted fate",
+        "pct": "Dominant fate (%)",
+        "modality_label": "Available modalities",
+        "dataset_idx": "Dataset index",
+        "batch_no": "Batch",
+        "fold": "CV fold",
+    }
+    labels_map = {c: _disp[c] for c in _disp if c in d.columns}
+    continuous = color_col == "pct"
+    if color_col == "fold":
+        d["_color"] = d["fold"].astype(str)
+        color_arg = "_color"
+        labels_map["_color"] = "Fold"
+        continuous = False
+    elif color_col == "batch_no":
+        d["_color"] = d["batch_no"].astype(str)
+        color_arg = "_color"
+        labels_map["_color"] = "Batch"
+        continuous = False
+    elif color_col == "correct":
+        d["_color"] = d["correct"].map({True: "Correct", False: "Wrong"})
+        color_arg = "_color"
+        labels_map["_color"] = "Prediction"
+        continuous = False
+    else:
+        color_arg = color_col
+    common = dict(
+        x="umap_x",
+        y="umap_y",
+        hover_data=hover_data,
+        labels=labels_map,
+        title=title,
+        width=width,
+        height=height,
+    )
+    if continuous:
+        fig = px.scatter(
+            d,
+            color=color_arg,
+            color_continuous_scale=UMAP_PCT_COLORSCALE,
+            **common,
+        )
+    else:
+        fig = px.scatter(
+            d,
+            color=color_arg,
+            color_discrete_sequence=list(LATENT_DISCRETE_PALETTE),
+            **common,
+        )
+    fig.update_traces(
+        marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title_font_size=16,
+        margin=dict(l=28, r=20, t=56, b=28),
+        legend_title_text="",
+        xaxis_title="",
+        yaxis_title="",
+    )
+    fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
+    fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
+    return fig
+def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
+    """Attention rank on x, shift rank on y, least-squares trend line, discrete point colours."""
+    need = ("shift_order_mod", "attention_order_mod")
+    if not all(c in df_mod.columns for c in need):
+        return go.Figure()
+    sub = df_mod.dropna(subset=list(need)).copy()
+    if sub.empty:
+        return go.Figure()
+    x = sub["attention_order_mod"].astype(float).to_numpy()
+    y = sub["shift_order_mod"].astype(float).to_numpy()
+    fig = px.scatter(
+        sub,
+        x="attention_order_mod",
+        y="shift_order_mod",
+        color="top_10_pct",
+        hover_name="feature",
+        hover_data={
+            "mean_rank": True,
+            "importance_shift": ":.4f",
+            "importance_att": ":.4f",
+        },
+        labels={
+            "attention_order_mod": "Attention rank",
+            "shift_order_mod": "Shift rank",
+        },
+        width=width,
+        height=height,
+        color_discrete_map={
+            "both": PALETTE[0],
+            "shift": PALETTE[1],
+            "att": PALETTE[2],
+            "None": "#94a3b8",
+        },
+    )
+    fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
+    if len(x) >= 2 and float(np.ptp(x)) > 0:
+        coef = np.polyfit(x, y, 1)
+        poly = np.poly1d(coef)
+        xs = np.linspace(float(np.min(x)), float(np.max(x)), 100)
+        fig.add_trace(
+            go.Scatter(
+                x=xs,
+                y=poly(xs),
+                mode="lines",
+                name=f"y = {coef[0]:.2f}x + {coef[1]:.2f}",
+                line=dict(color="#2563eb", width=2, dash="dash"),
+                showlegend=True,
+            )
+        )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=dict(
+            text=f"{modality}: shift vs attention (ranks)",
+            x=0.5,
+            xanchor="center",
+            y=0.98,
+            yanchor="top",
+            font=dict(size=14, family=PLOT_FONT["family"]),
+        ),
+        margin=dict(l=48, r=20, t=52, b=72),
+        legend=dict(orientation="h", yanchor="top", y=-0.2, xanchor="center", x=0.5),
+    )
+    return fig
+def _truncate_label(s: str, max_len: int = 36) -> str:
+    s = str(s)
+    return s if len(s) <= max_len else s[: max_len - 1] + "…"
+def joint_shift_attention_top_features(df_mod, modality: str, top_n: int):
+    """
+    Top features by mean_rank (lowest = strongest joint shift+attention ranking).
+    Shift and attention importances are min–max scaled within this top-N slice for side-by-side comparison.
+    """
+    need = ("mean_rank", "importance_shift", "importance_att", "feature")
+    if not all(c in df_mod.columns for c in need):
+        return go.Figure()
+    sub = df_mod.nsmallest(top_n, "mean_rank").copy()
+    if sub.empty:
+        return go.Figure()
+    def _mm(s: pd.Series) -> pd.Series:
+        lo, hi = float(s.min()), float(s.max())
+        if hi <= lo:
+            return pd.Series(0.5, index=s.index)
+        return (s.astype(float) - lo) / (hi - lo)
+    sub["_zs"] = _mm(sub["importance_shift"])
+    sub["_za"] = _mm(sub["importance_att"])
+    # Best (lowest mean_rank) at top of chart; matches shift/attention rows below.
+    sub = sub.sort_values("mean_rank", ascending=True)
+    feats_full = sub["feature"].astype(str)
+    y_disp = feats_full.map(lambda s: _truncate_label(s, 40))
+    base = MODALITY_COLOR.get(modality, PALETTE[0])
+    att_c = "#475569" if base != "#475569" else "#64748b"
+    margin_l = int(min(380, 64 + 5.8 * max((len(t) for t in y_disp), default=10)))
+    h = min(720, 52 + 22 * len(sub))
+    fig = go.Figure()
+    fig.add_trace(
+        go.Bar(
+            name="Shift (scaled)",
+            y=y_disp,
+            x=sub["_zs"],
+            orientation="h",
+            marker_color=base,
+            customdata=feats_full,
+            hovertemplate="<b>%{customdata}</b><br>Shift (scaled): %{x:.3f}<extra></extra>",
+        )
+    )
+    fig.add_trace(
+        go.Bar(
+            name="Attention (scaled)",
+            y=y_disp,
+            x=sub["_za"],
+            orientation="h",
+            marker_color=att_c,
+            customdata=feats_full,
+            hovertemplate="<b>%{customdata}</b><br>Attention (scaled): %{x:.3f}<extra></extra>",
+        )
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=dict(
+            text=f"{modality} · top {top_n}",
+            x=0.5,
+            xanchor="center",
+            y=0.98,
+            yanchor="top",
+            font=dict(size=14, family=PLOT_FONT["family"]),
+        ),
+        barmode="group",
+        bargap=0.15,
+        bargroupgap=0.05,
+        width=680,
+        height=h,
+        margin=dict(l=margin_l, r=12, t=44, b=72),
+        xaxis_title="Scaled 0-1 within selection",
+        yaxis_title="",
+        legend=dict(orientation="h", yanchor="top", y=-0.14, xanchor="center", x=0.5),
+    )
+    fig.update_yaxes(autorange="reversed", tickfont=dict(size=10))
+    return fig
+def modality_shift_attention_rank_stats(df_mod) -> dict[str, Any]:
+    """Pearson / Spearman between per-modality shift and attention ordinal ranks."""
+    from scipy.stats import pearsonr, spearmanr
+    need = ("shift_order_mod", "attention_order_mod")
+    if not all(c in df_mod.columns for c in need):
+        return {"n": 0}
+    sub = df_mod.dropna(subset=list(need))
+    n = len(sub)
+    if n < 3:
+        return {"n": n}
+    xs = sub["attention_order_mod"].astype(float)
+    ys = sub["shift_order_mod"].astype(float)
+    pr, pp = pearsonr(xs, ys)
+    sr, sp = spearmanr(xs, ys)
+    return {
+        "n": n,
+        "pearson_r": float(pr),
+        "pearson_p": float(pp),
+        "spearman_r": float(sr),
+        "spearman_p": float(sp),
+    }
+def rank_bar(
+    df_top,
+    xcol: str,
+    ycol: str,
+    title: str,
+    color: str = PALETTE[0],
+    xaxis_title: str | None = None,
+):
+    d = df_top.sort_values(xcol, ascending=True)
+    y_raw = d[ycol].astype(str)
+    y_show = y_raw.map(lambda s: _truncate_label(s, 42))
+    margin_l = int(min(420, 80 + 5.8 * max((len(s) for s in y_show), default=12)))
+    fig = go.Figure(
+        go.Bar(
+            y=y_show,
+            x=d[xcol],
+            orientation="h",
+            marker_color=color,
+            customdata=y_raw,
+            hovertemplate="<b>%{customdata}</b><br>%{x:.4g}<extra></extra>",
+        )
+    )
+    xt = xaxis_title if xaxis_title is not None else xcol.replace("_", " ")
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=title,
+        width=680,
+        height=min(620, 38 + 20 * len(d)),
+        margin=dict(l=margin_l, r=24, t=48, b=40),
+        xaxis_title=xt,
+        yaxis_title="",
+    )
+    fig.update_yaxes(tickfont=dict(size=10))
+    return fig
+def attention_top_comparison(fi_lists: dict, modality: str, top_n: int = 18):
+    """fi_lists: cohort -> {rna|atac|flux: [(name, score), ...]}."""
+    mk = FI_ATT_MOD_KEY.get(modality, str(modality).lower())
+    traces = []
+    for key, name, color in (
+        ("all", "All validation samples", PALETTE[0]),
+        ("dead_end", "Predicted dead-end", PALETTE[1]),
+        ("reprogramming", "Predicted reprogramming", PALETTE[2]),
+    ):
+        cohort = fi_lists.get(key) or {}
+        items = _attention_pairs_skip_batch(list(cohort.get(mk, [])))[:top_n]
+        if not items:
+            continue
+        feats, scores = zip(*items)
+        traces.append(
+            go.Bar(
+                name=name,
+                x=list(scores),
+                y=[f[:52] + ("…" if len(f) > 52 else "") for f in feats],
+                orientation="h",
+                marker_color=color,
+            )
+        )
+    fig = go.Figure(traces)
+    bar_h = max(320, 36 + min(top_n, 20) * 22 * max(1, len(traces)))
+    fig.update_layout(
+        barmode="group",
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=f"Top attention (rollout): {modality}",
+        width=520,
+        height=bar_h,
+        margin=dict(l=220, r=24, t=56, b=40),
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+    )
+    if not traces:
+        fig.update_layout(
+            annotations=[
+                dict(
+                    text="No attention list for this modality (re-run precompute).",
+                    xref="paper",
+                    yref="paper",
+                    x=0.5,
+                    y=0.5,
+                    showarrow=False,
+                )
+            ]
+        )
+    else:
+        fig.update_yaxes(autorange="reversed")
+    return fig
+def attention_cohort_view(
+    fi_lists: dict,
+    modality: str,
+    top_n: int,
+    mode: str,
+):
+    """
+    mode: 'compare': grouped bars for all three cohorts;
+          'all' | 'dead_end' | 'reprogramming': single cohort only.
+    """
+    if mode == "compare":
+        return attention_top_comparison(fi_lists, modality, top_n)
+    mk = FI_ATT_MOD_KEY.get(modality, str(modality).lower())
+    cohort = fi_lists.get(mode) or {}
+    items = _attention_pairs_skip_batch(list(cohort.get(mk, [])))[:top_n]
+    label = {
+        "all": "All validation samples",
+        "dead_end": "Predicted dead-end",
+        "reprogramming": "Predicted reprogramming",
+    }.get(mode, mode)
+    if not items:
+        fig = go.Figure()
+        fig.update_layout(
+            template="plotly_white",
+            font=PLOT_FONT,
+            title=f"{modality} · {label}",
+            annotations=[
+                dict(
+                    text="No items for this cohort.",
+                    xref="paper",
+                    yref="paper",
+                    x=0.5,
+                    y=0.5,
+                    showarrow=False,
+                )
+            ],
+        )
+        return fig
+    feats, scores = zip(*items)
+    fig = go.Figure(
+        go.Bar(
+            x=list(scores),
+            y=[f[:52] + ("…" if len(f) > 52 else "") for f in feats],
+            orientation="h",
+            marker_color=PALETTE[0],
+        )
+    )
+    h = max(280, 40 + min(top_n, 25) * 20)
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=f"{modality} · {label}",
+        width=520,
+        height=h,
+        margin=dict(l=220, r=24, t=56, b=40),
+        xaxis_title="Attention weight",
+    )
+    fig.update_yaxes(autorange="reversed")
+    return fig
+def global_rank_triple_panel(df_features, top_n: int = 20, top_n_pie: int = 100):
+    """
+    Global top-N by latent-shift and by attention (min–max scaled), plus pie of modality mix
+    among the top `top_n_pie` features by mean rank.
+    """
+    d = df_features.copy()
+    for col in ("importance_shift", "importance_att"):
+        min_v, max_v = d[col].min(), d[col].max()
+        if max_v > min_v:
+            d[col + "_norm"] = (d[col] - min_v) / (max_v - min_v)
+        else:
+            d[col + "_norm"] = 0.0
+    shift_top = d.nlargest(top_n, "importance_shift")
+    att_top = d.nlargest(top_n, "importance_att")
+    pie_pool = d.nsmallest(top_n_pie, "mean_rank")
+    fig = make_subplots(
+        rows=1,
+        cols=3,
+        column_widths=[0.36, 0.36, 0.28],
+        specs=[[{}, {}, {"type": "domain"}]],
+        subplot_titles=(
+            f"Top {top_n} by latent shift (ranked)",
+            f"Top {top_n} by attention (ranked)",
+            f"Top {top_n_pie} by mean rank (modality mix)",
+        ),
+        horizontal_spacing=0.06,
+    )
+    fig.add_trace(
+        go.Bar(
+            x=shift_top["importance_shift_norm"],
+            y=shift_top["feature"],
+            orientation="h",
+            marker_color=[MODALITY_COLOR.get(m, "#64748b") for m in shift_top["modality"]],
+            marker_line=dict(color="rgba(15,23,42,0.12)", width=1),
+            showlegend=False,
+            hovertemplate="%{y}<br>scaled shift: %{x:.3f}<extra></extra>",
+        ),
+        row=1,
+        col=1,
+    )
+    fig.add_trace(
+        go.Bar(
+            x=att_top["importance_att_norm"],
+            y=att_top["feature"],
+            orientation="h",
+            marker_color=[MODALITY_COLOR.get(m, "#64748b") for m in att_top["modality"]],
+            marker_line=dict(color="rgba(15,23,42,0.12)", width=1),
+            showlegend=False,
+            hovertemplate="%{y}<br>scaled attention: %{x:.3f}<extra></extra>",
+        ),
+        row=1,
+        col=2,
+    )
+    pie_labels = ["RNA", "ATAC", "Flux"]
+    counts = pie_pool["modality"].value_counts()
+    pie_vals = [int(counts.get(lab, 0)) for lab in pie_labels]
+    if sum(pie_vals) == 0:
+        pie_vals = [1, 1, 1]
+    fig.add_trace(
+        go.Pie(
+            labels=pie_labels,
+            values=pie_vals,
+            marker=dict(
+                colors=[MODALITY_PIE_COLOR.get(l, "#64748b") for l in pie_labels],
+                line=dict(color="#1e293b", width=1.2),
+            ),
+            textinfo="label+percent",
+            textfont_size=12,
+            hole=0.0,
+            showlegend=False,
+        ),
+        row=1,
+        col=3,
+    )
+    fig.update_xaxes(title_text="Min-max scaled shift", row=1, col=1)
+    fig.update_xaxes(title_text="Min-max scaled attention", row=1, col=2)
+    fig.update_yaxes(autorange="reversed", row=1, col=1)
+    fig.update_yaxes(autorange="reversed", row=1, col=2)
+    h = max(480, 40 + top_n * 18)
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        height=h,
+        width=min(1280, 400 + top_n * 14),
+        margin=dict(l=40, r=40, t=80, b=40),
+        title_text="Global feature ranking (all modalities)",
+        title_x=0.5,
+    )
+    return fig
+def _flux_prepare_top_ranked(flux_df: pd.DataFrame, top_n: int, metric: str = "mean_rank") -> pd.DataFrame:
+    sub = flux_df[~flux_df["feature"].astype(str).str.contains("batch", case=False, na=False)].copy()
+    if metric not in sub.columns:
+        metric = "mean_rank"
+    sub = sub.sort_values(metric, ascending=True).head(int(top_n)).copy()
+    if "pathway" in sub.columns:
+        pc = sub["pathway"].value_counts()
+        sub["_pw_n"] = sub["pathway"].map(pc)
+        sub.sort_values(["_pw_n", "pathway"], ascending=[False, True], inplace=True)
+    return sub
+def flux_pathway_sunburst(flux_df: pd.DataFrame, max_features: int = 55) -> go.Figure:
+    sub = flux_df.dropna(subset=["pathway"]).copy()
+    if sub.empty:
+        return go.Figure()
+    sub = sub.nsmallest(int(max_features), "mean_rank")
+    sub["pathway"] = sub["pathway"].astype(str)
+    sub["_uid"] = np.arange(len(sub))
+    sub["rxn"] = sub.apply(
+        lambda r: f"{_truncate_label(str(r['feature']), 36)} ·{int(r['_uid'])}",
+        axis=1,
+    )
+    mr = sub["mean_rank"].astype(float)
+    sub["w"] = (mr.max() - mr + 1.0).clip(lower=0.5)
+    color_col = "log_fc" if "log_fc" in sub.columns and sub["log_fc"].notna().any() else "mean_rank"
+    sb_kw: dict[str, Any] = {
+        "path": ["pathway", "rxn"],
+        "values": "w",
+        "color": color_col,
+        "hover_data": {"mean_rank": ":.2f", "pval_adj": ":.2e", "feature": True, "w": False, "_uid": False},
+    }
+    if color_col == "log_fc":
+        sb_kw["color_continuous_scale"] = LOG_FC_DIVERGING_SCALE
+        sb_kw["range_color"] = [LOG_FC_COLOR_MIN, LOG_FC_COLOR_MAX]
+    else:
+        sb_kw["color_continuous_scale"] = "Viridis_r"
+    fig = px.sunburst(sub, **sb_kw)
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        margin=dict(l=8, r=8, t=100, b=16),
+        height=min(820, 520 + int(max_features) * 5),
+        title=dict(
+            text="Top flux reactions by model rank, nested under pathway",
+            x=0,
+            xanchor="left",
+            y=0.99,
+            yanchor="top",
+            font=dict(size=13, family=PLOT_FONT["family"]),
+            pad=dict(b=16, l=4),
+        ),
+    )
+    if color_col == "log_fc":
+        fig.update_layout(
+            coloraxis=dict(
+                cmin=LOG_FC_COLOR_MIN,
+                cmax=LOG_FC_COLOR_MAX,
+                colorbar=dict(
+                    title=dict(text=LABEL_LOG2FC, side="right"),
+                    tickformat=".2f",
+                    len=0.38,
+                    thickness=12,
+                    y=0.52,
+                    yanchor="middle",
+                ),
+            )
+        )
+    return fig
+def flux_volcano(flux_df: pd.DataFrame) -> go.Figure:
+    if "log_fc" not in flux_df.columns:
+        return go.Figure()
+    d = flux_df.dropna(subset=["log_fc"]).copy()
+    if d.empty:
+        return go.Figure()
+    # Drop degenerate rows: ~zero fold-change with exactly-zero adjusted p (numeric artifact / noise).
+    lf = d["log_fc"].astype(float)
+    if "pval_adj" in d.columns:
+        pa = d["pval_adj"].astype(float)
+        bad = np.isfinite(lf) & np.isfinite(pa) & (np.abs(lf) < 1e-10) & (pa <= 0.0)
+        d = d[~bad]
+    if d.empty:
+        return go.Figure()
+    if "pval_adj_log" in d.columns:
+        y = d["pval_adj_log"].astype(float)
+    else:
+        p = d["pval_adj"].astype(float).clip(lower=1e-300)
+        y = -np.log10(p.to_numpy())
+    d = d.assign(_neglogp=y)
+    fig = px.scatter(
+        d,
+        x="log_fc",
+        y="_neglogp",
+        color="mean_rank",
+        color_continuous_scale="Viridis_r",
+        hover_name="feature",
+        hover_data=["pathway", "pval_adj", "group"],
+        labels={
+            "log_fc": LABEL_LOG2FC,
+            "_neglogp": LABEL_NEG_LOG10_ADJ_P,
+            "mean_rank": "Mean rank",
+        },
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title="Differential flux vs statistical significance",
+        height=520,
+        margin=dict(l=52, r=24, t=52, b=48),
+        coloraxis_colorbar=dict(
+            title=dict(text="Mean rank", side="right"),
+            thickness=12,
+            len=0.55,
+        ),
+    )
+    return fig
+def motif_tf_mean_rank_bars(atac_df: pd.DataFrame, top_n: int = 22) -> go.Figure:
+    """Aggregate motif features by TF name (prefix before ``_<motif_id>``); show lowest mean joint rank."""
+    if atac_df.empty or "feature" not in atac_df.columns:
+        return go.Figure()
+    def _tf_prefix(feat: str) -> str:
+        s = str(feat)
+        if "_" in s:
+            head, tail = s.rsplit("_", 1)
+            if tail.isdigit():
+                return head
+        return s
+    d = atac_df.copy()
+    d["_tf"] = d["feature"].map(_tf_prefix)
+    agg = d.groupby("_tf", as_index=False)["mean_rank"].mean()
+    agg = agg.nsmallest(int(top_n), "mean_rank").sort_values("mean_rank", ascending=True)
+    if agg.empty:
+        return go.Figure()
+    y_show = agg["_tf"].astype(str).map(lambda s: _truncate_label(s, 36))
+    fig = go.Figure(
+        go.Bar(
+            y=y_show,
+            x=agg["mean_rank"],
+            orientation="h",
+            marker_color=MODALITY_COLOR.get("ATAC", PALETTE[0]),
+            customdata=agg["_tf"],
+            hovertemplate="<b>%{customdata}</b><br>Mean mean_rank (across motifs): %{x:.2f}<extra></extra>",
+        )
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=f"TFs by average motif rank (top {top_n} by lowest mean rank)",
+        height=min(640, 48 + 22 * len(agg)),
+        margin=dict(l=160, r=24, t=52, b=40),
+        xaxis_title="Mean of mean_rank over motif instances (lower = stronger)",
+        yaxis_title="",
+    )
+    fig.update_yaxes(autorange="reversed", tickfont=dict(size=10))
+    return fig
+def motif_chromvar_volcano(atac_df: pd.DataFrame) -> go.Figure:
+    """Motif differential view: mean activity difference (reprogramming − dead-end) vs significance."""
+    need = ("mean_diff", "pval_adj")
+    if not all(c in atac_df.columns for c in need):
+        return go.Figure()
+    d = atac_df.dropna(subset=["mean_diff", "pval_adj"]).copy()
+    if d.empty:
+        return go.Figure()
+    md = d["mean_diff"].astype(float)
+    pa = d["pval_adj"].astype(float)
+    bad = np.isfinite(md) & np.isfinite(pa) & (np.abs(md) < 1e-12) & (pa <= 0.0)
+    d = d[~bad]
+    if d.empty:
+        return go.Figure()
+    if "pval_adj_log" in d.columns:
+        y = d["pval_adj_log"].astype(float)
+    else:
+        p = d["pval_adj"].astype(float).clip(lower=1e-300)
+        y = -np.log10(p.to_numpy())
+    d = d.assign(_y=y)
+    hover_cols = [c for c in ("group", "pval_adj", "mean_rank", "mean_de", "mean_re") if c in d.columns]
+    fig = px.scatter(
+        d,
+        x="mean_diff",
+        y="_y",
+        color="mean_rank",
+        color_continuous_scale="Viridis_r",
+        hover_name="feature",
+        hover_data=hover_cols if hover_cols else None,
+        labels={
+            "mean_diff": "Mean difference (reprogramming − dead-end)",
+            "_y": LABEL_NEG_LOG10_ADJ_P,
+            "mean_rank": "Mean rank",
+        },
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title="TF motif differential activity (mean difference vs significance)",
+        height=520,
+        margin=dict(l=52, r=24, t=52, b=48),
+        coloraxis_colorbar=dict(title=dict(text="Mean rank", side="right"), thickness=12, len=0.55),
+    )
+    return fig
+def notebook_style_activity_scatter(
+    df: pd.DataFrame,
+    title: str,
+    x_title: str,
+    y_title: str,
+) -> go.Figure:
+    """mean_de vs mean_re, colour = pval_adj_log (Reds), marker size ∝ inverse mean_rank."""
+    need = ("mean_de", "mean_re", "mean_rank", "pval_adj_log", "feature", "group")
+    if not all(c in df.columns for c in need):
+        return go.Figure()
+    d = df.dropna(subset=["mean_de", "mean_re", "mean_rank", "pval_adj_log"]).copy()
+    if d.empty:
+        return go.Figure()
+    mx = float(d["mean_rank"].max())
+    d = d.assign(_inv=(mx - d["mean_rank"].astype(float)).clip(lower=0))
+    inv = d["_inv"].astype(float)
+    lo, hi = float(inv.min()), float(inv.max())
+    if hi <= lo:
+        d["_sz"] = 6.0
+    else:
+        d["_sz"] = 3.5 + (inv - lo) / (hi - lo) * 9.0
+    fig = px.scatter(
+        d,
+        x="mean_de",
+        y="mean_re",
+        color="pval_adj_log",
+        color_continuous_scale="Reds",
+        size="_sz",
+        size_max=14,
+        hover_name="feature",
+        hover_data={
+            "mean_rank": ":.2f",
+            "group": True,
+            "pval_adj_log": ":.2f",
+            "_inv": False,
+            "_sz": False,
+        },
+        labels={
+            "mean_de": x_title,
+            "mean_re": y_title,
+            "pval_adj_log": "Adj. p-value (log)",
+        },
+    )
+    fig.update_traces(
+        marker=dict(line=dict(width=0.45, color="rgba(255,255,255,0.75)"), opacity=0.9),
+        selector=dict(mode="markers"),
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=title,
+        height=520,
+        margin=dict(l=52, r=24, t=52, b=48),
+        coloraxis_colorbar=dict(title=dict(text="Adj. p (log)", side="right"), thickness=12, len=0.55),
+    )
+    return fig
+def pathway_bubble_suggested_height(n_paths: int) -> int:
+    """Total figure height for pathway bubble panels (use the max of both cohorts so legends line up)."""
+    n = max(int(n_paths), 1)
+    return max(520, min(1100, 22 * n + 200))
+def pathway_enrichment_bubble_panel(
+    df: pd.DataFrame,
+    title: str,
+    *,
+    show_colorbar: bool = True,
+    layout_height: int | None = None,
+) -> go.Figure:
+    """Single cohort: Reactome (circle) vs KEGG (square), colour = −log₁₀ Benjamini (scale per panel)."""
+    fig = go.Figure()
+    if df.empty:
+        fig.update_layout(
+            template="plotly_white",
+            font=PLOT_FONT,
+            title=dict(text=title, x=0.5, xanchor="center"),
+            annotations=[
+                dict(
+                    text="No significant pathways (Benjamini–Hochberg q < 0.05)",
+                    xref="paper",
+                    yref="paper",
+                    x=0.5,
+                    y=0.5,
+                    showarrow=False,
+                    font=dict(size=13, color="#64748b"),
+                )
+            ],
+            height=320,
+            margin=dict(l=40, r=40, t=56, b=40),
+        )
+        return fig
+    # More genes in the overlap first, then stronger gene ratio (matches enrichment table emphasis).
+    d = df.sort_values(by=["Count", "Gene Ratio"], ascending=[False, False]).reset_index(drop=True)
+    d = d.assign(
+        _neglog=-np.log10(d["Benjamini"].astype(float).clip(lower=1e-300)),
+        _y=np.arange(len(d), dtype=float),
+    )
+    nl = d["_neglog"].astype(float)
+    cmin = float(nl.min())
+    cmax = float(nl.max())
+    if cmax <= cmin:
+        cmax = cmin + 1e-6
+    # Single trace: per-panel cmin/cmax so Viridis uses the cohort’s range (shared global max clusters at one hue).
+    sym_map = {"Reactome": "circle", "KEGG": "square"}
+    symbols = [sym_map.get(str(x), "circle") for x in d["Library"].tolist()]
+    sz = np.sqrt(d["Count"].astype(float).clip(lower=1)) * 4.8
+    customdata = np.stack(
+        [d["Count"].to_numpy(), d["_neglog"].to_numpy(), d["Library"].astype(str).to_numpy()],
+        axis=1,
+    )
+    fig.add_trace(
+        go.Scatter(
+            x=d["Gene Ratio"],
+            y=d["_y"],
+            mode="markers",
+            name="Pathways",
+            showlegend=False,
+            marker=dict(
+                size=sz,
+                sizemode="diameter",
+                sizemin=4,
+                symbol=symbols,
+                color=d["_neglog"],
+                cmin=cmin,
+                cmax=cmax,
+                colorscale="Viridis",
+                showscale=bool(show_colorbar),
+                colorbar=dict(
+                    title=dict(
+                        text="\u2212log\u2081\u2080 q",
+                        side="right",
+                    ),
+                    len=0.72,
+                    thickness=12,
+                    y=0.45,
+                    yanchor="middle",
+                    outlinewidth=0,
+                )
+                if show_colorbar
+                else None,
+                line=dict(width=0.75, color="rgba(0,0,0,0.5)"),
+                opacity=0.92,
+            ),
+            text=d["Term"],
+            customdata=customdata,
+            hovertemplate=(
+                "<b>%{text}</b><br>%{customdata[2]}<br>Gene ratio: %{x:.3f}<br>Count: %{customdata[0]}"
+                "<br>\u2212log\u2081\u2080 Benjamini: %{customdata[1]:.2f}<extra></extra>"
+            ),
+        )
+    )
+    for lib, sym in (("Reactome", "circle"), ("KEGG", "square")):
+        if lib not in set(d["Library"].astype(str)):
+            continue
+        fig.add_trace(
+            go.Scatter(
+                x=[None],
+                y=[None],
+                mode="markers",
+                name=lib,
+                marker=dict(
+                    symbol=sym,
+                    size=11,
+                    color="#475569",
+                    line=dict(width=1, color="rgba(0,0,0,0.45)"),
+                ),
+                showlegend=True,
+            )
+        )
+    ticktext = [_truncate_label(str(t), 52) for t in d["Term"]]
+    h = int(layout_height) if layout_height is not None else pathway_bubble_suggested_height(len(d))
+    fig.update_yaxes(
+        tickmode="array",
+        tickvals=d["_y"].tolist(),
+        ticktext=ticktext,
+        autorange="reversed",
+        title="",
+    )
+    fig.update_xaxes(title_text="Gene ratio (count ÷ list total)")
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=dict(
+            text=title,
+            x=0.5,
+            xanchor="center",
+            yanchor="top",
+            y=0.985,
+            pad=dict(b=0),
+        ),
+        height=h,
+        margin=dict(l=215, r=132, t=48, b=108),
+        legend=dict(
+            orientation="h",
+            yanchor="top",
+            y=-0.11,
+            xanchor="center",
+            x=0.5,
+            bgcolor="rgba(255,255,255,0.92)",
+            bordercolor="rgba(0,0,0,0.08)",
+            borderwidth=1,
+        ),
+        showlegend=True,
+    )
+    return fig
+def pathway_gene_membership_heatmap(
+    z: np.ndarray, row_labels: list[str], col_labels: list[str]
+) -> go.Figure:
+    """Pathway × gene grid; empty cells transparent; light gaps; legend for category colours."""
+    if z.size == 0:
+        return go.Figure()
+    # Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
+    _z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
+    zn = np.vectorize(lambda v: _z_plot.get(int(v), 0.04))(z).astype(float)
+    transparent = "rgba(0,0,0,0)"
+    colorscale = [
+        [0.0, transparent],
+        [0.14, transparent],
+        [0.15, "#e69138"],
+        [0.33, "#e69138"],
+        [0.34, "#7eb6d9"],
+        [0.53, "#7eb6d9"],
+        [0.54, "#9ccc65"],
+        [0.73, "#9ccc65"],
+        [0.74, "#283593"],
+        [1.0, "#283593"],
+    ]
+    def _cell_hint(v: float) -> str:
+        k = int(round(float(v)))
+        return {
+            0: "",
+            1: "Gene enriched in dead-end contrast",
+            2: "Gene enriched in reprogramming contrast",
+            3: "Reactome pathway set",
+            4: "KEGG pathway set",
+        }.get(k, "")
+    z_int = z.astype(int)
+    text_grid = [[_cell_hint(z_int[i, j]) for j in range(z.shape[1])] for i in range(z.shape[0])]
+    heat = go.Heatmap(
+        z=zn,
+        x=col_labels,
+        y=row_labels,
+        text=text_grid,
+        colorscale=colorscale,
+        zmin=0,
+        zmax=1,
+        showscale=False,
+        xgap=1,
+        ygap=1,
+        hovertemplate="%{y}<br>%{x}<br>%{text}<extra></extra>",
+    )
+    fig = go.Figure(data=[heat])
+    n_rows, n_cols = z.shape
+    cell_w = 10
+    cell_h = 20
+    w = int(min(1000, max(460, n_cols * cell_w + 272)))
+    h = int(min(960, max(460, n_rows * cell_h + 128)))
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        title=dict(text="Pathway–gene membership", x=0.5, xanchor="center"),
+        height=h,
+        width=w,
+        margin=dict(l=4, r=168, t=52, b=108),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="#f4f6f9",
+        xaxis=dict(side="bottom", tickangle=-50, showgrid=False, zeroline=False),
+        yaxis=dict(
+            tickfont=dict(size=9),
+            showgrid=False,
+            zeroline=False,
+            autorange="reversed",
+        ),
+    )
+    legend_markers = [
+        ("Empty cell", "#f1f5f9", "square"),
+        ("Dead-end–linked gene", "#e69138", "square"),
+        ("Reprogramming–linked gene", "#7eb6d9", "square"),
+        ("Reactome (column tag)", "#9ccc65", "square"),
+        ("KEGG (column tag)", "#283593", "square"),
+    ]
+    for name, color, sym in legend_markers:
+        fig.add_trace(
+            go.Scatter(
+                x=[None],
+                y=[None],
+                mode="markers",
+                name=name,
+                marker=dict(size=11, color=color, symbol=sym, line=dict(width=1, color="rgba(0,0,0,0.25)")),
+                showlegend=True,
+            )
+        )
+    fig.update_layout(
+        legend=dict(
+            orientation="v",
+            yanchor="top",
+            y=0.98,
+            xanchor="left",
+            x=1.02,
+            bgcolor="rgba(255,255,255,0.92)",
+            bordercolor="rgba(0,0,0,0.08)",
+            borderwidth=1,
+            font=dict(size=11),
+        )
+    )
+    return fig
+def flux_dead_end_vs_reprogram_scatter(flux_df: pd.DataFrame, max_pathway_colors: int = 12) -> go.Figure:
+    need = ("mean_de", "mean_re")
+    if not all(c in flux_df.columns for c in need):
+        return go.Figure()
+    d = flux_df.dropna(subset=list(need)).copy()
+    if d.empty:
+        return go.Figure()
+    imp = (
+        d["importance_shift"].astype(float).clip(lower=0) * d["importance_att"].astype(float).clip(lower=0)
+    ) ** 0.5
+    q = float(imp.quantile(0.95)) if len(imp) else 1.0
+    d = d.assign(_s=(imp / (q or 1.0)).clip(upper=1) * 20 + 5)
+    pw = d["pathway"].fillna("Unknown").astype(str) if "pathway" in d.columns else pd.Series(
+        ["Unknown"] * len(d), index=d.index
+    )
+    top_pw = pw.value_counts().head(int(max_pathway_colors)).index
+    d = d.assign(_pw_col=pw.where(pw.isin(top_pw), "Other"))
+    uniq = sorted(d["_pw_col"].astype(str).unique(), key=lambda x: (x == "Other", x))
+    pal = list(LATENT_DISCRETE_PALETTE)
+    pw_cmap: dict[str, str] = {}
+    j = 0
+    for name in uniq:
+        if name == "Other":
+            pw_cmap[name] = "#94a3b8"
+        else:
+            pw_cmap[name] = pal[j % len(pal)]
+            j += 1
+    fig = px.scatter(
+        d,
+        x="mean_de",
+        y="mean_re",
+        color="_pw_col",
+        color_discrete_map=pw_cmap,
+        size="_s",
+        hover_name="feature",
+        hover_data=["mean_rank", "log_fc", "pathway"],
+        labels={
+            "mean_de": "Mean flux · dead-end",
+            "mean_re": "Mean flux · reprogramming",
+            "_pw_col": "Pathway",
+        },
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        height=540,
+        margin=dict(l=52, r=20, t=52, b=40),
+        title="Average measured flux by fate label (each point is one reaction)",
+        legend=dict(orientation="h", yanchor="top", y=-0.28, xanchor="center", x=0.5),
+    )
+    fig.update_traces(marker=dict(opacity=0.75, line=dict(width=0.35, color="rgba(0,0,0,0.3)")))
+    return fig
+def flux_pathway_mean_rank_violin(flux_df: pd.DataFrame, top_pathways: int = 12) -> go.Figure:
+    sub = flux_df.dropna(subset=["pathway"]).copy()
+    if sub.empty:
+        return go.Figure()
+    top_p = sub["pathway"].astype(str).value_counts().head(int(top_pathways)).index
+    sub = sub[sub["pathway"].astype(str).isin(top_p)]
+    top_list = list(top_p)
+    v_cmap = {p: LATENT_DISCRETE_PALETTE[i % len(LATENT_DISCRETE_PALETTE)] for i, p in enumerate(top_list)}
+    fig = px.violin(
+        sub,
+        x="pathway",
+        y="mean_rank",
+        box=True,
+        points=False,
+        color="pathway",
+        color_discrete_map=v_cmap,
+        labels={"mean_rank": "Mean rank (lower = stronger model focus)", "pathway": "Pathway"},
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        showlegend=False,
+        height=420,
+        xaxis_tickangle=-32,
+        margin=dict(l=48, r=24, t=48, b=140),
+        title="How joint model rank spreads within high-coverage pathways",
+    )
+    return fig
+def flux_reaction_annotation_panel(flux_df: pd.DataFrame, top_n: int = 26, metric: str = "mean_rank") -> go.Figure:
+    """Three heatmap columns: pathway (categorical), DE Log₂FC, −log₁₀ adjusted p."""
+    top = _flux_prepare_top_ranked(flux_df, top_n, metric)
+    if top.empty:
+        return go.Figure()
+    n = len(top)
+    pathways = top["pathway"].fillna("Unknown").astype(str).tolist() if "pathway" in top.columns else ["Unknown"] * n
+    uniq = list(dict.fromkeys(pathways))
+    code_map = {u: i for i, u in enumerate(uniq)}
+    codes = np.array([code_map[p] for p in pathways], dtype=float)
+    k = max(len(uniq), 1)
+    qual = list(px.colors.qualitative.Safe) + list(px.colors.qualitative.Dark24) + list(px.colors.qualitative.Light24)
+    if k <= 1:
+        disc_scale = [[0, qual[0]], [1, qual[0]]]
+    else:
+        disc_scale = [[j / (k - 1), qual[j % len(qual)]] for j in range(k)]
+    log_fc = top["log_fc"].fillna(0).astype(float).to_numpy() if "log_fc" in top.columns else np.zeros(n)
+    if "pval_adj_log" in top.columns:
+        pv = top["pval_adj_log"].fillna(0).astype(float).to_numpy()
+    else:
+        pv = -np.log10(top["pval_adj"].astype(float).clip(lower=1e-300).to_numpy())
+    full_features = top["feature"].astype(str).tolist()
+    y_labels = [_truncate_label(str(f), 44) for f in full_features]
+    z_path = codes.reshape(-1, 1)
+    # hovertext (not customdata): subplot heatmaps often render %{customdata[0]} as "-" in the browser.
+    hover_path = [[f"<b>{fn}</b><br>pathway: {pw}"] for fn, pw in zip(full_features, pathways)]
+    hover_lfc = [
+        [f"<b>{fn}</b><br>{LABEL_LOG2FC}: {float(log_fc[i]):.4f}"]
+        for i, fn in enumerate(full_features)
+    ]
+    hover_pv = [
+        [f"<b>{fn}</b><br>{LABEL_NEG_LOG10_ADJ_P}: {float(pv[i]):.2f}"]
+        for i, fn in enumerate(full_features)
+    ]
+    fig = make_subplots(
+        rows=1,
+        cols=3,
+        shared_yaxes=True,
+        horizontal_spacing=0.06,
+        column_widths=[0.24, 0.24, 0.24],
+    )
+    fig.add_trace(
+        go.Heatmap(
+            z=z_path,
+            x=[""],
+            y=y_labels,
+            colorscale=disc_scale,
+            zmin=0,
+            zmax=max(k - 1, 0),
+            showscale=False,
+            hovertext=hover_path,
+            hovertemplate="%{hovertext}<extra></extra>",
+        ),
+        row=1,
+        col=1,
+    )
+    fig.add_trace(
+        go.Heatmap(
+            z=log_fc.reshape(-1, 1),
+            x=[""],
+            y=y_labels,
+            colorscale=LOG_FC_DIVERGING_SCALE,
+            zmin=LOG_FC_COLOR_MIN,
+            zmax=LOG_FC_COLOR_MAX,
+            showscale=True,
+            colorbar=dict(
+                title=dict(text=LABEL_LOG2FC, side="right"),
+                tickformat=".2f",
+                len=0.22,
+                y=0.71,
+                yanchor="middle",
+                x=1.0,
+                xanchor="left",
+                xref="paper",
+                yref="paper",
+                thickness=12,
+            ),
+            hovertext=hover_lfc,
+            hovertemplate="%{hovertext}<extra></extra>",
+        ),
+        row=1,
+        col=2,
+    )
+    fig.add_trace(
+        go.Heatmap(
+            z=pv.reshape(-1, 1),
+            x=[""],
+            y=y_labels,
+            colorscale="Viridis",
+            showscale=True,
+            colorbar=dict(
+                title=dict(text=LABEL_NEG_LOG10_ADJ_P, side="right"),
+                len=0.22,
+                y=0.29,
+                yanchor="middle",
+                x=1.0,
+                xanchor="left",
+                xref="paper",
+                yref="paper",
+                thickness=12,
+            ),
+            hovertext=hover_pv,
+            hovertemplate="%{hovertext}<extra></extra>",
+        ),
+        row=1,
+        col=3,
+    )
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        height=min(820, 120 + n * 22),
+        width=900,
+        margin=dict(l=8, r=108, t=56, b=72),
+        title=dict(
+            text=f"Pathway, {LABEL_LOG2FC}, and significance",
+            x=0,
+            xanchor="left",
+            y=0.995,
+            yanchor="top",
+            font=dict(size=13, family=PLOT_FONT["family"]),
+            pad=dict(b=8, l=4),
+        ),
+    )
+    fig.update_xaxes(side="bottom", title_standoff=8)
+    fig.update_xaxes(title_text="Pathway", row=1, col=1)
+    fig.update_xaxes(title_text=LABEL_LOG2FC, row=1, col=2)
+    fig.update_xaxes(title_text=LABEL_NEG_LOG10_ADJ_P, row=1, col=3)
+    fig.update_yaxes(autorange="reversed")
+    return fig
+def flux_model_metric_profile(flux_df: pd.DataFrame, top_n: int = 22, metric: str = "mean_rank") -> go.Figure:
+    """Matrix view: scaled shift, attention, model priority, and fate flux contrast."""
+    top = _flux_prepare_top_ranked(flux_df, top_n, metric)
+    if top.empty:
+        return go.Figure()
+    def mm(s: pd.Series) -> np.ndarray:
+        v = s.astype(float).to_numpy()
+        lo, hi = float(np.nanmin(v)), float(np.nanmax(v))
+        if hi <= lo or not np.isfinite(lo):
+            return np.zeros_like(v, dtype=float)
+        return (v - lo) / (hi - lo)
+    cols: list[np.ndarray] = []
+    labels: list[str] = []
+    for c, lab in (("importance_shift", "Latent shift impact"), ("importance_att", "Attention (rollout)")):
+        if c in top.columns:
+            cols.append(mm(top[c]))
+            labels.append(lab)
+    cols.append(1.0 - mm(top["mean_rank"]))
+    labels.append("Joint priority (1 - scaled mean rank)")
+    if "mean_de" in top.columns and "mean_re" in top.columns:
+        de = top["mean_de"].astype(float).replace(0, np.nan)
+        ratio = (top["mean_re"].astype(float) / (de + 1e-12)).fillna(0)
+        cols.append(mm(ratio))
+        labels.append("RE / DE mean flux (scaled)")
+    z = np.column_stack(cols)
+    full_rxn = top["feature"].astype(str).tolist()
+    x_labels = [_truncate_label(str(f), 34) for f in full_rxn]
+    fig = px.imshow(
+        z.T,
+        x=x_labels,
+        y=labels,
+        aspect="auto",
+        color_continuous_scale="Tealrose",
+        labels=dict(x="Reaction", y="Metric", color="Scaled 0-1 per metric"),
+    )
+    n_met, n_rxn = z.T.shape
+    hover_cd = np.broadcast_to(np.array(full_rxn, dtype=object), (n_met, n_rxn))
+    fig.update_traces(
+        customdata=hover_cd,
+        hovertemplate="<b>%{customdata}</b><br>%{y}<br>scaled: %{z:.3f}<extra></extra>",
+    )
+    fig.update_xaxes(tickangle=-50, side="bottom", title_standoff=12)
+    fig.update_layout(
+        template="plotly_white",
+        font=PLOT_FONT,
+        height=min(380, 140 + len(labels) * 36),
+        margin=dict(l=200, r=28, t=64, b=200),
+        title=dict(
+            text="Reaction profile",
+            x=0,
+            xanchor="left",
+            y=0.98,
+            yanchor="top",
+            font=dict(size=13, family=PLOT_FONT["family"]),
+            pad=dict(b=10, l=4),
+        ),
+    )
+    return fig

streamlit_hf/lib/reactions.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""Shared reaction-string normalisation (flux features vs metabolic metadata)."""
+from __future__ import annotations
+import re
+def normalize_reaction_key(name: str) -> str:
+    """Map `A→B` style names to the same key as metadata `A -> B` (case-insensitive)."""
+    t = str(name).strip().replace("→", " -> ")
+    t = re.sub(r"\s+", " ", t)
+    return t.lower()

streamlit_hf/lib/ui.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""Light shared styles (no heavy themes; keeps default Streamlit + plotly_white)."""
+from __future__ import annotations
+import streamlit as st
+def inject_app_styles() -> None:
+    """Panel labels and home cards; safe to call on every rerun (small CSS block)."""
+    st.markdown(
+        """
+<style>
+.latent-panel-title {
+    font-size: 0.82rem;
+    font-weight: 600;
+    color: #475569;
+    margin: 0 0 0.35rem 0;
+    letter-spacing: 0.02em;
+}
+.latent-panel-title-gap { margin-top: 0.85rem; }
+</style>
+""",
+        unsafe_allow_html=True,
+    )

streamlit_hf/pages/1_Single_Cell_Explorer.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""Interactive UMAP of multimodal latent space (validation folds)."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pandas as pd
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[2]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import formatters
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+st.title("Single-Cell Explorer")
+st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
+bundle = io.load_latent_bundle()
+if bundle is None:
+    st.error("Latent maps are not available in this session. Ask the maintainer to publish results, then reload.")
+    st.stop()
+samples = io.load_samples_df()
+df = io.latent_join_samples(bundle, samples)
+left, right = st.columns([0.36, 0.64], gap="large")
+with left:
+    st.markdown('<p class="latent-panel-title">Colour by</p>', unsafe_allow_html=True)
+    color_opt = st.selectbox(
+        "Hue",
+        [
+            "label",
+            "predicted_class",
+            "correct",
+            "fold",
+            "batch_no",
+            "modality_label",
+            "pct",
+        ],
+        format_func=lambda x: {
+            "label": "CellTag-Multi label",
+            "predicted_class": "Predicted fate",
+            "correct": "Prediction correct",
+            "fold": "CV fold",
+            "batch_no": "Batch",
+            "modality_label": "Available modalities",
+            "pct": "Dominant fate %",
+        }[x],
+        label_visibility="collapsed",
+        help="Which variable sets the colour of each point on the UMAP.",
+    )
+    st.markdown('<p class="latent-panel-title latent-panel-title-gap">Filters</p>', unsafe_allow_html=True)
+    mod_labels = sorted(df["modality_label"].astype(str).unique())
+    mod_pick = st.multiselect(
+        "Available modalities",
+        mod_labels,
+        default=mod_labels,
+        help="Keep cells whose modality combination matches your selection (RNA/ATAC measured where present; flux inferred).",
+    )
+    only_correct = st.selectbox(
+        "Prediction outcome",
+        ["All", "Correct only", "Wrong only"],
+        help="Restrict to cells where the model was correct, incorrect, or show all.",
+    )
+    folds = sorted(df["fold"].unique())
+    fold_pick = st.multiselect(
+        "CV folds",
+        folds,
+        default=folds,
+        help="Validation cross-validation folds to include (each fold’s held-out cells).",
+    )
+    pct_rng = st.slider(
+        "Dominant fate % range",
+        0.0,
+        100.0,
+        (0.0, 100.0),
+        1.0,
+        help="Keep cells whose dominant lineage probability (percent) falls in this range.",
+    )
+plot_df = df[df["fold"].isin(fold_pick) & df["modality_label"].isin(mod_pick)].copy()
+plot_df = plot_df[(plot_df["pct"] >= pct_rng[0]) & (plot_df["pct"] <= pct_rng[1])]
+if only_correct == "Correct only":
+    plot_df = plot_df[plot_df["correct"]]
+elif only_correct == "Wrong only":
+    plot_df = plot_df[~plot_df["correct"]]
+if plot_df.empty:
+    st.warning("No points after filters. Relax the filters and try again.")
+    st.stop()
+with right:
+    fig = plots.latent_scatter(
+        plot_df,
+        color_opt,
+        title="Validation latent space (UMAP)",
+        width=900,
+        height=560,
+        marker_size=5.8,
+        marker_opacity=0.74,
+    )
+    st.plotly_chart(fig, width="stretch", on_select="rerun", key="latent_pick")
+st.subheader("Selected points")
+state = st.session_state.get("latent_pick")
+points = []
+if isinstance(state, dict):
+    sel = state.get("selection") or {}
+    if isinstance(sel, dict):
+        points = sel.get("points") or []
+if points:
+    idxs = [int(p["point_index"]) for p in points if "point_index" in p]
+    idxs = [i for i in idxs if 0 <= i < len(plot_df)]
+    if idxs:
+        sub = plot_df.iloc[idxs]
+        disp = formatters.prepare_latent_display_dataframe(sub)
+        st.dataframe(
+            disp,
+            width="stretch",
+            hide_index=True,
+        )
+    else:
+        st.warning(
+            "A selection was reported but no valid points matched the current filtered view. "
+            "Try selecting again after changing filters, or pick a row via **Inspect by dataset index**."
+        )
+else:
+    st.info(
+        "This table fills in when you **select points on the UMAP**. "
+        "In the chart’s top-right toolbar, choose **Box select** or **Lasso select**, "
+        "then drag over the dots; the page reruns and rows for those cells appear here. "
+        "To inspect one cell without using the lasso, scroll down to **Inspect by dataset index**."
+    )
+st.subheader("Inspect by dataset index")
+pick = st.number_input(
+    "Dataset index",
+    min_value=int(df["dataset_idx"].min()),
+    max_value=int(df["dataset_idx"].max()),
+    value=int(df["dataset_idx"].iloc[0]),
+    help="Index `ind` in your sample table; aligns one validation cell to this row.",
+)
+row = df[df["dataset_idx"] == pick]
+if not row.empty:
+    st.dataframe(
+        formatters.latent_inspector_key_value(row.iloc[0]),
+        width="stretch",
+        hide_index=True,
+    )

streamlit_hf/pages/2_Feature_insights.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""Multimodal feature importance: ranks, attention by prediction, tables."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pandas as pd
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[2]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+st.title("Feature Insights")
+st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
+df = io.load_df_features()
+att = io.load_attention_summary()
+if df is None:
+    st.error(
+        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
+    )
+    st.stop()
+tab1, tab2, tab3, tab4, tab5 = st.tabs(
+    [
+        "Global overview",
+        "Modality spotlight",
+        "Shift vs attention",
+        "Attention vs prediction",
+        "Full table",
+    ]
+)
+# ----- Tab 1 -----
+with tab1:
+    c1, c2 = st.columns(2)
+    with c1:
+        top_n_bars = st.slider(
+            "Top N (shift & attention bars)",
+            10,
+            45,
+            20,
+            key="t1_topn_bars",
+        )
+    with c2:
+        top_n_pie = st.slider(
+            "Pool size (mean-rank pie)",
+            50,
+            250,
+            100,
+            key="t1_topn_pie",
+        )
+    st.plotly_chart(
+        plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
+        width="stretch",
+    )
+    st.caption(
+        "Bars: **global** top features by shift impact and by mean attention (min-max scaled); "
+        "colour = modality. Pie: RNA / ATAC / Flux mix among the lowest mean-rank features in that pool."
+    )
+# ----- Tab 2: RNA / ATAC / Flux columns -----
+with tab2:
+    st.caption(
+        "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
+        "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
+    )
+    top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
+    st.subheader("Joint top markers (by mean rank)")
+    st.caption(
+        "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
+        "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
+        "Hover a bar for the full feature name."
+    )
+    r1a, r1b, r1c = st.columns(3)
+    for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
+        sm = df[df["modality"] == mod]
+        if sm.empty:
+            continue
+        with col:
+            st.plotly_chart(
+                plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
+                width="stretch",
+            )
+    st.subheader("Shift importance")
+    r2a, r2b, r2c = st.columns(3)
+    for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
+        sm = df[df["modality"] == mod]
+        if sm.empty:
+            continue
+        colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
+        sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
+        with col:
+            st.plotly_chart(
+                plots.rank_bar(
+                    sub,
+                    "importance_shift",
+                    "feature",
+                    f"{mod}: shift · top {top_n_rank}",
+                    colc,
+                    xaxis_title="Latent shift importance",
+                ),
+                width="stretch",
+            )
+    st.subheader("Attention importance")
+    r3a, r3b, r3c = st.columns(3)
+    for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
+        sm = df[df["modality"] == mod]
+        if sm.empty:
+            continue
+        colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
+        sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
+        with col:
+            st.plotly_chart(
+                plots.rank_bar(
+                    sub,
+                    "importance_att",
+                    "feature",
+                    f"{mod}: attention · top {top_n_rank}",
+                    colc,
+                    xaxis_title="Attention importance",
+                ),
+                width="stretch",
+            )
+# ----- Tab 3 -----
+with tab3:
+    st.caption(
+        "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
+        "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
+        "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
+    )
+    corr_rows = []
+    for mod in ("RNA", "ATAC", "Flux"):
+        sm = df[df["modality"] == mod]
+        if sm.empty:
+            continue
+        cor = plots.modality_shift_attention_rank_stats(sm)
+        if cor.get("n", 0) >= 3:
+            corr_rows.append(
+                {
+                    "Modality": mod,
+                    "# features": cor["n"],
+                    "Pearson r": f"{cor['pearson_r']:.3f}",
+                    "Pearson p": f"{cor['pearson_p']:.2e}",
+                    "Spearman ρ": f"{cor['spearman_r']:.3f}",
+                    "Spearman p": f"{cor['spearman_p']:.2e}",
+                }
+            )
+    if corr_rows:
+        st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
+    rc1, rc2, rc3 = st.columns(3)
+    for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
+        with col:
+            sub_m = df[df["modality"] == mod]
+            st.plotly_chart(
+                plots.rank_scatter_shift_vs_attention(sub_m, mod),
+                width="stretch",
+            )
+# ----- Tab 4 -----
+with tab4:
+    with st.expander("What is this?", expanded=False):
+        st.markdown(
+            "Bars show **mean attention weights** (from rollout) averaged over validation cells, split by **what the "
+            "model predicted** for each cell: all validation cells together, only cells called **dead-end**, or only "
+            "cells called **reprogramming**. This reflects **model behaviour**, not the true fate label."
+        )
+    cohort_mode = st.selectbox(
+        "Cohort view",
+        [
+            "compare",
+            "all",
+            "dead_end",
+            "reprogramming",
+        ],
+        format_func=lambda x: {
+            "compare": "Compare cohorts (grouped bars)",
+            "all": "All validation samples (mean attention)",
+            "dead_end": "Mean attention when prediction = dead-end",
+            "reprogramming": "Mean attention when prediction = reprogramming",
+        }[x],
+        key="t4_cohort",
+        help=(
+            "Choose which validation cells contribute to the average. **All validation samples** uses every validation "
+            "cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
+            "so you can see which features receive more weight when the model leans each way."
+        ),
+    )
+    top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
+    if not att or "fi_att" not in att:
+        st.warning(
+            "Attention summaries are not available in this session. That view needs a full publish from the maintainer."
+        )
+    else:
+        ac1, ac2, ac3 = st.columns(3)
+        for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
+            with col:
+                st.plotly_chart(
+                    plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
+                    width="stretch",
+                )
+        if "rollout_mean" in att and "slices" in att:
+            st.subheader("Mean rollout weight")
+            if cohort_mode == "compare":
+                roll_cohort = st.selectbox(
+                    "Rollout table: average over",
+                    ["all", "dead_end", "reprogramming"],
+                    format_func=lambda x: {
+                        "all": "All validation samples",
+                        "dead_end": "Cells predicted dead-end",
+                        "reprogramming": "Cells predicted reprogramming",
+                    }[x],
+                    key="t4_roll",
+                    help="Pick which validation subset is used for the mean rollout vector in the tables below.",
+                )
+            else:
+                roll_cohort = cohort_mode
+                st.caption(
+                    "Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
+                )
+            rc1, rc2, rc3 = st.columns(3)
+            for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
+                with col:
+                    rm = att["rollout_mean"]
+                    vec_all = rm.get(roll_cohort)
+                    if vec_all is None:
+                        vec_all = rm["all"]
+                    sl = att["slices"][mod]
+                    vec = vec_all[sl["start"] : sl["stop"]]
+                    names = att["feature_names"][sl["start"] : sl["stop"]]
+                    mini = plots.rollout_top_features_table(names, vec, top_n_att)
+                    st.caption(mod)
+                    st.dataframe(mini, hide_index=True, width="stretch")
+# ----- Tab 5 -----
+with tab5:
+    scope = st.radio(
+        "Table scope",
+        ["All modalities", "Single modality"],
+        horizontal=True,
+        key="t5_scope",
+    )
+    mod_tbl = "all"
+    if scope == "Single modality":
+        mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
+        tbl = df[df["modality"] == mod_tbl].copy()
+    else:
+        tbl = df.copy()
+    show_cols = [
+        c
+        for c in [
+            "mean_rank",
+            "feature",
+            "modality",
+            "rank_shift_in_modal",
+            "rank_att_in_modal",
+            "combined_order_mod",
+            "rank_shift",
+            "rank_att",
+            "importance_shift",
+            "importance_att",
+            "top_10_pct",
+            "group",
+            "log_fc",
+            "pval_adj",
+            "pathway",
+            "module",
+        ]
+        if c in tbl.columns
+    ]
+    st.caption(
+        "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint shift + attention priority). "
+        "Use the dataframe search / sort in the table toolbar to narrow down."
+    )
+    full_view = tbl[show_cols].sort_values("mean_rank")
+    st.dataframe(full_view, width="stretch", hide_index=True)
+    suffix = mod_tbl if scope == "Single modality" else "all"
+    st.download_button(
+        "Download table (CSV)",
+        full_view.to_csv(index=False).encode("utf-8"),
+        file_name=f"fateformer_features_{suffix}.csv",
+        mime="text/csv",
+        key="t5_dl",
+    )

streamlit_hf/pages/3_Flux_analysis.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""Metabolic flux: pathway map, differential views, reaction ranking table, metabolic model metadata."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[2]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+st.title("Flux Analysis")
+st.caption(
+    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
+    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
+)
+df = io.load_df_features()
+if df is None:
+    st.error(
+        "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
+        "fresh results, or ask them to check the deployment."
+    )
+    st.stop()
+flux = df[df["modality"] == "Flux"].copy()
+if flux.empty:
+    st.warning("There are no flux reactions in the current results.")
+    st.stop()
+meta = io.load_metabolic_model_metadata()
+tab_map, tab_bio, tab_rank, tab_meta = st.tabs(
+    [
+        "Pathway map",
+        "Differential & fate",
+        "Reaction ranking",
+        "Metabolic model metadata",
+    ]
+)
+with tab_map:
+    st.caption(
+        "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
+        "same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
+        "Ranked reaction table: **Reaction Ranking**. Curated model edges: **Metabolic model metadata**."
+    )
+    try:
+        c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
+    except TypeError:
+        c1, c2 = st.columns([1.05, 0.95], gap="medium")
+    with c1:
+        n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
+        st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
+    with c2:
+        top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
+        st.plotly_chart(
+            plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
+            width="stretch",
+        )
+        st.plotly_chart(
+            plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
+            width="stretch",
+        )
+with tab_bio:
+    st.caption(
+        "**Volcano:** differential Log₂FC versus significance (−log₁₀ adjusted p); colour shows overall mean rank. "
+        "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
+        "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
+        "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
+    )
+    b1, b2 = st.columns(2)
+    with b1:
+        st.plotly_chart(plots.flux_volcano(flux), width="stretch")
+    with b2:
+        st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")
+with tab_rank:
+    st.caption("Filter by reaction name or pathway, then inspect or download the ranked flux table.")
+    q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
+    pw_f = st.multiselect(
+        "Pathway",
+        sorted(flux["pathway"].dropna().unique().astype(str)),
+        default=[],
+        key="flux_pw_f",
+    )
+    show = flux
+    if q.strip():
+        show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
+    if pw_f:
+        show = show[show["pathway"].astype(str).isin(pw_f)]
+    cols = [
+        c
+        for c in [
+            "mean_rank",
+            "feature",
+            "rank_shift_in_modal",
+            "rank_att_in_modal",
+            "combined_order_mod",
+            "rank_shift",
+            "rank_att",
+            "importance_shift",
+            "importance_att",
+            "top_10_pct",
+            "mean_de",
+            "mean_re",
+            "group",
+            "log_fc",
+            "pval_adj",
+            "pathway",
+            "module",
+        ]
+        if c in show.columns
+    ]
+    st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
+    st.download_button(
+        "Download Flux table (CSV)",
+        show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
+        file_name="fateformer_flux_filtered.csv",
+        mime="text/csv",
+        key="flux_dl",
+    )
+with tab_meta:
+    st.caption(
+        "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match."
+    )
+    if meta is None or meta.empty:
+        st.warning("Metabolic model metadata is not available in this build.")
+    else:
+        sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
+        graph_labels = ["All modules"]
+        for sid in sm_ids:
+            cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
+            graph_labels.append(f"{sid}: {cls}")
+        tix = st.selectbox(
+            "Model scope",
+            range(len(graph_labels)),
+            format_func=lambda i: graph_labels[i],
+            key="flux_model_scope",
+            help="Show every step in the model, or restrict to one functional module.",
+        )
+        supermodule_id = None if tix == 0 else sm_ids[tix - 1]
+        tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
+        st.dataframe(tbl, width="stretch", hide_index=True)
+        st.download_button(
+            "Download metabolic model metadata (CSV)",
+            tbl.to_csv(index=False).encode("utf-8"),
+            file_name="fateformer_metabolic_model_edges.csv",
+            mime="text/csv",
+            key="flux_model_dl",
+        )

streamlit_hf/pages/4_Gene_expression_analysis.py ADDED Viewed

	@@ -0,0 +1,168 @@

+"""Gene expression and TF motif activity: pathway enrichment, chromVAR-style motifs, and tables."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pandas as pd
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[2]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import pathways as pathway_data
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+st.title("Gene Expression & TF Activity")
+df = io.load_df_features()
+if df is None:
+    st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
+    st.stop()
+rna = df[df["modality"] == "RNA"].copy()
+atac = df[df["modality"] == "ATAC"].copy()
+if rna.empty and atac.empty:
+    st.warning("No RNA gene or ATAC motif features are available in the current results.")
+    st.stop()
+st.caption(
+    "Pathway enrichment (Reactome / KEGG) and a pathway–gene map; chromVAR-style motif deviations and activity by "
+    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
+)
+TABLE_COLS = [
+    "mean_rank",
+    "feature",
+    "rank_shift_in_modal",
+    "rank_att_in_modal",
+    "combined_order_mod",
+    "rank_shift",
+    "rank_att",
+    "importance_shift",
+    "importance_att",
+    "top_10_pct",
+    "mean_de",
+    "mean_re",
+    "group",
+    "log_fc",
+    "pval_adj",
+    "mean_diff",
+    "pval_adj_log",
+]
+def _table_cols(show: pd.DataFrame) -> list[str]:
+    return [c for c in TABLE_COLS if c in show.columns]
+tab_path, tab_motif, tab_gene_tbl, tab_motif_tbl = st.tabs(
+    ["Gene Pathway Enrichment", "Motif Activity", "Gene Table", "Motif Table"]
+)
+with tab_path:
+    st.caption(
+        "Over-representation of Reactome and KEGG pathways (Benjamini–Hochberg *q* < 0.05). "
+        "The lower panel maps leading genes to pathways; empty grid positions are left clear."
+    )
+    raw = pathway_data.load_de_re_tsv()
+    if raw is None:
+        st.info("Pathway enrichment views are not available in this deployment.")
+    else:
+        de_all, re_all = raw
+        mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
+        bubble_h = max(
+            plots.pathway_bubble_suggested_height(len(mde)),
+            plots.pathway_bubble_suggested_height(len(mre)),
+        )
+        c1, c2 = st.columns(2, gap="medium")
+        with c1:
+            st.plotly_chart(
+                plots.pathway_enrichment_bubble_panel(
+                    mde,
+                    "Pathway enrichment — dead-end",
+                    show_colorbar=True,
+                    layout_height=bubble_h,
+                ),
+                width="stretch",
+            )
+        with c2:
+            st.plotly_chart(
+                plots.pathway_enrichment_bubble_panel(
+                    mre,
+                    "Pathway enrichment — reprogramming",
+                    show_colorbar=True,
+                    layout_height=bubble_h,
+                ),
+                width="stretch",
+            )
+        hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
+        if hm is None:
+            st.info("No pathway–gene matrix could be built from the current enrichment results.")
+        else:
+            z, ylabs, xlabs = hm
+            st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
+with tab_motif:
+    if atac.empty:
+        st.warning("No motif-level ATAC features are available in the current results.")
+    else:
+        st.caption(
+            "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
+            "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
+        )
+        a1, a2 = st.columns(2, gap="medium")
+        with a1:
+            st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
+        with a2:
+            st.plotly_chart(
+                plots.notebook_style_activity_scatter(
+                    atac,
+                    title="TF activity (z-score) by fate",
+                    x_title="Dead-end (TF activity)",
+                    y_title="Reprogramming (TF activity)",
+                ),
+                width="stretch",
+            )
+with tab_gene_tbl:
+    if rna.empty:
+        st.warning("No RNA gene features are available in the current results.")
+    else:
+        q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
+        show = rna
+        if q.strip():
+            show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
+        cols = _table_cols(show)
+        st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
+        st.download_button(
+            "Download table (CSV)",
+            show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
+            file_name="gene_expression_table.csv",
+            mime="text/csv",
+            key="ge_tbl_dl",
+        )
+with tab_motif_tbl:
+    if atac.empty:
+        st.warning("No motif-level ATAC features are available in the current results.")
+    else:
+        q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
+        show = atac
+        if q.strip():
+            show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
+        cols = _table_cols(show)
+        st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
+        st.download_button(
+            "Download table (CSV)",
+            show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
+            file_name="tf_motif_table.csv",
+            mime="text/csv",
+            key="tf_tbl_dl",
+        )

streamlit_hf/requirements-docker.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# Hugging Face / production image: precomputed cache only (no torch)
+streamlit>=1.40.0
+plotly>=5.22.0
+pandas>=2.0.0
+numpy>=1.24.0
+pyarrow>=14.0.0

streamlit_hf/static/app_icon.svg ADDED Viewed