| import pandas as pd |
| import tangram as tg |
| import numpy as np |
| import torch |
| import anndata |
| from sklearn.decomposition import PCA |
| from sklearn.neighbors import NearestNeighbors |
|
|
|
|
|
|
| def generate_feature_ad(ad_expr, feature_path, sc=False): |
| """ |
| Generates an AnnData object with OmiCLIP text or image embeddings. |
| |
| :param ad_expr: AnnData object containing metadata for the dataset. |
| :param feature_path: Path to the CSV file containing the features to be loaded. |
| :param sc: Boolean flag indicating whether to copy single-cell metadata or ST metadata. Default is False (ST). |
| :return: A new AnnData object with the loaded features and relevant metadata from ad_expr. |
| """ |
| |
| |
| features = pd.read_csv(feature_path, index_col=0)[ad_expr.obs.index] |
| |
| |
| feature_ad = anndata.AnnData(features[ad_expr.obs.index].T) |
| |
| |
| if sc: |
| |
| feature_ad.obs = ad_expr.obs.copy() |
| else: |
| |
| feature_ad.obs['cell_num'] = ad_expr.obs['cell_num'].copy() |
| feature_ad.uns['spatial'] = ad_expr.uns['spatial'].copy() |
| feature_ad.obsm['spatial'] = ad_expr.obsm['spatial'].copy() |
|
|
| return feature_ad |
|
|
|
|
|
|
| def normalize_percentile(df, cols, min_percentile=5, max_percentile=95): |
| """ |
| Clips and normalizes the specified columns of a DataFrame based on percentile thresholds, |
| transforming their values to the [0, 1] range. |
| |
| :param df: A pandas DataFrame containing the columns to normalize. |
| :type df: pandas.DataFrame |
| :param cols: A list of column names in `df` that should be normalized. |
| :type cols: list[str] |
| :param min_percentile: The lower percentile used for clipping (defaults to 5). |
| :type min_percentile: float |
| :param max_percentile: The upper percentile used for clipping (defaults to 95). |
| :type max_percentile: float |
| :return: The same DataFrame with specified columns clipped and normalized. |
| :rtype: pandas.DataFrame |
| """ |
|
|
| |
| for col in cols: |
| |
| min_val = np.percentile(df[col], min_percentile) |
| max_val = np.percentile(df[col], max_percentile) |
|
|
| |
| df[col] = np.clip(df[col], min_val, max_val) |
|
|
| |
| df[col] = (df[col] - min_val) / (max_val - min_val) |
|
|
| return df |
|
|
|
|
|
|
| def cell_type_decompose(sc_ad, st_ad, cell_type_col='cell_type', NMS_mode=False, major_types=None, min_percentile=5, max_percentile=95): |
| """ |
| Performs cell type decomposition on spatial data (ST or image) with single-cell data . |
| |
| :param sc_ad: AnnData object containing single-cell meta data. |
| :param st_ad: AnnData object containing spatial data (ST or image) meta data. |
| :param cell_type_col: The column name in `sc_ad.obs` that contains cell type annotations. Default is 'cell_type'. |
| :param NMS_mode: Boolean flag to apply Non-Maximum Suppression (NMS) mode. Default is False. |
| :param major_types: Major cell types used for NMS mode. Default is None. |
| :param min_percentile: The lower percentile used for clipping (defaults to 5). |
| :param max_percentile: The upper percentile used for clipping (defaults to 95). |
| :return: The spatial AnnData object with projected cell type annotations. |
| """ |
| |
| |
| tg.pp_adatas(sc_ad, st_ad, genes=None) |
| |
|
|
| |
| ad_map = tg.map_cells_to_space( |
| sc_ad, st_ad, |
| mode="clusters", |
| cluster_label=cell_type_col, |
| device='cpu', |
| scale=False, |
| density_prior='uniform', |
| random_state=10, |
| verbose=False, |
| ) |
| |
| |
| tg.project_cell_annotations(ad_map, st_ad, annotation=cell_type_col) |
|
|
|
|
| if NMS_mode: |
| major_types = major_types |
| st_ad.obs = normalize_percentile(st_ad.obsm['tangram_ct_pred'], major_types, min_percentile, max_percentile) |
|
|
| st_ad_binary = st_ad.obsm['tangram_ct_pred'][major_types].copy() |
| |
| st_ad.obs[major_types] = st_ad_binary.where(st_ad_binary.eq(st_ad_binary.max(axis=1), axis=0), other=0) |
|
|
| return st_ad |
|
|
|
|
|
|
| def assign_cells_to_spots(cell_locs, spot_locs, patch_size=16): |
| """ |
| Assigns cells to spots based on their spatial coordinates. Each cell within the specified patch size (radius) |
| of a spot will be assigned to that spot. |
| |
| :param cell_locs: Numpy array of shape (n_cells, 2) with the x, y coordinates of the cells. |
| :param spot_locs: Numpy array of shape (n_spots, 2) with the x, y coordinates of the spots. |
| :param patch_size: The diameter of the spot patch. The radius used for assignment will be half of this value. |
| :return: A sparse matrix where each row corresponds to a cell and each column corresponds to a spot. |
| The value is 1 if the cell is assigned to that spot, 0 otherwise. |
| """ |
| |
| neigh = NearestNeighbors(radius=patch_size * 0.5) |
| |
| |
| neigh.fit(spot_locs) |
| |
| |
| |
| A = neigh.radius_neighbors_graph(cell_locs, mode='connectivity') |
| |
| return A |
|
|
|
|
|
|