Source code for cellrefiner.tools._contact_communication

from typing import Optional
import numpy as np
import pandas as pd
from anndata import AnnData
from ._cell_shape_modeling import SEM
from scipy.sparse import csr_matrix



[docs]
def contact_communication(
        df_ligrec: pd.DataFrame,
        adata: AnnData,
        sem: Optional[SEM] = None,
        contact_key: Optional[str] = 'contacts',
        lr_delimiter: str = '-',
        heteromeric_delimiter: str = '_'
    ) -> None:
    '''
    Contact-base communication inference

    Parameters
    ----------
    df_ligrec : DataFrame
        Dataframe where each row corresponds to a ligand-receptor pair with ligands, receptors, and the associated signaling pathways in the three columns, respectively.
    adata : Anndata
        Anndata object, must contain cell-cell contact information in `.obsp[contact_key]` if `sem` is None.
    sem : SEM, optional
        Cell shape model object that contains cell-cell contact matrix and associated AnnData.

        If provided, contact matrix will be obtained from `sem.contact_matrix`.

        If both `sem` and `adata` are provided, `adata` parameter takes precedence.
    contact_key : str, default 'contacts'
        Key in `adata.obsp` containing the cell-cell contact matrix (csr_matrix).
    lr_delimiter : str, default '-'
        Delimiter used to construct ligand-receptor pair names in output.
    heteromeric_delimiter : str, default '_'
        Delimiter used to separate subunits in heteromeric complexes within `df_ligrec`.

        For example, if a receptor complex is 'TGFBR1_TGFBR2', this parameter should be '_'.
    Returns
    -------
    Sets the following fields in adata

        add `.obsp['{ligand}{lr_delimiter}{receptor}']`, contact-base communication matrix via ligand-receptor pairs 
        (rows are sender cells, columns are receiver cells)

        add `.obsp['{pathway}']`, pathway-level contact-base communication matrix

        add `.obsp['total']`, sum of all pathway communication matrix

        add `.obsm['sender_signal']`, dataFrame with sender communication strengths per cell

        add `.obsm['receiver_signal']`, dataFrame with receiver communication strengths per cell

        add `.uns['contact_signal_info']`, metadata of the analysis
            - 'lr_pair': List of L-R pair names
            - 'pathway': List of pathway names  
            - 'total': ['total']
            - 'db': Filtered ligand-receptor database
    
    Examples
    --------
    >>> db_lr = cr.pp.ligand_receptor_database()
    >>> db_lr = cr.pp.filter_lr_database(db_lr,adata_cr, min_cell_pct=0.01)
    >>> cr.tl.contact_communication(db_lr, adata)
    '''

    if df_ligrec.shape[0] == 0:
        raise ValueError("empty ligand-receptor DB")
    if sem is None:  # sem is not provided, using adata contact matrix
        contact_matrix = adata.obsp[contact_key]
    else:  # sem is provided
        if adata is None:  # adata is not provided, use sem.adata
            adata = sem.adata
        else:  # adata is provided, use input adata
            if adata is not sem.adata:  # check if same adata
                Warning(
                    'Provide adata is not an attribute of sem, sem.adata will be unchanged')
        if sem.contact_matrix is None:
            print('compute cell-cell contact')
            sem.compute_contact()
        else:
            contact_matrix = sem.contact_matrix
    df_ligrec = df_ligrec.copy()
    # get cell pair index
    nc = adata.shape[0]
    indices = contact_matrix.indices
    indptr = contact_matrix.indptr
    ci = []
    cj = []
    for i in range(nc):
        j = indices[indptr[i]:indptr[i+1]]
        ci.append(np.tile(i, len(j)))
        cj.append(j)
    ci = np.concatenate(ci)
    cj = np.concatenate(cj)

    # contact signal
    lr_keys = []
    I = np.ones(df_ligrec.shape[0], dtype=bool)
    # ligand-receptors pairs
    for i in range(df_ligrec.shape[0]):
        l = df_ligrec.iloc[i, 0]
        r = df_ligrec.iloc[i, 1]
        l_data = np.prod(
            adata[ci, l.split(heteromeric_delimiter)].X.toarray(), axis=1)
        r_data = np.prod(
            adata[cj, r.split(heteromeric_delimiter)].X.toarray(), axis=1)
        key = f'{l}{lr_delimiter}{r}'
        # .copy() is necessary. eliminate_zeros() removes indices and indptr inplace
        sig_mat = csr_matrix(
            (l_data*r_data, indices.copy(), indptr.copy()), shape=(nc, nc))
        sig_mat.eliminate_zeros()
        I[i] = sig_mat.nnz > 0
        if I[i]:
            adata.obsp[key] = sig_mat
            lr_keys.append(key)
    df_ligrec = df_ligrec[I]

    # pathway and total
    pth_keys = df_ligrec.iloc[:, 2].unique().tolist()
    for n, pth in enumerate(pth_keys):
        lr_idx = np.where(df_ligrec.iloc[:, 2] == pth)[0]
        data = csr_matrix((nc, nc))
        for i in lr_idx:
            l = df_ligrec.iloc[i, 0]
            r = df_ligrec.iloc[i, 1]
            data += adata.obsp[f'{l}{lr_delimiter}{r}'].copy()
        adata.obsp[pth] = data.copy()
        if n == 0:
            total = data.copy()
        else:
            total += data.copy()
    adata.obsp['total'] = total

    # contact signal information
    adata.uns['contact_signal_info'] = {
        'lr_pair': lr_keys, 'pathway': pth_keys, 'total': ['total'], 'db': df_ligrec}
    print("add .uns['contact_signal_info']")

    # receiver/sender signal
    signal_list = lr_keys + pth_keys + ['total']
    sdim = len(signal_list)
    signal_vec_s = np.zeros((adata.shape[0], sdim))
    signal_vec_r = np.zeros((adata.shape[0], sdim))
    for si, signal in enumerate(signal_list):
        signal_vec_s[:, si] = np.sum(
            adata.obsp[signal].toarray(), axis=1)  # sender signal
        signal_vec_r[:, si] = np.sum(
            adata.obsp[signal].toarray(), axis=0)  # receiver signal
    df_s = pd.DataFrame(index=adata.obs.index,
                        columns=signal_list, data=signal_vec_s)
    df_r = pd.DataFrame(index=adata.obs.index,
                        columns=signal_list, data=signal_vec_r)
    adata.obsm['sender_signal'] = df_s
    adata.obsm['receiver_signal'] = df_r
    print("add .obsm['sender_signal'], .obsm['receiver_signal']")




[docs]
def cluster_communication(adata: AnnData,
                          cluster_key: str,
                          signal: str,
                          n_permutations: int = 100,
                          seed: int = 0):
    """
    Summarize cell-cell communication to cluster-cluster communication and compute p-values 
    by permutating cell/spot labels.

    Parameters
    ----------
    adata : Anndata
        Anndata object, must contain cell-cell contact information in `.obsp[contact_key]` if `sem` is None.
    cluster_key : str
        Key in `.obs` that contains cell type annotations
    signal : str
        Key in `.obsp` that contains communication matrix
    n_permutations : int, default 100
        Number of label permutations for computing the p-value.
    seed : int, default 0
        random seed

    Returns
    -------
    Sets the following fields in adata

        add `.uns['{cluster_key}-{signal}']`, cluster-level communication via {signal}
            - .uns['{cluster_key}-{signal}']['communication_matrix'], cluster-level communication matrix
            - .uns['{cluster_key}-{signal}']['communication_pvalue'], p-values
    
    Examples
    --------
    >>> cr.tl.cluster_communication(adata,cluster_key = 'cell_type',signal = 'NOTCH')
    """

    cluster_list = list(adata.obs[cluster_key].cat.categories)
    cluster_cell = adata.obs[cluster_key].to_numpy()
    sig_mat = adata.obsp[signal]
    rng = np.random.default_rng(seed)
    tmp_df, tmp_p_value = summarize_cluster(
        sig_mat, cluster_cell, cluster_list, rng, n_permutations=n_permutations)
    adata.uns[cluster_key+'-' + signal] = {'communication_matrix': tmp_df, 'communication_pvalue': tmp_p_value}



def summarize_cluster(X, clusterid, clusternames, rng, n_permutations):
    # Input a sparse matrix of cell signaling and output a pandas dataframe
    # for cluster-cluster signaling
    n = len(clusternames)
    X_cluster = np.empty([n, n], float)
    p_cluster = np.zeros([n, n], float)
    for i in range(n):
        tmp_idx_i = np.where(clusterid == clusternames[i])[0]
        for j in range(n):
            tmp_idx_j = np.where(clusterid == clusternames[j])[0]
            X_cluster[i, j] = X[tmp_idx_i, :][:, tmp_idx_j].mean()

    for i in range(n_permutations):
        clusterid_perm = rng.permutation(clusterid)
        X_cluster_perm = np.empty([n, n], float)
        for j in range(n):
            tmp_idx_j = np.where(clusterid_perm == clusternames[j])[0]
            for k in range(n):
                tmp_idx_k = np.where(clusterid_perm == clusternames[k])[0]
                X_cluster_perm[j, k] = X[tmp_idx_j, :][:, tmp_idx_k].mean()
        p_cluster[X_cluster_perm >= X_cluster] += 1.0
    p_cluster = p_cluster / n_permutations
    df_cluster = pd.DataFrame(
        data=X_cluster, index=clusternames, columns=clusternames)
    df_p_value = pd.DataFrame(
        data=p_cluster, index=clusternames, columns=clusternames)
    return df_cluster, df_p_value