Skip to content

SNF

Similarity Network Fusion and spectral embedding.

Functions

motco.stats.snf.get_affinity_matrix(dats, K=20, eps=0.5)

Estimate the affinity matrix for all datasets in dats from the squared Euclidean distance.

Parameters:

Name Type Description Default
dats list[ndarray]

list of data sets to estimate the affinity matrix.

required
K int

Number of K nearest neighbors to use. Default 20.

20
eps float

Normalization factor. Recommended between 0.3 and 0.8. Default 0.5.

0.5

Returns:

Name Type Description
Ws list[ndarray]

list of affinity matrices

Source code in src/motco/stats/snf.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def get_affinity_matrix(
    dats: list[np.ndarray], K: int = 20, eps: float = 0.5
) -> list[np.ndarray]:
    """
    Estimate the affinity matrix for all datasets in dats from the squared Euclidean
    distance.

    Parameters
    ----------
    dats: list[np.ndarray]
        list of data sets to estimate the affinity matrix.
    K: int
        Number of K nearest neighbors to use. Default 20.
    eps: float
        Normalization factor. Recommended between 0.3 and 0.8. Default 0.5.

    Returns
    -------
    Ws: list[np.ndarray]
        list of affinity matrices
    """
    if not dats:
        raise ValueError("dats must contain at least one dataset.")
    _n = np.asarray(dats[0]).shape[0]
    for _i, _dat in enumerate(dats):
        _arr = np.asarray(_dat)
        if _arr.shape[0] != _n:
            raise ValueError(
                f"dats[{_i}] has {_arr.shape[0]} rows but dats[0] has {_n} rows — "
                "all datasets must have the same number of rows."
            )
        if not np.all(np.isfinite(_arr)):
            raise ValueError(f"dats[{_i}] contains NaN or Inf values.")
    if K >= _n:
        raise ValueError(
            f"K={K} must be less than the number of samples ({_n})."
        )
    if eps <= 0:
        raise ValueError(f"eps={eps} must be positive.")
    Ws: list[np.ndarray] = []
    for dat in dats:
        arr = np.asarray(dat)
        euc_dist = cdist(arr, arr, metric="euclidean") ** 2
        Ws.append(_affinity_matrix(euc_dist, K, eps))

    return Ws

motco.stats.snf.SNF(Ws, k=20, t=20)

Similarity Network Fusion (SNF) across multiple affinity matrices.

The algorithm iteratively performs cross-diffusion using k-nearest neighbor sparse kernels and averages information from the other networks, producing a fused similarity matrix.

Parameters:

Name Type Description Default
Ws list[ndarray]

List of affinity matrices to fuse. All matrices must have the same shape (n_samples x n_samples) and be symmetric.

required
k int

Number of nearest neighbors for the sparse kernels. Default 20.

20
t int

Number of cross-diffusion iterations. Default 20.

20

Returns:

Name Type Description
Pc ndarray

Fused similarity matrix (n_samples x n_samples).

Source code in src/motco/stats/snf.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def SNF(Ws: list[np.ndarray], k: int = 20, t: int = 20) -> np.ndarray:
    """
    Similarity Network Fusion (SNF) across multiple affinity matrices.

    The algorithm iteratively performs cross-diffusion using k-nearest
    neighbor sparse kernels and averages information from the other
    networks, producing a fused similarity matrix.

    Parameters
    ----------
    Ws: list[np.ndarray]
        List of affinity matrices to fuse. All matrices must have the same
        shape (n_samples x n_samples) and be symmetric.
    k: int
        Number of nearest neighbors for the sparse kernels. Default 20.
    t: int
        Number of cross-diffusion iterations. Default 20.

    Returns
    -------
    Pc: np.ndarray
        Fused similarity matrix (n_samples x n_samples).
    """
    nw = len(Ws)
    if nw < 2:
        raise ValueError("SNF requires at least two affinity matrices.")
    _n = Ws[0].shape[0]
    for _i, _W in enumerate(Ws):
        if _W.shape[0] != _W.shape[1]:
            raise ValueError(
                f"Ws[{_i}] is not square: shape {_W.shape}."
            )
        if _W.shape[0] != _n:
            raise ValueError(
                f"Ws[{_i}] has shape {_W.shape} but Ws[0] has shape {Ws[0].shape} — "
                "all matrices must have the same shape."
            )
    if k >= _n:
        raise ValueError(
            f"k={k} must be less than the number of samples ({_n})."
        )

    Ps: list[np.ndarray] = []
    Ss: list[np.ndarray] = []

    for i in range(nw):
        Pi = _full_kernel(Ws[i])
        Pi = (Pi + Pi.T) / 2
        Ps.append(Pi)
        Ss.append(_sparse_kernel(Pi, k))

    # Initialize states
    Pst0 = [p.copy() for p in Ps]
    Pst1 = [p.copy() for p in Ps]

    # Iterations
    for _ in range(t):
        for j in range(nw):
            # Average of all other networks
            others = [Pst0[m] for m in range(nw) if m != j]
            M: np.ndarray = np.stack(others).mean(axis=0)
            Pst1[j] = Ss[j] @ M @ Ss[j].T
            Pst1[j] = _full_kernel(Pst1[j])
        Pst0 = [p.copy() for p in Pst1]

    Pc: np.ndarray = np.stack(Pst1).mean(axis=0)
    return Pc

motco.stats.snf.get_spectral(aff, n_components=10)

Calculate spectral embedding from an affinity/similarity matrix.

Parameters:

Name Type Description Default
aff ndarray

Affinity matrix to calculate the spectral embedding.

required
n_components int

Number of spectral embedding components. Default 10.

10

Returns:

Name Type Description
embedding ndarray

Spectral embedding.

Source code in src/motco/stats/snf.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def get_spectral(aff: np.ndarray, n_components: int = 10) -> np.ndarray:
    """
    Calculate spectral embedding from an affinity/similarity matrix.

    Parameters
    ----------
    aff: np.ndarray
        Affinity matrix to calculate the spectral embedding.
    n_components: int
        Number of spectral embedding components. Default 10.

    Returns
    -------
    embedding: np.ndarray
        Spectral embedding.
    """
    embedding = spectral_embedding(aff, n_components=n_components, random_state=1548)
    return embedding