KrishnaswamyLab · MattScicluna · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
@@ -1201,16 +1201,45 @@ def build_landmark_op(self):
                 n_samples = self.data.shape[0]
                 rng = np.random.default_rng(self.random_state)
                 landmark_indices = rng.choice(n_samples, self.n_landmark, replace=False)
-                data = (
-                    self.data if not hasattr(self, "data_nu") else self.data_nu
-                )  # because of the scaling to review
-                if (
-                    n_samples > 5000 and self.distance == "euclidean"
-                ):  # sklearn.euclidean_distances is faster than cdist for big dataset
-                    distances = euclidean_distances(data, data[landmark_indices])
+                precomputed = getattr(self, "precomputed", None)
+
+                if precomputed is not None:
+                    # Use affinities from the kernel computed from the precomputed matrix to avoid Euclidean fallback
+                    landmark_affinities = self.kernel[:, landmark_indices]
+
+                    if sparse.issparse(landmark_affinities):
+                        landmark_affinities = landmark_affinities.tocsr()
+                        cluster_assignments = np.asarray(
+                            landmark_affinities.argmax(axis=1)
+                        ).reshape(-1)
+                        row_max = matrix.to_array(
+                            landmark_affinities.max(axis=1)
+                        ).reshape(-1)
+                    else:
+                        landmark_affinities = np.asarray(landmark_affinities)
+                        cluster_assignments = np.argmax(landmark_affinities, axis=1)
+                        row_max = np.max(landmark_affinities, axis=1)
+
+                    if np.any(row_max == 0):
+                        warnings.warn(
+                            "Some samples have zero affinity to all randomly selected landmarks; "
+                            "increase n_landmark or ensure the affinity matrix connects all points.",
+                            RuntimeWarning,
+                        )
+                    self._clusters = cluster_assignments
                 else:
-                    distances = cdist(data, data[landmark_indices], metric=self.distance)
-                self._clusters = np.argmin(distances, axis=1)
+                    data = (
+                        self.data if not hasattr(self, "data_nu") else self.data_nu
+                    )  # because of the scaling to review
+                    if (
+                        n_samples > 5000 and self.distance == "euclidean"
+                    ):  # sklearn.euclidean_distances is faster than cdist for big dataset
+                        distances = euclidean_distances(data, data[landmark_indices])
+                    else:
+                        distances = cdist(
+                            data, data[landmark_indices], metric=self.distance
+                        )
+                    self._clusters = np.argmin(distances, axis=1)
 
             else:
                 with _logger.log_task("SVD"):

diff --git a/test/test_random_landmarking.py b/test/test_random_landmarking.py
@@ -8,6 +8,7 @@
 from load_tests import generate_swiss_roll
 from load_tests import graphtools
 from load_tests import np
+from load_tests import sp
 
 import pygsp
 import warnings
@@ -405,6 +406,208 @@ def test_random_landmarking_distance_parameter_consistency():
         assert len(G.clusters) == small_data.shape[0]
 
 
+def test_random_landmarking_with_precomputed_affinity():
+    """Random landmarking should work with precomputed affinity matrices"""
+    affinity = np.array(
+        [
+            [1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
+            [0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
+            [0.0, 0.0, 0.9, 1.0, 0.5, 0.2],
+            [0.0, 0.0, 0.4, 0.5, 1.0, 0.9],
+            [0.0, 0.0, 0.0, 0.2, 0.9, 1.0],
+        ]
+    )
+    affinity = (affinity + affinity.T) / 2  # ensure symmetry
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        affinity,
+        precomputed="affinity",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        knn=3,
+        thresh=0,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(affinity.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (affinity.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
+def test_random_landmarking_with_precomputed_distance():
+    """Random landmarking should work with precomputed distance matrices"""
+    dist = np.array(
+        [
+            [0, 1, 4, 4, 4, 4],
+            [1, 0, 4, 4, 4, 4],
+            [4, 4, 0, 1, 4, 4],
+            [4, 4, 1, 0, 4, 4],
+            [4, 4, 4, 4, 0, 1],
+            [4, 4, 4, 4, 1, 0],
+        ]
+    )
+
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        dist,
+        precomputed="distance",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        bandwidth=1,  # deterministic affinity: exp(-dist)
+        decay=1,
+        thresh=0,
+        knn=3,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(dist.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (dist.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
+def test_random_landmarking_with_sparse_precomputed_affinity():
+    """Random landmarking should work with sparse precomputed affinity matrices"""
+    affinity = np.array(
+        [
+            [1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
+            [0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
+            [0.0, 0.0, 0.9, 1.0, 0.5, 0.2],
+            [0.0, 0.0, 0.4, 0.5, 1.0, 0.9],
+            [0.0, 0.0, 0.0, 0.2, 0.9, 1.0],
+        ]
+    )
+    affinity = (affinity + affinity.T) / 2  # ensure symmetry
+    affinity_sparse = sp.csr_matrix(affinity)
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        affinity_sparse,
+        precomputed="affinity",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        knn=3,
+        thresh=0,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(affinity.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (affinity.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
+def test_random_landmarking_with_sparse_precomputed_distance():
+    """Random landmarking should work with sparse precomputed distance matrices"""
+    dist = np.array(
+        [
+            [0, 1, 4, 4, 4, 4],
+            [1, 0, 4, 4, 4, 4],
+            [4, 4, 0, 1, 4, 4],
+            [4, 4, 1, 0, 4, 4],
+            [4, 4, 4, 4, 0, 1],
+            [4, 4, 4, 4, 1, 0],
+        ]
+    )
+    dist_sparse = sp.csr_matrix(dist)
+
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        dist_sparse,
+        precomputed="distance",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        bandwidth=1,  # deterministic affinity: exp(-dist)
+        decay=1,
+        thresh=0,
+        knn=3,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(dist.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (dist.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
+def test_random_landmarking_zero_affinity_warning():
+    """Test warning when samples have zero affinity to all landmarks"""
+    # Create an affinity matrix where point 5 has no connection to other points
+    affinity = np.array(
+        [
+            [1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
+            [0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
+            [0.0, 0.0, 0.9, 1.0, 0.5, 0.0],
+            [0.0, 0.0, 0.4, 0.5, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],  # isolated point
+        ]
+    )
+    affinity = (affinity + affinity.T) / 2  # ensure symmetry
+    n_landmark = 2
+    random_state = 42  # This seed selects landmarks that don't include point 5
+
+    # Should warn about zero affinity
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        G = graphtools.Graph(
+            affinity,
+            precomputed="affinity",
+            n_landmark=n_landmark,
+            random_landmarking=True,
+            random_state=random_state,
+            knn=3,
+            thresh=0,
+        )
+        _ = G.landmark_op
+
+        assert len(w) == 1
+        assert issubclass(w[0].category, RuntimeWarning)
+        assert "zero affinity to all randomly selected landmarks" in str(w[0].message)
+
+
 #############
 # Test API
 #############