From 2d65034a23b5fa1ca0df1625f93ff755f06c5629 Mon Sep 17 00:00:00 2001
From: MattScicluna <mattcscicluna@gmail.com>
Date: Thu, 18 Dec 2025 13:48:10 -0500
Subject: [PATCH 1/4] added random_landmarking support for precomputed
 distance/affinity

---
 graphtools/graphs.py            | 47 ++++++++++++++++++++++++++-------
 test/test_random_landmarking.py | 40 ++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index c53a1dc..403a540 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -1201,16 +1201,45 @@ def build_landmark_op(self):
                 n_samples = self.data.shape[0]
                 rng = np.random.default_rng(self.random_state)
                 landmark_indices = rng.choice(n_samples, self.n_landmark, replace=False)
-                data = (
-                    self.data if not hasattr(self, "data_nu") else self.data_nu
-                )  # because of the scaling to review
-                if (
-                    n_samples > 5000 and self.distance == "euclidean"
-                ):  # sklearn.euclidean_distances is faster than cdist for big dataset
-                    distances = euclidean_distances(data, data[landmark_indices])
+                precomputed = getattr(self, "precomputed", None)
+
+                if precomputed is not None:
+                    # Use the precomputed affinities/distances directly to avoid Euclidean fallback
+                    landmark_affinities = self.kernel[:, landmark_indices]
+
+                    if sparse.issparse(landmark_affinities):
+                        landmark_affinities = landmark_affinities.tocsr()
+                        cluster_assignments = np.asarray(
+                            landmark_affinities.argmax(axis=1)
+                        ).reshape(-1)
+                        row_max = matrix.to_array(
+                            landmark_affinities.max(axis=1)
+                        ).reshape(-1)
+                    else:
+                        landmark_affinities = np.asarray(landmark_affinities)
+                        cluster_assignments = np.argmax(landmark_affinities, axis=1)
+                        row_max = np.max(landmark_affinities, axis=1)
+
+                    if np.any(row_max == 0):
+                        warnings.warn(
+                            "Some samples have zero affinity to all randomly selected landmarks; "
+                            "increase n_landmark or ensure the affinity matrix connects all points.",
+                            RuntimeWarning,
+                        )
+                    self._clusters = cluster_assignments
                 else:
-                    distances = cdist(data, data[landmark_indices], metric=self.distance)
-                self._clusters = np.argmin(distances, axis=1)
+                    data = (
+                        self.data if not hasattr(self, "data_nu") else self.data_nu
+                    )  # because of the scaling to review
+                    if (
+                        n_samples > 5000 and self.distance == "euclidean"
+                    ):  # sklearn.euclidean_distances is faster than cdist for big dataset
+                        distances = euclidean_distances(data, data[landmark_indices])
+                    else:
+                        distances = cdist(
+                            data, data[landmark_indices], metric=self.distance
+                        )
+                    self._clusters = np.argmin(distances, axis=1)
 
             else:
                 with _logger.log_task("SVD"):
diff --git a/test/test_random_landmarking.py b/test/test_random_landmarking.py
index 329fb98..0e23577 100644
--- a/test/test_random_landmarking.py
+++ b/test/test_random_landmarking.py
@@ -405,6 +405,46 @@ def test_random_landmarking_distance_parameter_consistency():
         assert len(G.clusters) == small_data.shape[0]
 
 
+def test_random_landmarking_with_precomputed_affinity():
+    """Random landmarking should work with precomputed affinity matrices"""
+    affinity = np.array(
+        [
+            [1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
+            [0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
+            [0.0, 0.0, 0.9, 1.0, 0.5, 0.2],
+            [0.0, 0.0, 0.4, 0.5, 1.0, 0.9],
+            [0.0, 0.0, 0.0, 0.2, 0.9, 1.0],
+        ]
+    )
+    affinity = (affinity + affinity.T) / 2  # ensure symmetry
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        affinity,
+        precomputed="affinity",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        knn=3,
+        thresh=0,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(affinity.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (affinity.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
 #############
 # Test API
 #############

From 45c96cae335a295719b19d47a5e1032fb67a3edd Mon Sep 17 00:00:00 2001
From: MattScicluna <mattcscicluna@gmail.com>
Date: Thu, 18 Dec 2025 14:16:36 -0500
Subject: [PATCH 2/4] added random landmarking precomputed distance test

---
 test/test_random_landmarking.py | 42 +++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/test/test_random_landmarking.py b/test/test_random_landmarking.py
index 0e23577..80db6fa 100644
--- a/test/test_random_landmarking.py
+++ b/test/test_random_landmarking.py
@@ -445,6 +445,48 @@ def test_random_landmarking_with_precomputed_affinity():
     assert G.landmark_op.shape == (n_landmark, n_landmark)
 
 
+def test_random_landmarking_with_precomputed_distance():
+    """Random landmarking should work with precomputed distance matrices"""
+    dist = np.array(
+        [
+            [0, 1, 4, 4, 4, 4],
+            [1, 0, 4, 4, 4, 4],
+            [4, 4, 0, 1, 4, 4],
+            [4, 4, 1, 0, 4, 4],
+            [4, 4, 4, 4, 0, 1],
+            [4, 4, 4, 4, 1, 0],
+        ]
+    )
+
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        dist,
+        precomputed="distance",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        bandwidth=1,  # deterministic affinity: exp(-dist)
+        decay=1,
+        thresh=0,
+        knn=3,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(dist.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (dist.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
 #############
 # Test API
 #############

From 4be342ab5855c820ec4382d31eb933a5019d0d07 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 19:30:37 +0000
Subject: [PATCH 3/4] Initial plan


From 78f0c8949e82c99d8f7db8bb5658e4b4401d5e78 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 19:35:21 +0000
Subject: [PATCH 4/4] Add test coverage for sparse matrices and zero affinity
 edge case, improve comment clarity

Co-authored-by: MattScicluna <19255250+MattScicluna@users.noreply.github.com>
---
 graphtools/graphs.py            |   2 +-
 test/test_random_landmarking.py | 121 ++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 403a540..d0317d2 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -1204,7 +1204,7 @@ def build_landmark_op(self):
                 precomputed = getattr(self, "precomputed", None)
 
                 if precomputed is not None:
-                    # Use the precomputed affinities/distances directly to avoid Euclidean fallback
+                    # Use affinities from the kernel computed from the precomputed matrix to avoid Euclidean fallback
                     landmark_affinities = self.kernel[:, landmark_indices]
 
                     if sparse.issparse(landmark_affinities):
diff --git a/test/test_random_landmarking.py b/test/test_random_landmarking.py
index 80db6fa..38fb3f3 100644
--- a/test/test_random_landmarking.py
+++ b/test/test_random_landmarking.py
@@ -8,6 +8,7 @@
 from load_tests import generate_swiss_roll
 from load_tests import graphtools
 from load_tests import np
+from load_tests import sp
 
 import pygsp
 import warnings
@@ -487,6 +488,126 @@ def test_random_landmarking_with_precomputed_distance():
     assert G.landmark_op.shape == (n_landmark, n_landmark)
 
 
+def test_random_landmarking_with_sparse_precomputed_affinity():
+    """Random landmarking should work with sparse precomputed affinity matrices"""
+    affinity = np.array(
+        [
+            [1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
+            [0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
+            [0.0, 0.0, 0.9, 1.0, 0.5, 0.2],
+            [0.0, 0.0, 0.4, 0.5, 1.0, 0.9],
+            [0.0, 0.0, 0.0, 0.2, 0.9, 1.0],
+        ]
+    )
+    affinity = (affinity + affinity.T) / 2  # ensure symmetry
+    affinity_sparse = sp.csr_matrix(affinity)
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        affinity_sparse,
+        precomputed="affinity",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        knn=3,
+        thresh=0,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(affinity.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (affinity.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
+def test_random_landmarking_with_sparse_precomputed_distance():
+    """Random landmarking should work with sparse precomputed distance matrices"""
+    dist = np.array(
+        [
+            [0, 1, 4, 4, 4, 4],
+            [1, 0, 4, 4, 4, 4],
+            [4, 4, 0, 1, 4, 4],
+            [4, 4, 1, 0, 4, 4],
+            [4, 4, 4, 4, 0, 1],
+            [4, 4, 4, 4, 1, 0],
+        ]
+    )
+    dist_sparse = sp.csr_matrix(dist)
+
+    n_landmark = 3
+    random_state = 42
+
+    G = graphtools.Graph(
+        dist_sparse,
+        precomputed="distance",
+        n_landmark=n_landmark,
+        random_landmarking=True,
+        random_state=random_state,
+        bandwidth=1,  # deterministic affinity: exp(-dist)
+        decay=1,
+        thresh=0,
+        knn=3,
+    )
+
+    # Trigger landmark construction
+    _ = G.landmark_op
+
+    rng = np.random.default_rng(random_state)
+    landmark_indices = rng.choice(dist.shape[0], n_landmark, replace=False)
+    expected_clusters = np.asarray(
+        G.kernel[:, landmark_indices].argmax(axis=1)
+    ).reshape(-1)
+
+    assert np.array_equal(G.clusters, expected_clusters)
+    assert G.transitions.shape == (dist.shape[0], n_landmark)
+    assert G.landmark_op.shape == (n_landmark, n_landmark)
+
+
+def test_random_landmarking_zero_affinity_warning():
+    """Test warning when samples have zero affinity to all landmarks"""
+    # Create an affinity matrix where point 5 has no connection to other points
+    affinity = np.array(
+        [
+            [1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
+            [0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
+            [0.0, 0.0, 0.9, 1.0, 0.5, 0.0],
+            [0.0, 0.0, 0.4, 0.5, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],  # isolated point
+        ]
+    )
+    affinity = (affinity + affinity.T) / 2  # ensure symmetry
+    n_landmark = 2
+    random_state = 42  # This seed selects landmarks that don't include point 5
+
+    # Should warn about zero affinity
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        G = graphtools.Graph(
+            affinity,
+            precomputed="affinity",
+            n_landmark=n_landmark,
+            random_landmarking=True,
+            random_state=random_state,
+            knn=3,
+            thresh=0,
+        )
+        _ = G.landmark_op
+
+        assert len(w) == 1
+        assert issubclass(w[0].category, RuntimeWarning)
+        assert "zero affinity to all randomly selected landmarks" in str(w[0].message)
+
+
 #############
 # Test API
 #############