Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 38 additions & 9 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,16 +1201,45 @@ def build_landmark_op(self):
n_samples = self.data.shape[0]
rng = np.random.default_rng(self.random_state)
landmark_indices = rng.choice(n_samples, self.n_landmark, replace=False)
data = (
self.data if not hasattr(self, "data_nu") else self.data_nu
) # because of the scaling to review
if (
n_samples > 5000 and self.distance == "euclidean"
): # sklearn.euclidean_distances is faster than cdist for big dataset
distances = euclidean_distances(data, data[landmark_indices])
precomputed = getattr(self, "precomputed", None)

if precomputed is not None:
# Use affinities from the kernel computed from the precomputed matrix to avoid Euclidean fallback
landmark_affinities = self.kernel[:, landmark_indices]

if sparse.issparse(landmark_affinities):
landmark_affinities = landmark_affinities.tocsr()
cluster_assignments = np.asarray(
landmark_affinities.argmax(axis=1)
).reshape(-1)
row_max = matrix.to_array(
landmark_affinities.max(axis=1)
).reshape(-1)
else:
landmark_affinities = np.asarray(landmark_affinities)
cluster_assignments = np.argmax(landmark_affinities, axis=1)
row_max = np.max(landmark_affinities, axis=1)

if np.any(row_max == 0):
warnings.warn(
"Some samples have zero affinity to all randomly selected landmarks; "
"increase n_landmark or ensure the affinity matrix connects all points.",
RuntimeWarning,
)
self._clusters = cluster_assignments
else:
distances = cdist(data, data[landmark_indices], metric=self.distance)
self._clusters = np.argmin(distances, axis=1)
data = (
self.data if not hasattr(self, "data_nu") else self.data_nu
) # because of the scaling to review
if (
n_samples > 5000 and self.distance == "euclidean"
): # sklearn.euclidean_distances is faster than cdist for big dataset
distances = euclidean_distances(data, data[landmark_indices])
else:
distances = cdist(
data, data[landmark_indices], metric=self.distance
)
self._clusters = np.argmin(distances, axis=1)

else:
with _logger.log_task("SVD"):
Expand Down
203 changes: 203 additions & 0 deletions test/test_random_landmarking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from load_tests import generate_swiss_roll
from load_tests import graphtools
from load_tests import np
from load_tests import sp

import pygsp
import warnings
Expand Down Expand Up @@ -405,6 +406,208 @@ def test_random_landmarking_distance_parameter_consistency():
assert len(G.clusters) == small_data.shape[0]


def test_random_landmarking_with_precomputed_affinity():
"""Random landmarking should work with precomputed affinity matrices"""
affinity = np.array(
[
[1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
[0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
[0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
[0.0, 0.0, 0.9, 1.0, 0.5, 0.2],
[0.0, 0.0, 0.4, 0.5, 1.0, 0.9],
[0.0, 0.0, 0.0, 0.2, 0.9, 1.0],
]
)
affinity = (affinity + affinity.T) / 2 # ensure symmetry
n_landmark = 3
random_state = 42

G = graphtools.Graph(
affinity,
precomputed="affinity",
n_landmark=n_landmark,
random_landmarking=True,
random_state=random_state,
knn=3,
thresh=0,
)

# Trigger landmark construction
_ = G.landmark_op

rng = np.random.default_rng(random_state)
landmark_indices = rng.choice(affinity.shape[0], n_landmark, replace=False)
expected_clusters = np.asarray(
G.kernel[:, landmark_indices].argmax(axis=1)
).reshape(-1)

assert np.array_equal(G.clusters, expected_clusters)
assert G.transitions.shape == (affinity.shape[0], n_landmark)
assert G.landmark_op.shape == (n_landmark, n_landmark)

Comment on lines +409 to +447
Copy link

Copilot AI Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this test covers precomputed="affinity", there is no test coverage for precomputed="distance". Since the implementation in graphs.py handles precomputed distance matrices by converting them to affinity matrices in build_kernel(), it would be valuable to add a similar test case that verifies random landmarking works correctly with precomputed distance matrices as well.

Copilot uses AI. Check for mistakes.
Comment on lines +409 to +447
Copy link

Copilot AI Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test only covers dense precomputed affinity matrices. Consider adding test coverage for sparse precomputed affinity matrices to ensure the sparse matrix handling code path (lines 1210-1217 in graphs.py) works correctly with random landmarking.

Copilot uses AI. Check for mistakes.
Comment on lines +409 to +447
Copy link

Copilot AI Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a test case for the edge case where some samples have zero affinity to all randomly selected landmarks. This would test the warning logic implemented in lines 1223-1228 of graphs.py, which is currently not covered by these tests.

Copilot uses AI. Check for mistakes.

def test_random_landmarking_with_precomputed_distance():
"""Random landmarking should work with precomputed distance matrices"""
dist = np.array(
[
[0, 1, 4, 4, 4, 4],
[1, 0, 4, 4, 4, 4],
[4, 4, 0, 1, 4, 4],
[4, 4, 1, 0, 4, 4],
[4, 4, 4, 4, 0, 1],
[4, 4, 4, 4, 1, 0],
]
)

n_landmark = 3
random_state = 42

G = graphtools.Graph(
dist,
precomputed="distance",
n_landmark=n_landmark,
random_landmarking=True,
random_state=random_state,
bandwidth=1, # deterministic affinity: exp(-dist)
decay=1,
thresh=0,
knn=3,
)

# Trigger landmark construction
_ = G.landmark_op

rng = np.random.default_rng(random_state)
landmark_indices = rng.choice(dist.shape[0], n_landmark, replace=False)
expected_clusters = np.asarray(
G.kernel[:, landmark_indices].argmax(axis=1)
).reshape(-1)

assert np.array_equal(G.clusters, expected_clusters)
assert G.transitions.shape == (dist.shape[0], n_landmark)
assert G.landmark_op.shape == (n_landmark, n_landmark)

Comment on lines +449 to +489
Copy link

Copilot AI Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test only covers dense precomputed distance matrices. Consider adding test coverage for sparse precomputed distance matrices to ensure the sparse matrix code path works correctly with random landmarking.

Copilot uses AI. Check for mistakes.

def test_random_landmarking_with_sparse_precomputed_affinity():
"""Random landmarking should work with sparse precomputed affinity matrices"""
affinity = np.array(
[
[1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
[0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
[0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
[0.0, 0.0, 0.9, 1.0, 0.5, 0.2],
[0.0, 0.0, 0.4, 0.5, 1.0, 0.9],
[0.0, 0.0, 0.0, 0.2, 0.9, 1.0],
]
)
affinity = (affinity + affinity.T) / 2 # ensure symmetry
affinity_sparse = sp.csr_matrix(affinity)
n_landmark = 3
random_state = 42

G = graphtools.Graph(
affinity_sparse,
precomputed="affinity",
n_landmark=n_landmark,
random_landmarking=True,
random_state=random_state,
knn=3,
thresh=0,
)

# Trigger landmark construction
_ = G.landmark_op

rng = np.random.default_rng(random_state)
landmark_indices = rng.choice(affinity.shape[0], n_landmark, replace=False)
expected_clusters = np.asarray(
G.kernel[:, landmark_indices].argmax(axis=1)
).reshape(-1)

assert np.array_equal(G.clusters, expected_clusters)
assert G.transitions.shape == (affinity.shape[0], n_landmark)
assert G.landmark_op.shape == (n_landmark, n_landmark)


def test_random_landmarking_with_sparse_precomputed_distance():
"""Random landmarking should work with sparse precomputed distance matrices"""
dist = np.array(
[
[0, 1, 4, 4, 4, 4],
[1, 0, 4, 4, 4, 4],
[4, 4, 0, 1, 4, 4],
[4, 4, 1, 0, 4, 4],
[4, 4, 4, 4, 0, 1],
[4, 4, 4, 4, 1, 0],
]
)
dist_sparse = sp.csr_matrix(dist)

n_landmark = 3
random_state = 42

G = graphtools.Graph(
dist_sparse,
precomputed="distance",
n_landmark=n_landmark,
random_landmarking=True,
random_state=random_state,
bandwidth=1, # deterministic affinity: exp(-dist)
decay=1,
thresh=0,
knn=3,
)

# Trigger landmark construction
_ = G.landmark_op

rng = np.random.default_rng(random_state)
landmark_indices = rng.choice(dist.shape[0], n_landmark, replace=False)
expected_clusters = np.asarray(
G.kernel[:, landmark_indices].argmax(axis=1)
).reshape(-1)

assert np.array_equal(G.clusters, expected_clusters)
assert G.transitions.shape == (dist.shape[0], n_landmark)
assert G.landmark_op.shape == (n_landmark, n_landmark)


def test_random_landmarking_zero_affinity_warning():
"""Test warning when samples have zero affinity to all landmarks"""
# Create an affinity matrix where point 5 has no connection to other points
affinity = np.array(
[
[1.0, 0.8, 0.1, 0.0, 0.0, 0.0],
[0.8, 1.0, 0.2, 0.0, 0.0, 0.0],
[0.1, 0.2, 1.0, 0.9, 0.4, 0.0],
[0.0, 0.0, 0.9, 1.0, 0.5, 0.0],
[0.0, 0.0, 0.4, 0.5, 1.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 1.0], # isolated point
]
)
affinity = (affinity + affinity.T) / 2 # ensure symmetry
n_landmark = 2
random_state = 42 # This seed selects landmarks that don't include point 5

# Should warn about zero affinity
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
G = graphtools.Graph(
affinity,
precomputed="affinity",
n_landmark=n_landmark,
random_landmarking=True,
random_state=random_state,
knn=3,
thresh=0,
)
_ = G.landmark_op

assert len(w) == 1
assert issubclass(w[0].category, RuntimeWarning)
assert "zero affinity to all randomly selected landmarks" in str(w[0].message)


#############
# Test API
#############
Expand Down
Loading