From 84d385c9e87e533f3f588b8d24cc75086ac2ad8d Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 11 Sep 2025 19:57:09 +0200 Subject: [PATCH] Add more tests for clonotype clustering --- src/scirpy/tests/test_clonotypes.py | 37 ++++++++++++++++++++++++++++- src/scirpy/tl/_clonotypes.py | 3 ++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/scirpy/tests/test_clonotypes.py b/src/scirpy/tests/test_clonotypes.py index 081abff00..da34eaa63 100644 --- a/src/scirpy/tests/test_clonotypes.py +++ b/src/scirpy/tests/test_clonotypes.py @@ -163,6 +163,14 @@ def test_clonotypes_end_to_end1(adata_define_clonotypes): [0, 0, 0, 1, 0, np.nan, 0, 0, 0, 0, 2], [8, 8, 8, 1, 8, np.nan, 8, 8, 8, 8, 1], ), + ( + "VDJ", + "primary_only", + False, + None, + [0, 0, 0, 1, 0, np.nan, 0, 2, 3, 3, np.nan], + [5, 5, 5, 1, 5, np.nan, 5, 1, 2, 2, np.nan], + ), ( "VDJ", "primary_only", @@ -171,6 +179,33 @@ def test_clonotypes_end_to_end1(adata_define_clonotypes): [0, 0, 0, 1, 0, np.nan, 0, 2, 3, 4, 5], [5, 5, 5, 1, 5, np.nan, 5, 1, 1, 1, 1], ), + ( + "VJ", + "primary_only", + False, + None, + [0, 0, 1, 1, 0, np.nan, 0, 0, 0, np.nan, np.nan], + [6, 6, 2, 2, 6, np.nan, 6, 6, 6, np.nan, np.nan], + ), + ( + "VJ", + "all", + False, + None, + [0, 0, 1, 1, 2, np.nan, 2, 0, 2, np.nan, np.nan], + [3, 3, 2, 2, 3, np.nan, 3, 3, 3, np.nan, np.nan], + ), + ( + "VJ", + "all", + True, + None, + # TODO: the last two cells get assigned clonotype cluster 5 because they have a v gene, but no junction_aa sequence + # Do we want that? Or should it be np.nan if no sequence exists? Typically this would + # be filtered out before. + [0, 1, 2, 3, 4, np.nan, 4, 1, 4, np.nan, np.nan], + [1, 1, 2, 1, 3, np.nan, 3, 2, 3, np.nan, np.nan], + ), # v gene and receptor type ( "any", @@ -206,7 +241,7 @@ def test_clonotype_clusters_end_to_end( same_v_gene=same_v_gene, ) # type: ignore print(clonotypes) - npt.assert_equal(list(clonotypes.values), [str(x) if not np.isnan(x) else x for x in expected]) + assert list(clonotypes.values) == [str(x) if not np.isnan(x) else x for x in expected] npt.assert_almost_equal(clonotype_size.values, expected_size) diff --git a/src/scirpy/tl/_clonotypes.py b/src/scirpy/tl/_clonotypes.py index 528a67f93..7f8e67b75 100644 --- a/src/scirpy/tl/_clonotypes.py +++ b/src/scirpy/tl/_clonotypes.py @@ -125,7 +125,7 @@ def _validate_parameters( params: DataHandler, - reference: DataHandler, + reference: DataHandler | None, receptor_arms, dual_ir, within_group, @@ -138,6 +138,7 @@ def _validate_parameters( def _get_db_name(): try: + assert reference is not None return reference.adata.uns["DB"]["name"] except KeyError: raise ValueError(