From a5ed6195dce169df607018959e7b25d014fd8f7f Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 31 Dec 2025 21:29:36 -0800 Subject: [PATCH 1/4] implement combine generic for compressed lists --- src/compressed_lists/base.py | 37 ++++++++++++++++++++ src/compressed_lists/partition.py | 58 +++++++++++++++++++++++++++++-- tests/test_generics.py | 23 ++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) diff --git a/src/compressed_lists/base.py b/src/compressed_lists/base.py index daa4994..9211115 100644 --- a/src/compressed_lists/base.py +++ b/src/compressed_lists/base.py @@ -594,3 +594,40 @@ def empty(cls, n: int): _range_lengths = [0] * n return CompressedList(unlist_data=[], partitioning=Partitioning(ends=_range_lengths)) + + ####################### + ######>> extend <<##### + ####################### + + def extend(self, other: CompressedList, in_place: bool = False) -> CompressedList: + """ + Args: + other: + Some CompressedList object. + + in_place: + Whether to perform the modification in place. + + Returns: + A ``CompressedList`` where items in ``other`` are added to the end. If + ``in_place = False``, this is a new object, otherwise a reference + to the current object is returned. + """ + output = self._define_output(in_place) + + output._unlist_data = ut.combine_sequences(output._unlist_data, other._unlist_data) + output._partitioning = ut.combine_sequences(output._partitioning, other._partitioning) + + return output + + +@ut.combine_sequences.register(CompressedList) +def _register_combine_patitioning(*x: CompressedList) -> CompressedList: + if not x: + raise ValueError("Cannot combine an empty object") + + output = x[0].copy() + for i in range(1, len(x)): + output.extend(x[i], in_place=True) + + return output diff --git a/src/compressed_lists/partition.py b/src/compressed_lists/partition.py index 7d25adf..231814f 100644 --- a/src/compressed_lists/partition.py +++ b/src/compressed_lists/partition.py @@ -27,7 +27,7 @@ class Partitioning: """ def __init__( - self, ends: Sequence[int], names: Optional[Union[ut.Names, Sequence[str]]] = None, validate: bool = True + self, ends: Sequence[int], names: Optional[Union[ut.Names, Sequence[str]]] = None, _validate: bool = True ): """Initialize a Partitioning object. @@ -38,7 +38,7 @@ def __init__( names: Optional names for each partition. - validate: + _validate: Internal use only. """ self._ends = np.array(ends, dtype=np.int64) @@ -52,7 +52,7 @@ def __init__( if names is not None: self._names = ut.Names(names) - if validate: + if _validate: _validate_names(names, len(ends)) @classmethod @@ -115,6 +115,7 @@ def __deepcopy__(self, memo=None, _nil=[]): return current_class_const( ends=_ends_copy, names=_names_copy, + _validate=False, ) def __copy__(self): @@ -126,6 +127,7 @@ def __copy__(self): return current_class_const( ends=self._ends, names=self._names, + _validate=False, ) def copy(self): @@ -289,3 +291,53 @@ def get_starts(self) -> np.ndarray: def starts(self) -> np.ndarray: """Alias for :py:attr:`~get_starts`, provided for back-compatibility.""" return self.get_starts() + + ####################### + ######>> extend <<##### + ####################### + + def extend(self, other: Partitioning, in_place: bool = False) -> Partitioning: + """ + Args: + other: + Some Paritioning object. + + in_place: + Whether to perform the modification in place. + + Returns: + A ``Partitioning`` where items in ``other`` are added to the end. If + ``in_place = False``, this is a new object, otherwise a reference + to the current object is returned. + """ + output = self._define_output(in_place) + previous_len = output.get_nobj() + + output._ends = ut.combine_sequences(output._ends, (other._ends + previous_len)) + output._starts = ut.combine_sequences(output._starts, (other._starts + previous_len)) + + if output._names is None and other._names is None: + output._names = None + else: + if output._names is None: + output._names = ut.Names([""] * previous_len) + output._names.extend(other._names) + elif other._names is None: + _names = ut.Names([""] * len(other)) + output._names.extend(_names) + else: + output._names.extend(other._names) + + return output + + +@ut.combine_sequences.register(Partitioning) +def _register_combine_patitioning(*x: Partitioning) -> Partitioning: + if not x: + raise ValueError("Cannot combine an empty sequence") + + output = x[0].copy() + for i in range(1, len(x)): + output.extend(x[i], in_place=True) + + return output diff --git a/tests/test_generics.py b/tests/test_generics.py index 57c01f1..21625f8 100644 --- a/tests/test_generics.py +++ b/tests/test_generics.py @@ -84,3 +84,26 @@ def test_generic_register_helper_errors(): with pytest.raises(ValueError, match="'groups_or_paritions' must be a group vector or a Partition object"): _generic_register_helper([1, 2, 3], {"a": 1}) + + +def test_paritioning_combine(): + p1 = Partitioning(ends=[3, 5, 9], names=["1", "2", "3"]) + p2 = Partitioning(ends=[3, 5, 9]) + + combi = ut.combine_sequences(p1, p2) + + assert isinstance(combi, Partitioning) + assert len(combi) == 6 + assert np.allclose(combi.get_ends(), [3, 5, 9, 12, 14, 18]) + assert list(combi.get_names()) == ["1", "2", "3", "", "", ""] + + +def test_compressed_list_combine(): + f1 = CompressedFloatList.from_list([[1.1, 1.2], [2.1, 2.2, 2.3], [3]], ["fruits1", "fruits2", "fruits3"]) + + combi = ut.combine_sequences(f1, f1) + + assert isinstance(combi, CompressedFloatList) + assert len(combi) == 6 + assert np.allclose(combi.get_partitioning().get_ends(), [2, 5, 6, 8, 11, 12]) + assert list(combi.get_names()) == ["fruits1", "fruits2", "fruits3", "fruits1", "fruits2", "fruits3"] From b1c54649344e66b6ad926c51aad180d5e30b7491 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Jan 2026 05:30:03 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_base.py b/tests/test_base.py index 8a5be20..e58b17c 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -52,6 +52,7 @@ def test_base_list_empty_classmeth(): assert isinstance(subset, CompressedList) assert len(subset) == 2 + def test_base_set_names(base_list): new_names = ["X", "Y", "Z"] From 4fdd4355d8259d5a1f44f9fc99be5b090432f7b3 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 31 Dec 2025 21:32:27 -0800 Subject: [PATCH 3/4] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8c3390..b75685b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ # Changelog -## Version 0.4.0 - 0.4.1 +## Version 0.4.0 - 0.4.2 - Classes extend `BiocObject` from biocutils. `metadata` is a named list. - Update actions to run from 3.10-3.14 - Support empty compressed list objects of size `n`. +- Implement combine generic for compressed lists. ## Version 0.3.0 From 4c4e4ebde9c8dbb669ca5e7b6073123c13680513 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 31 Dec 2025 22:35:13 -0800 Subject: [PATCH 4/4] a better test to improve coverage --- tests/test_generics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_generics.py b/tests/test_generics.py index 21625f8..b333d78 100644 --- a/tests/test_generics.py +++ b/tests/test_generics.py @@ -100,10 +100,11 @@ def test_paritioning_combine(): def test_compressed_list_combine(): f1 = CompressedFloatList.from_list([[1.1, 1.2], [2.1, 2.2, 2.3], [3]], ["fruits1", "fruits2", "fruits3"]) + f2 = CompressedFloatList.from_list([[1.1, 1.2], [2.1, 2.2, 2.3], [3]]) - combi = ut.combine_sequences(f1, f1) + combi = ut.combine_sequences(f1, f2) assert isinstance(combi, CompressedFloatList) assert len(combi) == 6 assert np.allclose(combi.get_partitioning().get_ends(), [2, 5, 6, 8, 11, 12]) - assert list(combi.get_names()) == ["fruits1", "fruits2", "fruits3", "fruits1", "fruits2", "fruits3"] + assert list(combi.get_names()) == ["fruits1", "fruits2", "fruits3", "", "", ""]