From c3c564b5b993a85242d9edb7df8f4fbdf3dfcbe9 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 20 Jul 2025 10:11:38 -0700 Subject: [PATCH 1/6] Vector Index get Edge Case --- src/hdmf/common/table.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index f4b2cd011..8b4592a21 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -207,18 +207,27 @@ def get(self, arg, **kwargs): # Note: len(indices) == 0 for test_to_hierarchical_dataframe_empty_tables. # This is an edge case test for to_hierarchical_dataframe() on empty tables. # When len(indices) == 0, ret is expected to be an empty list, defined above. - try: - data = self.target.get(slice(None), **kwargs) - except IndexError: - """ - Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level. - This is the old way to get the data and not an untested feature. - """ - for i in indices: - ret.append(self.__getitem_helper(i, **kwargs)) - - return ret - + # try: + # data = self.target.get(slice(None), **kwargs) + # except IndexError: + # """ + # Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level. + # This is the old way to get the data and not an untested feature. + # """ + # for i in indices: + # ret.append(self.__getitem_helper(i, **kwargs)) + # + # return ret + if isinstance(self, VectorIndex): + # Note: VectorIndex gets messy as it is using the index for a DynamicTableRegion + # to then index a DynamicTable. The data in VectorIndex is being used as slice. + # This means when we create slice we want to keep the natural behavior of leaving + # the last one out. + # slice(None) is for when the data are not indices and you want everything. + slicing = slice(0, self.data[-1]) + else: + slicing = slice(None) + data = self.target.get(slicing, **kwargs) slices = [self.__get_slice(i) for i in indices] if isinstance(data, pd.DataFrame): ret = [data.iloc[s] for s in slices] @@ -1094,6 +1103,7 @@ def __get_selection_as_dict(self, arg, df, index, exclude=None, **kwargs): ret = OrderedDict() try: # index with a python slice or single int to select one or multiple rows + # breakpoint() ret['id'] = self.id[arg] for name in self.colnames: if name in exclude: From c0dcdd272220a3e8795b416c71ea3821ca9c9a8b Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 20 Jul 2025 10:12:00 -0700 Subject: [PATCH 2/6] Vector Index get Edge Case --- src/hdmf/common/table.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 8b4592a21..a1aff87e6 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -204,20 +204,6 @@ def get(self, arg, **kwargs): indices = arg ret = list() if len(indices) > 0: - # Note: len(indices) == 0 for test_to_hierarchical_dataframe_empty_tables. - # This is an edge case test for to_hierarchical_dataframe() on empty tables. - # When len(indices) == 0, ret is expected to be an empty list, defined above. - # try: - # data = self.target.get(slice(None), **kwargs) - # except IndexError: - # """ - # Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level. - # This is the old way to get the data and not an untested feature. - # """ - # for i in indices: - # ret.append(self.__getitem_helper(i, **kwargs)) - # - # return ret if isinstance(self, VectorIndex): # Note: VectorIndex gets messy as it is using the index for a DynamicTableRegion # to then index a DynamicTable. The data in VectorIndex is being used as slice. From ad86ae43680ff787ec3e740a4d145a03f23fcf83 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Tue, 29 Jul 2025 17:19:50 -0700 Subject: [PATCH 3/6] Update table.py --- src/hdmf/common/table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index a1aff87e6..c5c35e386 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -1089,7 +1089,6 @@ def __get_selection_as_dict(self, arg, df, index, exclude=None, **kwargs): ret = OrderedDict() try: # index with a python slice or single int to select one or multiple rows - # breakpoint() ret['id'] = self.id[arg] for name in self.colnames: if name in exclude: From 13fd5f4e15cd0902db6f7e1aa4b58bc43de4d4c1 Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 3 Oct 2025 23:10:54 -0700 Subject: [PATCH 4/6] Fix incorrect DTR in linkedtables test --- tests/unit/common/test_linkedtables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/common/test_linkedtables.py b/tests/unit/common/test_linkedtables.py index 3c1c63170..d8d48e6b2 100644 --- a/tests/unit/common/test_linkedtables.py +++ b/tests/unit/common/test_linkedtables.py @@ -445,7 +445,7 @@ def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self): p1 = DynamicTable(name='parent_table', description='parent_table', columns=[VectorData(name='p1', description='p1', data=np.arange(3)), dtr_p1, vi_dtr_p1]) # Super-parent table - dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=p1) + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(3), table=p1) vi_dtr_sp = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_sp) spt = DynamicTable(name='super_parent_table', description='super_parent_table', columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), dtr_sp, vi_dtr_sp]) From 2199237ea7dda53fc3693eccf7f3d9587a89a02d Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 3 Oct 2025 23:11:23 -0700 Subject: [PATCH 5/6] Clean up VectorIndex.get --- src/hdmf/common/table.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index c5c35e386..278ba9dbf 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -204,16 +204,8 @@ def get(self, arg, **kwargs): indices = arg ret = list() if len(indices) > 0: - if isinstance(self, VectorIndex): - # Note: VectorIndex gets messy as it is using the index for a DynamicTableRegion - # to then index a DynamicTable. The data in VectorIndex is being used as slice. - # This means when we create slice we want to keep the natural behavior of leaving - # the last one out. - # slice(None) is for when the data are not indices and you want everything. - slicing = slice(0, self.data[-1]) - else: - slicing = slice(None) - data = self.target.get(slicing, **kwargs) + # Load the entire target table at once to avoid multiple I/O calls + data = self.target.get(slice(None), **kwargs) slices = [self.__get_slice(i) for i in indices] if isinstance(data, pd.DataFrame): ret = [data.iloc[s] for s in slices] From 2b1350117c159580a5b8e9d0f242e20d25d4cba3 Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 3 Oct 2025 23:20:15 -0700 Subject: [PATCH 6/6] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b0ac581b..89b20d757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Fixed - Fixed copying of `TypeMap` and `TypeConfigurator`. Previously, the same global `TypeConfigurator` instance was used in all copies of a `TypeMap`. @rly [#1302](https://github.com/hdmf-dev/hdmf/pull/1302) - Fixed `get_data_shape` to use `Data.data.shape` instead of `Data.shape`, which may be overridden by subclasses. @rly [#1311](https://github.com/hdmf-dev/hdmf/pull/1311) +- Fixed a broken test and refactored `VectorIndex.get`. @rly, @mavaylon1 [#1293](https://github.com/hdmf-dev/hdmf/pull/1293) ### Added - Added a check for a compound datatype that is not defined in the schema or spec. This is currently not supported. @mavaylon1 [#1276](https://github.com/hdmf-dev/hdmf/pull/1276)