From 67dd1fa05601f01522757d9a132a8810e76f992d Mon Sep 17 00:00:00 2001 From: aliraeisdanaei Date: Sun, 26 Dec 2021 18:47:03 -0500 Subject: [PATCH 1/5] Adding a test case for the issue 28570 --- .../tests/groupby/aggregate/test_aggregate.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2ab553434873c..67a19efa979f1 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -62,6 +62,33 @@ def test_agg_ser_multi_key(df): tm.assert_series_equal(results, expected) +def test_agg_different_partials(): + # for issue 28570 + + def add_top_column(df, top_col, inplace=False): + if not inplace: + df = df.copy() + df.columns = pd.MultiIndex.from_product([[top_col], df.columns]) + return df + quant50 = partial(np.percentile, q=50) + quant50.__name__ = "quant50" + quant70 = partial(np.percentile, q=70) + quant70.__name__ = "quant70" + + test = pd.DataFrame({'col1': ['a', 'a', 'b', 'b', 'b'], 'col2': [1,2,3,4,5]}) + test = test.groupby('col1').agg({'col2': [quant50, quant70]}) + # print(test) + + expected_df = pd.DataFrame({'col1': ['a', 'b'], + 'quant50': [1.5, 4.0], + 'quant70': [1.7, 4.4]}) + expected_df = expected_df.set_index('col1') + expected_df = add_top_column(expected_df, 'col2') + # print(expected_df) + + assert(test.equals(expected_df)) + + def test_groupby_aggregation_mixed_dtype(): # GH 6212 expected = DataFrame( From 98034796a45ee3f6b0e6a672876212a53b6b0987 Mon Sep 17 00:00:00 2001 From: aliraeisdanaei Date: Sun, 26 Dec 2021 19:08:17 -0500 Subject: [PATCH 2/5] Made some PEP 8 changes, and fixed the assert line. --- .../tests/groupby/aggregate/test_aggregate.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 67a19efa979f1..8fc5df8f86ee0 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -70,23 +70,22 @@ def add_top_column(df, top_col, inplace=False): df = df.copy() df.columns = pd.MultiIndex.from_product([[top_col], df.columns]) return df + quant50 = partial(np.percentile, q=50) quant50.__name__ = "quant50" quant70 = partial(np.percentile, q=70) quant70.__name__ = "quant70" - test = pd.DataFrame({'col1': ['a', 'a', 'b', 'b', 'b'], 'col2': [1,2,3,4,5]}) - test = test.groupby('col1').agg({'col2': [quant50, quant70]}) - # print(test) + test = pd.DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}) + test = test.groupby("col1").agg({"col2": [quant50, quant70]}) - expected_df = pd.DataFrame({'col1': ['a', 'b'], - 'quant50': [1.5, 4.0], - 'quant70': [1.7, 4.4]}) - expected_df = expected_df.set_index('col1') - expected_df = add_top_column(expected_df, 'col2') - # print(expected_df) + expected_df = pd.DataFrame( + {"col1": ["a", "b"], "quant50": [1.5, 4.0], "quant70": [1.7, 4.4]} + ) + expected_df = expected_df.set_index("col1") + expected_df = add_top_column(expected_df, "col2") - assert(test.equals(expected_df)) + tm.assert_frame_equal(test, expected_df) def test_groupby_aggregation_mixed_dtype(): From 4d4a02349a0320c4adcf5350292abca49b8c8999 Mon Sep 17 00:00:00 2001 From: aliraeisdanaei Date: Sun, 26 Dec 2021 22:36:18 -0500 Subject: [PATCH 3/5] Removed unnecessary lines from function --- pandas/tests/groupby/aggregate/test_aggregate.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8fc5df8f86ee0..80e0f4dbe4661 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -65,9 +65,7 @@ def test_agg_ser_multi_key(df): def test_agg_different_partials(): # for issue 28570 - def add_top_column(df, top_col, inplace=False): - if not inplace: - df = df.copy() + def add_top_column(df, top_col): df.columns = pd.MultiIndex.from_product([[top_col], df.columns]) return df From 48213b0a447f36956c6b184d13881c95658a8a6d Mon Sep 17 00:00:00 2001 From: aliraeisdanaei Date: Sun, 2 Jan 2022 16:09:57 -0500 Subject: [PATCH 4/5] Made minor changes to naming and took out the imbedded function in the test. --- pandas/tests/groupby/aggregate/test_aggregate.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 80e0f4dbe4661..0f6eab8a16af3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -65,25 +65,21 @@ def test_agg_ser_multi_key(df): def test_agg_different_partials(): # for issue 28570 - def add_top_column(df, top_col): - df.columns = pd.MultiIndex.from_product([[top_col], df.columns]) - return df - quant50 = partial(np.percentile, q=50) quant50.__name__ = "quant50" quant70 = partial(np.percentile, q=70) quant70.__name__ = "quant70" - test = pd.DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}) - test = test.groupby("col1").agg({"col2": [quant50, quant70]}) + df = pd.DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}) + result = df.groupby("col1").agg({"col2": [quant50, quant70]}) - expected_df = pd.DataFrame( + expected = pd.DataFrame( {"col1": ["a", "b"], "quant50": [1.5, 4.0], "quant70": [1.7, 4.4]} ) - expected_df = expected_df.set_index("col1") - expected_df = add_top_column(expected_df, "col2") + expected = expected.set_index("col1") + expected.columns = pd.MultiIndex.from_product([["col2"], expected.columns]) - tm.assert_frame_equal(test, expected_df) + tm.assert_frame_equal(result, expected) def test_groupby_aggregation_mixed_dtype(): From 85dfe6c6a6220c1803042a827c42efba20adf2b4 Mon Sep 17 00:00:00 2001 From: aliraeisdanaei Date: Mon, 10 Jan 2022 10:16:04 -0500 Subject: [PATCH 5/5] Change comment to allow for easier greping --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0f6eab8a16af3..4932f77ec0908 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -63,7 +63,7 @@ def test_agg_ser_multi_key(df): def test_agg_different_partials(): - # for issue 28570 + # GH 28570 quant50 = partial(np.percentile, q=50) quant50.__name__ = "quant50"