@@ -406,33 +406,52 @@ def test_mixed_groupings(normalize, expected_label, expected_values):
406406
407407
408408@pytest .mark .parametrize (
409- "test, expected_names" ,
409+ "test, columns, expected_names" ,
410410 [
411- ("repeat" , ["a" , None , "d" , "b" , "b" , "e" ]),
412- ("level" , ["a" , None , "d" , "b" , "c" , "level_1" ]),
411+ ("repeat" , list ( "abbde" ), ["a" , None , "d" , "b" , "b" , "e" ]),
412+ ("level" , list ( "abcd" ) + [ "level_1" ], ["a" , None , "d" , "b" , "c" , "level_1" ]),
413413 ],
414414)
415415@pytest .mark .parametrize ("as_index" , [False , True ])
416- def test_column_name_clashes (test , expected_names , as_index ):
417- df = DataFrame ({"a" : [1 , 2 ], "b" : [3 , 4 ], "c" : [5 , 6 ], "d" : [7 , 8 ], "e" : [9 , 10 ]})
418- if test == "repeat" :
419- df .columns = list ("abbde" )
420- else :
421- df .columns = list ("abcd" ) + ["level_1" ]
422-
416+ def test_column_label_duplicates (test , columns , expected_names , as_index ):
417+ # GH 44992
418+ # Test for duplicate input column labels and generated duplicate labels
419+ df = DataFrame ([[1 , 3 , 5 , 7 , 9 ], [2 , 4 , 6 , 8 , 10 ]], columns = columns )
420+ expected_data = [(1 , 0 , 7 , 3 , 5 , 9 ), (2 , 1 , 8 , 4 , 6 , 10 )]
421+ result = df .groupby (["a" , [0 , 1 ], "d" ], as_index = as_index ).value_counts ()
423422 if as_index :
424- result = df .groupby (["a" , [0 , 1 ], "d" ], as_index = as_index ).value_counts ()
425423 expected = Series (
426424 data = (1 , 1 ),
427425 index = MultiIndex .from_tuples (
428- [( 1 , 0 , 7 , 3 , 5 , 9 ), ( 2 , 1 , 8 , 4 , 6 , 10 )] ,
426+ expected_data ,
429427 names = expected_names ,
430428 ),
431429 )
432430 tm .assert_series_equal (result , expected )
433431 else :
434- with pytest .raises (ValueError , match = "cannot insert" ):
435- df .groupby (["a" , [0 , 1 ], "d" ], as_index = as_index ).value_counts ()
432+ expected_data = [list (row ) + [1 ] for row in expected_data ]
433+ expected_columns = list (expected_names )
434+ expected_columns [1 ] = "level_1"
435+ expected_columns .append ("count" )
436+ expected = DataFrame (expected_data , columns = expected_columns )
437+ tm .assert_frame_equal (result , expected )
438+
439+
440+ @pytest .mark .parametrize (
441+ "normalize, expected_label" ,
442+ [
443+ (False , "count" ),
444+ (True , "proportion" ),
445+ ],
446+ )
447+ def test_result_label_duplicates (normalize , expected_label ):
448+ # Test for result column label duplicating an input column label
449+ gb = DataFrame ([[1 , 2 , 3 ]], columns = ["a" , "b" , expected_label ]).groupby (
450+ "a" , as_index = False
451+ )
452+ msg = f"Column label '{ expected_label } ' is duplicate of result column"
453+ with pytest .raises (ValueError , match = msg ):
454+ gb .value_counts (normalize = normalize )
436455
437456
438457def test_ambiguous_grouping ():
0 commit comments