@@ -2595,6 +2595,35 @@ def get_stats(group):
25952595 result = self .df .groupby (cats ).D .apply (get_stats )
25962596 self .assertEqual (result .index .names [0 ], 'C' )
25972597
2598+ def test_apply_categorical_data (self ):
2599+ # GH 10138
2600+ for ordered in [True , False ]:
2601+ dense = Categorical (list ('abc' ), ordered = ordered )
2602+ # 'b' is in the categories but not in the list
2603+ missing = Categorical (list ('aaa' ), categories = ['a' , 'b' ], ordered = ordered )
2604+ values = np .arange (len (dense ))
2605+ df = DataFrame ({'missing' : missing ,
2606+ 'dense' : dense ,
2607+ 'values' : values })
2608+ grouped = df .groupby (['missing' , 'dense' ])
2609+
2610+ # missing category 'b' should still exist in the output index
2611+ idx = MultiIndex .from_product ([['a' , 'b' ], ['a' , 'b' , 'c' ]],
2612+ names = ['missing' , 'dense' ])
2613+ expected = DataFrame ([0 , 1 , 2 , np .nan , np .nan , np .nan ],
2614+ index = idx ,
2615+ columns = ['values' ])
2616+
2617+ assert_frame_equal (grouped .apply (lambda x : np .mean (x )), expected )
2618+ assert_frame_equal (grouped .mean (), expected )
2619+ assert_frame_equal (grouped .agg (np .mean ), expected )
2620+
2621+ # but for transform we should still get back the original index
2622+ idx = MultiIndex .from_product ([['a' ], ['a' , 'b' , 'c' ]],
2623+ names = ['missing' , 'dense' ])
2624+ expected = Series (1 , index = idx )
2625+ assert_series_equal (grouped .apply (lambda x : 1 ), expected )
2626+
25982627 def test_apply_corner_cases (self ):
25992628 # #535, can't use sliding iterator
26002629
0 commit comments