|
32 | 32 | SpecificationError, |
33 | 33 | ) |
34 | 34 | from pandas.util._decorators import ( |
35 | | - Appender, |
36 | | - Substitution, |
37 | 35 | doc, |
38 | 36 | set_module, |
39 | 37 | ) |
|
71 | 69 | from pandas.core.groupby.groupby import ( |
72 | 70 | GroupBy, |
73 | 71 | GroupByPlot, |
74 | | - _transform_template, |
75 | 72 | ) |
76 | 73 | from pandas.core.indexes.api import ( |
77 | 74 | Index, |
@@ -702,9 +699,141 @@ def _wrap_applied_output( |
702 | 699 | """ |
703 | 700 | ) |
704 | 701 |
|
705 | | - @Substitution(klass="Series", example=__examples_series_doc) |
706 | | - @Appender(_transform_template) |
707 | 702 | def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): |
| 703 | + """ |
| 704 | + Call function producing a same-indexed Series on each group. |
| 705 | +
|
| 706 | + Returns a Series having the same indexes as the original object |
| 707 | + filled with the transformed values. |
| 708 | +
|
| 709 | + Parameters |
| 710 | + ---------- |
| 711 | + func : function, str |
| 712 | + Function to apply to each group. |
| 713 | + See the Notes section below for requirements. |
| 714 | +
|
| 715 | + Accepted inputs are: |
| 716 | +
|
| 717 | + - String |
| 718 | + - Python function |
| 719 | + - Numba JIT function with ``engine='numba'`` specified. |
| 720 | +
|
| 721 | + Only passing a single function is supported with this engine. |
| 722 | + If the ``'numba'`` engine is chosen, the function must be |
| 723 | + a user defined function with ``values`` and ``index`` as the |
| 724 | + first and second arguments respectively in the function signature. |
| 725 | + Each group's index will be passed to the user defined function |
| 726 | + and optionally available for use. |
| 727 | +
|
| 728 | + If a string is chosen, then it needs to be the name |
| 729 | + of the groupby method you want to use. |
| 730 | + *args |
| 731 | + Positional arguments to pass to func. |
| 732 | + engine : str, default None |
| 733 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 734 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 735 | + * ``None`` : Defaults to ``'cython'`` |
| 736 | + or the global setting ``compute.use_numba`` |
| 737 | +
|
| 738 | + engine_kwargs : dict, default None |
| 739 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 740 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 741 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 742 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 743 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 744 | + applied to the function |
| 745 | +
|
| 746 | + **kwargs |
| 747 | + Keyword arguments to be passed into func. |
| 748 | +
|
| 749 | + Returns |
| 750 | + ------- |
| 751 | + Series |
| 752 | + Series with the same indexes as the original object filled |
| 753 | + with transformed values. |
| 754 | +
|
| 755 | + See Also |
| 756 | + -------- |
| 757 | + Series.groupby.apply : Apply function ``func`` group-wise and combine |
| 758 | + the results together. |
| 759 | + Series.groupby.aggregate : Aggregate using one or more operations. |
| 760 | + Series.transform : Call ``func`` on self producing a Series with the |
| 761 | + same axis shape as self. |
| 762 | +
|
| 763 | + Notes |
| 764 | + ----- |
| 765 | + Each group is endowed the attribute 'name' in case you need to know |
| 766 | + which group you are working on. |
| 767 | +
|
| 768 | + The current implementation imposes three requirements on f: |
| 769 | +
|
| 770 | + * f must return a value that either has the same shape as the input |
| 771 | + subframe or can be broadcast to the shape of the input subframe. |
| 772 | + For example, if `f` returns a scalar it will be broadcast to have the |
| 773 | + same shape as the input subframe. |
| 774 | + * if this is a DataFrame, f must support application column-by-column |
| 775 | + in the subframe. If f also supports application to the entire subframe, |
| 776 | + then a fast path is used starting from the second chunk. |
| 777 | + * f must not mutate groups. Mutation is not supported and may |
| 778 | + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. |
| 779 | +
|
| 780 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 781 | + The group data and group index will be passed as numpy arrays to the JITed |
| 782 | + user defined function, and no alternative execution attempts will be tried. |
| 783 | +
|
| 784 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 785 | + see the examples below. |
| 786 | +
|
| 787 | + .. versionchanged:: 2.0.0 |
| 788 | +
|
| 789 | + When using ``.transform`` on a grouped DataFrame and |
| 790 | + the transformation function returns a DataFrame, |
| 791 | + pandas now aligns the result's index with the input's index. |
| 792 | + You can call ``.to_numpy()`` on the result of |
| 793 | + the transformation function to avoid alignment. |
| 794 | +
|
| 795 | + Examples |
| 796 | + -------- |
| 797 | +
|
| 798 | + >>> ser = pd.Series( |
| 799 | + ... [390.0, 350.0, 30.0, 20.0], |
| 800 | + ... index=["Falcon", "Falcon", "Parrot", "Parrot"], |
| 801 | + ... name="Max Speed", |
| 802 | + ... ) |
| 803 | + >>> grouped = ser.groupby([1, 1, 2, 2]) |
| 804 | + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) |
| 805 | + Falcon 0.707107 |
| 806 | + Falcon -0.707107 |
| 807 | + Parrot 0.707107 |
| 808 | + Parrot -0.707107 |
| 809 | + Name: Max Speed, dtype: float64 |
| 810 | +
|
| 811 | + Broadcast result of the transformation |
| 812 | +
|
| 813 | + >>> grouped.transform(lambda x: x.max() - x.min()) |
| 814 | + Falcon 40.0 |
| 815 | + Falcon 40.0 |
| 816 | + Parrot 10.0 |
| 817 | + Parrot 10.0 |
| 818 | + Name: Max Speed, dtype: float64 |
| 819 | +
|
| 820 | + >>> grouped.transform("mean") |
| 821 | + Falcon 370.0 |
| 822 | + Falcon 370.0 |
| 823 | + Parrot 25.0 |
| 824 | + Parrot 25.0 |
| 825 | + Name: Max Speed, dtype: float64 |
| 826 | +
|
| 827 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 828 | + for example: |
| 829 | +
|
| 830 | + >>> grouped.transform(lambda x: x.astype(int).max()) |
| 831 | + Falcon 390 |
| 832 | + Falcon 390 |
| 833 | + Parrot 30 |
| 834 | + Parrot 30 |
| 835 | + Name: Max Speed, dtype: int64 |
| 836 | + """ |
708 | 837 | return self._transform( |
709 | 838 | func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs |
710 | 839 | ) |
@@ -2423,9 +2552,152 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): |
2423 | 2552 | """ |
2424 | 2553 | ) |
2425 | 2554 |
|
2426 | | - @Substitution(klass="DataFrame", example=__examples_dataframe_doc) |
2427 | | - @Appender(_transform_template) |
2428 | 2555 | def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): |
| 2556 | + """ |
| 2557 | + Call function producing a same-indexed DataFrame on each group. |
| 2558 | +
|
| 2559 | + Returns a DataFrame having the same indexes as the original object |
| 2560 | + filled with the transformed values. |
| 2561 | +
|
| 2562 | + Parameters |
| 2563 | + ---------- |
| 2564 | + func : function, str |
| 2565 | + Function to apply to each group. |
| 2566 | + See the Notes section below for requirements. |
| 2567 | +
|
| 2568 | + Accepted inputs are: |
| 2569 | +
|
| 2570 | + - String |
| 2571 | + - Python function |
| 2572 | + - Numba JIT function with ``engine='numba'`` specified. |
| 2573 | +
|
| 2574 | + Only passing a single function is supported with this engine. |
| 2575 | + If the ``'numba'`` engine is chosen, the function must be |
| 2576 | + a user defined function with ``values`` and ``index`` as the |
| 2577 | + first and second arguments respectively in the function signature. |
| 2578 | + Each group's index will be passed to the user defined function |
| 2579 | + and optionally available for use. |
| 2580 | +
|
| 2581 | + If a string is chosen, then it needs to be the name |
| 2582 | + of the groupby method you want to use. |
| 2583 | + *args |
| 2584 | + Positional arguments to pass to func. |
| 2585 | + engine : str, default None |
| 2586 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 2587 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 2588 | + * ``None`` : Defaults to ``'cython'`` |
| 2589 | + or the global setting ``compute.use_numba`` |
| 2590 | +
|
| 2591 | + engine_kwargs : dict, default None |
| 2592 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 2593 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 2594 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 2595 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 2596 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 2597 | + applied to the function |
| 2598 | +
|
| 2599 | + **kwargs |
| 2600 | + Keyword arguments to be passed into func. |
| 2601 | +
|
| 2602 | + Returns |
| 2603 | + ------- |
| 2604 | + DataFrame |
| 2605 | + DataFrame with the same indexes as the original object filled |
| 2606 | + with transformed values. |
| 2607 | +
|
| 2608 | + See Also |
| 2609 | + -------- |
| 2610 | + DataFrame.groupby.apply : Apply function ``func`` group-wise and combine |
| 2611 | + the results together. |
| 2612 | + DataFrame.groupby.aggregate : Aggregate using one or more operations. |
| 2613 | + DataFrame.transform : Call ``func`` on self producing a DataFrame with the |
| 2614 | + same axis shape as self. |
| 2615 | +
|
| 2616 | + Notes |
| 2617 | + ----- |
| 2618 | + Each group is endowed the attribute 'name' in case you need to know |
| 2619 | + which group you are working on. |
| 2620 | +
|
| 2621 | + The current implementation imposes three requirements on f: |
| 2622 | +
|
| 2623 | + * f must return a value that either has the same shape as the input |
| 2624 | + subframe or can be broadcast to the shape of the input subframe. |
| 2625 | + For example, if `f` returns a scalar it will be broadcast to have the |
| 2626 | + same shape as the input subframe. |
| 2627 | + * if this is a DataFrame, f must support application column-by-column |
| 2628 | + in the subframe. If f also supports application to the entire subframe, |
| 2629 | + then a fast path is used starting from the second chunk. |
| 2630 | + * f must not mutate groups. Mutation is not supported and may |
| 2631 | + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. |
| 2632 | +
|
| 2633 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 2634 | + The group data and group index will be passed as numpy arrays to the JITed |
| 2635 | + user defined function, and no alternative execution attempts will be tried. |
| 2636 | +
|
| 2637 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 2638 | + see the examples below. |
| 2639 | +
|
| 2640 | + .. versionchanged:: 2.0.0 |
| 2641 | +
|
| 2642 | + When using ``.transform`` on a grouped DataFrame |
| 2643 | + and the transformation function returns a DataFrame, |
| 2644 | + pandas now aligns the result's index with the input's index. |
| 2645 | + You can call ``.to_numpy()`` on the result of the |
| 2646 | + transformation function to avoid alignment. |
| 2647 | +
|
| 2648 | + Examples |
| 2649 | + -------- |
| 2650 | +
|
| 2651 | + >>> df = pd.DataFrame( |
| 2652 | + ... { |
| 2653 | + ... "A": ["foo", "bar", "foo", "bar", "foo", "bar"], |
| 2654 | + ... "B": ["one", "one", "two", "three", "two", "two"], |
| 2655 | + ... "C": [1, 5, 5, 2, 5, 5], |
| 2656 | + ... "D": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], |
| 2657 | + ... } |
| 2658 | + ... ) |
| 2659 | + >>> grouped = df.groupby("A")[["C", "D"]] |
| 2660 | + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) |
| 2661 | + C D |
| 2662 | + 0 -1.154701 -0.577350 |
| 2663 | + 1 0.577350 0.000000 |
| 2664 | + 2 0.577350 1.154701 |
| 2665 | + 3 -1.154701 -1.000000 |
| 2666 | + 4 0.577350 -0.577350 |
| 2667 | + 5 0.577350 1.000000 |
| 2668 | +
|
| 2669 | + Broadcast result of the transformation |
| 2670 | +
|
| 2671 | + >>> grouped.transform(lambda x: x.max() - x.min()) |
| 2672 | + C D |
| 2673 | + 0 4.0 6.0 |
| 2674 | + 1 3.0 8.0 |
| 2675 | + 2 4.0 6.0 |
| 2676 | + 3 3.0 8.0 |
| 2677 | + 4 4.0 6.0 |
| 2678 | + 5 3.0 8.0 |
| 2679 | +
|
| 2680 | + >>> grouped.transform("mean") |
| 2681 | + C D |
| 2682 | + 0 3.666667 4.0 |
| 2683 | + 1 4.000000 5.0 |
| 2684 | + 2 3.666667 4.0 |
| 2685 | + 3 4.000000 5.0 |
| 2686 | + 4 3.666667 4.0 |
| 2687 | + 5 4.000000 5.0 |
| 2688 | +
|
| 2689 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 2690 | + for example: |
| 2691 | +
|
| 2692 | + >>> grouped.transform(lambda x: x.astype(int).max()) |
| 2693 | + C D |
| 2694 | + 0 5 8 |
| 2695 | + 1 5 9 |
| 2696 | + 2 5 8 |
| 2697 | + 3 5 9 |
| 2698 | + 4 5 8 |
| 2699 | + 5 5 9 |
| 2700 | + """ |
2429 | 2701 | return self._transform( |
2430 | 2702 | func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs |
2431 | 2703 | ) |
|
0 commit comments