@@ -669,10 +669,45 @@ def group_mean(floating[:, ::1] out,
669669 int64_t[::1] counts ,
670670 ndarray[floating , ndim = 2 ] values,
671671 const intp_t[::1] labels ,
672- Py_ssize_t min_count = - 1 ) -> None:
672+ Py_ssize_t min_count = - 1 ,
673+ bint is_datetimelike = False ,
674+ const uint8_t[:, ::1] mask = None ,
675+ uint8_t[:, ::1] result_mask = None
676+ ) -> None:
677+ """
678+ Compute the mean per label given a label assignment for each value.
679+ NaN values are ignored.
680+
681+ Parameters
682+ ----------
683+ out : np.ndarray[floating]
684+ Values into which this method will write its results.
685+ counts : np.ndarray[int64]
686+ A zeroed array of the same shape as labels ,
687+ populated by group sizes during algorithm.
688+ values : np.ndarray[floating]
689+ 2-d array of the values to find the mean of.
690+ labels : np.ndarray[np.intp]
691+ Array containing unique label for each group , with its
692+ ordering matching up to the corresponding record in `values`.
693+ min_count : Py_ssize_t
694+ Only used in add and prod. Always -1.
695+ is_datetimelike : bool
696+ True if `values` contains datetime-like entries.
697+ mask : ndarray[bool , ndim = 2 ], optional
698+ Not used.
699+ result_mask : ndarray[bool , ndim = 2 ], optional
700+ Not used.
701+
702+ Notes
703+ -----
704+ This method modifies the `out` parameter rather than returning an object.
705+ `counts` is modified to hold group sizes
706+ """
707+
673708 cdef:
674709 Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
675- floating val , count , y , t
710+ floating val , count , y , t , nan_val
676711 floating[:, ::1] sumx , compensation
677712 int64_t[:, ::1] nobs
678713 Py_ssize_t len_values = len (values), len_labels = len (labels)
@@ -682,12 +717,13 @@ def group_mean(floating[:, ::1] out,
682717 if len_values != len_labels:
683718 raise ValueError("len(index ) != len(labels )")
684719
685- nobs = np.zeros((< object > out).shape, dtype = np.int64)
686720 # the below is equivalent to `np.zeros_like(out )` but faster
721+ nobs = np.zeros((< object > out).shape, dtype = np.int64)
687722 sumx = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
688723 compensation = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
689724
690725 N , K = (< object > values).shape
726+ nan_val = NPY_NAT if is_datetimelike else NAN
691727
692728 with nogil:
693729 for i in range(N ):
@@ -699,7 +735,7 @@ def group_mean(floating[:, ::1] out,
699735 for j in range (K):
700736 val = values[i, j]
701737 # not nan
702- if val == val:
738+ if val == val and not (is_datetimelike and val == NPY_NAT) :
703739 nobs[lab, j] += 1
704740 y = val - compensation[lab, j]
705741 t = sumx[lab, j] + y
@@ -710,7 +746,7 @@ def group_mean(floating[:, ::1] out,
710746 for j in range (K):
711747 count = nobs[i, j]
712748 if nobs[i, j] == 0 :
713- out[i, j] = NAN
749+ out[i, j] = nan_val
714750 else :
715751 out[i, j] = sumx[i, j] / count
716752
0 commit comments