Skip to content

Commit 8229ea9

Browse files
committed
BUG: Fix pd.col logical operators and NA comparison inconsistency
- Add __and__, __rand__, __or__, __ror__, __xor__, __rxor__, __invert__ methods to Expression class to support combining conditions with & | ^ operators in pd.col expressions (GH#63322) - Fix comp_method_OBJECT_ARRAY to return BooleanArray when input contains pd.NA values, ensuring NA is properly propagated in comparison results instead of returning False (GH#63328)
1 parent 944c527 commit 8229ea9

File tree

2 files changed

+56
-1
lines changed

2 files changed

+56
-1
lines changed

pandas/core/col.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@
3737
"__lt__": "<",
3838
"__eq__": "==",
3939
"__ne__": "!=",
40+
"__and__": "&",
41+
"__rand__": "&",
42+
"__or__": "|",
43+
"__ror__": "|",
44+
"__xor__": "^",
45+
"__rxor__": "^",
4046
}
4147

4248

@@ -157,6 +163,28 @@ def __mod__(self, other: Any) -> Expression:
157163
def __rmod__(self, other: Any) -> Expression:
158164
return self._with_binary_op("__rmod__", other)
159165

166+
# Logical ops
167+
def __and__(self, other: Any) -> Expression:
168+
return self._with_binary_op("__and__", other)
169+
170+
def __rand__(self, other: Any) -> Expression:
171+
return self._with_binary_op("__rand__", other)
172+
173+
def __or__(self, other: Any) -> Expression:
174+
return self._with_binary_op("__or__", other)
175+
176+
def __ror__(self, other: Any) -> Expression:
177+
return self._with_binary_op("__ror__", other)
178+
179+
def __xor__(self, other: Any) -> Expression:
180+
return self._with_binary_op("__xor__", other)
181+
182+
def __rxor__(self, other: Any) -> Expression:
183+
return self._with_binary_op("__rxor__", other)
184+
185+
def __invert__(self) -> Expression:
186+
return Expression(lambda df: ~self(df), f"(~{self._repr_str})")
187+
160188
def __array_ufunc__(
161189
self, ufunc: Callable[..., Any], method: str, *inputs: Any, **kwargs: Any
162190
) -> Expression:

pandas/core/ops/array_ops.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ def fill_binop(left, right, fill_value):
113113

114114

115115
def comp_method_OBJECT_ARRAY(op, x, y):
116+
from pandas._libs import missing as libmissing
117+
from pandas.core.arrays import BooleanArray
118+
116119
if isinstance(y, list):
117120
# e.g. test_tuple_categories
118121
y = construct_1d_object_array_from_listlike(y)
@@ -129,7 +132,31 @@ def comp_method_OBJECT_ARRAY(op, x, y):
129132
result = libops.vec_compare(x.ravel(), y.ravel(), op)
130133
else:
131134
result = libops.scalar_compare(x.ravel(), y, op)
132-
return result.reshape(x.shape)
135+
result = result.reshape(x.shape)
136+
137+
# GH#63328: Check if there are pd.NA values in the input and return
138+
# BooleanArray to properly propagate NA in comparisons
139+
x_has_na = any(val is libmissing.NA for val in x.ravel())
140+
y_has_na = (
141+
is_scalar(y) and y is libmissing.NA
142+
) or (
143+
isinstance(y, np.ndarray)
144+
and any(val is libmissing.NA for val in y.ravel())
145+
)
146+
147+
if x_has_na or y_has_na:
148+
# Create a mask for NA values
149+
mask = np.array([val is libmissing.NA for val in x.ravel()], dtype=bool)
150+
if isinstance(y, np.ndarray):
151+
mask = mask | np.array(
152+
[val is libmissing.NA for val in y.ravel()], dtype=bool
153+
)
154+
elif y is libmissing.NA:
155+
mask = np.ones(x.shape, dtype=bool)
156+
mask = mask.reshape(x.shape)
157+
return BooleanArray(result, mask, copy=False)
158+
159+
return result
133160

134161

135162
def _masked_arith_op(x: np.ndarray, y, op) -> np.ndarray:

0 commit comments

Comments
 (0)