Skip to content

Commit 290c62e

Browse files
committed
Disallows dropping duplicate keys when using full outer join
1 parent 276dc6a commit 290c62e

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

python/tests/test_dataframe.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,35 @@ def test_unnest_without_nulls(nested_df):
647647
assert result.column(1) == pa.array([7, 8, 8, 9, 9, 9])
648648

649649

650+
def test_join_full():
651+
ctx = SessionContext()
652+
batch = pa.RecordBatch.from_arrays(
653+
[pa.array([1, 2, 3]), pa.array(['a', 'b', 'c'])],
654+
names=["num", "name"],
655+
)
656+
df1 = ctx.create_dataframe([[batch]], "l")
657+
658+
batch = pa.RecordBatch.from_arrays(
659+
[pa.array([1, 3, 5]), pa.array([True, True, False])],
660+
names=["num", "value"],
661+
)
662+
df2 = ctx.create_dataframe([[batch]], "r")
663+
664+
df3 = df1.join(df2, on="num", how="full")
665+
666+
expected = {'num': [1, 3, None, 5], 'name': ['a', 'c', 'b', None], 'value': [True, True, None, False]}
667+
assert expected == df3.to_pydict()
668+
669+
# To show how user can do post-processing
670+
df4 = df3.select_exprs(
671+
"coalesce(l.num, r.num) as num",
672+
"l.name",
673+
"r.value"
674+
)
675+
expected = {'num': [1, 3, 2, 5], 'name': ['a', 'c', 'b', None], 'value': [True, True, None, False]}
676+
assert expected == df4.to_pydict()
677+
678+
650679
def test_join():
651680
ctx = SessionContext()
652681

src/dataframe.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,8 @@ impl PyDataFrame {
676676
None,
677677
)?;
678678

679-
if drop_duplicate_keys {
679+
let allow_drop_duplicate_keys = drop_duplicate_keys && join_type != JoinType::Full;
680+
if allow_drop_duplicate_keys {
680681
let mutual_keys = left_keys
681682
.iter()
682683
.zip(right_keys.iter())

0 commit comments

Comments
 (0)