Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4746,6 +4746,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}

// setcc X, 0, setlt --> X (when X is all sign bits)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there already a similar check special cased for zero or one boolean content? Can these be merged?

//
// When we know that X has 0 or -1 in each lane, this comparison will produce
// X. This is only true when boolean contents are represented via 0s and -1s.
if (OpVT.isVector() && VT == OpVT &&
// Check that the result of setcc is 0 and -1.
getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent &&
// Match only for checks X < 0
Cond == ISD::SETLT && isNullOrNullSplat(N1) &&
// The identity holds iff we know all sign bits for all lanes.
DAG.ComputeNumSignBits(N0) == N0.getScalarValueSizeInBits())
return N0;

// FIXME: Support vectors.
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
Expand Down
118 changes: 118 additions & 0 deletions llvm/test/CodeGen/AArch64/setcc-redundant-cmlt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s

define <4 x i32> @direct_setcc_lt0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: direct_setcc_lt0:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
%lt0 = icmp slt <4 x i32> %sext, zeroinitializer
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %sel
}

define <4 x i32> @shuffle_setcc_lt0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: shuffle_setcc_lt0:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: dup v0.4s, v0.s[2]
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
%dup = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
%lt0 = icmp slt <4 x i32> %dup, zeroinitializer
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %sel
}

define <4 x i32> @direct_setcc_0gt(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: direct_setcc_0gt:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
%gt0 = icmp sgt <4 x i32> zeroinitializer, %sext
%sel = select <4 x i1> %gt0, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %sel
}

define <8 x i16> @direct_setcc_lt0_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: direct_setcc_lt0_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <8 x i16> %a, %b
%sext = sext <8 x i1> %cmp to <8 x i16>
%lt0 = icmp slt <8 x i16> %sext, zeroinitializer
%sel = select <8 x i1> %lt0, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %sel
}

define <4 x i32> @non_splat_shuffle(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: non_splat_shuffle:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: rev64 v0.4s, v0.4s
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
%shuf = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%lt0 = icmp slt <4 x i32> %shuf, zeroinitializer
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %sel
}

define <16 x i8> @bitcast_narrow(<4 x i32> %a, <4 x i32> %b, <16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: bitcast_narrow:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <16 x i8>
%lt0 = icmp slt <16 x i8> %bc, zeroinitializer
%sel = select <16 x i1> %lt0, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %sel
}

define <8 x i16> @chain_shuffle_bitcast(<4 x i32> %a, <4 x i32> %b, <8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: chain_shuffle_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: dup v0.4s, v0.s[2]
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %a, %b
%sext = sext <4 x i1> %cmp to <4 x i32>
%shuf = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
%bc = bitcast <4 x i32> %shuf to <8 x i16>
%lt0 = icmp slt <8 x i16> %bc, zeroinitializer
%sel = select <8 x i1> %lt0, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %sel
}

; NEGATIVE TEST: Widening bitcast should NOT be optimized
define <4 x i32> @bitcast_widen_negative(<16 x i8> %a, <16 x i8> %b, <4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: bitcast_widen_negative:
; CHECK: // %bb.0:
; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%cmp = icmp slt <16 x i8> %a, %b
%sext = sext <16 x i1> %cmp to <16 x i8>
%bc = bitcast <16 x i8> %sext to <4 x i32>
%lt0 = icmp slt <4 x i32> %bc, zeroinitializer
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %sel
}