diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index bf26fec287636..ece2ea66dafee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4746,6 +4746,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // setcc X, 0, setlt --> X (when X is all sign bits) + // + // When we know that X has 0 or -1 in each lane, this comparison will produce + // X. This is only true when boolean contents are represented via 0s and -1s. + if (OpVT.isVector() && VT == OpVT && + // Check that the result of setcc is 0 and -1. + getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent && + // Match only for checks X < 0 + Cond == ISD::SETLT && isNullOrNullSplat(N1) && + // The identity holds iff we know all sign bits for all lanes. + DAG.ComputeNumSignBits(N0) == N0.getScalarValueSizeInBits()) + return N0; + // FIXME: Support vectors. if (auto *N1C = dyn_cast(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); diff --git a/llvm/test/CodeGen/AArch64/setcc-redundant-cmlt.ll b/llvm/test/CodeGen/AArch64/setcc-redundant-cmlt.ll new file mode 100644 index 0000000000000..060a11c085130 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/setcc-redundant-cmlt.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define <4 x i32> @direct_setcc_lt0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: direct_setcc_lt0: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <4 x i32> %a, %b + %sext = sext <4 x i1> %cmp to <4 x i32> + %lt0 = icmp slt <4 x i32> %sext, zeroinitializer + %sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %sel +} + +define <4 x i32> @shuffle_setcc_lt0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: shuffle_setcc_lt0: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: dup v0.4s, v0.s[2] +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <4 x i32> %a, %b + %sext = sext <4 x i1> %cmp to <4 x i32> + %dup = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> + %lt0 = icmp slt <4 x i32> %dup, zeroinitializer + %sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %sel +} + +define <4 x i32> @direct_setcc_0gt(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: direct_setcc_0gt: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <4 x i32> %a, %b + %sext = sext <4 x i1> %cmp to <4 x i32> + %gt0 = icmp sgt <4 x i32> zeroinitializer, %sext + %sel = select <4 x i1> %gt0, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %sel +} + +define <8 x i16> @direct_setcc_lt0_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: direct_setcc_lt0_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <8 x i16> %a, %b + %sext = sext <8 x i1> %cmp to <8 x i16> + %lt0 = icmp slt <8 x i16> %sext, zeroinitializer + %sel = select <8 x i1> %lt0, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %sel +} + +define <4 x i32> @non_splat_shuffle(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: non_splat_shuffle: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <4 x i32> %a, %b + %sext = sext <4 x i1> %cmp to <4 x i32> + %shuf = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> + %lt0 = icmp slt <4 x i32> %shuf, zeroinitializer + %sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %sel +} + +define <16 x i8> @bitcast_narrow(<4 x i32> %a, <4 x i32> %b, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: bitcast_narrow: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <4 x i32> %a, %b + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc = bitcast <4 x i32> %sext to <16 x i8> + %lt0 = icmp slt <16 x i8> %bc, zeroinitializer + %sel = select <16 x i1> %lt0, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %sel +} + +define <8 x i16> @chain_shuffle_bitcast(<4 x i32> %a, <4 x i32> %b, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: chain_shuffle_bitcast: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: dup v0.4s, v0.s[2] +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <4 x i32> %a, %b + %sext = sext <4 x i1> %cmp to <4 x i32> + %shuf = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> + %bc = bitcast <4 x i32> %shuf to <8 x i16> + %lt0 = icmp slt <8 x i16> %bc, zeroinitializer + %sel = select <8 x i1> %lt0, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %sel +} + +; NEGATIVE TEST: Widening bitcast should NOT be optimized +define <4 x i32> @bitcast_widen_negative(<16 x i8> %a, <16 x i8> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: bitcast_widen_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret + %cmp = icmp slt <16 x i8> %a, %b + %sext = sext <16 x i1> %cmp to <16 x i8> + %bc = bitcast <16 x i8> %sext to <4 x i32> + %lt0 = icmp slt <4 x i32> %bc, zeroinitializer + %sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %sel +}