Skip to content

Commit 43fb6f1

Browse files
[SelectionDAG] Eliminate redundant setcc on vector comparison results
For values with all lanes being either 0 or -1, comparing < 0 is an identity operation.
1 parent 5c6918f commit 43fb6f1

File tree

2 files changed

+131
-0
lines changed

2 files changed

+131
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4748,6 +4748,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
47484748
}
47494749
}
47504750

4751+
// setcc X, 0, setlt --> X (when X is all sign bits)
4752+
//
4753+
// When we know that X has 0 or -1 in each lane, this comparison will produce
4754+
// X. This is only true when boolean contents are represented via 0s and -1s.
4755+
if (OpVT.isVector() && VT == OpVT &&
4756+
// Check that the result of setcc is 0 and -1.
4757+
getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent &&
4758+
// Match only for checks X < 0
4759+
Cond == ISD::SETLT && isNullOrNullSplat(N1) &&
4760+
// The identity holds iff we know all sign bits for all lanes.
4761+
DAG.ComputeNumSignBits(N0) == N0.getScalarValueSizeInBits())
4762+
return N0;
4763+
47514764
// FIXME: Support vectors.
47524765
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
47534766
const APInt &C1 = N1C->getAPIntValue();
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
4+
define <4 x i32> @direct_setcc_lt0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
5+
; CHECK-LABEL: direct_setcc_lt0:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
8+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
9+
; CHECK-NEXT: ret
10+
%cmp = icmp slt <4 x i32> %a, %b
11+
%sext = sext <4 x i1> %cmp to <4 x i32>
12+
%lt0 = icmp slt <4 x i32> %sext, zeroinitializer
13+
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
14+
ret <4 x i32> %sel
15+
}
16+
17+
define <4 x i32> @shuffle_setcc_lt0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
18+
; CHECK-LABEL: shuffle_setcc_lt0:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
21+
; CHECK-NEXT: dup v0.4s, v0.s[2]
22+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
23+
; CHECK-NEXT: ret
24+
%cmp = icmp slt <4 x i32> %a, %b
25+
%sext = sext <4 x i1> %cmp to <4 x i32>
26+
%dup = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
27+
%lt0 = icmp slt <4 x i32> %dup, zeroinitializer
28+
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
29+
ret <4 x i32> %sel
30+
}
31+
32+
define <4 x i32> @direct_setcc_0gt(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
33+
; CHECK-LABEL: direct_setcc_0gt:
34+
; CHECK: // %bb.0:
35+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
36+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
37+
; CHECK-NEXT: ret
38+
%cmp = icmp slt <4 x i32> %a, %b
39+
%sext = sext <4 x i1> %cmp to <4 x i32>
40+
%gt0 = icmp sgt <4 x i32> zeroinitializer, %sext
41+
%sel = select <4 x i1> %gt0, <4 x i32> %x, <4 x i32> %y
42+
ret <4 x i32> %sel
43+
}
44+
45+
define <8 x i16> @direct_setcc_lt0_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %x, <8 x i16> %y) {
46+
; CHECK-LABEL: direct_setcc_lt0_v8i16:
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
49+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
50+
; CHECK-NEXT: ret
51+
%cmp = icmp slt <8 x i16> %a, %b
52+
%sext = sext <8 x i1> %cmp to <8 x i16>
53+
%lt0 = icmp slt <8 x i16> %sext, zeroinitializer
54+
%sel = select <8 x i1> %lt0, <8 x i16> %x, <8 x i16> %y
55+
ret <8 x i16> %sel
56+
}
57+
58+
define <4 x i32> @non_splat_shuffle(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
59+
; CHECK-LABEL: non_splat_shuffle:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
62+
; CHECK-NEXT: rev64 v0.4s, v0.4s
63+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
64+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
65+
; CHECK-NEXT: ret
66+
%cmp = icmp slt <4 x i32> %a, %b
67+
%sext = sext <4 x i1> %cmp to <4 x i32>
68+
%shuf = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
69+
%lt0 = icmp slt <4 x i32> %shuf, zeroinitializer
70+
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
71+
ret <4 x i32> %sel
72+
}
73+
74+
define <16 x i8> @bitcast_narrow(<4 x i32> %a, <4 x i32> %b, <16 x i8> %x, <16 x i8> %y) {
75+
; CHECK-LABEL: bitcast_narrow:
76+
; CHECK: // %bb.0:
77+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
78+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
79+
; CHECK-NEXT: ret
80+
%cmp = icmp slt <4 x i32> %a, %b
81+
%sext = sext <4 x i1> %cmp to <4 x i32>
82+
%bc = bitcast <4 x i32> %sext to <16 x i8>
83+
%lt0 = icmp slt <16 x i8> %bc, zeroinitializer
84+
%sel = select <16 x i1> %lt0, <16 x i8> %x, <16 x i8> %y
85+
ret <16 x i8> %sel
86+
}
87+
88+
define <8 x i16> @chain_shuffle_bitcast(<4 x i32> %a, <4 x i32> %b, <8 x i16> %x, <8 x i16> %y) {
89+
; CHECK-LABEL: chain_shuffle_bitcast:
90+
; CHECK: // %bb.0:
91+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
92+
; CHECK-NEXT: dup v0.4s, v0.s[2]
93+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
94+
; CHECK-NEXT: ret
95+
%cmp = icmp slt <4 x i32> %a, %b
96+
%sext = sext <4 x i1> %cmp to <4 x i32>
97+
%shuf = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
98+
%bc = bitcast <4 x i32> %shuf to <8 x i16>
99+
%lt0 = icmp slt <8 x i16> %bc, zeroinitializer
100+
%sel = select <8 x i1> %lt0, <8 x i16> %x, <8 x i16> %y
101+
ret <8 x i16> %sel
102+
}
103+
104+
; NEGATIVE TEST: Widening bitcast should NOT be optimized
105+
define <4 x i32> @bitcast_widen_negative(<16 x i8> %a, <16 x i8> %b, <4 x i32> %x, <4 x i32> %y) {
106+
; CHECK-LABEL: bitcast_widen_negative:
107+
; CHECK: // %bb.0:
108+
; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
109+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
110+
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
111+
; CHECK-NEXT: ret
112+
%cmp = icmp slt <16 x i8> %a, %b
113+
%sext = sext <16 x i1> %cmp to <16 x i8>
114+
%bc = bitcast <16 x i8> %sext to <4 x i32>
115+
%lt0 = icmp slt <4 x i32> %bc, zeroinitializer
116+
%sel = select <4 x i1> %lt0, <4 x i32> %x, <4 x i32> %y
117+
ret <4 x i32> %sel
118+
}

0 commit comments

Comments
 (0)