1818define i32 @reduce_and4 (i32 %acc , <4 x i32 > %v1 , <4 x i32 > %v2 , <4 x i32 > %v3 , <4 x i32 > %v4 ) {
1919; SSE2-LABEL: @reduce_and4(
2020; SSE2-NEXT: entry:
21- ; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
22- ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
23- ; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
24- ; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0 ]])
25- ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP3]]
21+ ; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
22+ ; SSE2-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
23+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
24+ ; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2 ]])
25+ ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1 ]], [[TMP3]]
2626; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
2727; SSE2-NEXT: ret i32 [[OP_RDX1]]
2828;
@@ -40,11 +40,11 @@ define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <
4040;
4141; AVX-LABEL: @reduce_and4(
4242; AVX-NEXT: entry:
43- ; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
44- ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
45- ; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
46- ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0 ]])
47- ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP3]]
43+ ; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
44+ ; AVX-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
45+ ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
46+ ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2 ]])
47+ ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1 ]], [[TMP3]]
4848; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
4949; AVX-NEXT: ret i32 [[OP_RDX1]]
5050;
@@ -94,11 +94,11 @@ entry:
9494
9595define i32 @reduce_and4_transpose (i32 %acc , <4 x i32 > %v1 , <4 x i32 > %v2 , <4 x i32 > %v3 , <4 x i32 > %v4 ) {
9696; SSE2-LABEL: @reduce_and4_transpose(
97- ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
98- ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
99- ; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
100- ; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1 ]])
101- ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3 ]], [[TMP4]]
97+ ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
98+ ; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
99+ ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
100+ ; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3 ]])
101+ ; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP4]]
102102; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
103103; SSE2-NEXT: ret i32 [[OP_RDX1]]
104104;
@@ -114,11 +114,11 @@ define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i
114114; SSE42-NEXT: ret i32 [[OP_RDX3]]
115115;
116116; AVX-LABEL: @reduce_and4_transpose(
117- ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2 :%.*]], <4 x i32> [[V1 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
118- ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
119- ; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
120- ; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1 ]])
121- ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3 ]], [[TMP4]]
117+ ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4 :%.*]], <4 x i32> [[V3 :%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
118+ ; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
119+ ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
120+ ; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3 ]])
121+ ; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2 ]], [[TMP4]]
122122; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
123123; AVX-NEXT: ret i32 [[OP_RDX1]]
124124;
0 commit comments