@@ -314,8 +314,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
314314;
315315; AVX512F-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
316316; AVX512F: # %bb.0:
317- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
318- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0 , %xmm0
317+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
318+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
319319; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
320320; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
321321; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
@@ -324,8 +324,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
324324;
325325; AVX512DQ-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
326326; AVX512DQ: # %bb.0:
327- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
328- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0 , %xmm0
327+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
328+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0 , %ymm0
329329; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
330330; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
331331; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
@@ -981,7 +981,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
981981; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
982982; AVX512F-NEXT: vmovdqa (%rdi), %ymm1
983983; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
984- ; AVX512F-NEXT: vpermd %ymm1 , %ymm0 , %ymm0
984+ ; AVX512F-NEXT: vpermd %zmm1 , %zmm0 , %zmm0
985985; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
986986; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
987987; AVX512F-NEXT: vzeroupper
@@ -992,7 +992,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
992992; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
993993; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm1
994994; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
995- ; AVX512DQ-NEXT: vpermd %ymm1 , %ymm0 , %ymm0
995+ ; AVX512DQ-NEXT: vpermd %zmm1 , %zmm0 , %zmm0
996996; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
997997; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
998998; AVX512DQ-NEXT: vzeroupper
@@ -3507,12 +3507,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35073507;
35083508; AVX512F-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
35093509; AVX512F: # %bb.0:
3510- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3510+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3511+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
35113512; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
35123513; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3513- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3514+ ; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm2
3515+ ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
35143516; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3515- ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
35163517; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
35173518; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
35183519; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3522,12 +3523,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35223523;
35233524; AVX512DQ-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
35243525; AVX512DQ: # %bb.0:
3525- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3526+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3527+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
35263528; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
35273529; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3528- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
3530+ ; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm2
3531+ ; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
35293532; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3530- ; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
35313533; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
35323534; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
35333535; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3766,10 +3768,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
37663768;
37673769; AVX512F-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
37683770; AVX512F: # %bb.0:
3769- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
3771+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3772+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
37703773; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
37713774; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3772- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
37733775; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm2
37743776; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
37753777; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -3782,10 +3784,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
37823784;
37833785; AVX512DQ-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
37843786; AVX512DQ: # %bb.0:
3785- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
3787+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3788+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
37863789; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
37873790; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3788- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
37893791; AVX512DQ-NEXT: vpbroadcastq %xmm0, %ymm2
37903792; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
37913793; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -4145,9 +4147,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
41454147;
41464148; AVX512F-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
41474149; AVX512F: # %bb.0:
4148- ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
4150+ ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
4151+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
41494152; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
4150- ; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
41514153; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
41524154; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
41534155; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -4159,9 +4161,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
41594161;
41604162; AVX512DQ-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
41614163; AVX512DQ: # %bb.0:
4162- ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
4164+ ; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
4165+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
41634166; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
4164- ; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
41654167; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
41664168; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
41674169; AVX512DQ-NEXT: vpbroadcastw %xmm0, %xmm0
0 commit comments