diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32ea2198f7898..da5f0776f1679 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2210,6 +2210,22 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, return Result; if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) return NewShuffle; + + SmallVector NewMask(Mask.begin(), Mask.end()); + std::rotate(NewMask.begin(), NewMask.begin() + NewMask.size() / 2, + NewMask.end()); + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VILVH(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VILVL(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, NewMask, VT, V1, V2, DAG))) { + Result = + DAG.getVectorShuffle(MVT::v2i64, DL, DAG.getBitcast(MVT::v2i64, Result), + DAG.getUNDEF(MVT::v2i64), {1, 0}); + return DAG.getBitcast(VT, Result); + } + if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll index 4034773a8a1ff..73d5fdc262ca1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -121,3 +121,131 @@ define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> % %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %c } + +;; vpackev.b +define <16 x i8> @shufflevector_pack_ev_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i8_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackev.h +define <8 x i16> @shufflevector_pack_ev_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i16_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackev.w +define <4 x i32> @shufflevector_pack_ev_v4i32_rotate(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x i64> @shufflevector_pack_ev_v2i64_rotate(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2i64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackev.w +define <4 x float> @shufflevector_pack_ev_v4f32_rotate(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x double> @shufflevector_pack_ev_v2f64_rotate(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2f64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} + +;; vpackod.b +define <16 x i8> @shufflevector_pack_od_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i8_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackod.h +define <8 x i16> @shufflevector_pack_od_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i16_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackod.w +define <4 x i32> @shufflevector_pack_od_v4i32_rotate(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v4i32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x i64> @shufflodector_pack_od_v2i64_rotate(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2i64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackod.w +define <4 x float> @shufflodector_pack_od_v4f32_rotate(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x double> @shufflodector_pack_od_v2f64_rotate(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2f64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +}