From 4c1c21bc6f210abca8a1da45cc6aedaac8d67204 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Tue, 9 Dec 2025 19:55:57 +0800 Subject: [PATCH 1/2] part of tests --- .../lsx/ir-instruction/shuffle-as-vpack.ll | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll index 4034773a8a1ff..1a733492e961f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -121,3 +121,145 @@ define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> % %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %c } + +;; vpackev.b +define <16 x i8> @shufflevector_pack_ev_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i8_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackev.h +define <8 x i16> @shufflevector_pack_ev_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i16_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI13_0) +; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackev.w +define <4 x i32> @shufflevector_pack_ev_v4i32_rotate(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI14_0) +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x i64> @shufflevector_pack_ev_v2i64_rotate(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2i64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackev.w +define <4 x float> @shufflevector_pack_ev_v4f32_rotate(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI16_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI16_0) +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x double> @shufflevector_pack_ev_v2f64_rotate(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2f64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} + +;; vpackod.b +define <16 x i8> @shufflevector_pack_od_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i8_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI18_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI18_0) +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackod.h +define <8 x i16> @shufflevector_pack_od_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i16_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI19_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI19_0) +; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackod.w +define <4 x i32> @shufflevector_pack_od_v4i32_rotate(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v4i32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI20_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI20_0) +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x i64> @shufflodector_pack_od_v2i64_rotate(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2i64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackod.w +define <4 x float> @shufflodector_pack_od_v4f32_rotate(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f32_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI22_0) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI22_0) +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x double> @shufflodector_pack_od_v2f64_rotate(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2f64_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} From 437229b5ad52ab887aa73275bf29c8611a500901 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Tue, 9 Dec 2025 19:56:21 +0800 Subject: [PATCH 2/2] WIP: future work for shufflevector Needs: - Better code logic and more tests. - More consideration, now just a few randomly written conditions. - Support lasx. --- .../LoongArch/LoongArchISelLowering.cpp | 16 +++++++ .../lsx/ir-instruction/shuffle-as-vpack.ll | 46 +++++++------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32ea2198f7898..da5f0776f1679 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2210,6 +2210,22 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, return Result; if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) return NewShuffle; + + SmallVector NewMask(Mask.begin(), Mask.end()); + std::rotate(NewMask.begin(), NewMask.begin() + NewMask.size() / 2, + NewMask.end()); + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VILVH(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VILVL(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, NewMask, VT, V1, V2, DAG)) || + (Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, NewMask, VT, V1, V2, DAG))) { + Result = + DAG.getVectorShuffle(MVT::v2i64, DL, DAG.getBitcast(MVT::v2i64, Result), + DAG.getUNDEF(MVT::v2i64), {1, 0}); + return DAG.getBitcast(VT, Result); + } + if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll index 1a733492e961f..73d5fdc262ca1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -126,9 +126,8 @@ define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> % define <16 x i8> @shufflevector_pack_ev_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: shufflevector_pack_ev_v16i8_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI12_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c @@ -138,10 +137,8 @@ define <16 x i8> @shufflevector_pack_ev_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) define <8 x i16> @shufflevector_pack_ev_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shufflevector_pack_ev_v8i16_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI13_0) -; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c @@ -151,10 +148,8 @@ define <8 x i16> @shufflevector_pack_ev_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) define <4 x i32> @shufflevector_pack_ev_v4i32_rotate(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shufflevector_pack_ev_v4i32_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI14_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -174,10 +169,8 @@ define <2 x i64> @shufflevector_pack_ev_v2i64_rotate(<2 x i64> %a, <2 x i64> %b) define <4 x float> @shufflevector_pack_ev_v4f32_rotate(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: shufflevector_pack_ev_v4f32_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI16_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI16_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %c @@ -197,9 +190,8 @@ define <2 x double> @shufflevector_pack_ev_v2f64_rotate(<2 x double> %a, <2 x do define <16 x i8> @shufflevector_pack_od_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: shufflevector_pack_od_v16i8_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI18_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI18_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c @@ -209,10 +201,8 @@ define <16 x i8> @shufflevector_pack_od_v16i8_rotate(<16 x i8> %a, <16 x i8> %b) define <8 x i16> @shufflevector_pack_od_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shufflevector_pack_od_v8i16_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI19_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI19_0) -; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c @@ -222,10 +212,8 @@ define <8 x i16> @shufflevector_pack_od_v8i16_rotate(<8 x i16> %a, <8 x i16> %b) define <4 x i32> @shufflevector_pack_od_v4i32_rotate(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shufflevector_pack_od_v4i32_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI20_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI20_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -245,10 +233,8 @@ define <2 x i64> @shufflodector_pack_od_v2i64_rotate(<2 x i64> %a, <2 x i64> %b) define <4 x float> @shufflodector_pack_od_v4f32_rotate(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: shufflodector_pack_od_v4f32_rotate: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI22_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI22_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 ; CHECK-NEXT: ret %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %c