From dc4777e7be48c13d7fe0ff3d26d0c50a60551ed5 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 29 Nov 2025 18:54:23 -0500 Subject: [PATCH 01/16] [AArch64] Add tests for roundeven+conversion fusion --- .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 372 ++++++++++++++++++ 1 file changed, 372 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll index a729772f2897a..55bc436824504 100644 --- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -543,6 +543,154 @@ define double @fcvtau_dd_round_simd(double %a) { ret double %bc } +define double @fcvtns_ds_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ds_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ds_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtns_sd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_sd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_sd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtns_ss_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ss_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ss_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtns_dd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_dd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_dd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtnu_ds_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ds_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtnu_sd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_sd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtnu_ss_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ss_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptoui float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtnu_dd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_dd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptoui double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} define double @fcvtms_ds_round_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtms_ds_round_simd: @@ -1342,6 +1490,230 @@ define double @fcvtau_dd_simd(double %a) { ret double %bc } +define float @fcvtns_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn h0, h0 +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn h0, h0 +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtns_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn h0, h0 +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn h0, h0 +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtns_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtns_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtns_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtns_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtnu_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn h0, h0 +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn h0, h0 +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtnu_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn h0, h0 +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn h0, h0 +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtnu_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtnu_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtnu_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtnu_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: frintn d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + define float @fcvtms_sh_simd(half %a) { ; CHECK-NOFPRCVT-LABEL: fcvtms_sh_simd: ; CHECK-NOFPRCVT: // %bb.0: From c4763b27468f0de5da5dfde42bebd0c796bcad28 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 29 Nov 2025 18:54:40 -0500 Subject: [PATCH 02/16] [AArch64] Add tests for vector round+conversion fusion --- .../CodeGen/AArch64/arm64-vcvt-fused-round.ll | 882 ++++++++++++++++++ 1 file changed, 882 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll new file mode 100644 index 0000000000000..63638adbc6174 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll @@ -0,0 +1,882 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 +; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +; +; Tests for fused round + convert to int patterns (FCVTAS, FCVTAU, FCVTMS, FCVTMU, etc.) +; + +; +; round + signed -> fcvtas +; + +define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtas_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; round + unsigned -> fcvtau +; + +define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtau_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; roundeven + signed -> fcvtns +; + +define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtns_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; roundeven + unsigned -> fcvtnu +; + +define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtnu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; floor + signed -> fcvtms +; + +define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtms_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; floor + unsigned -> fcvtmu +; + +define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtmu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; ceil + signed -> fcvtps +; + +define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtps_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; ceil + unsigned -> fcvtpu +; + +define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtpu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; trunc + signed -> fcvtzs (already the default, but test the fusion) +; + +define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzs_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; trunc + unsigned -> fcvtzu +; + +define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +; +; f16 tests (require +fullfp16) +; + +define <4 x i16> @fcvtas_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtas_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtau_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtau_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtns_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtns_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtnu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtnu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtms_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtms_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtmu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtmu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtps_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtps_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtpu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtpu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtzs_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtzs_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <4 x i16> @fcvtzu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i16> @fcvtzu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +; Intrinsic declarations +declare <2 x float> @llvm.round.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.round.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.round.v2f64(<2 x double>) nounwind readnone +declare <4 x half> @llvm.round.v4f16(<4 x half>) nounwind readnone +declare <8 x half> @llvm.round.v8f16(<8 x half>) nounwind readnone + +declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone +declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) nounwind readnone +declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) nounwind readnone + +declare <2 x float> @llvm.floor.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone +declare <4 x half> @llvm.floor.v4f16(<4 x half>) nounwind readnone +declare <8 x half> @llvm.floor.v8f16(<8 x half>) nounwind readnone + +declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone +declare <4 x half> @llvm.ceil.v4f16(<4 x half>) nounwind readnone +declare <8 x half> @llvm.ceil.v8f16(<8 x half>) nounwind readnone + +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone +declare <4 x half> @llvm.trunc.v4f16(<4 x half>) nounwind readnone +declare <8 x half> @llvm.trunc.v8f16(<8 x half>) nounwind readnone From f6c8b7803fefca2f8182605d9d1ba35b49b2c5c8 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 29 Nov 2025 18:58:03 -0500 Subject: [PATCH 03/16] [AArch64] Add float-to-int codegen pattern for roundeven+fptoi --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 18 +-- .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 120 ++++++------------ 2 files changed, 50 insertions(+), 88 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index da93a2b13fc11..e94c2e06d3594 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6817,14 +6817,16 @@ multiclass FPToIntegerPats(INST # v1i64) f64:$Rn)>; } -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; // f16 -> s16 conversions let Predicates = [HasFullFP16] in { diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll index 55bc436824504..48e7972b04a6c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -546,15 +546,13 @@ define double @fcvtau_dd_round_simd(double %a) { define double @fcvtns_ds_roundeven_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_ds_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fcvtns x8, s0 ; CHECK-NOFPRCVT-NEXT: fmov d0, x8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_ds_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: fcvtns d0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = fptosi float %r to i64 @@ -565,15 +563,13 @@ define double @fcvtns_ds_roundeven_simd(float %a) { define float @fcvtns_sd_roundeven_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_sd_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fcvtns w8, d0 ; CHECK-NOFPRCVT-NEXT: fmov s0, w8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_sd_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: fcvtns s0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = fptosi double %r to i32 @@ -584,14 +580,12 @@ define float @fcvtns_sd_roundeven_simd(double %a) { define float @fcvtns_ss_roundeven_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_ss_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtns s0, s0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_ss_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: fcvtns s0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = fptosi float %r to i32 @@ -602,14 +596,12 @@ define float @fcvtns_ss_roundeven_simd(float %a) { define double @fcvtns_dd_roundeven_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_dd_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtns d0, d0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_dd_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: fcvtns d0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = fptosi double %r to i64 @@ -621,15 +613,13 @@ define double @fcvtns_dd_roundeven_simd(double %a) { define double @fcvtnu_ds_roundeven_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fcvtnu x8, s0 ; CHECK-NOFPRCVT-NEXT: fmov d0, x8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_ds_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: fcvtnu d0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = fptoui float %r to i64 @@ -640,15 +630,13 @@ define double @fcvtnu_ds_roundeven_simd(float %a) { define float @fcvtnu_sd_roundeven_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fcvtnu w8, d0 ; CHECK-NOFPRCVT-NEXT: fmov s0, w8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_sd_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: fcvtnu s0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = fptoui double %r to i32 @@ -659,14 +647,12 @@ define float @fcvtnu_sd_roundeven_simd(double %a) { define float @fcvtnu_ss_roundeven_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtnu s0, s0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_ss_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: fcvtnu s0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = fptoui float %r to i32 @@ -677,14 +663,12 @@ define float @fcvtnu_ss_roundeven_simd(float %a) { define double @fcvtnu_dd_roundeven_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_roundeven_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtnu d0, d0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_dd_roundeven_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: fcvtnu d0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = fptoui double %r to i64 @@ -1493,15 +1477,13 @@ define double @fcvtau_dd_simd(double %a) { define float @fcvtns_sh_simd(half %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_sh_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn h0, h0 -; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0 +; CHECK-NOFPRCVT-NEXT: fcvtns w8, h0 ; CHECK-NOFPRCVT-NEXT: fmov s0, w8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_sh_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn h0, h0 -; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: fcvtns s0, h0 ; CHECK-NEXT: ret %r = call half @llvm.roundeven.f16(half %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) @@ -1512,15 +1494,13 @@ define float @fcvtns_sh_simd(half %a) { define double @fcvtns_dh_simd(half %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_dh_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn h0, h0 -; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0 +; CHECK-NOFPRCVT-NEXT: fcvtns x8, h0 ; CHECK-NOFPRCVT-NEXT: fmov d0, x8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_dh_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn h0, h0 -; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: fcvtns d0, h0 ; CHECK-NEXT: ret %r = call half @llvm.roundeven.f16(half %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) @@ -1531,15 +1511,13 @@ define double @fcvtns_dh_simd(half %a) { define double @fcvtns_ds_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_ds_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fcvtns x8, s0 ; CHECK-NOFPRCVT-NEXT: fmov d0, x8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_ds_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: fcvtns d0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) @@ -1550,15 +1528,13 @@ define double @fcvtns_ds_simd(float %a) { define float @fcvtns_sd_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_sd_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fcvtns w8, d0 ; CHECK-NOFPRCVT-NEXT: fmov s0, w8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_sd_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: fcvtns s0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) @@ -1569,14 +1545,12 @@ define float @fcvtns_sd_simd(double %a) { define float @fcvtns_ss_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_ss_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtns s0, s0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_ss_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: fcvtns s0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) @@ -1587,14 +1561,12 @@ define float @fcvtns_ss_simd(float %a) { define double @fcvtns_dd_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtns_dd_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtns d0, d0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtns_dd_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: fcvtns d0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) @@ -1605,15 +1577,13 @@ define double @fcvtns_dd_simd(double %a) { define float @fcvtnu_sh_simd(half %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_sh_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn h0, h0 -; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0 +; CHECK-NOFPRCVT-NEXT: fcvtnu w8, h0 ; CHECK-NOFPRCVT-NEXT: fmov s0, w8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_sh_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn h0, h0 -; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: fcvtnu s0, h0 ; CHECK-NEXT: ret %r = call half @llvm.roundeven.f16(half %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) @@ -1624,15 +1594,13 @@ define float @fcvtnu_sh_simd(half %a) { define double @fcvtnu_dh_simd(half %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_dh_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn h0, h0 -; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0 +; CHECK-NOFPRCVT-NEXT: fcvtnu x8, h0 ; CHECK-NOFPRCVT-NEXT: fmov d0, x8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_dh_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn h0, h0 -; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: fcvtnu d0, h0 ; CHECK-NEXT: ret %r = call half @llvm.roundeven.f16(half %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) @@ -1643,15 +1611,13 @@ define double @fcvtnu_dh_simd(half %a) { define double @fcvtnu_ds_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fcvtnu x8, s0 ; CHECK-NOFPRCVT-NEXT: fmov d0, x8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_ds_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: fcvtnu d0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) @@ -1662,15 +1628,13 @@ define double @fcvtnu_ds_simd(float %a) { define float @fcvtnu_sd_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fcvtnu w8, d0 ; CHECK-NOFPRCVT-NEXT: fmov s0, w8 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_sd_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: fcvtnu s0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) @@ -1681,14 +1645,12 @@ define float @fcvtnu_sd_simd(double %a) { define float @fcvtnu_ss_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn s0, s0 -; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: fcvtnu s0, s0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_ss_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn s0, s0 -; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: fcvtnu s0, s0 ; CHECK-NEXT: ret %r = call float @llvm.roundeven.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) @@ -1699,14 +1661,12 @@ define float @fcvtnu_ss_simd(float %a) { define double @fcvtnu_dd_simd(double %a) { ; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_simd: ; CHECK-NOFPRCVT: // %bb.0: -; CHECK-NOFPRCVT-NEXT: frintn d0, d0 -; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: fcvtnu d0, d0 ; CHECK-NOFPRCVT-NEXT: ret ; ; CHECK-LABEL: fcvtnu_dd_simd: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn d0, d0 -; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: fcvtnu d0, d0 ; CHECK-NEXT: ret %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) From d5b88fc7716052ea3dd5507d68b9db80d59bddaa Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 29 Nov 2025 19:59:53 -0500 Subject: [PATCH 04/16] [AArch64] Use vector rounding conversion instructions --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 ++++ .../CodeGen/AArch64/arm64-vcvt-fused-round.ll | 130 ++++++------------ llvm/test/CodeGen/AArch64/shuffle-tbl34.ll | 26 ++-- 3 files changed, 76 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index e94c2e06d3594..02ae9546f7ccf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5830,6 +5830,33 @@ multiclass SIMDTwoVectorFPToIntSatPats; defm : SIMDTwoVectorFPToIntSatPats; +// Fused round + convert to int patterns for vectors +multiclass SIMDTwoVectorFPToIntRoundPats { + let Predicates = [HasFullFP16] in { + def : Pat<(v4i16 (to_int (round v4f16:$Rn))), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int (round v8f16:$Rn))), + (!cast(INST # v8f16) v8f16:$Rn)>; + } + def : Pat<(v2i32 (to_int (round v2f32:$Rn))), + (!cast(INST # v2f32) v2f32:$Rn)>; + def : Pat<(v4i32 (to_int (round v4f32:$Rn))), + (!cast(INST # v4f32) v4f32:$Rn)>; + def : Pat<(v2i64 (to_int (round v2f64:$Rn))), + (!cast(INST # v2f64) v2f64:$Rn)>; +} + +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; + def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll index 63638adbc6174..5a2bc3bb80225 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll @@ -13,8 +13,7 @@ define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtas_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtas v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> @@ -24,8 +23,7 @@ define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtas_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtas v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> @@ -35,8 +33,7 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtas_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtas v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> @@ -50,8 +47,7 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtau_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtau v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> @@ -61,8 +57,7 @@ define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtau_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtau v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> @@ -72,8 +67,7 @@ define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtau_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtau v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> @@ -87,8 +81,7 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtns_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtns v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> @@ -98,8 +91,7 @@ define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtns_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtns v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> @@ -109,8 +101,7 @@ define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtns_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtns v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> @@ -124,8 +115,7 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtnu_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtnu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> @@ -135,8 +125,7 @@ define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtnu_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtnu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> @@ -146,8 +135,7 @@ define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtnu_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtnu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> @@ -161,8 +149,7 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtms_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtms v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> @@ -172,8 +159,7 @@ define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtms_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtms v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> @@ -183,8 +169,7 @@ define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtms_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtms v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> @@ -198,8 +183,7 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtmu_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtmu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> @@ -209,8 +193,7 @@ define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtmu_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtmu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> @@ -220,8 +203,7 @@ define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtmu_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtmu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> @@ -235,8 +217,7 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtps_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtps v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> @@ -246,8 +227,7 @@ define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtps_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtps v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> @@ -257,8 +237,7 @@ define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtps_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtps v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> @@ -272,8 +251,7 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtpu_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtpu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> @@ -283,8 +261,7 @@ define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtpu_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtpu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> @@ -294,8 +271,7 @@ define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtpu_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtpu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> @@ -309,7 +285,6 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtzs_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2s, v0.2s ; CHECK-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) @@ -320,7 +295,6 @@ define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtzs_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) @@ -331,7 +305,6 @@ define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtzs_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2d, v0.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) @@ -346,7 +319,6 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtzu_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2s, v0.2s ; CHECK-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) @@ -357,7 +329,6 @@ define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtzu_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.4s, v0.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) @@ -368,7 +339,6 @@ define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtzu_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2d, v0.2d ; CHECK-NEXT: fcvtzu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) @@ -393,8 +363,7 @@ define <4 x i16> @fcvtas_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtas_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtas v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> @@ -419,8 +388,7 @@ define <8 x i16> @fcvtas_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtas_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtas v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> @@ -440,8 +408,7 @@ define <4 x i16> @fcvtau_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtau_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtau v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> @@ -466,8 +433,7 @@ define <8 x i16> @fcvtau_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtau_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtau v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> @@ -487,8 +453,7 @@ define <4 x i16> @fcvtns_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtns_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtns v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> @@ -513,8 +478,7 @@ define <8 x i16> @fcvtns_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtns_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtns v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> @@ -534,8 +498,7 @@ define <4 x i16> @fcvtnu_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtnu_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtnu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> @@ -560,8 +523,7 @@ define <8 x i16> @fcvtnu_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtnu_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtnu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> @@ -581,8 +543,7 @@ define <4 x i16> @fcvtms_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtms_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtms v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> @@ -607,8 +568,7 @@ define <8 x i16> @fcvtms_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtms_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtms v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> @@ -628,8 +588,7 @@ define <4 x i16> @fcvtmu_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtmu_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtmu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> @@ -654,8 +613,7 @@ define <8 x i16> @fcvtmu_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtmu_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtmu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> @@ -675,8 +633,7 @@ define <4 x i16> @fcvtps_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtps_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtps v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> @@ -701,8 +658,7 @@ define <8 x i16> @fcvtps_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtps_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtps v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> @@ -722,8 +678,7 @@ define <4 x i16> @fcvtpu_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtpu_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtpu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> @@ -748,8 +703,7 @@ define <8 x i16> @fcvtpu_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtpu_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtpu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> @@ -769,7 +723,6 @@ define <4 x i16> @fcvtzs_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzs_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.4h, v0.4h ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) @@ -795,7 +748,6 @@ define <8 x i16> @fcvtzs_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzs_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.8h, v0.8h ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) @@ -816,7 +768,6 @@ define <4 x i16> @fcvtzu_4h(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzu_4h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.4h, v0.4h ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) @@ -842,7 +793,6 @@ define <8 x i16> @fcvtzu_8h(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzu_8h: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.8h, v0.8h ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index fb571eff39fe5..9f4f00fda7cdf 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s ; CHECK: .LCPI0_0: @@ -700,23 +700,15 @@ define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8 define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l249, <2 x double> %l267, <2 x double> %l285, <2 x double> %l303, <2 x double> %l321, <2 x double> %l339) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: frintm v4.2d, v4.2d +; CHECK-NEXT: fcvtms v0.2d, v0.2d +; CHECK-NEXT: fcvtms v4.2d, v4.2d ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: frintm v1.2d, v1.2d -; CHECK-NEXT: frintm v5.2d, v5.2d -; CHECK-NEXT: frintm v2.2d, v2.2d -; CHECK-NEXT: frintm v6.2d, v6.2d -; CHECK-NEXT: frintm v3.2d, v3.2d -; CHECK-NEXT: frintm v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtms v1.2d, v1.2d +; CHECK-NEXT: fcvtms v5.2d, v5.2d +; CHECK-NEXT: fcvtms v2.2d, v2.2d +; CHECK-NEXT: fcvtms v6.2d, v6.2d +; CHECK-NEXT: fcvtms v3.2d, v3.2d +; CHECK-NEXT: fcvtms v7.2d, v7.2d ; CHECK-NEXT: xtn v16.2s, v0.2d ; CHECK-NEXT: xtn v20.2s, v4.2d ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] From e56d806c43a2ef1fa5c00de3f854349d4f43da20 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 2 Dec 2025 21:20:39 -0500 Subject: [PATCH 05/16] [AArch64] Add saturating vector float->int tests; remove intrinsic declarations --- .../CodeGen/AArch64/arm64-vcvt-fused-round.ll | 888 +++++++++++++++++- 1 file changed, 858 insertions(+), 30 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll index 5a2bc3bb80225..c7cb1acecb13c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll @@ -20,6 +20,18 @@ define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtas_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtas_4s: ; CHECK: // %bb.0: @@ -30,6 +42,18 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtas_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtas_2d: ; CHECK: // %bb.0: @@ -40,6 +64,18 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtas_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtas_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; round + unsigned -> fcvtau ; @@ -54,6 +90,18 @@ define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtau_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtau_4s: ; CHECK: // %bb.0: @@ -64,6 +112,18 @@ define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtau_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtau_2d: ; CHECK: // %bb.0: @@ -74,6 +134,18 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtau_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtau_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; roundeven + signed -> fcvtns ; @@ -88,6 +160,18 @@ define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtns_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtns_4s: ; CHECK: // %bb.0: @@ -98,6 +182,18 @@ define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtns_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtns_2d: ; CHECK: // %bb.0: @@ -108,6 +204,18 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtns_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtns_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; roundeven + unsigned -> fcvtnu ; @@ -122,6 +230,18 @@ define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtnu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtnu_4s: ; CHECK: // %bb.0: @@ -132,6 +252,18 @@ define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtnu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtnu_2d: ; CHECK: // %bb.0: @@ -142,6 +274,18 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtnu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtnu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; floor + signed -> fcvtms ; @@ -156,6 +300,18 @@ define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtms_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtms_4s: ; CHECK: // %bb.0: @@ -166,6 +322,18 @@ define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtms_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtms_2d: ; CHECK: // %bb.0: @@ -176,6 +344,18 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtms_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtms_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; floor + unsigned -> fcvtmu ; @@ -190,6 +370,18 @@ define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtmu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtmu_4s: ; CHECK: // %bb.0: @@ -200,6 +392,18 @@ define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtmu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtmu_2d: ; CHECK: // %bb.0: @@ -210,6 +414,18 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtmu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtmu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; ceil + signed -> fcvtps ; @@ -224,6 +440,18 @@ define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtps_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtps_4s: ; CHECK: // %bb.0: @@ -234,6 +462,18 @@ define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtps_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtps_2d: ; CHECK: // %bb.0: @@ -244,6 +484,18 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtps_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtps_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; ceil + unsigned -> fcvtpu ; @@ -258,6 +510,18 @@ define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtpu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtpu_4s: ; CHECK: // %bb.0: @@ -268,6 +532,18 @@ define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtpu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtpu_2d: ; CHECK: // %bb.0: @@ -278,6 +554,18 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtpu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtpu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; trunc + signed -> fcvtzs (already the default, but test the fusion) ; @@ -292,6 +580,18 @@ define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtzs_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtzs_4s: ; CHECK: // %bb.0: @@ -302,6 +602,18 @@ define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtzs_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtzs_2d: ; CHECK: // %bb.0: @@ -312,6 +624,18 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtzs_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzs_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; trunc + unsigned -> fcvtzu ; @@ -326,6 +650,18 @@ define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @fcvtzu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtzu_4s: ; CHECK: // %bb.0: @@ -336,6 +672,18 @@ define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { ret <4 x i32> %tmp2 } +define <4 x i32> @fcvtzu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtzu_2d: ; CHECK: // %bb.0: @@ -346,6 +694,18 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp2 } +define <2 x i64> @fcvtzu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz v0.2d, v0.2d +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + ; ; f16 tests (require +fullfp16) ; @@ -370,6 +730,28 @@ define <4 x i16> @fcvtas_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtas_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtas_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtas_8h: ; CHECK-NO16: // %bb.0: @@ -395,6 +777,34 @@ define <8 x i16> @fcvtas_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtas_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtau_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtau_4h: ; CHECK-NO16: // %bb.0: @@ -415,6 +825,28 @@ define <4 x i16> @fcvtau_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtau_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtau_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtau_8h: ; CHECK-NO16: // %bb.0: @@ -440,6 +872,34 @@ define <8 x i16> @fcvtau_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtau_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtns_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtns_4h: ; CHECK-NO16: // %bb.0: @@ -460,6 +920,28 @@ define <4 x i16> @fcvtns_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtns_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtns_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtns_8h: ; CHECK-NO16: // %bb.0: @@ -485,6 +967,34 @@ define <8 x i16> @fcvtns_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtns_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtnu_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtnu_4h: ; CHECK-NO16: // %bb.0: @@ -505,6 +1015,28 @@ define <4 x i16> @fcvtnu_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtnu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtnu_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtnu_8h: ; CHECK-NO16: // %bb.0: @@ -530,6 +1062,34 @@ define <8 x i16> @fcvtnu_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtnu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtms_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtms_4h: ; CHECK-NO16: // %bb.0: @@ -550,6 +1110,28 @@ define <4 x i16> @fcvtms_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtms_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtms_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtms_8h: ; CHECK-NO16: // %bb.0: @@ -575,6 +1157,34 @@ define <8 x i16> @fcvtms_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtms_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtmu_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtmu_4h: ; CHECK-NO16: // %bb.0: @@ -595,6 +1205,28 @@ define <4 x i16> @fcvtmu_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtmu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtmu_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtmu_8h: ; CHECK-NO16: // %bb.0: @@ -620,6 +1252,34 @@ define <8 x i16> @fcvtmu_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtmu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtps_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtps_4h: ; CHECK-NO16: // %bb.0: @@ -640,6 +1300,28 @@ define <4 x i16> @fcvtps_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtps_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtps_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtps_8h: ; CHECK-NO16: // %bb.0: @@ -665,6 +1347,34 @@ define <8 x i16> @fcvtps_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtps_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtpu_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtpu_4h: ; CHECK-NO16: // %bb.0: @@ -685,6 +1395,28 @@ define <4 x i16> @fcvtpu_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtpu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtpu_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtpu_8h: ; CHECK-NO16: // %bb.0: @@ -710,6 +1442,34 @@ define <8 x i16> @fcvtpu_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtpu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtzs_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtzs_4h: ; CHECK-NO16: // %bb.0: @@ -730,6 +1490,28 @@ define <4 x i16> @fcvtzs_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtzs_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtzs_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtzs_8h: ; CHECK-NO16: // %bb.0: @@ -755,6 +1537,34 @@ define <8 x i16> @fcvtzs_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } +define <8 x i16> @fcvtzs_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + define <4 x i16> @fcvtzu_4h(<4 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtzu_4h: ; CHECK-NO16: // %bb.0: @@ -775,6 +1585,28 @@ define <4 x i16> @fcvtzu_4h(<4 x half> %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @fcvtzu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + define <8 x i16> @fcvtzu_8h(<8 x half> %A) nounwind { ; CHECK-NO16-LABEL: fcvtzu_8h: ; CHECK-NO16: // %bb.0: @@ -800,33 +1632,29 @@ define <8 x i16> @fcvtzu_8h(<8 x half> %A) nounwind { ret <8 x i16> %tmp2 } -; Intrinsic declarations -declare <2 x float> @llvm.round.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.round.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.round.v2f64(<2 x double>) nounwind readnone -declare <4 x half> @llvm.round.v4f16(<4 x half>) nounwind readnone -declare <8 x half> @llvm.round.v8f16(<8 x half>) nounwind readnone - -declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone -declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) nounwind readnone -declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) nounwind readnone - -declare <2 x float> @llvm.floor.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone -declare <4 x half> @llvm.floor.v4f16(<4 x half>) nounwind readnone -declare <8 x half> @llvm.floor.v8f16(<8 x half>) nounwind readnone - -declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone -declare <4 x half> @llvm.ceil.v4f16(<4 x half>) nounwind readnone -declare <8 x half> @llvm.ceil.v8f16(<8 x half>) nounwind readnone - -declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone -declare <4 x half> @llvm.trunc.v4f16(<4 x half>) nounwind readnone -declare <8 x half> @llvm.trunc.v8f16(<8 x half>) nounwind readnone +define <8 x i16> @fcvtzu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} From ea855433058be38ae68c3a57debca6a206051e5a Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 2 Dec 2025 21:35:46 -0500 Subject: [PATCH 06/16] [AArch64] Actually optimize all the vector float->int patterns --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 54 ++++++-- .../CodeGen/AArch64/arm64-vcvt-fused-round.ll | 130 ++++++------------ 2 files changed, 79 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 02ae9546f7ccf..0df70cace9892 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5831,12 +5831,22 @@ defm : SIMDTwoVectorFPToIntSatPats; defm : SIMDTwoVectorFPToIntSatPats; // Fused round + convert to int patterns for vectors -multiclass SIMDTwoVectorFPToIntRoundPats { +multiclass SIMDTwoVectorFPToIntRoundPats { let Predicates = [HasFullFP16] in { - def : Pat<(v4i16 (to_int (round v4f16:$Rn))), - (!cast(INST # v4f16) v4f16:$Rn)>; - def : Pat<(v8i16 (to_int (round v8f16:$Rn))), - (!cast(INST # v8f16) v8f16:$Rn)>; + def : Pat<(v4i16 (to_int (round v4f16:$Rn))), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int (round v8f16:$Rn))), + (!cast(INST # v8f16) v8f16:$Rn)>; + + def : Pat<(v4i16 (to_int_sat (round v4f16:$Rn), i16)), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int_sat (round v8f16:$Rn), i16)), + (!cast(INST # v8f16) v8f16:$Rn)>; + + def : Pat<(v4i16 (to_int_sat_gi (round v4f16:$Rn))), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int_sat_gi (round v8f16:$Rn))), + (!cast(INST # v8f16) v8f16:$Rn)>; } def : Pat<(v2i32 (to_int (round v2f32:$Rn))), (!cast(INST # v2f32) v2f32:$Rn)>; @@ -5844,18 +5854,32 @@ multiclass SIMDTwoVectorFPToIntRoundPats(INST # v4f32) v4f32:$Rn)>; def : Pat<(v2i64 (to_int (round v2f64:$Rn))), (!cast(INST # v2f64) v2f64:$Rn)>; + + def : Pat<(v2i32 (to_int_sat (round v2f32:$Rn), i32)), + (!cast(INST # v2f32) v2f32:$Rn)>; + def : Pat<(v4i32 (to_int_sat (round v4f32:$Rn), i32)), + (!cast(INST # v4f32) v4f32:$Rn)>; + def : Pat<(v2i64 (to_int_sat (round v2f64:$Rn), i64)), + (!cast(INST # v2f64) v2f64:$Rn)>; + + def : Pat<(v2i32 (to_int_sat_gi (round v2f32:$Rn))), + (!cast(INST # v2f32) v2f32:$Rn)>; + def : Pat<(v4i32 (to_int_sat_gi (round v4f32:$Rn))), + (!cast(INST # v4f32) v4f32:$Rn)>; + def : Pat<(v2i64 (to_int_sat_gi (round v2f64:$Rn))), + (!cast(INST # v2f64) v2f64:$Rn)>; } -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; -defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll index c7cb1acecb13c..bfc96216928fc 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll @@ -23,8 +23,7 @@ define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtas_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtas_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtas v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -45,8 +44,7 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtas_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtas_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtas v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -67,8 +65,7 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtas_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtas_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtas v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -93,8 +90,7 @@ define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtau_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtau_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtau v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -115,8 +111,7 @@ define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtau_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtau_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtau v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -137,8 +132,7 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtau_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtau_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frinta v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtau v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -163,8 +157,7 @@ define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtns_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtns_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtns v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -185,8 +178,7 @@ define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtns_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtns_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtns v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -207,8 +199,7 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtns_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtns_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtns v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -233,8 +224,7 @@ define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtnu_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtnu_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtnu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -255,8 +245,7 @@ define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtnu_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtnu_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtnu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -277,8 +266,7 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtnu_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtnu_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintn v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtnu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -303,8 +291,7 @@ define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtms_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtms_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtms v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -325,8 +312,7 @@ define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtms_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtms_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtms v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -347,8 +333,7 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtms_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtms_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtms v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -373,8 +358,7 @@ define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtmu_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtmu_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtmu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -395,8 +379,7 @@ define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtmu_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtmu_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtmu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -417,8 +400,7 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtmu_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtmu_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtmu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -443,8 +425,7 @@ define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtps_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtps_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2s, v0.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtps v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -465,8 +446,7 @@ define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtps_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtps_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.4s, v0.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtps v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -487,8 +467,7 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtps_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtps_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtps v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -513,8 +492,7 @@ define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtpu_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtpu_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2s, v0.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtpu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) @@ -535,8 +513,7 @@ define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtpu_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtpu_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtpu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) @@ -557,8 +534,7 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtpu_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtpu_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintp v0.2d, v0.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: fcvtpu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) @@ -583,7 +559,6 @@ define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtzs_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtzs_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2s, v0.2s ; CHECK-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) @@ -605,7 +580,6 @@ define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtzs_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtzs_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) @@ -627,7 +601,6 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtzs_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtzs_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2d, v0.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) @@ -653,7 +626,6 @@ define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { define <2 x i32> @fcvtzu_2s_sat(<2 x float> %A) nounwind { ; CHECK-LABEL: fcvtzu_2s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2s, v0.2s ; CHECK-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-NEXT: ret %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) @@ -675,7 +647,6 @@ define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { define <4 x i32> @fcvtzu_4s_sat(<4 x float> %A) nounwind { ; CHECK-LABEL: fcvtzu_4s_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.4s, v0.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: ret %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) @@ -697,7 +668,6 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { define <2 x i64> @fcvtzu_2d_sat(<2 x double> %A) nounwind { ; CHECK-LABEL: fcvtzu_2d_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: frintz v0.2d, v0.2d ; CHECK-NEXT: fcvtzu v0.2d, v0.2d ; CHECK-NEXT: ret %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) @@ -743,8 +713,7 @@ define <4 x i16> @fcvtas_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtas_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtas v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -796,8 +765,7 @@ define <8 x i16> @fcvtas_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtas_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtas v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -838,8 +806,7 @@ define <4 x i16> @fcvtau_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtau_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtau v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -891,8 +858,7 @@ define <8 x i16> @fcvtau_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtau_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frinta v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtau v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -933,8 +899,7 @@ define <4 x i16> @fcvtns_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtns_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtns v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -986,8 +951,7 @@ define <8 x i16> @fcvtns_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtns_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtns v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -1028,8 +992,7 @@ define <4 x i16> @fcvtnu_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtnu_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtnu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -1081,8 +1044,7 @@ define <8 x i16> @fcvtnu_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtnu_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintn v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtnu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -1123,8 +1085,7 @@ define <4 x i16> @fcvtms_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtms_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtms v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -1176,8 +1137,7 @@ define <8 x i16> @fcvtms_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtms_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtms v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -1218,8 +1178,7 @@ define <4 x i16> @fcvtmu_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtmu_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtmu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -1271,8 +1230,7 @@ define <8 x i16> @fcvtmu_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtmu_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintm v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtmu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -1313,8 +1271,7 @@ define <4 x i16> @fcvtps_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtps_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtps v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -1366,8 +1323,7 @@ define <8 x i16> @fcvtps_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtps_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtps v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -1408,8 +1364,7 @@ define <4 x i16> @fcvtpu_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtpu_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.4h, v0.4h -; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: fcvtpu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) @@ -1461,8 +1416,7 @@ define <8 x i16> @fcvtpu_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtpu_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintp v0.8h, v0.8h -; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: fcvtpu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) @@ -1503,7 +1457,6 @@ define <4 x i16> @fcvtzs_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzs_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.4h, v0.4h ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) @@ -1556,7 +1509,6 @@ define <8 x i16> @fcvtzs_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzs_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.8h, v0.8h ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) @@ -1598,7 +1550,6 @@ define <4 x i16> @fcvtzu_4h_sat(<4 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzu_4h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.4h, v0.4h ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h ; CHECK-FP16-NEXT: ret %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) @@ -1651,7 +1602,6 @@ define <8 x i16> @fcvtzu_8h_sat(<8 x half> %A) nounwind { ; ; CHECK-FP16-LABEL: fcvtzu_8h_sat: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: frintz v0.8h, v0.8h ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h ; CHECK-FP16-NEXT: ret %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) From 9507254ec41406679cb9ae4da4eda092e45018c4 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 4 Dec 2025 06:27:32 -0500 Subject: [PATCH 07/16] [AArch64] Add more roundeven+conversion tests These currently aren't compiled into fcvtn* instructions because the libcall isn't properly lowered. --- .../AArch64/round-fptosi-sat-scalar.ll | 89 +++++++++++++++++++ .../AArch64/round-fptoui-sat-scalar.ll | 89 +++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index b7fae2bff6876..ade261dec644c 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -434,6 +434,92 @@ entry: ret i64 %i } +; Round to nearest, ties to even (fcvtns). + +define i32 @testnswh(half %a) { +; CHECK-CVT-LABEL: testnswh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnswh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtns w0, h0 +; CHECK-FP16-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + ret i32 %i +} + +define i64 @testnsxh(half %a) { +; CHECK-CVT-LABEL: testnsxh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnsxh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtns x0, h0 +; CHECK-FP16-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + ret i64 %i +} + +define i32 @testnsws(float %a) { +; CHECK-LABEL: testnsws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns w0, s0 +; CHECK-NEXT: ret +entry: + %r = call float @roundevenf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + ret i32 %i +} + +define i64 @testnsxs(float %a) { +; CHECK-LABEL: testnsxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns x0, s0 +; CHECK-NEXT: ret +entry: + %r = call float @roundevenf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + ret i64 %i +} + +define i32 @testnswd(double %a) { +; CHECK-LABEL: testnswd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns w0, d0 +; CHECK-NEXT: ret +entry: + %r = call double @roundeven(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + ret i32 %i +} + +define i64 @testnsxd(double %a) { +; CHECK-LABEL: testnsxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns x0, d0 +; CHECK-NEXT: ret +entry: + %r = call double @roundeven(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + ret i64 %i +} + declare i32 @llvm.fptosi.sat.i32.bf16 (bfloat) declare i64 @llvm.fptosi.sat.i64.bf16 (bfloat) declare i32 @llvm.fptosi.sat.i32.f16 (half) @@ -451,11 +537,14 @@ declare half @llvm.floor.f16(half) nounwind readnone declare half @llvm.ceil.f16(half) nounwind readnone declare half @llvm.trunc.f16(half) nounwind readnone declare half @llvm.round.f16(half) nounwind readnone +declare half @llvm.roundeven.f16(half) nounwind readnone declare float @floorf(float) nounwind readnone declare float @ceilf(float) nounwind readnone declare float @truncf(float) nounwind readnone declare float @roundf(float) nounwind readnone +declare float @roundevenf(float) nounwind readnone declare double @floor(double) nounwind readnone declare double @ceil(double) nounwind readnone declare double @trunc(double) nounwind readnone declare double @round(double) nounwind readnone +declare double @roundeven(double) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index 21382e2802e4a..d5759f7fe5430 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -346,6 +346,92 @@ entry: ret i64 %i } +; Round to nearest, ties to even (fcvtnu). + +define i32 @testnuwh(half %a) { +; CHECK-CVT-LABEL: testnuwh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu w0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnuwh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtnu w0, h0 +; CHECK-FP16-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + ret i32 %i +} + +define i64 @testnuxh(half %a) { +; CHECK-CVT-LABEL: testnuxh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnuxh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtnu x0, h0 +; CHECK-FP16-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + ret i64 %i +} + +define i32 @testnuws(float %a) { +; CHECK-LABEL: testnuws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu w0, s0 +; CHECK-NEXT: ret +entry: + %r = call float @roundevenf(float %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + ret i32 %i +} + +define i64 @testnuxs(float %a) { +; CHECK-LABEL: testnuxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu x0, s0 +; CHECK-NEXT: ret +entry: + %r = call float @roundevenf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + ret i64 %i +} + +define i32 @testnuwd(double %a) { +; CHECK-LABEL: testnuwd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu w0, d0 +; CHECK-NEXT: ret +entry: + %r = call double @roundeven(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + ret i32 %i +} + +define i64 @testnuxd(double %a) { +; CHECK-LABEL: testnuxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu x0, d0 +; CHECK-NEXT: ret +entry: + %r = call double @roundeven(double %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + ret i64 %i +} + declare i32 @llvm.fptoui.sat.i32.f16 (half) declare i64 @llvm.fptoui.sat.i64.f16 (half) declare i32 @llvm.fptoui.sat.i32.f32 (float) @@ -357,11 +443,14 @@ declare half @llvm.floor.f16(half) nounwind readnone declare half @llvm.ceil.f16(half) nounwind readnone declare half @llvm.trunc.f16(half) nounwind readnone declare half @llvm.round.f16(half) nounwind readnone +declare half @llvm.roundeven.f16(half) nounwind readnone declare float @floorf(float) nounwind readnone declare float @ceilf(float) nounwind readnone declare float @truncf(float) nounwind readnone declare float @roundf(float) nounwind readnone +declare float @roundevenf(float) nounwind readnone declare double @floor(double) nounwind readnone declare double @ceil(double) nounwind readnone declare double @trunc(double) nounwind readnone declare double @round(double) nounwind readnone +declare double @roundeven(double) nounwind readnone From 1eaf0d4edc771b3e8e6a531240b80a37ecc30d49 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 4 Dec 2025 07:21:00 -0500 Subject: [PATCH 08/16] [AArch64] Regen roundeven tests We can't get that nice codegen yet; those changes should've been for a separate PR --- .../AArch64/round-fptosi-sat-scalar.ll | 28 ++++++++++++++++--- .../AArch64/round-fptoui-sat-scalar.ll | 28 ++++++++++++++++--- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index ade261dec644c..aedeccc6d566a 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -479,7 +479,12 @@ entry: define i32 @testnsws(float %a) { ; CHECK-LABEL: testnsws: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtns w0, s0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundevenf +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call float @roundevenf(float %a) nounwind readnone @@ -490,7 +495,12 @@ entry: define i64 @testnsxs(float %a) { ; CHECK-LABEL: testnsxs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtns x0, s0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundevenf +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call float @roundevenf(float %a) nounwind readnone @@ -501,7 +511,12 @@ entry: define i32 @testnswd(double %a) { ; CHECK-LABEL: testnswd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtns w0, d0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call double @roundeven(double %a) nounwind readnone @@ -512,7 +527,12 @@ entry: define i64 @testnsxd(double %a) { ; CHECK-LABEL: testnsxd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtns x0, d0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call double @roundeven(double %a) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index d5759f7fe5430..da2c228296957 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -391,7 +391,12 @@ entry: define i32 @testnuws(float %a) { ; CHECK-LABEL: testnuws: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtnu w0, s0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundevenf +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call float @roundevenf(float %a) nounwind readnone @@ -402,7 +407,12 @@ entry: define i64 @testnuxs(float %a) { ; CHECK-LABEL: testnuxs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtnu x0, s0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundevenf +; CHECK-NEXT: fcvtzu x0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call float @roundevenf(float %a) nounwind readnone @@ -413,7 +423,12 @@ entry: define i32 @testnuwd(double %a) { ; CHECK-LABEL: testnuwd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtnu w0, d0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call double @roundeven(double %a) nounwind readnone @@ -424,7 +439,12 @@ entry: define i64 @testnuxd(double %a) { ; CHECK-LABEL: testnuxd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtnu x0, d0 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %r = call double @roundeven(double %a) nounwind readnone From 0886610a2ddd4b7610e26ed8ea595ba355ccfbf6 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 4 Dec 2025 11:57:12 -0500 Subject: [PATCH 09/16] [AArch64] Add global-isel to new tests --- llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll index bfc96216928fc..780bdebd832b6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 ; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 ; ; Tests for fused round + convert to int patterns (FCVTAS, FCVTAU, FCVTMS, FCVTMU, etc.) From 9ca61d11654efc7d89be54179a29c212aa2f83ec Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 9 Dec 2025 10:15:35 -0500 Subject: [PATCH 10/16] [AArch64] Use roundeven intrinsic for tests --- .../AArch64/round-fptosi-sat-scalar.ll | 38 ++++--------------- .../AArch64/round-fptoui-sat-scalar.ll | 38 ++++--------------- 2 files changed, 16 insertions(+), 60 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index aedeccc6d566a..e17fd84478628 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -479,15 +479,10 @@ entry: define i32 @testnsws(float %a) { ; CHECK-LABEL: testnsws: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundevenf -; CHECK-NEXT: fcvtzs w0, s0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtns w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundevenf(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -495,15 +490,10 @@ entry: define i64 @testnsxs(float %a) { ; CHECK-LABEL: testnsxs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundevenf -; CHECK-NEXT: fcvtzs x0, s0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtns x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundevenf(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -511,15 +501,10 @@ entry: define i32 @testnswd(double %a) { ; CHECK-LABEL: testnswd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundeven -; CHECK-NEXT: fcvtzs w0, d0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtns w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @roundeven(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -527,15 +512,10 @@ entry: define i64 @testnsxd(double %a) { ; CHECK-LABEL: testnsxd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundeven -; CHECK-NEXT: fcvtzs x0, d0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtns x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @roundeven(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -562,9 +542,7 @@ declare float @floorf(float) nounwind readnone declare float @ceilf(float) nounwind readnone declare float @truncf(float) nounwind readnone declare float @roundf(float) nounwind readnone -declare float @roundevenf(float) nounwind readnone declare double @floor(double) nounwind readnone declare double @ceil(double) nounwind readnone declare double @trunc(double) nounwind readnone declare double @round(double) nounwind readnone -declare double @roundeven(double) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index da2c228296957..33ea696472882 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -391,15 +391,10 @@ entry: define i32 @testnuws(float %a) { ; CHECK-LABEL: testnuws: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundevenf -; CHECK-NEXT: fcvtzu w0, s0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtnu w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundevenf(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -407,15 +402,10 @@ entry: define i64 @testnuxs(float %a) { ; CHECK-LABEL: testnuxs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundevenf -; CHECK-NEXT: fcvtzu x0, s0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtnu x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundevenf(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -423,15 +413,10 @@ entry: define i32 @testnuwd(double %a) { ; CHECK-LABEL: testnuwd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundeven -; CHECK-NEXT: fcvtzu w0, d0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtnu w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @roundeven(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -439,15 +424,10 @@ entry: define i64 @testnuxd(double %a) { ; CHECK-LABEL: testnuxd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl roundeven -; CHECK-NEXT: fcvtzu x0, d0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtnu x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @roundeven(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -468,9 +448,7 @@ declare float @floorf(float) nounwind readnone declare float @ceilf(float) nounwind readnone declare float @truncf(float) nounwind readnone declare float @roundf(float) nounwind readnone -declare float @roundevenf(float) nounwind readnone declare double @floor(double) nounwind readnone declare double @ceil(double) nounwind readnone declare double @trunc(double) nounwind readnone declare double @round(double) nounwind readnone -declare double @roundeven(double) nounwind readnone From f62a1122b42a1c1bc9507893b6c000751a64584b Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 9 Dec 2025 10:22:33 -0500 Subject: [PATCH 11/16] [AArch64] Add non-saturating roundeven+fpto[su]i tests --- llvm/test/CodeGen/AArch64/round-conv.ll | 82 +++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/round-conv.ll b/llvm/test/CodeGen/AArch64/round-conv.ll index 5ed7d9409e3dd..84cc516a56c05 100644 --- a/llvm/test/CodeGen/AArch64/round-conv.ll +++ b/llvm/test/CodeGen/AArch64/round-conv.ll @@ -250,6 +250,16 @@ entry: ret i32 %conv } +; CHECK-LABEL: testnsws: +; CHECK: fcvtns w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testnsws(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) + %conv = fptosi float %call to i32 + ret i32 %conv +} + ; CHECK-LABEL: testasxs: ; CHECK: fcvtas x0, s0 ; CHECK-NOT: frintx {{s[0-9]+}}, s0 @@ -260,6 +270,16 @@ entry: ret i64 %conv } +; CHECK-LABEL: testnsxs: +; CHECK: fcvtns x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testnsxs(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) + %conv = fptosi float %call to i64 + ret i64 %conv +} + ; CHECK-LABEL: testaswd: ; CHECK: fcvtas w0, d0 ; CHECK-NOT: frintx {{d[0-9]+}}, d0 @@ -270,6 +290,16 @@ entry: ret i32 %conv } +; CHECK-LABEL: testnswd: +; CHECK: fcvtns w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testnswd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) + %conv = fptosi double %call to i32 + ret i32 %conv +} + ; CHECK-LABEL: testasxd: ; CHECK: fcvtas x0, d0 ; CHECK-NOT: frintx {{d[0-9]+}}, d0 @@ -280,6 +310,16 @@ entry: ret i64 %conv } +; CHECK-LABEL: testnsxd: +; CHECK: fcvtns x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testnsxd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) + %conv = fptosi double %call to i64 + ret i64 %conv +} + ; CHECK-LABEL: testauws: ; CHECK: fcvtau w0, s0 ; CHECK-NOT: frintx {{s[0-9]+}}, s0 @@ -290,6 +330,16 @@ entry: ret i32 %conv } +; CHECK-LABEL: testnuws: +; CHECK: fcvtnu w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testnuws(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) + %conv = fptoui float %call to i32 + ret i32 %conv +} + ; CHECK-LABEL: testauxs: ; CHECK: fcvtau x0, s0 ; CHECK-NOT: frintx {{s[0-9]+}}, s0 @@ -300,6 +350,16 @@ entry: ret i64 %conv } +; CHECK-LABEL: testnuxs: +; CHECK: fcvtnu x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testnuxs(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) + %conv = fptoui float %call to i64 + ret i64 %conv +} + ; CHECK-LABEL: testauwd: ; CHECK: fcvtau w0, d0 ; CHECK-NOT: frintx {{d[0-9]+}}, d0 @@ -310,6 +370,16 @@ entry: ret i32 %conv } +; CHECK-LABEL: testnuwd: +; CHECK: fcvtnu w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testnuwd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) + %conv = fptoui double %call to i32 + ret i32 %conv +} + ; CHECK-LABEL: testauxd: ; CHECK: fcvtau x0, d0 ; CHECK-NOT: frintx {{d[0-9]+}}, d0 @@ -320,6 +390,16 @@ entry: ret i64 %conv } +; CHECK-LABEL: testnuxd: +; CHECK: fcvtnu x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testnuxd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) + %conv = fptoui double %call to i64 + ret i64 %conv +} + declare float @floorf(float) nounwind readnone declare double @floor(double) nounwind readnone declare float @ceilf(float) nounwind readnone @@ -328,3 +408,5 @@ declare float @truncf(float) nounwind readnone declare double @trunc(double) nounwind readnone declare float @roundf(float) nounwind readnone declare double @round(double) nounwind readnone +declare float @llvm.roundeven.f32(float) +declare double @llvm.roundeven.f64(double) From dd5b29a4059c4bfde47ae46802e2a76d34ad94ce Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 9 Dec 2025 10:45:21 -0500 Subject: [PATCH 12/16] [AArch64] Use intrinsics for all the rounding tests --- .../AArch64/round-fptosi-sat-scalar.ll | 95 +++++++------------ .../AArch64/round-fptoui-sat-scalar.ll | 80 ++++++---------- 2 files changed, 64 insertions(+), 111 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index e17fd84478628..e49ec8144f678 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -21,7 +21,7 @@ define i32 @testmswbf(bfloat %a) { ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret entry: - %r = call bfloat @llvm.floor.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.floor.bf16(bfloat %a) %i = call i32 @llvm.fptosi.sat.i32.bf16(bfloat %r) ret i32 %i } @@ -43,7 +43,7 @@ define i64 @testmsxbf(bfloat %a) { ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret entry: - %r = call bfloat @llvm.floor.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.floor.bf16(bfloat %a) %i = call i64 @llvm.fptosi.sat.i64.bf16(bfloat %r) ret i64 %i } @@ -63,7 +63,7 @@ define i32 @testmswh(half %a) { ; CHECK-FP16-NEXT: fcvtms w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -83,7 +83,7 @@ define i64 @testmsxh(half %a) { ; CHECK-FP16-NEXT: fcvtms x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -94,7 +94,7 @@ define i32 @testmsws(float %a) { ; CHECK-NEXT: fcvtms w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @floorf(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -105,7 +105,7 @@ define i64 @testmsxs(float %a) { ; CHECK-NEXT: fcvtms x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @floorf(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -116,7 +116,7 @@ define i32 @testmswd(double %a) { ; CHECK-NEXT: fcvtms w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @floor(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -127,7 +127,7 @@ define i64 @testmsxd(double %a) { ; CHECK-NEXT: fcvtms x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @floor(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -151,7 +151,7 @@ define i32 @testpswbf(bfloat %a) { ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret entry: - %r = call bfloat @llvm.ceil.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.ceil.bf16(bfloat %a) %i = call i32 @llvm.fptosi.sat.i32.bf16(bfloat %r) ret i32 %i } @@ -173,7 +173,7 @@ define i64 @testpsxbf(bfloat %a) { ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret entry: - %r = call bfloat @llvm.ceil.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.ceil.bf16(bfloat %a) %i = call i64 @llvm.fptosi.sat.i64.bf16(bfloat %r) ret i64 %i } @@ -193,7 +193,7 @@ define i32 @testpswh(half %a) { ; CHECK-FP16-NEXT: fcvtps w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -213,7 +213,7 @@ define i64 @testpsxh(half %a) { ; CHECK-FP16-NEXT: fcvtps x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -224,7 +224,7 @@ define i32 @testpsws(float %a) { ; CHECK-NEXT: fcvtps w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @ceilf(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -235,7 +235,7 @@ define i64 @testpsxs(float %a) { ; CHECK-NEXT: fcvtps x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @ceilf(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -246,7 +246,7 @@ define i32 @testpswd(double %a) { ; CHECK-NEXT: fcvtps w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @ceil(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -257,7 +257,7 @@ define i64 @testpsxd(double %a) { ; CHECK-NEXT: fcvtps x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @ceil(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -279,7 +279,7 @@ define i32 @testzswh(half %a) { ; CHECK-FP16-NEXT: fcvtzs w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -299,7 +299,7 @@ define i64 @testzsxh(half %a) { ; CHECK-FP16-NEXT: fcvtzs x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -310,7 +310,7 @@ define i32 @testzsws(float %a) { ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @truncf(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -321,7 +321,7 @@ define i64 @testzsxs(float %a) { ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @truncf(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -332,7 +332,7 @@ define i32 @testzswd(double %a) { ; CHECK-NEXT: fcvtzs w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @trunc(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -343,7 +343,7 @@ define i64 @testzsxd(double %a) { ; CHECK-NEXT: fcvtzs x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @trunc(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -365,7 +365,7 @@ define i32 @testaswh(half %a) { ; CHECK-FP16-NEXT: fcvtas w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -385,7 +385,7 @@ define i64 @testasxh(half %a) { ; CHECK-FP16-NEXT: fcvtas x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -396,7 +396,7 @@ define i32 @testasws(float %a) { ; CHECK-NEXT: fcvtas w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundf(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -407,7 +407,7 @@ define i64 @testasxs(float %a) { ; CHECK-NEXT: fcvtas x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundf(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -418,7 +418,7 @@ define i32 @testaswd(double %a) { ; CHECK-NEXT: fcvtas w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @round(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -429,7 +429,7 @@ define i64 @testasxd(double %a) { ; CHECK-NEXT: fcvtas x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @round(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -451,7 +451,7 @@ define i32 @testnswh(half %a) { ; CHECK-FP16-NEXT: fcvtns w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -471,7 +471,7 @@ define i64 @testnsxh(half %a) { ; CHECK-FP16-NEXT: fcvtns x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -482,7 +482,7 @@ define i32 @testnsws(float %a) { ; CHECK-NEXT: fcvtns w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @llvm.roundeven.f32(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -493,7 +493,7 @@ define i64 @testnsxs(float %a) { ; CHECK-NEXT: fcvtns x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @llvm.roundeven.f32(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -504,7 +504,7 @@ define i32 @testnswd(double %a) { ; CHECK-NEXT: fcvtns w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @llvm.roundeven.f64(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -515,34 +515,7 @@ define i64 @testnsxd(double %a) { ; CHECK-NEXT: fcvtns x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @llvm.roundeven.f64(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } - -declare i32 @llvm.fptosi.sat.i32.bf16 (bfloat) -declare i64 @llvm.fptosi.sat.i64.bf16 (bfloat) -declare i32 @llvm.fptosi.sat.i32.f16 (half) -declare i64 @llvm.fptosi.sat.i64.f16 (half) -declare i32 @llvm.fptosi.sat.i32.f32 (float) -declare i64 @llvm.fptosi.sat.i64.f32 (float) -declare i32 @llvm.fptosi.sat.i32.f64 (double) -declare i64 @llvm.fptosi.sat.i64.f64 (double) - -declare bfloat @llvm.floor.bf16(bfloat) nounwind readnone -declare bfloat @llvm.ceil.bf16(bfloat) nounwind readnone -declare bfloat @llvm.trunc.bf16(bfloat) nounwind readnone -declare bfloat @llvm.round.bf16(bfloat) nounwind readnone -declare half @llvm.floor.f16(half) nounwind readnone -declare half @llvm.ceil.f16(half) nounwind readnone -declare half @llvm.trunc.f16(half) nounwind readnone -declare half @llvm.round.f16(half) nounwind readnone -declare half @llvm.roundeven.f16(half) nounwind readnone -declare float @floorf(float) nounwind readnone -declare float @ceilf(float) nounwind readnone -declare float @truncf(float) nounwind readnone -declare float @roundf(float) nounwind readnone -declare double @floor(double) nounwind readnone -declare double @ceil(double) nounwind readnone -declare double @trunc(double) nounwind readnone -declare double @round(double) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index 33ea696472882..a445ecc8172a1 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -19,7 +19,7 @@ define i32 @testmuwh(half %a) { ; CHECK-FP16-NEXT: fcvtmu w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -39,7 +39,7 @@ define i64 @testmuxh(half %a) { ; CHECK-FP16-NEXT: fcvtmu x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -50,7 +50,7 @@ define i32 @testmuws(float %a) { ; CHECK-NEXT: fcvtmu w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @floorf(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -61,7 +61,7 @@ define i64 @testmuxs(float %a) { ; CHECK-NEXT: fcvtmu x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @floorf(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -72,7 +72,7 @@ define i32 @testmuwd(double %a) { ; CHECK-NEXT: fcvtmu w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @floor(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -83,7 +83,7 @@ define i64 @testmuxd(double %a) { ; CHECK-NEXT: fcvtmu x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @floor(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -105,7 +105,7 @@ define i32 @testpuwh(half %a) { ; CHECK-FP16-NEXT: fcvtpu w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -125,7 +125,7 @@ define i64 @testpuxh(half %a) { ; CHECK-FP16-NEXT: fcvtpu x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -136,7 +136,7 @@ define i32 @testpuws(float %a) { ; CHECK-NEXT: fcvtpu w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @ceilf(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -147,7 +147,7 @@ define i64 @testpuxs(float %a) { ; CHECK-NEXT: fcvtpu x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @ceilf(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -158,7 +158,7 @@ define i32 @testpuwd(double %a) { ; CHECK-NEXT: fcvtpu w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @ceil(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -169,7 +169,7 @@ define i64 @testpuxd(double %a) { ; CHECK-NEXT: fcvtpu x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @ceil(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -191,7 +191,7 @@ define i32 @testzuwh(half %a) { ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -211,7 +211,7 @@ define i64 @testzuxh(half %a) { ; CHECK-FP16-NEXT: fcvtzu x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -222,7 +222,7 @@ define i32 @testzuws(float %a) { ; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @truncf(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -233,7 +233,7 @@ define i64 @testzuxs(float %a) { ; CHECK-NEXT: fcvtzu x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @truncf(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -244,7 +244,7 @@ define i32 @testzuwd(double %a) { ; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @trunc(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -255,7 +255,7 @@ define i64 @testzuxd(double %a) { ; CHECK-NEXT: fcvtzu x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @trunc(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -277,7 +277,7 @@ define i32 @testauwh(half %a) { ; CHECK-FP16-NEXT: fcvtau w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -297,7 +297,7 @@ define i64 @testauxh(half %a) { ; CHECK-FP16-NEXT: fcvtau x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -308,7 +308,7 @@ define i32 @testauws(float %a) { ; CHECK-NEXT: fcvtau w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundf(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -319,7 +319,7 @@ define i64 @testauxs(float %a) { ; CHECK-NEXT: fcvtau x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @roundf(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -330,7 +330,7 @@ define i32 @testauwd(double %a) { ; CHECK-NEXT: fcvtau w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @round(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -341,7 +341,7 @@ define i64 @testauxd(double %a) { ; CHECK-NEXT: fcvtau x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @round(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -363,7 +363,7 @@ define i32 @testnuwh(half %a) { ; CHECK-FP16-NEXT: fcvtnu w0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -383,7 +383,7 @@ define i64 @testnuxh(half %a) { ; CHECK-FP16-NEXT: fcvtnu x0, h0 ; CHECK-FP16-NEXT: ret entry: - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -394,7 +394,7 @@ define i32 @testnuws(float %a) { ; CHECK-NEXT: fcvtnu w0, s0 ; CHECK-NEXT: ret entry: - %r = call float @llvm.roundeven.f32(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -405,7 +405,7 @@ define i64 @testnuxs(float %a) { ; CHECK-NEXT: fcvtnu x0, s0 ; CHECK-NEXT: ret entry: - %r = call float @llvm.roundeven.f32(float %a) nounwind readnone + %r = call float @llvm.roundeven.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -416,7 +416,7 @@ define i32 @testnuwd(double %a) { ; CHECK-NEXT: fcvtnu w0, d0 ; CHECK-NEXT: ret entry: - %r = call double @llvm.roundeven.f64(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -427,28 +427,8 @@ define i64 @testnuxd(double %a) { ; CHECK-NEXT: fcvtnu x0, d0 ; CHECK-NEXT: ret entry: - %r = call double @llvm.roundeven.f64(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } -declare i32 @llvm.fptoui.sat.i32.f16 (half) -declare i64 @llvm.fptoui.sat.i64.f16 (half) -declare i32 @llvm.fptoui.sat.i32.f32 (float) -declare i64 @llvm.fptoui.sat.i64.f32 (float) -declare i32 @llvm.fptoui.sat.i32.f64 (double) -declare i64 @llvm.fptoui.sat.i64.f64 (double) - -declare half @llvm.floor.f16(half) nounwind readnone -declare half @llvm.ceil.f16(half) nounwind readnone -declare half @llvm.trunc.f16(half) nounwind readnone -declare half @llvm.round.f16(half) nounwind readnone -declare half @llvm.roundeven.f16(half) nounwind readnone -declare float @floorf(float) nounwind readnone -declare float @ceilf(float) nounwind readnone -declare float @truncf(float) nounwind readnone -declare float @roundf(float) nounwind readnone -declare double @floor(double) nounwind readnone -declare double @ceil(double) nounwind readnone -declare double @trunc(double) nounwind readnone -declare double @round(double) nounwind readnone From bc5441b025ae4aac854c8d654af939a16c5eb964 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 9 Dec 2025 10:47:01 -0500 Subject: [PATCH 13/16] [AArch64] Add GlobalISel to fpto[su]i+round tests --- .../AArch64/round-fptosi-sat-scalar.ll | 245 ++++++++++++++++++ .../AArch64/round-fptoui-sat-scalar.ll | 181 +++++++++++++ 2 files changed, 426 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index e49ec8144f678..995798af6b6b3 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple aarch64 -global-isel -global-isel-abort=2 -mattr=+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK-GI ; Round towards minus infinity (fcvtms). @@ -20,6 +21,22 @@ define i32 @testmswbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmswbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call bfloat @llvm.floor.bf16(bfloat %a) %i = call i32 @llvm.fptosi.sat.i32.bf16(bfloat %r) @@ -42,6 +59,22 @@ define i64 @testmsxbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsxbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call bfloat @llvm.floor.bf16(bfloat %a) %i = call i64 @llvm.fptosi.sat.i64.bf16(bfloat %r) @@ -62,6 +95,12 @@ define i32 @testmswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtms w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) @@ -82,6 +121,12 @@ define i64 @testmsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtms x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) @@ -93,6 +138,12 @@ define i32 @testmsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) @@ -104,6 +155,12 @@ define i64 @testmsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) @@ -115,6 +172,12 @@ define i32 @testmswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm d0, d0 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) @@ -126,6 +189,12 @@ define i64 @testmsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm d0, d0 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) @@ -150,6 +219,22 @@ define i32 @testpswbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpswbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call bfloat @llvm.ceil.bf16(bfloat %a) %i = call i32 @llvm.fptosi.sat.i32.bf16(bfloat %r) @@ -172,6 +257,22 @@ define i64 @testpsxbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsxbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call bfloat @llvm.ceil.bf16(bfloat %a) %i = call i64 @llvm.fptosi.sat.i64.bf16(bfloat %r) @@ -192,6 +293,12 @@ define i32 @testpswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtps w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) @@ -212,6 +319,12 @@ define i64 @testpsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtps x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) @@ -223,6 +336,12 @@ define i32 @testpsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) @@ -234,6 +353,12 @@ define i64 @testpsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) @@ -245,6 +370,12 @@ define i32 @testpswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp d0, d0 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) @@ -256,6 +387,12 @@ define i64 @testpsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp d0, d0 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) @@ -278,6 +415,12 @@ define i32 @testzswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzs w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) @@ -298,6 +441,12 @@ define i64 @testzsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzs x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) @@ -309,6 +458,12 @@ define i32 @testzsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz s0, s0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.trunc.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) @@ -320,6 +475,12 @@ define i64 @testzsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz s0, s0 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.trunc.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) @@ -331,6 +492,12 @@ define i32 @testzswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz d0, d0 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.trunc.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) @@ -342,6 +509,12 @@ define i64 @testzsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz d0, d0 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.trunc.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) @@ -364,6 +537,12 @@ define i32 @testaswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtas w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testaswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) @@ -384,6 +563,12 @@ define i64 @testasxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtas x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testasxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) @@ -395,6 +580,12 @@ define i32 @testasws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testasws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta s0, s0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) @@ -406,6 +597,12 @@ define i64 @testasxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testasxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta s0, s0 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) @@ -417,6 +614,12 @@ define i32 @testaswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testaswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta d0, d0 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) @@ -428,6 +631,12 @@ define i64 @testasxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testasxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta d0, d0 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) @@ -450,6 +659,12 @@ define i32 @testnswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtns w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn h0, h0 +; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) @@ -470,6 +685,12 @@ define i64 @testnsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtns x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn h0, h0 +; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) @@ -481,6 +702,12 @@ define i32 @testnsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtns w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn s0, s0 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) @@ -492,6 +719,12 @@ define i64 @testnsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtns x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn s0, s0 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) @@ -503,6 +736,12 @@ define i32 @testnswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtns w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn d0, d0 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) @@ -514,6 +753,12 @@ define i64 @testnsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtns x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn d0, d0 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index a445ecc8172a1..ff470da44e0de 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK-GI ; Round towards minus infinity (fcvtmu). @@ -18,6 +19,12 @@ define i32 @testmuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtmu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) @@ -38,6 +45,12 @@ define i64 @testmuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtmu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) @@ -49,6 +62,12 @@ define i32 @testmuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) @@ -60,6 +79,12 @@ define i64 @testmuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) @@ -71,6 +96,12 @@ define i32 @testmuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm d0, d0 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) @@ -82,6 +113,12 @@ define i64 @testmuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintm d0, d0 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) @@ -104,6 +141,12 @@ define i32 @testpuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtpu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) @@ -124,6 +167,12 @@ define i64 @testpuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtpu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) @@ -135,6 +184,12 @@ define i32 @testpuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) @@ -146,6 +201,12 @@ define i64 @testpuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) @@ -157,6 +218,12 @@ define i32 @testpuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp d0, d0 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) @@ -168,6 +235,12 @@ define i64 @testpuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintp d0, d0 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) @@ -190,6 +263,12 @@ define i32 @testzuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) @@ -210,6 +289,12 @@ define i64 @testzuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) @@ -221,6 +306,12 @@ define i32 @testzuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz s0, s0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.trunc.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) @@ -232,6 +323,12 @@ define i64 @testzuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz s0, s0 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.trunc.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) @@ -243,6 +340,12 @@ define i32 @testzuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz d0, d0 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.trunc.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) @@ -254,6 +357,12 @@ define i64 @testzuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintz d0, d0 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.trunc.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) @@ -276,6 +385,12 @@ define i32 @testauwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtau w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testauwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) @@ -296,6 +411,12 @@ define i64 @testauxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtau x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testauxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) @@ -307,6 +428,12 @@ define i32 @testauws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta s0, s0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) @@ -318,6 +445,12 @@ define i64 @testauxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta s0, s0 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) @@ -329,6 +462,12 @@ define i32 @testauwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta d0, d0 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) @@ -340,6 +479,12 @@ define i64 @testauxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frinta d0, d0 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) @@ -362,6 +507,12 @@ define i32 @testnuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtnu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn h0, h0 +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) @@ -382,6 +533,12 @@ define i64 @testnuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtnu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn h0, h0 +; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) @@ -393,6 +550,12 @@ define i32 @testnuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtnu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn s0, s0 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) @@ -404,6 +567,12 @@ define i64 @testnuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtnu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn s0, s0 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) @@ -415,6 +584,12 @@ define i32 @testnuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtnu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn d0, d0 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) @@ -426,6 +601,12 @@ define i64 @testnuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtnu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: frintn d0, d0 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) From fb752c5f5f1edba39755dd375b5b64e40c74371f Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 9 Dec 2025 11:09:08 -0500 Subject: [PATCH 14/16] [AArch64][GlobalISel] Add patterns for scalar float-to-int conversions --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 15 ++++ .../AArch64/round-fptosi-sat-scalar.ll | 78 ++++++------------- .../AArch64/round-fptoui-sat-scalar.ll | 78 ++++++------------- 3 files changed, 63 insertions(+), 108 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0df70cace9892..a7d3a2ad424b8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6835,6 +6835,21 @@ multiclass FPToIntegerPats(INST # UXDr) f64:$Rn)>; + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))), + (!cast(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))), + (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))), + (!cast(INST # UXDr) f64:$Rn)>; + // For global-isel we can use register classes to determine // which FCVT instruction to use. let Predicates = [HasFPRCVT] in { diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index 995798af6b6b3..3a742fa64f71c 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -98,8 +98,7 @@ define i32 @testmswh(half %a) { ; ; CHECK-GI-LABEL: testmswh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm h0, h0 -; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: fcvtms w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) @@ -124,8 +123,7 @@ define i64 @testmsxh(half %a) { ; ; CHECK-GI-LABEL: testmsxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm h0, h0 -; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: fcvtms x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) @@ -141,8 +139,7 @@ define i32 @testmsws(float %a) { ; ; CHECK-GI-LABEL: testmsws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm s0, s0 -; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: fcvtms w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) @@ -158,8 +155,7 @@ define i64 @testmsxs(float %a) { ; ; CHECK-GI-LABEL: testmsxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm s0, s0 -; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: fcvtms x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) @@ -175,8 +171,7 @@ define i32 @testmswd(double %a) { ; ; CHECK-GI-LABEL: testmswd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm d0, d0 -; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: fcvtms w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) @@ -192,8 +187,7 @@ define i64 @testmsxd(double %a) { ; ; CHECK-GI-LABEL: testmsxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm d0, d0 -; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: fcvtms x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) @@ -296,8 +290,7 @@ define i32 @testpswh(half %a) { ; ; CHECK-GI-LABEL: testpswh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp h0, h0 -; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: fcvtps w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) @@ -322,8 +315,7 @@ define i64 @testpsxh(half %a) { ; ; CHECK-GI-LABEL: testpsxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp h0, h0 -; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: fcvtps x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) @@ -339,8 +331,7 @@ define i32 @testpsws(float %a) { ; ; CHECK-GI-LABEL: testpsws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp s0, s0 -; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: fcvtps w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) @@ -356,8 +347,7 @@ define i64 @testpsxs(float %a) { ; ; CHECK-GI-LABEL: testpsxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp s0, s0 -; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: fcvtps x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) @@ -373,8 +363,7 @@ define i32 @testpswd(double %a) { ; ; CHECK-GI-LABEL: testpswd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp d0, d0 -; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: fcvtps w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) @@ -390,8 +379,7 @@ define i64 @testpsxd(double %a) { ; ; CHECK-GI-LABEL: testpsxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp d0, d0 -; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: fcvtps x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) @@ -418,7 +406,6 @@ define i32 @testzswh(half %a) { ; ; CHECK-GI-LABEL: testzswh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz h0, h0 ; CHECK-GI-NEXT: fcvtzs w0, h0 ; CHECK-GI-NEXT: ret entry: @@ -444,7 +431,6 @@ define i64 @testzsxh(half %a) { ; ; CHECK-GI-LABEL: testzsxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz h0, h0 ; CHECK-GI-NEXT: fcvtzs x0, h0 ; CHECK-GI-NEXT: ret entry: @@ -461,7 +447,6 @@ define i32 @testzsws(float %a) { ; ; CHECK-GI-LABEL: testzsws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz s0, s0 ; CHECK-GI-NEXT: fcvtzs w0, s0 ; CHECK-GI-NEXT: ret entry: @@ -478,7 +463,6 @@ define i64 @testzsxs(float %a) { ; ; CHECK-GI-LABEL: testzsxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz s0, s0 ; CHECK-GI-NEXT: fcvtzs x0, s0 ; CHECK-GI-NEXT: ret entry: @@ -495,7 +479,6 @@ define i32 @testzswd(double %a) { ; ; CHECK-GI-LABEL: testzswd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz d0, d0 ; CHECK-GI-NEXT: fcvtzs w0, d0 ; CHECK-GI-NEXT: ret entry: @@ -512,7 +495,6 @@ define i64 @testzsxd(double %a) { ; ; CHECK-GI-LABEL: testzsxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz d0, d0 ; CHECK-GI-NEXT: fcvtzs x0, d0 ; CHECK-GI-NEXT: ret entry: @@ -540,8 +522,7 @@ define i32 @testaswh(half %a) { ; ; CHECK-GI-LABEL: testaswh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta h0, h0 -; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: fcvtas w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) @@ -566,8 +547,7 @@ define i64 @testasxh(half %a) { ; ; CHECK-GI-LABEL: testasxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta h0, h0 -; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: fcvtas x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) @@ -583,8 +563,7 @@ define i32 @testasws(float %a) { ; ; CHECK-GI-LABEL: testasws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta s0, s0 -; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: fcvtas w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) @@ -600,8 +579,7 @@ define i64 @testasxs(float %a) { ; ; CHECK-GI-LABEL: testasxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta s0, s0 -; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: fcvtas x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) @@ -617,8 +595,7 @@ define i32 @testaswd(double %a) { ; ; CHECK-GI-LABEL: testaswd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta d0, d0 -; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: fcvtas w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) @@ -634,8 +611,7 @@ define i64 @testasxd(double %a) { ; ; CHECK-GI-LABEL: testasxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta d0, d0 -; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: fcvtas x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) @@ -662,8 +638,7 @@ define i32 @testnswh(half %a) { ; ; CHECK-GI-LABEL: testnswh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn h0, h0 -; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: fcvtns w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) @@ -688,8 +663,7 @@ define i64 @testnsxh(half %a) { ; ; CHECK-GI-LABEL: testnsxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn h0, h0 -; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: fcvtns x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) @@ -705,8 +679,7 @@ define i32 @testnsws(float %a) { ; ; CHECK-GI-LABEL: testnsws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn s0, s0 -; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: fcvtns w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) @@ -722,8 +695,7 @@ define i64 @testnsxs(float %a) { ; ; CHECK-GI-LABEL: testnsxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn s0, s0 -; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: fcvtns x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) @@ -739,8 +711,7 @@ define i32 @testnswd(double %a) { ; ; CHECK-GI-LABEL: testnswd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn d0, d0 -; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: fcvtns w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) @@ -756,8 +727,7 @@ define i64 @testnsxd(double %a) { ; ; CHECK-GI-LABEL: testnsxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn d0, d0 -; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: fcvtns x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index ff470da44e0de..b409658e1cd33 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -22,8 +22,7 @@ define i32 @testmuwh(half %a) { ; ; CHECK-GI-LABEL: testmuwh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm h0, h0 -; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: fcvtmu w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) @@ -48,8 +47,7 @@ define i64 @testmuxh(half %a) { ; ; CHECK-GI-LABEL: testmuxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm h0, h0 -; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: fcvtmu x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.floor.f16(half %a) @@ -65,8 +63,7 @@ define i32 @testmuws(float %a) { ; ; CHECK-GI-LABEL: testmuws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm s0, s0 -; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: fcvtmu w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) @@ -82,8 +79,7 @@ define i64 @testmuxs(float %a) { ; ; CHECK-GI-LABEL: testmuxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm s0, s0 -; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: fcvtmu x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.floor.f32(float %a) @@ -99,8 +95,7 @@ define i32 @testmuwd(double %a) { ; ; CHECK-GI-LABEL: testmuwd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm d0, d0 -; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: fcvtmu w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) @@ -116,8 +111,7 @@ define i64 @testmuxd(double %a) { ; ; CHECK-GI-LABEL: testmuxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintm d0, d0 -; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: fcvtmu x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.floor.f64(double %a) @@ -144,8 +138,7 @@ define i32 @testpuwh(half %a) { ; ; CHECK-GI-LABEL: testpuwh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp h0, h0 -; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: fcvtpu w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) @@ -170,8 +163,7 @@ define i64 @testpuxh(half %a) { ; ; CHECK-GI-LABEL: testpuxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp h0, h0 -; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: fcvtpu x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.ceil.f16(half %a) @@ -187,8 +179,7 @@ define i32 @testpuws(float %a) { ; ; CHECK-GI-LABEL: testpuws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp s0, s0 -; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: fcvtpu w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) @@ -204,8 +195,7 @@ define i64 @testpuxs(float %a) { ; ; CHECK-GI-LABEL: testpuxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp s0, s0 -; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: fcvtpu x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.ceil.f32(float %a) @@ -221,8 +211,7 @@ define i32 @testpuwd(double %a) { ; ; CHECK-GI-LABEL: testpuwd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp d0, d0 -; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: fcvtpu w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) @@ -238,8 +227,7 @@ define i64 @testpuxd(double %a) { ; ; CHECK-GI-LABEL: testpuxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintp d0, d0 -; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: fcvtpu x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.ceil.f64(double %a) @@ -266,7 +254,6 @@ define i32 @testzuwh(half %a) { ; ; CHECK-GI-LABEL: testzuwh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz h0, h0 ; CHECK-GI-NEXT: fcvtzu w0, h0 ; CHECK-GI-NEXT: ret entry: @@ -292,7 +279,6 @@ define i64 @testzuxh(half %a) { ; ; CHECK-GI-LABEL: testzuxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz h0, h0 ; CHECK-GI-NEXT: fcvtzu x0, h0 ; CHECK-GI-NEXT: ret entry: @@ -309,7 +295,6 @@ define i32 @testzuws(float %a) { ; ; CHECK-GI-LABEL: testzuws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz s0, s0 ; CHECK-GI-NEXT: fcvtzu w0, s0 ; CHECK-GI-NEXT: ret entry: @@ -326,7 +311,6 @@ define i64 @testzuxs(float %a) { ; ; CHECK-GI-LABEL: testzuxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz s0, s0 ; CHECK-GI-NEXT: fcvtzu x0, s0 ; CHECK-GI-NEXT: ret entry: @@ -343,7 +327,6 @@ define i32 @testzuwd(double %a) { ; ; CHECK-GI-LABEL: testzuwd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz d0, d0 ; CHECK-GI-NEXT: fcvtzu w0, d0 ; CHECK-GI-NEXT: ret entry: @@ -360,7 +343,6 @@ define i64 @testzuxd(double %a) { ; ; CHECK-GI-LABEL: testzuxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintz d0, d0 ; CHECK-GI-NEXT: fcvtzu x0, d0 ; CHECK-GI-NEXT: ret entry: @@ -388,8 +370,7 @@ define i32 @testauwh(half %a) { ; ; CHECK-GI-LABEL: testauwh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta h0, h0 -; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: fcvtau w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) @@ -414,8 +395,7 @@ define i64 @testauxh(half %a) { ; ; CHECK-GI-LABEL: testauxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta h0, h0 -; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: fcvtau x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.round.f16(half %a) @@ -431,8 +411,7 @@ define i32 @testauws(float %a) { ; ; CHECK-GI-LABEL: testauws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta s0, s0 -; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: fcvtau w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) @@ -448,8 +427,7 @@ define i64 @testauxs(float %a) { ; ; CHECK-GI-LABEL: testauxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta s0, s0 -; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: fcvtau x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.round.f32(float %a) @@ -465,8 +443,7 @@ define i32 @testauwd(double %a) { ; ; CHECK-GI-LABEL: testauwd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta d0, d0 -; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: fcvtau w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) @@ -482,8 +459,7 @@ define i64 @testauxd(double %a) { ; ; CHECK-GI-LABEL: testauxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frinta d0, d0 -; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: fcvtau x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.round.f64(double %a) @@ -510,8 +486,7 @@ define i32 @testnuwh(half %a) { ; ; CHECK-GI-LABEL: testnuwh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn h0, h0 -; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: fcvtnu w0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) @@ -536,8 +511,7 @@ define i64 @testnuxh(half %a) { ; ; CHECK-GI-LABEL: testnuxh: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn h0, h0 -; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: fcvtnu x0, h0 ; CHECK-GI-NEXT: ret entry: %r = call half @llvm.roundeven.f16(half %a) @@ -553,8 +527,7 @@ define i32 @testnuws(float %a) { ; ; CHECK-GI-LABEL: testnuws: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn s0, s0 -; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: fcvtnu w0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) @@ -570,8 +543,7 @@ define i64 @testnuxs(float %a) { ; ; CHECK-GI-LABEL: testnuxs: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn s0, s0 -; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: fcvtnu x0, s0 ; CHECK-GI-NEXT: ret entry: %r = call float @llvm.roundeven.f32(float %a) @@ -587,8 +559,7 @@ define i32 @testnuwd(double %a) { ; ; CHECK-GI-LABEL: testnuwd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn d0, d0 -; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: fcvtnu w0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) @@ -604,8 +575,7 @@ define i64 @testnuxd(double %a) { ; ; CHECK-GI-LABEL: testnuxd: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: frintn d0, d0 -; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: fcvtnu x0, d0 ; CHECK-GI-NEXT: ret entry: %r = call double @llvm.roundeven.f64(double %a) From 4ac866287f496afb77f6a5a465304f7faa5f099b Mon Sep 17 00:00:00 2001 From: valadaptive Date: Wed, 10 Dec 2025 12:44:44 -0500 Subject: [PATCH 15/16] [AArch64] Remove `nounwind readnone` from some tests #171288 changed the tests to call into floating-point intrinsics instead of libcalls, and these attributes are already implied. --- .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 40 ++++++------ llvm/test/CodeGen/AArch64/round-conv.ll | 64 +++++++++---------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll index 48e7972b04a6c..3f6ad552ec200 100644 --- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -1285,7 +1285,7 @@ define float @fcvtas_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtas s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1302,7 +1302,7 @@ define double @fcvtas_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtas d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1385,7 +1385,7 @@ define float @fcvtau_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtau s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1402,7 +1402,7 @@ define double @fcvtau_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtau d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1485,7 +1485,7 @@ define float @fcvtns_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtns s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1502,7 +1502,7 @@ define double @fcvtns_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtns d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1585,7 +1585,7 @@ define float @fcvtnu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtnu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1602,7 +1602,7 @@ define double @fcvtnu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtnu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.roundeven.f16(half %a) nounwind readnone + %r = call half @llvm.roundeven.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1685,7 +1685,7 @@ define float @fcvtms_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtms s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1702,7 +1702,7 @@ define double @fcvtms_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtms d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1785,7 +1785,7 @@ define float @fcvtmu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtmu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1802,7 +1802,7 @@ define double @fcvtmu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtmu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1885,7 +1885,7 @@ define float @fcvtps_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtps s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1902,7 +1902,7 @@ define double @fcvtps_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtps d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1985,7 +1985,7 @@ define float @fcvtpu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtpu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -2002,7 +2002,7 @@ define double @fcvtpu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtpu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -2085,7 +2085,7 @@ define float @fcvtzs_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -2102,7 +2102,7 @@ define double @fcvtzs_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -2185,7 +2185,7 @@ define float @fcvtzu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -2202,7 +2202,7 @@ define double @fcvtzu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc diff --git a/llvm/test/CodeGen/AArch64/round-conv.ll b/llvm/test/CodeGen/AArch64/round-conv.ll index 9559a9c4b978a..66e5297aa2d68 100644 --- a/llvm/test/CodeGen/AArch64/round-conv.ll +++ b/llvm/test/CodeGen/AArch64/round-conv.ll @@ -5,7 +5,7 @@ ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testmsws(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -15,7 +15,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testmsxs(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -25,7 +25,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testmswd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -35,7 +35,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testmsxd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -45,7 +45,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testmuws(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -55,7 +55,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testmuxs(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -65,7 +65,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testmuwd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -75,7 +75,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testmuxd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } @@ -85,7 +85,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testpsws(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -95,7 +95,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testpsxs(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -105,7 +105,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testpswd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -115,7 +115,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testpsxd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -125,7 +125,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testpuws(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -135,7 +135,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testpuxs(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -145,7 +145,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testpuwd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -155,7 +155,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testpuxd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } @@ -165,7 +165,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testzsws(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -175,7 +175,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testzsxs(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -185,7 +185,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testzswd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -195,7 +195,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testzsxd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -205,7 +205,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testzuws(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -215,7 +215,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testzuxs(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -225,7 +225,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testzuwd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -235,7 +235,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testzuxd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } @@ -245,7 +245,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testasws(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -265,7 +265,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testasxs(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -285,7 +285,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testaswd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -305,7 +305,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testasxd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -325,7 +325,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testauws(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -345,7 +345,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testauxs(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -365,7 +365,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testauwd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -385,7 +385,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testauxd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } From 9aa714c996dcf94cb472ffcf73d9bebbb9694e28 Mon Sep 17 00:00:00 2001 From: valadaptive <79560998+valadaptive@users.noreply.github.com> Date: Tue, 16 Dec 2025 09:15:27 -0500 Subject: [PATCH 16/16] Update llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll Co-authored-by: Lukacma --- llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index 3a742fa64f71c..0286febd01f4b 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -; RUN: llc < %s -mtriple aarch64 -global-isel -global-isel-abort=2 -mattr=+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK-GI +; RUN: llc < %s -mtriple aarch64 -global-isel -global-isel-abort=2 -mattr=+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Round towards minus infinity (fcvtms).