-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AArch64] Optimize more floating-point round+convert combinations into fcvt instructions #170018
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
dc4777e
c4763b2
f6c8b78
d5b88fc
e56d806
ea85543
9507254
1eaf0d4
0886610
9ca61d1
f62a112
dd5b29a
bc5441b
fb752c5
7f64305
4ac8662
9aa714c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5830,6 +5830,33 @@ multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, | |
| defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">; | ||
| defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">; | ||
|
|
||
| // Fused round + convert to int patterns for vectors | ||
| multiclass SIMDTwoVectorFPToIntRoundPats<SDNode to_int, SDNode round, string INST> { | ||
| let Predicates = [HasFullFP16] in { | ||
| def : Pat<(v4i16 (to_int (round v4f16:$Rn))), | ||
| (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; | ||
| def : Pat<(v8i16 (to_int (round v8f16:$Rn))), | ||
| (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; | ||
| } | ||
| def : Pat<(v2i32 (to_int (round v2f32:$Rn))), | ||
| (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; | ||
| def : Pat<(v4i32 (to_int (round v4f32:$Rn))), | ||
| (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; | ||
| def : Pat<(v2i64 (to_int (round v2f64:$Rn))), | ||
| (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; | ||
| } | ||
|
|
||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fceil, "FCVTPS">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fceil, "FCVTPU">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, ffloor, "FCVTMS">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, ffloor, "FCVTMU">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, ftrunc, "FCVTZS">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, ftrunc, "FCVTZU">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fround, "FCVTAS">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fround, "FCVTAU">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, froundeven, "FCVTNS">; | ||
| defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, froundeven, "FCVTNU">; | ||
|
|
||
| def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; | ||
| def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; | ||
| def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; | ||
|
|
@@ -6817,14 +6844,16 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_g | |
| (!cast<Instruction>(INST # v1i64) f64:$Rn)>; | ||
| } | ||
|
|
||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">; | ||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">; | ||
| defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, froundeven, "FCVTNS">; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also add tests for non-bitcast patterns. You can existing tests conversion llvm/test/CodeGen/AArch64/round-conv.ll and /home/marluk01/llvm-project/cvt-round/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll if it helps.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was a good catch; I actually discovered that the For now, I've added tests to round-fptosi-sat-scalar.ll and round-fptoui-sat-scalar.ll. The generated code isn't where we want it to be yet, but that's because of the missing SelectionDAG lowering. The round-conv.ll tests have manually-written test lines, so I haven't added anything there for now. |
||
| defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, froundeven, "FCVTNU">; | ||
|
|
||
| // f16 -> s16 conversions | ||
| let Predicates = [HasFullFP16] in { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason patterns for saturating nodes are not added here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what happened. I redid this part to just copy all the patterns from
SIMDTwoVectorFPToIntSatPats, but with the additional non-saturating ops and the rounding function as part of the patterns. I'll admit I'm not sure what the GlobalISel versions are (the_gisuffixed ones) but they're present too now.Is there any logic to why some saturating ops are part of their own multiclass (
FPToIntegerIntPats+FPToIntegerSatPats) and some are part of the same multiclass as their non-saturating counterparts (FPToIntegerPats)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as I understand, _gi nodes are nodes which globalISel produces when lowering the saturating conversion. As for why they couldn't reuse SDAG ones, I have no idea. But since you added patterns for GlobalISel, I think we should test those as well by adding the globalISel runline to vcvt-fused-round test.
As for why patterns are sometimes split, I have no idea. I have just added bitconvert patterns to the classes as they were.