Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 76 additions & 8 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5848,6 +5848,57 @@ multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi,
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;

// Fused round + convert to int patterns for vectors
multiclass SIMDTwoVectorFPToIntRoundPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_gi, SDNode round, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(v4i16 (to_int (round v4f16:$Rn))),
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
def : Pat<(v8i16 (to_int (round v8f16:$Rn))),
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;

def : Pat<(v4i16 (to_int_sat (round v4f16:$Rn), i16)),
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
def : Pat<(v8i16 (to_int_sat (round v8f16:$Rn), i16)),
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;

def : Pat<(v4i16 (to_int_sat_gi (round v4f16:$Rn))),
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
def : Pat<(v8i16 (to_int_sat_gi (round v8f16:$Rn))),
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
}
def : Pat<(v2i32 (to_int (round v2f32:$Rn))),
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
def : Pat<(v4i32 (to_int (round v4f32:$Rn))),
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
def : Pat<(v2i64 (to_int (round v2f64:$Rn))),
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;

def : Pat<(v2i32 (to_int_sat (round v2f32:$Rn), i32)),
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
def : Pat<(v4i32 (to_int_sat (round v4f32:$Rn), i32)),
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
def : Pat<(v2i64 (to_int_sat (round v2f64:$Rn), i64)),
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;

def : Pat<(v2i32 (to_int_sat_gi (round v2f32:$Rn))),
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
def : Pat<(v4i32 (to_int_sat_gi (round v4f32:$Rn))),
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
def : Pat<(v2i64 (to_int_sat_gi (round v2f64:$Rn))),
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
}

defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, froundeven, "FCVTNS">;
defm : SIMDTwoVectorFPToIntRoundPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, froundeven, "FCVTNU">;

def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
Expand Down Expand Up @@ -6801,6 +6852,21 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_g
def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;

let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
}
def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))),
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;

// For global-isel we can use register classes to determine
// which FCVT instruction to use.
let Predicates = [HasFPRCVT] in {
Expand Down Expand Up @@ -6834,14 +6900,16 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_g
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
}

defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, froundeven, "FCVTNS">;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also add tests for non-bitcast patterns. You can existing tests conversion llvm/test/CodeGen/AArch64/round-conv.ll and /home/marluk01/llvm-project/cvt-round/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll if it helps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was a good catch; I actually discovered that the roundeven/roundevenf/roundevenl libcalls are missing from SelectionDAGBuilder::visitCall and can't get lowered into LLVM intrinsics! Since that's a target-agnostic issue and this PR only touches AArch64, I'm going to fix that in a separate PR.

For now, I've added tests to round-fptosi-sat-scalar.ll and round-fptoui-sat-scalar.ll. The generated code isn't where we want it to be yet, but that's because of the missing SelectionDAG lowering. The round-conv.ll tests have manually-written test lines, so I haven't added anything there for now.

defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, froundeven, "FCVTNU">;

let Predicates = [HasFPRCVT] in {
def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))),
Expand Down
Loading
Loading