|
20 | 20 | #include "clang/Basic/TargetBuiltins.h" |
21 | 21 | #include "clang/CIR/Dialect/IR/CIRTypes.h" |
22 | 22 | #include "clang/CIR/MissingFeatures.h" |
| 23 | +#include "llvm/Support/ErrorHandling.h" |
23 | 24 |
|
24 | 25 | using namespace clang; |
25 | 26 | using namespace clang::CIRGen; |
@@ -163,6 +164,41 @@ static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, |
163 | 164 | mlir::ValueRange{source, mask, maskValue}); |
164 | 165 | } |
165 | 166 |
|
| 167 | +static mlir::Value getBoolMaskVecValue(CIRGenBuilderTy &builder, |
| 168 | + mlir::Location loc, mlir::Value mask, |
| 169 | + unsigned numElems) { |
| 170 | + |
| 171 | + cir::BoolType boolTy = builder.getBoolTy(); |
| 172 | + auto maskTy = cir::VectorType::get( |
| 173 | + boolTy, cast<cir::IntType>(mask.getType()).getWidth()); |
| 174 | + mlir::Value maskVec = builder.createBitcast(mask, maskTy); |
| 175 | + |
| 176 | + if (numElems < 8) { |
| 177 | + SmallVector<mlir::Attribute> indices; |
| 178 | + indices.reserve(numElems); |
| 179 | + mlir::Type i32Ty = builder.getSInt32Ty(); |
| 180 | + for (auto i : llvm::seq<unsigned>(0, numElems)) |
| 181 | + indices.push_back(cir::IntAttr::get(i32Ty, i)); |
| 182 | + |
| 183 | + maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices); |
| 184 | + } |
| 185 | + return maskVec; |
| 186 | +} |
| 187 | + |
| 188 | +static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc, |
| 189 | + mlir::Value mask, mlir::Value op0, |
| 190 | + mlir::Value op1) { |
| 191 | + auto constOp = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp()); |
| 192 | + // If the mask is all ones just return first argument. |
| 193 | + if (constOp && constOp.isAllOnesValue()) |
| 194 | + return op0; |
| 195 | + |
| 196 | + mask = getBoolMaskVecValue(builder, loc, mask, |
| 197 | + cast<cir::VectorType>(op0.getType()).getSize()); |
| 198 | + |
| 199 | + return builder.createSelect(loc, mask, op0, op1); |
| 200 | +} |
| 201 | + |
166 | 202 | static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder, |
167 | 203 | mlir::Location loc, |
168 | 204 | const std::string &intrinsicName, |
@@ -1076,7 +1112,31 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, |
1076 | 1112 | case X86::BI__builtin_ia32_extractf64x2_256_mask: |
1077 | 1113 | case X86::BI__builtin_ia32_extracti64x2_256_mask: |
1078 | 1114 | case X86::BI__builtin_ia32_extractf64x2_512_mask: |
1079 | | - case X86::BI__builtin_ia32_extracti64x2_512_mask: |
| 1115 | + case X86::BI__builtin_ia32_extracti64x2_512_mask: { |
| 1116 | + mlir::Location loc = getLoc(expr->getExprLoc()); |
| 1117 | + cir::VectorType dstTy = cast<cir::VectorType>(convertType(expr->getType())); |
| 1118 | + unsigned numElts = dstTy.getSize(); |
| 1119 | + unsigned srcNumElts = cast<cir::VectorType>(ops[0].getType()).getSize(); |
| 1120 | + unsigned subVectors = srcNumElts / numElts; |
| 1121 | + assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors"); |
| 1122 | + unsigned index = |
| 1123 | + ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue(); |
| 1124 | + |
| 1125 | + index &= subVectors - 1; // Remove any extra bits. |
| 1126 | + index *= numElts; |
| 1127 | + |
| 1128 | + int64_t indices[16]; |
| 1129 | + std::iota(indices, indices + numElts, index); |
| 1130 | + |
| 1131 | + mlir::Value poison = |
| 1132 | + builder.getConstant(loc, cir::PoisonAttr::get(ops[0].getType())); |
| 1133 | + mlir::Value res = builder.createVecShuffle(loc, ops[0], poison, |
| 1134 | + ArrayRef(indices, numElts)); |
| 1135 | + if (ops.size() == 4) |
| 1136 | + res = emitX86Select(builder, loc, ops[3], res, ops[2]); |
| 1137 | + |
| 1138 | + return res; |
| 1139 | + } |
1080 | 1140 | case X86::BI__builtin_ia32_vinsertf128_pd256: |
1081 | 1141 | case X86::BI__builtin_ia32_vinsertf128_ps256: |
1082 | 1142 | case X86::BI__builtin_ia32_vinsertf128_si256: |
|
0 commit comments