Skip to content

Commit 120deff

Browse files
committed
merge main into amd-staging
2 parents 1a77aed + 1d7d83d commit 120deff

File tree

145 files changed

+10979
-2799
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

145 files changed

+10979
-2799
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,6 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
156156
def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
157157
def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
158158
def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
159-
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
160-
def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
161159
}
162160

163161
let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
@@ -170,8 +168,6 @@ let Features = "sse2", Attributes = [NoThrow] in {
170168

171169
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
172170
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
173-
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
174-
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
175171
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
176172
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
177173
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
@@ -513,8 +509,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
513509
}
514510

515511
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
516-
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
517-
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
518512
def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
519513
def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
520514
def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
@@ -716,11 +710,13 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
716710
def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
717711
}
718712

719-
let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
713+
let Features = "f16c",
714+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
720715
def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">;
721716
}
722717

723-
let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
718+
let Features = "f16c",
719+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
724720
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
725721
}
726722

@@ -3310,15 +3306,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
33103306
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
33113307
}
33123308

3313-
let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3309+
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
33143310
def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
33153311
}
33163312

3317-
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3313+
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
33183314
def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
33193315
}
33203316

3321-
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
3317+
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
33223318
def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
33233319
}
33243320

@@ -3358,10 +3354,6 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<5
33583354
def dpbf16ps_512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
33593355
}
33603356

3361-
let Features = "avx512bf16", Attributes = [NoThrow, Const] in {
3362-
def cvtsbf162ss_32 : X86Builtin<"float(__bf16)">;
3363-
}
3364-
33653357
let Features = "avx512vp2intersect", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
33663358
def vp2intersect_q_512 : X86Builtin<"void(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char *, unsigned char *)">;
33673359
}
@@ -3539,14 +3531,6 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
35393531
def reducesh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int, _Constant int)">;
35403532
}
35413533

3542-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3543-
def sqrtph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>)">;
3544-
}
3545-
3546-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
3547-
def sqrtph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>)">;
3548-
}
3549-
35503534
let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
35513535
def sqrtph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int)">;
35523536
}
@@ -5065,15 +5049,3 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>
50655049
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
50665050
def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
50675051
}
5068-
5069-
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
5070-
def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">;
5071-
}
5072-
5073-
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
5074-
def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">;
5075-
}
5076-
5077-
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
5078-
def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
5079-
}

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,8 +1435,12 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
14351435
return false;
14361436

14371437
if (Ptr.isIntegralPointer()) {
1438-
S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off));
1439-
return true;
1438+
if (std::optional<IntPointer> IntPtr =
1439+
Ptr.asIntPointer().atOffset(S.getASTContext(), Off)) {
1440+
S.Stk.push<Pointer>(std::move(*IntPtr));
1441+
return true;
1442+
}
1443+
return false;
14401444
}
14411445

14421446
if (!Ptr.isBlockPointer()) {
@@ -2081,15 +2085,15 @@ bool InvalidShuffleVectorIndex(InterpState &S, CodePtr OpPC, uint32_t Index) {
20812085

20822086
bool CheckPointerToIntegralCast(InterpState &S, CodePtr OpPC,
20832087
const Pointer &Ptr, unsigned BitWidth) {
2088+
const SourceInfo &E = S.Current->getSource(OpPC);
2089+
S.CCEDiag(E, diag::note_constexpr_invalid_cast)
2090+
<< 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);
2091+
20842092
if (Ptr.isDummy())
20852093
return false;
20862094
if (Ptr.isFunctionPointer())
20872095
return true;
20882096

2089-
const SourceInfo &E = S.Current->getSource(OpPC);
2090-
S.CCEDiag(E, diag::note_constexpr_invalid_cast)
2091-
<< 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);
2092-
20932097
if (Ptr.isBlockPointer() && !Ptr.isZero()) {
20942098
// Only allow based lvalue casts if they are lossless.
20952099
if (S.getASTContext().getTargetInfo().getPointerWidth(LangAS::Default) !=

clang/lib/AST/ByteCode/Interp.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2646,10 +2646,6 @@ template <PrimType Name, class T = typename PrimConv<Name>::T>
26462646
bool CastPointerIntegral(InterpState &S, CodePtr OpPC) {
26472647
const Pointer &Ptr = S.Stk.pop<Pointer>();
26482648

2649-
S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast)
2650-
<< diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
2651-
<< S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);
2652-
26532649
if (!CheckPointerToIntegralCast(S, OpPC, Ptr, T::bitWidth()))
26542650
return Invalid(S, OpPC);
26552651

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3527,6 +3527,147 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
35273527
}
35283528

35293529
pushInteger(S, RetMask, Call->getType());
3530+
return true;
3531+
}
3532+
3533+
static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
3534+
const CallExpr *Call) {
3535+
// Arguments are: vector of floats, rounding immediate
3536+
assert(Call->getNumArgs() == 2);
3537+
3538+
APSInt Imm = popToAPSInt(S, Call->getArg(1));
3539+
const Pointer &Src = S.Stk.pop<Pointer>();
3540+
const Pointer &Dst = S.Stk.peek<Pointer>();
3541+
3542+
assert(Src.getFieldDesc()->isPrimitiveArray());
3543+
assert(Dst.getFieldDesc()->isPrimitiveArray());
3544+
3545+
const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>();
3546+
unsigned SrcNumElems = SrcVTy->getNumElements();
3547+
const auto *DstVTy = Call->getType()->castAs<VectorType>();
3548+
unsigned DstNumElems = DstVTy->getNumElements();
3549+
3550+
const llvm::fltSemantics &HalfSem =
3551+
S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy);
3552+
3553+
// imm[2] == 1 means use MXCSR rounding mode.
3554+
// In that case, we can only evaluate if the conversion is exact.
3555+
int ImmVal = Imm.getZExtValue();
3556+
bool UseMXCSR = (ImmVal & 4) != 0;
3557+
bool IsFPConstrained =
3558+
Call->getFPFeaturesInEffect(S.getASTContext().getLangOpts())
3559+
.isFPConstrained();
3560+
3561+
llvm::RoundingMode RM;
3562+
if (!UseMXCSR) {
3563+
switch (ImmVal & 3) {
3564+
case 0:
3565+
RM = llvm::RoundingMode::NearestTiesToEven;
3566+
break;
3567+
case 1:
3568+
RM = llvm::RoundingMode::TowardNegative;
3569+
break;
3570+
case 2:
3571+
RM = llvm::RoundingMode::TowardPositive;
3572+
break;
3573+
case 3:
3574+
RM = llvm::RoundingMode::TowardZero;
3575+
break;
3576+
default:
3577+
llvm_unreachable("Invalid immediate rounding mode");
3578+
}
3579+
} else {
3580+
// For MXCSR, we must check for exactness. We can use any rounding mode
3581+
// for the trial conversion since the result is the same if it's exact.
3582+
RM = llvm::RoundingMode::NearestTiesToEven;
3583+
}
3584+
3585+
QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
3586+
PrimType DstElemT = *S.getContext().classify(DstElemQT);
3587+
3588+
for (unsigned I = 0; I != SrcNumElems; ++I) {
3589+
Floating SrcVal = Src.elem<Floating>(I);
3590+
APFloat DstVal = SrcVal.getAPFloat();
3591+
3592+
bool LostInfo;
3593+
APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);
3594+
3595+
if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
3596+
S.FFDiag(S.Current->getSource(OpPC),
3597+
diag::note_constexpr_dynamic_rounding);
3598+
return false;
3599+
}
3600+
3601+
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
3602+
// Convert the destination value's bit pattern to an unsigned integer,
3603+
// then reconstruct the element using the target type's 'from' method.
3604+
uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
3605+
Dst.elem<T>(I) = T::from(RawBits);
3606+
});
3607+
}
3608+
3609+
// Zero out remaining elements if the destination has more elements
3610+
// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
3611+
if (DstNumElems > SrcNumElems) {
3612+
for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
3613+
INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
3614+
}
3615+
}
3616+
3617+
Dst.initializeAllElements();
3618+
return true;
3619+
}
3620+
3621+
static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
3622+
const CallExpr *Call) {
3623+
assert(Call->getNumArgs() == 2);
3624+
3625+
QualType ATy = Call->getArg(0)->getType();
3626+
QualType BTy = Call->getArg(1)->getType();
3627+
if (!ATy->isVectorType() || !BTy->isVectorType()) {
3628+
return false;
3629+
}
3630+
3631+
const Pointer &BPtr = S.Stk.pop<Pointer>();
3632+
const Pointer &APtr = S.Stk.pop<Pointer>();
3633+
const auto *AVecT = ATy->castAs<VectorType>();
3634+
assert(AVecT->getNumElements() ==
3635+
BTy->castAs<VectorType>()->getNumElements());
3636+
3637+
PrimType ElemT = *S.getContext().classify(AVecT->getElementType());
3638+
3639+
unsigned NumBytesInQWord = 8;
3640+
unsigned NumBitsInByte = 8;
3641+
unsigned NumBytes = AVecT->getNumElements();
3642+
unsigned NumQWords = NumBytes / NumBytesInQWord;
3643+
const Pointer &Dst = S.Stk.peek<Pointer>();
3644+
3645+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3646+
APInt BQWord(64, 0);
3647+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3648+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
3649+
INT_TYPE_SWITCH(ElemT, {
3650+
uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
3651+
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
3652+
});
3653+
}
3654+
3655+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3656+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
3657+
uint64_t Ctrl = 0;
3658+
INT_TYPE_SWITCH(
3659+
ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
3660+
3661+
APInt Byte(8, 0);
3662+
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
3663+
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
3664+
}
3665+
INT_TYPE_SWITCH(ElemT,
3666+
{ Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
3667+
}
3668+
}
3669+
3670+
Dst.initializeAllElements();
35303671

35313672
return true;
35323673
}
@@ -4756,6 +4897,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
47564897
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
47574898
});
47584899

4900+
case X86::BI__builtin_ia32_vpmultishiftqb128:
4901+
case X86::BI__builtin_ia32_vpmultishiftqb256:
4902+
case X86::BI__builtin_ia32_vpmultishiftqb512:
4903+
return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
47594904
case X86::BI__builtin_ia32_kandqi:
47604905
case X86::BI__builtin_ia32_kandhi:
47614906
case X86::BI__builtin_ia32_kandsi:
@@ -4898,6 +5043,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
48985043
case X86::BI__builtin_ia32_insert128i256:
48995044
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
49005045

5046+
case clang::X86::BI__builtin_ia32_vcvtps2ph:
5047+
case clang::X86::BI__builtin_ia32_vcvtps2ph256:
5048+
return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
5049+
49015050
case X86::BI__builtin_ia32_vec_ext_v4hi:
49025051
case X86::BI__builtin_ia32_vec_ext_v16qi:
49035052
case X86::BI__builtin_ia32_vec_ext_v8hi:

clang/lib/AST/ByteCode/Pointer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -895,8 +895,8 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
895895
return Result;
896896
}
897897

898-
IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
899-
unsigned Offset) const {
898+
std::optional<IntPointer> IntPointer::atOffset(const ASTContext &ASTCtx,
899+
unsigned Offset) const {
900900
if (!this->Desc)
901901
return *this;
902902
const Record *R = this->Desc->ElemRecord;
@@ -914,6 +914,9 @@ IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
914914
return *this;
915915

916916
const FieldDecl *FD = F->Decl;
917+
if (FD->getParent()->isInvalidDecl())
918+
return std::nullopt;
919+
917920
const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent());
918921
unsigned FieldIndex = FD->getFieldIndex();
919922
uint64_t FieldOffset =

clang/lib/AST/ByteCode/Pointer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ struct IntPointer {
4747
const Descriptor *Desc;
4848
uint64_t Value;
4949

50-
IntPointer atOffset(const ASTContext &ASTCtx, unsigned Offset) const;
50+
std::optional<IntPointer> atOffset(const ASTContext &ASTCtx,
51+
unsigned Offset) const;
5152
IntPointer baseCast(const ASTContext &ASTCtx, unsigned BaseOffset) const;
5253
};
5354

0 commit comments

Comments
 (0)