diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 56ab040706a13..d3505cf38fce8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1603,7 +1603,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID, - &AAAMDGPUClusterDims::ID}); + &AAAMDGPUClusterDims::ID, &AAAlign::ID}); AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = Options.IsClosedWorld; @@ -1661,6 +1661,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, if (Ptr) { A.getOrCreateAAFor(IRPosition::value(*Ptr)); A.getOrCreateAAFor(IRPosition::value(*Ptr)); + if (const IntrinsicInst *II = dyn_cast(Ptr)) { + if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) + A.getOrCreateAAFor(IRPosition::value(*Ptr)); + } } } } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index a6ac7610a2c7a..074d8edeee4fd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5220,6 +5220,13 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, return AlignAA->getKnownAlign().value(); break; } + case Intrinsic::amdgcn_make_buffer_rsrc: { + const auto *AlignAA = A.getAAFor( + QueryingAA, IRPosition::value(*II), DepClassTy::NONE); + if (AlignAA) + return AlignAA->getKnownAlign().value(); + break; + } default: break; } @@ -5543,7 +5550,7 @@ struct AAAlignCallSiteReturned final const auto *AlignAA = A.getAAFor(*this, IRPosition::value(*(II->getOperand(0))), DepClassTy::REQUIRED); - if (AlignAA && AlignAA->isValidState()) { + if (AlignAA) { Alignment = std::max(AlignAA->getAssumedAlign(), Alignment); Valid = true; } @@ -5554,6 +5561,18 @@ struct AAAlignCallSiteReturned final std::min(this->getAssumedAlign(), Alignment).value()); break; } + // FIXME: Should introduce target specific sub-attributes and letting + // getAAfor lead to create sub-attribute to handle target + // specific intrinsics. + case Intrinsic::amdgcn_make_buffer_rsrc: { + const auto *AlignAA = + A.getAAFor(*this, IRPosition::value(*(II->getOperand(0))), + DepClassTy::REQUIRED); + if (AlignAA) + return clampStateAndIndicateChange( + this->getState(), AlignAA->getAssumedAlign().value()); + break; + } default: break; } diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll new file mode 100644 index 0000000000000..29845e649a6c6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s + +define float @align_back_prop(ptr addrspace(1) align 4 %x) { +; CHECK-LABEL: define float @align_back_prop( +; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0) +; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8 +; CHECK-NEXT: ret float [[Y]] +; + %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0) + %y = load float, ptr addrspace(7) %fat.ptr, align 8 + ret float %y +} + +define float @align_foward_prop(ptr addrspace(1) align 8 %x) { +; CHECK-LABEL: define float @align_foward_prop( +; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0) +; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8 +; CHECK-NEXT: ret float [[Y]] +; + %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0) + %y = load float, ptr addrspace(7) %fat.ptr, align 4 + ret float %y +} + +define float @align_mix_prop(ptr addrspace(1) align 4 %x) { +; CHECK-LABEL: define float @align_mix_prop( +; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i64 256, i32 0) +; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8 +; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8 +; CHECK-NEXT: ret float [[Z]] +; + %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i64 256, i32 0) + %y = load float, ptr addrspace(7) %fat.ptr, align 2 + %z = load float, ptr addrspace(1) %x, align 8 + ret float %z +} diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll index 1ab607465dbbb..af877edee59c9 100644 --- a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll +++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll @@ -305,7 +305,7 @@ define amdgpu_kernel void @test_call_untouched_ptr() { define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer( -; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11:[0-9]+]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4 ; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]] @@ -320,7 +320,7 @@ define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) { define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) { ; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { +; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { ; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i64 noundef 0, i32 noundef 0) #[[ATTR11]] ; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]] ; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]