Skip to content

Commit 63a8908

Browse files
[1/n] L0 Immediate Commandlist improvements
Add indirect allocs to residency in flushTask Related-To: LOCI-1988 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
1 parent efdfdeb commit 63a8908

File tree

5 files changed

+125
-20
lines changed

5 files changed

+125
-20
lines changed

level_zero/core/source/cmdlist/cmdlist.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,26 @@ void CommandList::migrateSharedAllocations() {
163163
}
164164
}
165165

166+
void CommandList::handleIndirectAllocationResidency() {
167+
bool indirectAllocationsAllowed = this->hasIndirectAllocationsAllowed();
168+
NEO::Device *neoDevice = this->device->getNEODevice();
169+
if (indirectAllocationsAllowed) {
170+
auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager();
171+
auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
172+
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
173+
submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
174+
}
175+
176+
if (submitAsPack) {
177+
svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u);
178+
} else {
179+
UnifiedMemoryControls unifiedMemoryControls = this->getUnifiedMemoryControls();
180+
181+
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
182+
this->commandContainer.getResidencyContainer(),
183+
unifiedMemoryControls.generateMask());
184+
}
185+
}
186+
}
187+
166188
} // namespace L0

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -200,6 +200,8 @@ struct CommandList : _ze_command_list_handle_t {
200200
return indirectAllocationsAllowed;
201201
}
202202

203+
void handleIndirectAllocationResidency();
204+
203205
NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel);
204206

205207
std::vector<Kernel *> &getPrintfFunctionContainer() {

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
6565

6666
auto lockCSR = this->csr->obtainUniqueOwnership();
6767

68+
this->handleIndirectAllocationResidency();
69+
6870
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadScratchSize());
6971

7072
if (performMigration) {

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -168,24 +168,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
168168
for (auto i = 0u; i < numCommandLists; i++) {
169169
auto commandList = CommandList::fromHandle(phCommandLists[i]);
170170

171-
bool indirectAllocationsAllowed = commandList->hasIndirectAllocationsAllowed();
172-
if (indirectAllocationsAllowed) {
173-
auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
174-
auto submitAsPack = device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
175-
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
176-
submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
177-
}
178-
179-
if (submitAsPack) {
180-
svmAllocsManager->makeIndirectAllocationsResident(*csr, csr->peekTaskCount() + 1u);
181-
} else {
182-
UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls();
183-
184-
svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(),
185-
commandList->commandContainer.getResidencyContainer(),
186-
unifiedMemoryControls.generateMask());
187-
}
188-
}
171+
commandList->csr = csr;
172+
commandList->handleIndirectAllocationResidency();
189173

190174
totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size();
191175
spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
@@ -212,7 +196,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
212196
}
213197

214198
partitionCount = std::max(partitionCount, commandList->partitionCount);
215-
commandList->csr = csr;
216199
commandList->makeResidentAndMigrate(performMigration);
217200
}
218201

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,102 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
934934
commandQueue->destroy();
935935
}
936936

937+
HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndirectAllocationsAsPackWhenIndirectAccessIsUsedThenWholePackIsMadeResidentWithImmediateCommandListAndFlushTask) {
938+
DebugManagerStateRestore restorer;
939+
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
940+
941+
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
942+
csr.initializeTagAllocation();
943+
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
944+
945+
ze_result_t returnValue;
946+
ze_command_queue_desc_t desc = {};
947+
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
948+
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily,
949+
device,
950+
&desc,
951+
false,
952+
NEO::EngineGroupType::Compute,
953+
returnValue));
954+
ASSERT_NE(nullptr, commandList);
955+
EXPECT_EQ(1u, commandList->cmdListType);
956+
EXPECT_NE(nullptr, commandList->cmdQImmediate);
957+
958+
void *deviceAlloc = nullptr;
959+
ze_device_mem_alloc_desc_t deviceDesc = {};
960+
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc);
961+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
962+
963+
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
964+
ASSERT_NE(nullptr, gpuAlloc);
965+
966+
createKernel();
967+
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
968+
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
969+
970+
static_cast<MockMemoryManager *>(driverHandle.get()->getMemoryManager())->overrideAllocateAsPackReturn = 1u;
971+
972+
ze_group_count_t groupCount{1, 1, 1};
973+
result = commandList->appendLaunchKernel(kernel->toHandle(),
974+
&groupCount,
975+
nullptr,
976+
0,
977+
nullptr);
978+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
979+
980+
EXPECT_TRUE(gpuAlloc->isResident(csr.getOsContext().getContextId()));
981+
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, gpuAlloc->getResidencyTaskCount(csr.getOsContext().getContextId()));
982+
983+
device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc);
984+
}
985+
986+
HWTEST_F(CommandQueueIndirectAllocations, givenImmediateCommandListAndFlushTaskWithIndirectAllocsAsPackDisabledThenLaunchKernelWorks) {
987+
DebugManagerStateRestore restorer;
988+
NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true);
989+
NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0);
990+
991+
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
992+
csr.initializeTagAllocation();
993+
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
994+
995+
ze_result_t returnValue;
996+
ze_command_queue_desc_t desc = {};
997+
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
998+
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily,
999+
device,
1000+
&desc,
1001+
false,
1002+
NEO::EngineGroupType::Compute,
1003+
returnValue));
1004+
ASSERT_NE(nullptr, commandList);
1005+
EXPECT_EQ(1u, commandList->cmdListType);
1006+
EXPECT_NE(nullptr, commandList->cmdQImmediate);
1007+
1008+
void *deviceAlloc = nullptr;
1009+
ze_device_mem_alloc_desc_t deviceDesc = {};
1010+
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc);
1011+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
1012+
1013+
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1014+
ASSERT_NE(nullptr, gpuAlloc);
1015+
1016+
createKernel();
1017+
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
1018+
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
1019+
1020+
static_cast<MockMemoryManager *>(driverHandle.get()->getMemoryManager())->overrideAllocateAsPackReturn = 1u;
1021+
1022+
ze_group_count_t groupCount{1, 1, 1};
1023+
result = commandList->appendLaunchKernel(kernel->toHandle(),
1024+
&groupCount,
1025+
nullptr,
1026+
0,
1027+
nullptr);
1028+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
1029+
1030+
device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc);
1031+
}
1032+
9371033
using DeviceCreateCommandQueueTest = Test<DeviceFixture>;
9381034
TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) {
9391035
ze_command_queue_desc_t desc{};

0 commit comments

Comments
 (0)