Skip to content

Commit 9d8ce7a

Browse files
Command container appends BB_END on cmd buffer allocation end
When linear stream created for command container has not enough space for command and BB_END it will program BB_END and allocate new command buffer allocation. Pointer returned from getSpace in this case will return storage from new command buffer allocation. Related-To: NEO-5707 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
1 parent 92316c4 commit 9d8ce7a

31 files changed

+262
-306
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -159,7 +159,6 @@ struct CommandListCoreFamily : CommandListImp {
159159
ze_result_t reset() override;
160160
ze_result_t executeCommandListImmediate(bool performMigration) override;
161161
size_t getReserveSshSize();
162-
void increaseCommandStreamSpace(size_t commandSize);
163162

164163
protected:
165164
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
332332
if (this->partitionCount > 1) {
333333
estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo);
334334
}
335-
increaseCommandStreamSpace(estimateSize);
336335

337336
for (uint32_t i = 0u; i < packetsToReset; i++) {
338337
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
@@ -896,13 +895,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
896895
commandContainer.addToResidencyContainer(clearColorAllocation);
897896

898897
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
899-
bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END
900-
size_t estimatedSize = NEO::BlitCommandsHelper<GfxFamily>::template BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer,
901-
false,
902-
false,
903-
blitterDirectSubmission,
904-
*device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
905-
increaseCommandStreamSpace(estimatedSize);
906898

907899
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
908900

@@ -946,13 +938,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
946938
}
947939

948940
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
949-
bool blitterDirectSubmission = true; // assume direct submission enabled, since usually MI_BATCH_BUFFER_START is bigger than MI_BATCH_BUFFER_END
950-
size_t estimatedSize = NEO::BlitCommandsHelper<GfxFamily>::template BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer,
951-
false,
952-
false,
953-
blitterDirectSubmission,
954-
*device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
955-
increaseCommandStreamSpace(estimatedSize);
956941

957942
appendEventForProfiling(hSignalEvent, true);
958943
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
@@ -1684,11 +1669,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
16841669
if (isCopyOnly()) {
16851670
NEO::MiFlushArgs args;
16861671
args.commandWithPostSync = true;
1687-
increaseCommandStreamSpace(NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite());
16881672
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED,
16891673
args, hwInfo);
16901674
} else {
1691-
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
16921675
NEO::PipeControlArgs args;
16931676
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
16941677
if (this->partitionCount > 1) {
@@ -1839,7 +1822,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
18391822
if (isCopyOnly()) {
18401823
NEO::MiFlushArgs args;
18411824
args.commandWithPostSync = true;
1842-
increaseCommandStreamSpace(NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite());
18431825
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset),
18441826
Event::STATE_SIGNALED, args, hwInfo);
18451827
} else {
@@ -1851,7 +1833,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
18511833
event->setPacketsInUse(this->partitionCount);
18521834
}
18531835
if (applyScope || event->isEventTimestampFlagSet()) {
1854-
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo));
18551836
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
18561837
*commandContainer.getCommandStream(),
18571838
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
@@ -1860,7 +1841,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
18601841
hwInfo,
18611842
args);
18621843
} else {
1863-
increaseCommandStreamSpace(NEO::EncodeStoreMemory<GfxFamily>::getStoreDataImmSize());
18641844
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(
18651845
*commandContainer.getCommandStream(),
18661846
ptrOffset(baseAddr, eventSignalOffset),
@@ -1928,7 +1908,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
19281908
estimatedBufferSize += NEO::EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
19291909
}
19301910
}
1931-
increaseCommandStreamSpace(estimatedBufferSize);
19321911

19331912
if (dcFlushRequired) {
19341913
if (isCopyOnly()) {
@@ -2204,17 +2183,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reserveSpace(size_t size, void
22042183
return ZE_RESULT_SUCCESS;
22052184
}
22062185

2207-
template <GFXCORE_FAMILY gfxCoreFamily>
2208-
void CommandListCoreFamily<gfxCoreFamily>::increaseCommandStreamSpace(size_t commandSize) {
2209-
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
2210-
size_t estimatedSizeRequired = commandSize + sizeof(MI_BATCH_BUFFER_END);
2211-
if (commandContainer.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) {
2212-
auto bbEnd = commandContainer.getCommandStream()->template getSpaceForCmd<MI_BATCH_BUFFER_END>();
2213-
*bbEnd = GfxFamily::cmdInitBatchBufferEnd;
2214-
commandContainer.allocateNextCommandBuffer();
2215-
}
2216-
}
2217-
22182186
template <GFXCORE_FAMILY gfxCoreFamily>
22192187
ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) {
22202188
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@@ -2353,9 +2321,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
23532321
const auto &hwInfo = this->device->getHwInfo();
23542322
if (!hSignalEvent) {
23552323
if (isCopyOnly()) {
2356-
size_t estimatedSizeRequired = NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
2357-
increaseCommandStreamSpace(estimatedSizeRequired);
2358-
23592324
NEO::MiFlushArgs args;
23602325
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
23612326
} else {

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
183183

184184
template <GFXCORE_FAMILY gfxCoreFamily>
185185
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
186-
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
187-
increaseCommandStreamSpace(estimatedSizeRequired);
188-
189186
NEO::PipeControlArgs args = createBarrierFlags();
190187
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
191188
}

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
134134

135135
const auto &hwInfo = this->device->getHwInfo();
136136
if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) {
137-
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
138-
139137
NEO::PipeControlArgs args;
140138
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
141139
}
@@ -245,8 +243,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
245243
event->setPacketsInUse(partitionCount);
246244
}
247245
if (L3FlushEnable) {
248-
size_t estimatedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
249-
increaseCommandStreamSpace(estimatedSize);
250246
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
251247
}
252248
}
@@ -302,16 +298,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
302298

303299
template <GFXCORE_FAMILY gfxCoreFamily>
304300
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
305-
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
306-
increaseCommandStreamSpace(estimatedSizeRequired);
307301
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
308302
partitionDataSize);
309303
}
310304

311305
template <GFXCORE_FAMILY gfxCoreFamily>
312306
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
313-
const size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
314-
increaseCommandStreamSpace(estimatedSizeRequired);
315307
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
316308
NEO::ImplicitScalingDispatch<GfxFamily>::getPostSyncOffset());
317309
}
@@ -320,14 +312,9 @@ template <GFXCORE_FAMILY gfxCoreFamily>
320312
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
321313
if (this->partitionCount > 1) {
322314
auto neoDevice = device->getNEODevice();
323-
auto &hwInfo = neoDevice->getHardwareInfo();
324-
325-
increaseCommandStreamSpace(estimateBufferSizeMultiTileBarrier(hwInfo));
326315
appendMultiTileBarrier(*neoDevice);
327316
} else {
328317
NEO::PipeControlArgs args = createBarrierFlags();
329-
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
330-
increaseCommandStreamSpace(estimatedSizeRequired);
331318
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
332319
}
333320
}

level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@ ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const
4444

4545
NEO::LinearStream &cmdStream = *commandContainer.getCommandStream();
4646

47-
size_t estimatedSizeRequired = NEO::EncodeMemoryPrefetch<GfxFamily>::getSizeForMemoryPrefetch(size);
48-
increaseCommandStreamSpace(estimatedSizeRequired);
49-
5047
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast<uint32_t>(size), offset, hwInfo);
5148

5249
return ZE_RESULT_SUCCESS;
@@ -56,9 +53,6 @@ template <>
5653
void CommandListCoreFamily<IGFX_XE_HPC_CORE>::applyMemoryRangesBarrier(uint32_t numRanges,
5754
const size_t *pRangeSizes,
5855
const void **pRanges) {
59-
60-
increaseCommandStreamSpace(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl());
61-
6256
NEO::PipeControlArgs args;
6357
args.hdcPipelineFlush = true;
6458
args.unTypedDataPortCacheFlush = true;

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp

Lines changed: 2 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp
106106
const auto streamCpu = stream->getCpuBase();
107107

108108
Vec3<size_t> groupCount{1, 1, 1};
109-
auto requiredSizeEstimate = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
110-
device->getNEODevice(), {0, 0, 0}, groupCount, false, false, false, kernel.get(), false);
109+
auto sizeLeftInStream = sizeof(MI_BATCH_BUFFER_END);
111110
auto available = stream->getAvailableSpace();
112-
stream->getSpace(available - requiredSizeEstimate + 1);
111+
stream->getSpace(available - sizeLeftInStream);
113112
auto bbEndPosition = stream->getSpace(0);
114113

115114
const uint32_t threadGroupDimensions[3] = {1, 1, 1};
@@ -236,38 +235,6 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDe
236235
EXPECT_NE(initialAllocation, reallocatedAllocation);
237236
}
238237

239-
HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferSizeDoesNotExceedEstimate, IsAtLeastSkl) {
240-
createKernel();
241-
ze_group_count_t groupCount{1, 1, 1};
242-
243-
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
244-
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
245-
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
246-
247-
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
248-
249-
auto result = commandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, false);
250-
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
251-
252-
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
253-
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
254-
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, false, kernel.get(), false);
255-
256-
EXPECT_LE(sizeAfter - sizeBefore, estimate);
257-
258-
sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
259-
260-
result = commandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, true, false, false);
261-
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
262-
263-
sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
264-
estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
265-
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, false, kernel.get(), false);
266-
267-
EXPECT_LE(sizeAfter - sizeBefore, estimate);
268-
EXPECT_LE(sizeAfter - sizeBefore, estimate);
269-
}
270-
271238
HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppendingKernelThenPostSyncToEventIsGenerated) {
272239
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
273240
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@@ -689,19 +656,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWithImplicitArgsWhe
689656
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
690657
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
691658

692-
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
693-
694659
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
695660
static_cast<ze_group_count_t *>(alloc),
696661
nullptr, 0, nullptr);
697662
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
698-
699-
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
700-
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
701-
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, true, &kernel, false);
702-
703-
EXPECT_LE(sizeAfter - sizeBefore, estimate);
704-
705663
auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT);
706664
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + kernel.getSizeForImplicitArgsPatching() - sizeof(ImplicitArgs);
707665
auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
@@ -869,19 +827,11 @@ HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWo
869827
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc);
870828
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
871829

872-
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
873-
874830
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
875831
static_cast<ze_group_count_t *>(alloc),
876832
nullptr, 0, nullptr);
877833
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
878834

879-
auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed();
880-
auto estimate = NEO::EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(
881-
device->getNEODevice(), Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false, false, true, &kernel, false);
882-
883-
EXPECT_LE(sizeAfter - sizeBefore, estimate);
884-
885835
kernel.groupSize[2] = 2;
886836
result = commandList->appendLaunchKernelIndirect(kernel.toHandle(),
887837
static_cast<ze_group_count_t *>(alloc),

0 commit comments

Comments
 (0)