1 /*
2 * Copyright (C) 2019-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/program/sync_buffer_handler.h"
9 #include "shared/test/common/mocks/ult_device_factory.h"
10 #include "shared/test/common/test_macros/test.h"
11
12 #include "opencl/source/api/api.h"
13 #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h"
14 #include "opencl/test/unit_test/mocks/mock_command_queue.h"
15 #include "opencl/test/unit_test/mocks/mock_kernel.h"
16 #include "opencl/test/unit_test/mocks/mock_mdi.h"
17 #include "opencl/test/unit_test/mocks/mock_platform.h"
18
19 #include "engine_node.h"
20
21 using namespace NEO;
22
23 class MockSyncBufferHandler : public SyncBufferHandler {
24 public:
25 using SyncBufferHandler::bufferSize;
26 using SyncBufferHandler::graphicsAllocation;
27 using SyncBufferHandler::usedBufferSize;
28 };
29
30 class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
31 public:
SetUp()32 void SetUp() override {
33 hardwareInfo = *defaultHwInfo;
34 hardwareInfo.capabilityTable.blitterOperationsSupported = true;
35 uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily];
36 hardwareInfoSetup[productFamily](&hardwareInfo, true, hwInfoConfig);
37 SetUpImpl(&hardwareInfo);
38 }
39
TearDown()40 void TearDown() override {
41 context->decRefInternal();
42 delete pClDevice;
43 pClDevice = nullptr;
44 pDevice = nullptr;
45 }
46
SetUpImpl(const NEO::HardwareInfo * hardwareInfo)47 void SetUpImpl(const NEO::HardwareInfo *hardwareInfo) {
48 pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(hardwareInfo);
49 ASSERT_NE(nullptr, pDevice);
50 pClDevice = new MockClDevice{pDevice};
51 ASSERT_NE(nullptr, pClDevice);
52
53 auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
54 pTagMemory = commandStreamReceiver.getTagAddress();
55 ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
56
57 context = new NEO::MockContext(pClDevice);
58 }
59 };
60
61 class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
62 public:
SetUp()63 void SetUp() override {}
TearDown()64 void TearDown() override {}
65
66 template <typename FamilyType>
SetUpT()67 void SetUpT() {
68 SyncBufferEnqueueHandlerTest::SetUp();
69 kernelInternals = std::make_unique<MockKernelWithInternals>(*pClDevice, context);
70 kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
71 kernel = kernelInternals->mockKernel;
72 kernel->executionType = KernelExecutionType::Concurrent;
73 commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
74 hwHelper = &HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily);
75 if (hwHelper->isCooperativeEngineSupported(pClDevice->getHardwareInfo())) {
76 commandQueue->gpgpuEngine = &pClDevice->getEngine(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative);
77 }
78 }
79
80 template <typename FamilyType>
TearDownT()81 void TearDownT() {
82 commandQueue->release();
83 kernelInternals.reset();
84 SyncBufferEnqueueHandlerTest::TearDown();
85 }
86
patchAllocateSyncBuffer()87 void patchAllocateSyncBuffer() {
88 kernelInternals->kernelInfo.setSyncBuffer(sizeof(uint8_t), 0, 0);
89 }
90
getSyncBufferHandler()91 MockSyncBufferHandler *getSyncBufferHandler() {
92 return reinterpret_cast<MockSyncBufferHandler *>(pDevice->syncBufferHandler.get());
93 }
94
enqueueNDCount()95 cl_int enqueueNDCount() {
96 return clEnqueueNDCountKernelINTEL(commandQueue, kernelInternals->mockMultiDeviceKernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
97 }
98
isCooperativeDispatchSupported()99 bool isCooperativeDispatchSupported() {
100 auto engineGroupType = hwHelper->getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(),
101 commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo);
102 return hwHelper->isCooperativeDispatchSupported(engineGroupType, pDevice->getHardwareInfo());
103 }
104
105 const cl_uint workDim = 1;
106 const size_t gwOffset[3] = {0, 0, 0};
107 const size_t workItemsCount = 16;
108 const size_t lws[3] = {workItemsCount, 1, 1};
109 size_t workgroupCount[3] = {workItemsCount, 1, 1};
110 std::unique_ptr<MockKernelWithInternals> kernelInternals;
111 MockKernel *kernel;
112 MockCommandQueue *commandQueue;
113 HwHelper *hwHelper;
114 };
115
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed)116 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
117 patchAllocateSyncBuffer();
118
119 enqueueNDCount();
120 auto syncBufferHandler = getSyncBufferHandler();
121 EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
122
123 commandQueue->flush();
124
125 auto pCsr = commandQueue->getGpgpuEngine().commandStreamReceiver;
126 EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount(pCsr->getOsContext().getContextId()),
127 static_cast<UltCommandStreamReceiver<FamilyType> *>(pCsr)->latestSentTaskCount);
128 }
129
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned)130 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned) {
131 patchAllocateSyncBuffer();
132
133 workgroupCount[0] = 1;
134 enqueueNDCount();
135
136 auto syncBufferHandler = getSyncBufferHandler();
137 EXPECT_EQ(CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize);
138
139 enqueueNDCount();
140 EXPECT_EQ(2u * CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize);
141 }
142
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated)143 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {
144 auto retVal = enqueueNDCount();
145 EXPECT_EQ(CL_SUCCESS, retVal);
146 EXPECT_EQ(nullptr, getSyncBufferHandler());
147 }
148
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated)149 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated) {
150 patchAllocateSyncBuffer();
151 kernel->executionType = KernelExecutionType::Default;
152
153 auto retVal = enqueueNDCount();
154 EXPECT_EQ(CL_INVALID_KERNEL, retVal);
155 EXPECT_EQ(nullptr, getSyncBufferHandler());
156 }
157
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated)158 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) {
159 patchAllocateSyncBuffer();
160 auto retVal = enqueueNDCount();
161 EXPECT_EQ(CL_SUCCESS, retVal);
162 EXPECT_NE(nullptr, getSyncBufferHandler());
163 }
164
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned)165 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) {
166 auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
167 workgroupCount[0] = maxWorkGroupCount;
168
169 auto retVal = enqueueNDCount();
170 EXPECT_EQ(CL_SUCCESS, retVal);
171 }
172
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned)173 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) {
174 size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
175 workgroupCount[0] = maxWorkGroupCount + 1;
176
177 auto retVal = enqueueNDCount();
178 EXPECT_EQ(CL_INVALID_VALUE, retVal);
179 }
180
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated)181 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) {
182 patchAllocateSyncBuffer();
183 enqueueNDCount();
184 auto syncBufferHandler = getSyncBufferHandler();
185
186 syncBufferHandler->usedBufferSize = syncBufferHandler->bufferSize;
187 enqueueNDCount();
188 EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
189 }
190
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched)191 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched) {
192 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
193 kernelInternals->kernelInfo.setBufferAddressingMode(KernelDescriptor::BindfulAndStateless);
194
195 patchAllocateSyncBuffer();
196
197 pDevice->allocateSyncBufferHandler();
198 auto syncBufferHandler = getSyncBufferHandler();
199 auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(kernel->getSurfaceStateHeap(),
200 kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful));
201 auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress();
202 surfaceState->setSurfaceBaseAddress(bufferAddress + 1);
203 auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
204 EXPECT_NE(bufferAddress, surfaceAddress);
205
206 kernel->patchSyncBuffer(syncBufferHandler->graphicsAllocation, syncBufferHandler->usedBufferSize);
207 surfaceAddress = surfaceState->getSurfaceBaseAddress();
208 EXPECT_EQ(bufferAddress, surfaceAddress);
209 }
210
TEST(SyncBufferHandlerDeviceTest,GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce)211 TEST(SyncBufferHandlerDeviceTest, GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) {
212 const size_t testUsedBufferSize = 100;
213 MockDevice rootDevice;
214 rootDevice.allocateSyncBufferHandler();
215 auto syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(rootDevice.syncBufferHandler.get());
216
217 ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize);
218 syncBufferHandler->usedBufferSize = testUsedBufferSize;
219
220 rootDevice.allocateSyncBufferHandler();
221 syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(rootDevice.syncBufferHandler.get());
222
223 EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize);
224 }
225
TEST(SyncBufferHandlerDeviceTest,GivenSubDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce)226 TEST(SyncBufferHandlerDeviceTest, GivenSubDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) {
227 UltDeviceFactory ultDeviceFactory{1, 2};
228 auto pSubDevice = ultDeviceFactory.subDevices[0];
229 pSubDevice->allocateSyncBufferHandler();
230 auto syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(pSubDevice->syncBufferHandler.get());
231
232 const size_t testUsedBufferSize = 100;
233 ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize);
234 syncBufferHandler->usedBufferSize = testUsedBufferSize;
235
236 pSubDevice->allocateSyncBufferHandler();
237 syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(pSubDevice->syncBufferHandler.get());
238
239 EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize);
240 }
241