1 /*
2  * Copyright (C) 2019-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/program/sync_buffer_handler.h"
9 #include "shared/test/common/mocks/ult_device_factory.h"
10 #include "shared/test/common/test_macros/test.h"
11 
12 #include "opencl/source/api/api.h"
13 #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h"
14 #include "opencl/test/unit_test/mocks/mock_command_queue.h"
15 #include "opencl/test/unit_test/mocks/mock_kernel.h"
16 #include "opencl/test/unit_test/mocks/mock_mdi.h"
17 #include "opencl/test/unit_test/mocks/mock_platform.h"
18 
19 #include "engine_node.h"
20 
21 using namespace NEO;
22 
23 class MockSyncBufferHandler : public SyncBufferHandler {
24   public:
25     using SyncBufferHandler::bufferSize;
26     using SyncBufferHandler::graphicsAllocation;
27     using SyncBufferHandler::usedBufferSize;
28 };
29 
30 class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
31   public:
SetUp()32     void SetUp() override {
33         hardwareInfo = *defaultHwInfo;
34         hardwareInfo.capabilityTable.blitterOperationsSupported = true;
35         uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily];
36         hardwareInfoSetup[productFamily](&hardwareInfo, true, hwInfoConfig);
37         SetUpImpl(&hardwareInfo);
38     }
39 
TearDown()40     void TearDown() override {
41         context->decRefInternal();
42         delete pClDevice;
43         pClDevice = nullptr;
44         pDevice = nullptr;
45     }
46 
SetUpImpl(const NEO::HardwareInfo * hardwareInfo)47     void SetUpImpl(const NEO::HardwareInfo *hardwareInfo) {
48         pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(hardwareInfo);
49         ASSERT_NE(nullptr, pDevice);
50         pClDevice = new MockClDevice{pDevice};
51         ASSERT_NE(nullptr, pClDevice);
52 
53         auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
54         pTagMemory = commandStreamReceiver.getTagAddress();
55         ASSERT_NE(nullptr, const_cast<uint32_t *>(pTagMemory));
56 
57         context = new NEO::MockContext(pClDevice);
58     }
59 };
60 
61 class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
62   public:
SetUp()63     void SetUp() override {}
TearDown()64     void TearDown() override {}
65 
66     template <typename FamilyType>
SetUpT()67     void SetUpT() {
68         SyncBufferEnqueueHandlerTest::SetUp();
69         kernelInternals = std::make_unique<MockKernelWithInternals>(*pClDevice, context);
70         kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
71         kernel = kernelInternals->mockKernel;
72         kernel->executionType = KernelExecutionType::Concurrent;
73         commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
74         hwHelper = &HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily);
75         if (hwHelper->isCooperativeEngineSupported(pClDevice->getHardwareInfo())) {
76             commandQueue->gpgpuEngine = &pClDevice->getEngine(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative);
77         }
78     }
79 
80     template <typename FamilyType>
TearDownT()81     void TearDownT() {
82         commandQueue->release();
83         kernelInternals.reset();
84         SyncBufferEnqueueHandlerTest::TearDown();
85     }
86 
patchAllocateSyncBuffer()87     void patchAllocateSyncBuffer() {
88         kernelInternals->kernelInfo.setSyncBuffer(sizeof(uint8_t), 0, 0);
89     }
90 
getSyncBufferHandler()91     MockSyncBufferHandler *getSyncBufferHandler() {
92         return reinterpret_cast<MockSyncBufferHandler *>(pDevice->syncBufferHandler.get());
93     }
94 
enqueueNDCount()95     cl_int enqueueNDCount() {
96         return clEnqueueNDCountKernelINTEL(commandQueue, kernelInternals->mockMultiDeviceKernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr);
97     }
98 
isCooperativeDispatchSupported()99     bool isCooperativeDispatchSupported() {
100         auto engineGroupType = hwHelper->getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(),
101                                                             commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo);
102         return hwHelper->isCooperativeDispatchSupported(engineGroupType, pDevice->getHardwareInfo());
103     }
104 
105     const cl_uint workDim = 1;
106     const size_t gwOffset[3] = {0, 0, 0};
107     const size_t workItemsCount = 16;
108     const size_t lws[3] = {workItemsCount, 1, 1};
109     size_t workgroupCount[3] = {workItemsCount, 1, 1};
110     std::unique_ptr<MockKernelWithInternals> kernelInternals;
111     MockKernel *kernel;
112     MockCommandQueue *commandQueue;
113     HwHelper *hwHelper;
114 };
115 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed)116 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
117     patchAllocateSyncBuffer();
118 
119     enqueueNDCount();
120     auto syncBufferHandler = getSyncBufferHandler();
121     EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
122 
123     commandQueue->flush();
124 
125     auto pCsr = commandQueue->getGpgpuEngine().commandStreamReceiver;
126     EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount(pCsr->getOsContext().getContextId()),
127               static_cast<UltCommandStreamReceiver<FamilyType> *>(pCsr)->latestSentTaskCount);
128 }
129 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned)130 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned) {
131     patchAllocateSyncBuffer();
132 
133     workgroupCount[0] = 1;
134     enqueueNDCount();
135 
136     auto syncBufferHandler = getSyncBufferHandler();
137     EXPECT_EQ(CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize);
138 
139     enqueueNDCount();
140     EXPECT_EQ(2u * CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize);
141 }
142 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated)143 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {
144     auto retVal = enqueueNDCount();
145     EXPECT_EQ(CL_SUCCESS, retVal);
146     EXPECT_EQ(nullptr, getSyncBufferHandler());
147 }
148 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated)149 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated) {
150     patchAllocateSyncBuffer();
151     kernel->executionType = KernelExecutionType::Default;
152 
153     auto retVal = enqueueNDCount();
154     EXPECT_EQ(CL_INVALID_KERNEL, retVal);
155     EXPECT_EQ(nullptr, getSyncBufferHandler());
156 }
157 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated)158 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) {
159     patchAllocateSyncBuffer();
160     auto retVal = enqueueNDCount();
161     EXPECT_EQ(CL_SUCCESS, retVal);
162     EXPECT_NE(nullptr, getSyncBufferHandler());
163 }
164 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned)165 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) {
166     auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
167     workgroupCount[0] = maxWorkGroupCount;
168 
169     auto retVal = enqueueNDCount();
170     EXPECT_EQ(CL_SUCCESS, retVal);
171 }
172 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned)173 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) {
174     size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
175     workgroupCount[0] = maxWorkGroupCount + 1;
176 
177     auto retVal = enqueueNDCount();
178     EXPECT_EQ(CL_INVALID_VALUE, retVal);
179 }
180 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated)181 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) {
182     patchAllocateSyncBuffer();
183     enqueueNDCount();
184     auto syncBufferHandler = getSyncBufferHandler();
185 
186     syncBufferHandler->usedBufferSize = syncBufferHandler->bufferSize;
187     enqueueNDCount();
188     EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
189 }
190 
HWTEST_TEMPLATED_F(SyncBufferHandlerTest,GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched)191 HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched) {
192     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
193     kernelInternals->kernelInfo.setBufferAddressingMode(KernelDescriptor::BindfulAndStateless);
194 
195     patchAllocateSyncBuffer();
196 
197     pDevice->allocateSyncBufferHandler();
198     auto syncBufferHandler = getSyncBufferHandler();
199     auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(kernel->getSurfaceStateHeap(),
200                                                                            kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful));
201     auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress();
202     surfaceState->setSurfaceBaseAddress(bufferAddress + 1);
203     auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
204     EXPECT_NE(bufferAddress, surfaceAddress);
205 
206     kernel->patchSyncBuffer(syncBufferHandler->graphicsAllocation, syncBufferHandler->usedBufferSize);
207     surfaceAddress = surfaceState->getSurfaceBaseAddress();
208     EXPECT_EQ(bufferAddress, surfaceAddress);
209 }
210 
TEST(SyncBufferHandlerDeviceTest,GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce)211 TEST(SyncBufferHandlerDeviceTest, GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) {
212     const size_t testUsedBufferSize = 100;
213     MockDevice rootDevice;
214     rootDevice.allocateSyncBufferHandler();
215     auto syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(rootDevice.syncBufferHandler.get());
216 
217     ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize);
218     syncBufferHandler->usedBufferSize = testUsedBufferSize;
219 
220     rootDevice.allocateSyncBufferHandler();
221     syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(rootDevice.syncBufferHandler.get());
222 
223     EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize);
224 }
225 
TEST(SyncBufferHandlerDeviceTest,GivenSubDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce)226 TEST(SyncBufferHandlerDeviceTest, GivenSubDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) {
227     UltDeviceFactory ultDeviceFactory{1, 2};
228     auto pSubDevice = ultDeviceFactory.subDevices[0];
229     pSubDevice->allocateSyncBufferHandler();
230     auto syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(pSubDevice->syncBufferHandler.get());
231 
232     const size_t testUsedBufferSize = 100;
233     ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize);
234     syncBufferHandler->usedBufferSize = testUsedBufferSize;
235 
236     pSubDevice->allocateSyncBufferHandler();
237     syncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(pSubDevice->syncBufferHandler.get());
238 
239     EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize);
240 }
241