1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/gmm_helper/gmm.h"
9 #include "shared/source/gmm_helper/gmm_helper.h"
10 #include "shared/source/helpers/ptr_math.h"
11 #include "shared/source/memory_manager/surface.h"
12 #include "shared/source/memory_manager/unified_memory_manager.h"
13 #include "shared/test/common/helpers/debug_manager_state_restore.h"
14 #include "shared/test/common/test_macros/test.h"
15 
16 #include "opencl/source/kernel/kernel.h"
17 #include "opencl/test/unit_test/fixtures/buffer_fixture.h"
18 #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
19 #include "opencl/test/unit_test/fixtures/context_fixture.h"
20 #include "opencl/test/unit_test/mocks/mock_kernel.h"
21 #include "opencl/test/unit_test/mocks/mock_program.h"
22 #include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
23 
24 #include "gtest/gtest.h"
25 
26 using namespace NEO;
27 
28 class BufferSetArgTest : public ContextFixture,
29                          public ClDeviceFixture,
30                          public testing::Test {
31 
32     using ContextFixture::SetUp;
33 
34   public:
BufferSetArgTest()35     BufferSetArgTest() {}
36 
37   protected:
SetUp()38     void SetUp() override {
39         ClDeviceFixture::SetUp();
40         cl_device_id device = pClDevice;
41         ContextFixture::SetUp(1, &device);
42         pKernelInfo = std::make_unique<MockKernelInfo>();
43         pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
44 
45         constexpr uint32_t sizeOfPointer = sizeof(void *);
46         pKernelInfo->addArgBuffer(0, 0x10, sizeOfPointer);
47         pKernelInfo->addArgBuffer(1, 0x20, sizeOfPointer);
48         pKernelInfo->addArgBuffer(2, 0x30, sizeOfPointer);
49 
50         pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
51         pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
52 
53         pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice));
54 
55         retVal = CL_INVALID_VALUE;
56         pMultiDeviceKernel = MultiDeviceKernel::create<MockKernel>(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal);
57         pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
58         ASSERT_NE(nullptr, pKernel);
59         ASSERT_EQ(CL_SUCCESS, retVal);
60         pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData));
61 
62         pKernel->setKernelArgHandler(1, &Kernel::setArgBuffer);
63         pKernel->setKernelArgHandler(2, &Kernel::setArgBuffer);
64         pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer);
65 
66         BufferDefaults::context = new MockContext(pClDevice);
67         buffer = BufferHelper<>::create(BufferDefaults::context);
68     }
69 
TearDown()70     void TearDown() override {
71         delete buffer;
72         delete BufferDefaults::context;
73         delete pMultiDeviceKernel;
74 
75         delete pProgram;
76         ContextFixture::TearDown();
77         ClDeviceFixture::TearDown();
78     }
79 
80     cl_int retVal = CL_SUCCESS;
81     MockProgram *pProgram;
82     MultiDeviceKernel *pMultiDeviceKernel = nullptr;
83     MockKernel *pKernel = nullptr;
84     std::unique_ptr<MockKernelInfo> pKernelInfo;
85     SKernelBinaryHeaderCommon kernelHeader;
86     char surfaceStateHeap[0x80];
87     char pCrossThreadData[64];
88     Buffer *buffer = nullptr;
89 };
90 
TEST_F(BufferSetArgTest,WhenSettingKernelArgBufferThenGpuAddressIsSet)91 TEST_F(BufferSetArgTest, WhenSettingKernelArgBufferThenGpuAddressIsSet) {
92     auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
93                                 pKernelInfo->argAsPtr(0).stateless);
94 
95     buffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), false);
96 
97     EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pKernelArg);
98 }
99 
TEST_F(BufferSetArgTest,givenInvalidSizeWhenSettingKernelArgBufferThenReturnClInvalidArgSize)100 TEST_F(BufferSetArgTest, givenInvalidSizeWhenSettingKernelArgBufferThenReturnClInvalidArgSize) {
101     cl_mem arg = buffer;
102     cl_int err = pKernel->setArgBuffer(0, sizeof(cl_mem) + 1, arg);
103     EXPECT_EQ(CL_INVALID_ARG_SIZE, err);
104 }
105 
HWTEST_F(BufferSetArgTest,givenSetArgBufferWhenNullArgStatefulThenProgramNullSurfaceState)106 HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSurfaceState) {
107     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
108     using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
109 
110     pKernelInfo->argAsPtr(0).bindful = 0;
111     cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr);
112 
113     EXPECT_EQ(CL_SUCCESS, ret);
114 
115     auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
116     auto surfaceFormat = surfaceState->getSurfaceType();
117     auto surfacetype = surfaceState->getSurfaceFormat();
118 
119     EXPECT_EQ(surfaceFormat, RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
120     EXPECT_EQ(surfacetype, SURFACE_FORMAT::SURFACE_FORMAT_RAW);
121 }
122 
HWTEST_F(BufferSetArgTest,givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn)123 HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn) {
124     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
125 
126     pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrConstant);
127     pKernelInfo->argAsPtr(0).bindful = 0;
128 
129     auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
130     graphicsAllocation->setSize(graphicsAllocation->getUnderlyingBufferSize() - 1);
131 
132     cl_mem clMemBuffer = buffer;
133 
134     cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMemBuffer);
135 
136     EXPECT_EQ(CL_SUCCESS, ret);
137 
138     auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
139     auto mocs = surfaceState->getMemoryObjectControlState();
140     auto gmmHelper = pDevice->getGmmHelper();
141     auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
142     auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
143     EXPECT_TRUE(expectedMocs == mocs || expectedMocs2 == mocs);
144 }
145 
HWTEST_F(BufferSetArgTest,givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState)146 HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState) {
147     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
148     using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
149 
150     char sshOriginal[sizeof(surfaceStateHeap)];
151     memcpy(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap));
152 
153     pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
154 
155     cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr);
156 
157     EXPECT_EQ(CL_SUCCESS, ret);
158 
159     EXPECT_EQ(memcmp(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)), 0);
160 }
161 
HWTEST_F(BufferSetArgTest,givenNonPureStatefulArgWhenCompressedBufferIsSetThenSetNonAuxMode)162 HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenCompressedBufferIsSetThenSetNonAuxMode) {
163     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
164 
165     pKernelInfo->argAsPtr(0).bindful = 0;
166 
167     auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
168     graphicsAllocation->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize(), 0, false));
169     graphicsAllocation->getDefaultGmm()->isCompressionEnabled = true;
170     cl_mem clMem = buffer;
171 
172     cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem);
173     EXPECT_EQ(CL_SUCCESS, ret);
174 
175     auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
176     EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState->getAuxiliarySurfaceMode());
177 
178     pKernelInfo->setBufferStateful(0);
179     ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem);
180     EXPECT_EQ(CL_SUCCESS, ret);
181     EXPECT_TRUE(EncodeSurfaceState<FamilyType>::isAuxModeEnabled(surfaceState, graphicsAllocation->getDefaultGmm()));
182 }
183 
TEST_F(BufferSetArgTest,Given32BitAddressingWhenSettingArgStatelessThenGpuAddressIsSetCorrectly)184 TEST_F(BufferSetArgTest, Given32BitAddressingWhenSettingArgStatelessThenGpuAddressIsSetCorrectly) {
185     auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
186                                 pKernelInfo->argAsPtr(0).stateless);
187 
188     auto gpuBase = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() >> 2;
189     buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setGpuBaseAddress(gpuBase);
190     buffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), true);
191 
192     EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() - gpuBase), *pKernelArg);
193 }
194 
TEST_F(BufferSetArgTest,givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched)195 TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) {
196     cl_buffer_region region;
197     region.origin = 0xc0;
198     region.size = 32;
199     cl_int error = 0;
200     auto subBuffer = buffer->createSubBuffer(buffer->getFlags(), buffer->getFlagsIntel(), &region, error);
201 
202     ASSERT_NE(nullptr, subBuffer);
203 
204     EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress());
205 
206     auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
207                                 pKernelInfo->argAsPtr(0).stateless);
208 
209     subBuffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), false);
210 
211     EXPECT_EQ(reinterpret_cast<void *>(subBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + region.origin), *pKernelArg);
212     delete subBuffer;
213 }
214 
TEST_F(BufferSetArgTest,givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe)215 TEST_F(BufferSetArgTest, givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe) {
216     auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
217                                 pKernelInfo->argAsPtr(0).stateless);
218 
219     //fill 8 bytes with 0xffffffffffffffff;
220     uint64_t fillValue = -1;
221     uint64_t *pointer64bytes = (uint64_t *)pKernelArg;
222     *pointer64bytes = fillValue;
223 
224     constexpr uint32_t sizeOf4Bytes = sizeof(uint32_t);
225     pKernelInfo->argAsPtr(0).pointerSize = sizeOf4Bytes;
226 
227     buffer->setArgStateless(pKernelArg, sizeOf4Bytes, pClDevice->getRootDeviceIndex(), false);
228 
229     //make sure only 4 bytes are patched
230     auto bufferAddress = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress();
231     uint32_t address32bits = static_cast<uint32_t>(bufferAddress);
232     uint64_t curbeValue = *pointer64bytes;
233     uint32_t higherPart = curbeValue >> 32;
234     uint32_t lowerPart = (curbeValue & 0xffffffff);
235     EXPECT_EQ(0xffffffff, higherPart);
236     EXPECT_EQ(address32bits, lowerPart);
237 }
238 
TEST_F(BufferSetArgTest,WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet)239 TEST_F(BufferSetArgTest, WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet) {
240     cl_mem memObj = buffer;
241 
242     retVal = clSetKernelArg(
243         pMultiDeviceKernel,
244         0,
245         sizeof(memObj),
246         &memObj);
247     ASSERT_EQ(CL_SUCCESS, retVal);
248 
249     auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
250                                 pKernelInfo->argAsPtr(0).stateless);
251 
252     EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()), *pKernelArg);
253 
254     std::vector<Surface *> surfaces;
255     pKernel->getResidency(surfaces);
256     EXPECT_EQ(1u, surfaces.size());
257 
258     for (auto &surface : surfaces) {
259         delete surface;
260     }
261 }
262 
TEST_F(BufferSetArgTest,GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet)263 TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet) {
264     REQUIRE_SVM_OR_SKIP(pDevice);
265     void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, {}, pContext->getRootDeviceIndices(), pContext->getDeviceBitfields());
266     EXPECT_NE(nullptr, ptrSVM);
267 
268     auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
269     ASSERT_NE(nullptr, svmData);
270     GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
271     EXPECT_NE(nullptr, pSvmAlloc);
272 
273     retVal = pKernel->setArgSvmAlloc(
274         0,
275         ptrSVM,
276         pSvmAlloc);
277     ASSERT_EQ(CL_SUCCESS, retVal);
278 
279     auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
280                                 pKernelInfo->argAsPtr(0).stateless);
281 
282     EXPECT_EQ(ptrSVM, *pKernelArg);
283 
284     std::vector<Surface *> surfaces;
285     pKernel->getResidency(surfaces);
286     EXPECT_EQ(1u, surfaces.size());
287     for (auto &surface : surfaces) {
288         delete surface;
289     }
290 
291     pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
292 }
293 
TEST_F(BufferSetArgTest,WhenGettingKernelArgThenBufferIsReturned)294 TEST_F(BufferSetArgTest, WhenGettingKernelArgThenBufferIsReturned) {
295     cl_mem memObj = buffer;
296 
297     retVal = pKernel->setArg(
298         0,
299         sizeof(memObj),
300         &memObj);
301     ASSERT_EQ(CL_SUCCESS, retVal);
302 
303     EXPECT_EQ(memObj, pKernel->getKernelArg(0));
304 }
305 
TEST_F(BufferSetArgTest,givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected)306 TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected) {
307     DebugManagerStateRestore dbgRestore;
308     DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
309     cl_mem memObj = buffer;
310 
311     retVal = pKernel->setArg(
312         0,
313         sizeof(memObj),
314         &memObj);
315 
316     ASSERT_EQ(CL_SUCCESS, retVal);
317     ASSERT_EQ(1u, pKernel->getPatchInfoDataList().size());
318 
319     EXPECT_EQ(PatchInfoAllocationType::KernelArg, pKernel->getPatchInfoDataList()[0].sourceType);
320     EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, pKernel->getPatchInfoDataList()[0].targetType);
321     EXPECT_EQ(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch(), pKernel->getPatchInfoDataList()[0].sourceAllocation);
322     EXPECT_EQ(reinterpret_cast<uint64_t>(pKernel->getCrossThreadData()), pKernel->getPatchInfoDataList()[0].targetAllocation);
323     EXPECT_EQ(0u, pKernel->getPatchInfoDataList()[0].sourceAllocationOffset);
324 }
325 
TEST_F(BufferSetArgTest,givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected)326 TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected) {
327     cl_mem memObj = buffer;
328 
329     retVal = pKernel->setArg(
330         0,
331         sizeof(memObj),
332         &memObj);
333 
334     ASSERT_EQ(CL_SUCCESS, retVal);
335     EXPECT_EQ(0u, pKernel->getPatchInfoDataList().size());
336 }
337