1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/gmm_helper/gmm.h"
9 #include "shared/source/gmm_helper/gmm_helper.h"
10 #include "shared/source/helpers/ptr_math.h"
11 #include "shared/source/memory_manager/surface.h"
12 #include "shared/source/memory_manager/unified_memory_manager.h"
13 #include "shared/test/common/helpers/debug_manager_state_restore.h"
14 #include "shared/test/common/test_macros/test.h"
15
16 #include "opencl/source/kernel/kernel.h"
17 #include "opencl/test/unit_test/fixtures/buffer_fixture.h"
18 #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
19 #include "opencl/test/unit_test/fixtures/context_fixture.h"
20 #include "opencl/test/unit_test/mocks/mock_kernel.h"
21 #include "opencl/test/unit_test/mocks/mock_program.h"
22 #include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
23
24 #include "gtest/gtest.h"
25
26 using namespace NEO;
27
28 class BufferSetArgTest : public ContextFixture,
29 public ClDeviceFixture,
30 public testing::Test {
31
32 using ContextFixture::SetUp;
33
34 public:
BufferSetArgTest()35 BufferSetArgTest() {}
36
37 protected:
SetUp()38 void SetUp() override {
39 ClDeviceFixture::SetUp();
40 cl_device_id device = pClDevice;
41 ContextFixture::SetUp(1, &device);
42 pKernelInfo = std::make_unique<MockKernelInfo>();
43 pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
44
45 constexpr uint32_t sizeOfPointer = sizeof(void *);
46 pKernelInfo->addArgBuffer(0, 0x10, sizeOfPointer);
47 pKernelInfo->addArgBuffer(1, 0x20, sizeOfPointer);
48 pKernelInfo->addArgBuffer(2, 0x30, sizeOfPointer);
49
50 pKernelInfo->heapInfo.pSsh = surfaceStateHeap;
51 pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap);
52
53 pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice));
54
55 retVal = CL_INVALID_VALUE;
56 pMultiDeviceKernel = MultiDeviceKernel::create<MockKernel>(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal);
57 pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
58 ASSERT_NE(nullptr, pKernel);
59 ASSERT_EQ(CL_SUCCESS, retVal);
60 pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData));
61
62 pKernel->setKernelArgHandler(1, &Kernel::setArgBuffer);
63 pKernel->setKernelArgHandler(2, &Kernel::setArgBuffer);
64 pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer);
65
66 BufferDefaults::context = new MockContext(pClDevice);
67 buffer = BufferHelper<>::create(BufferDefaults::context);
68 }
69
TearDown()70 void TearDown() override {
71 delete buffer;
72 delete BufferDefaults::context;
73 delete pMultiDeviceKernel;
74
75 delete pProgram;
76 ContextFixture::TearDown();
77 ClDeviceFixture::TearDown();
78 }
79
80 cl_int retVal = CL_SUCCESS;
81 MockProgram *pProgram;
82 MultiDeviceKernel *pMultiDeviceKernel = nullptr;
83 MockKernel *pKernel = nullptr;
84 std::unique_ptr<MockKernelInfo> pKernelInfo;
85 SKernelBinaryHeaderCommon kernelHeader;
86 char surfaceStateHeap[0x80];
87 char pCrossThreadData[64];
88 Buffer *buffer = nullptr;
89 };
90
TEST_F(BufferSetArgTest,WhenSettingKernelArgBufferThenGpuAddressIsSet)91 TEST_F(BufferSetArgTest, WhenSettingKernelArgBufferThenGpuAddressIsSet) {
92 auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
93 pKernelInfo->argAsPtr(0).stateless);
94
95 buffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), false);
96
97 EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pKernelArg);
98 }
99
TEST_F(BufferSetArgTest,givenInvalidSizeWhenSettingKernelArgBufferThenReturnClInvalidArgSize)100 TEST_F(BufferSetArgTest, givenInvalidSizeWhenSettingKernelArgBufferThenReturnClInvalidArgSize) {
101 cl_mem arg = buffer;
102 cl_int err = pKernel->setArgBuffer(0, sizeof(cl_mem) + 1, arg);
103 EXPECT_EQ(CL_INVALID_ARG_SIZE, err);
104 }
105
HWTEST_F(BufferSetArgTest,givenSetArgBufferWhenNullArgStatefulThenProgramNullSurfaceState)106 HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSurfaceState) {
107 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
108 using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
109
110 pKernelInfo->argAsPtr(0).bindful = 0;
111 cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr);
112
113 EXPECT_EQ(CL_SUCCESS, ret);
114
115 auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
116 auto surfaceFormat = surfaceState->getSurfaceType();
117 auto surfacetype = surfaceState->getSurfaceFormat();
118
119 EXPECT_EQ(surfaceFormat, RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
120 EXPECT_EQ(surfacetype, SURFACE_FORMAT::SURFACE_FORMAT_RAW);
121 }
122
HWTEST_F(BufferSetArgTest,givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn)123 HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn) {
124 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
125
126 pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrConstant);
127 pKernelInfo->argAsPtr(0).bindful = 0;
128
129 auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
130 graphicsAllocation->setSize(graphicsAllocation->getUnderlyingBufferSize() - 1);
131
132 cl_mem clMemBuffer = buffer;
133
134 cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMemBuffer);
135
136 EXPECT_EQ(CL_SUCCESS, ret);
137
138 auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
139 auto mocs = surfaceState->getMemoryObjectControlState();
140 auto gmmHelper = pDevice->getGmmHelper();
141 auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
142 auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
143 EXPECT_TRUE(expectedMocs == mocs || expectedMocs2 == mocs);
144 }
145
HWTEST_F(BufferSetArgTest,givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState)146 HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState) {
147 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
148 using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
149
150 char sshOriginal[sizeof(surfaceStateHeap)];
151 memcpy(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap));
152
153 pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
154
155 cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr);
156
157 EXPECT_EQ(CL_SUCCESS, ret);
158
159 EXPECT_EQ(memcmp(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)), 0);
160 }
161
HWTEST_F(BufferSetArgTest,givenNonPureStatefulArgWhenCompressedBufferIsSetThenSetNonAuxMode)162 HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenCompressedBufferIsSetThenSetNonAuxMode) {
163 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
164
165 pKernelInfo->argAsPtr(0).bindful = 0;
166
167 auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
168 graphicsAllocation->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize(), 0, false));
169 graphicsAllocation->getDefaultGmm()->isCompressionEnabled = true;
170 cl_mem clMem = buffer;
171
172 cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem);
173 EXPECT_EQ(CL_SUCCESS, ret);
174
175 auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
176 EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState->getAuxiliarySurfaceMode());
177
178 pKernelInfo->setBufferStateful(0);
179 ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem);
180 EXPECT_EQ(CL_SUCCESS, ret);
181 EXPECT_TRUE(EncodeSurfaceState<FamilyType>::isAuxModeEnabled(surfaceState, graphicsAllocation->getDefaultGmm()));
182 }
183
TEST_F(BufferSetArgTest,Given32BitAddressingWhenSettingArgStatelessThenGpuAddressIsSetCorrectly)184 TEST_F(BufferSetArgTest, Given32BitAddressingWhenSettingArgStatelessThenGpuAddressIsSetCorrectly) {
185 auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
186 pKernelInfo->argAsPtr(0).stateless);
187
188 auto gpuBase = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() >> 2;
189 buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setGpuBaseAddress(gpuBase);
190 buffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), true);
191
192 EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() - gpuBase), *pKernelArg);
193 }
194
TEST_F(BufferSetArgTest,givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched)195 TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) {
196 cl_buffer_region region;
197 region.origin = 0xc0;
198 region.size = 32;
199 cl_int error = 0;
200 auto subBuffer = buffer->createSubBuffer(buffer->getFlags(), buffer->getFlagsIntel(), ®ion, error);
201
202 ASSERT_NE(nullptr, subBuffer);
203
204 EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress());
205
206 auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
207 pKernelInfo->argAsPtr(0).stateless);
208
209 subBuffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), false);
210
211 EXPECT_EQ(reinterpret_cast<void *>(subBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + region.origin), *pKernelArg);
212 delete subBuffer;
213 }
214
TEST_F(BufferSetArgTest,givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe)215 TEST_F(BufferSetArgTest, givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe) {
216 auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
217 pKernelInfo->argAsPtr(0).stateless);
218
219 //fill 8 bytes with 0xffffffffffffffff;
220 uint64_t fillValue = -1;
221 uint64_t *pointer64bytes = (uint64_t *)pKernelArg;
222 *pointer64bytes = fillValue;
223
224 constexpr uint32_t sizeOf4Bytes = sizeof(uint32_t);
225 pKernelInfo->argAsPtr(0).pointerSize = sizeOf4Bytes;
226
227 buffer->setArgStateless(pKernelArg, sizeOf4Bytes, pClDevice->getRootDeviceIndex(), false);
228
229 //make sure only 4 bytes are patched
230 auto bufferAddress = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress();
231 uint32_t address32bits = static_cast<uint32_t>(bufferAddress);
232 uint64_t curbeValue = *pointer64bytes;
233 uint32_t higherPart = curbeValue >> 32;
234 uint32_t lowerPart = (curbeValue & 0xffffffff);
235 EXPECT_EQ(0xffffffff, higherPart);
236 EXPECT_EQ(address32bits, lowerPart);
237 }
238
TEST_F(BufferSetArgTest,WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet)239 TEST_F(BufferSetArgTest, WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet) {
240 cl_mem memObj = buffer;
241
242 retVal = clSetKernelArg(
243 pMultiDeviceKernel,
244 0,
245 sizeof(memObj),
246 &memObj);
247 ASSERT_EQ(CL_SUCCESS, retVal);
248
249 auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
250 pKernelInfo->argAsPtr(0).stateless);
251
252 EXPECT_EQ(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()), *pKernelArg);
253
254 std::vector<Surface *> surfaces;
255 pKernel->getResidency(surfaces);
256 EXPECT_EQ(1u, surfaces.size());
257
258 for (auto &surface : surfaces) {
259 delete surface;
260 }
261 }
262
TEST_F(BufferSetArgTest,GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet)263 TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet) {
264 REQUIRE_SVM_OR_SKIP(pDevice);
265 void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, {}, pContext->getRootDeviceIndices(), pContext->getDeviceBitfields());
266 EXPECT_NE(nullptr, ptrSVM);
267
268 auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM);
269 ASSERT_NE(nullptr, svmData);
270 GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
271 EXPECT_NE(nullptr, pSvmAlloc);
272
273 retVal = pKernel->setArgSvmAlloc(
274 0,
275 ptrSVM,
276 pSvmAlloc);
277 ASSERT_EQ(CL_SUCCESS, retVal);
278
279 auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
280 pKernelInfo->argAsPtr(0).stateless);
281
282 EXPECT_EQ(ptrSVM, *pKernelArg);
283
284 std::vector<Surface *> surfaces;
285 pKernel->getResidency(surfaces);
286 EXPECT_EQ(1u, surfaces.size());
287 for (auto &surface : surfaces) {
288 delete surface;
289 }
290
291 pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
292 }
293
TEST_F(BufferSetArgTest,WhenGettingKernelArgThenBufferIsReturned)294 TEST_F(BufferSetArgTest, WhenGettingKernelArgThenBufferIsReturned) {
295 cl_mem memObj = buffer;
296
297 retVal = pKernel->setArg(
298 0,
299 sizeof(memObj),
300 &memObj);
301 ASSERT_EQ(CL_SUCCESS, retVal);
302
303 EXPECT_EQ(memObj, pKernel->getKernelArg(0));
304 }
305
TEST_F(BufferSetArgTest,givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected)306 TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected) {
307 DebugManagerStateRestore dbgRestore;
308 DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true);
309 cl_mem memObj = buffer;
310
311 retVal = pKernel->setArg(
312 0,
313 sizeof(memObj),
314 &memObj);
315
316 ASSERT_EQ(CL_SUCCESS, retVal);
317 ASSERT_EQ(1u, pKernel->getPatchInfoDataList().size());
318
319 EXPECT_EQ(PatchInfoAllocationType::KernelArg, pKernel->getPatchInfoDataList()[0].sourceType);
320 EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, pKernel->getPatchInfoDataList()[0].targetType);
321 EXPECT_EQ(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch(), pKernel->getPatchInfoDataList()[0].sourceAllocation);
322 EXPECT_EQ(reinterpret_cast<uint64_t>(pKernel->getCrossThreadData()), pKernel->getPatchInfoDataList()[0].targetAllocation);
323 EXPECT_EQ(0u, pKernel->getPatchInfoDataList()[0].sourceAllocationOffset);
324 }
325
TEST_F(BufferSetArgTest,givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected)326 TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected) {
327 cl_mem memObj = buffer;
328
329 retVal = pKernel->setArg(
330 0,
331 sizeof(memObj),
332 &memObj);
333
334 ASSERT_EQ(CL_SUCCESS, retVal);
335 EXPECT_EQ(0u, pKernel->getPatchInfoDataList().size());
336 }
337