1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/memory_manager/unified_memory_manager.h"
9 #include "shared/source/unified_memory/unified_memory.h"
10 #include "shared/test/common/fixtures/memory_management_fixture.h"
11 #include "shared/test/common/helpers/debug_manager_state_restore.h"
12 #include "shared/test/common/test_macros/test.h"
13 
14 #include "opencl/source/kernel/kernel.h"
15 #include "opencl/source/mem_obj/buffer.h"
16 #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
17 #include "opencl/test/unit_test/fixtures/context_fixture.h"
18 #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h"
19 #include "opencl/test/unit_test/mocks/mock_buffer.h"
20 #include "opencl/test/unit_test/mocks/mock_context.h"
21 #include "opencl/test/unit_test/mocks/mock_kernel.h"
22 #include "opencl/test/unit_test/mocks/mock_program.h"
23 #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h"
24 
25 #include "CL/cl.h"
26 #include "gtest/gtest.h"
27 #include "hw_cmds.h"
28 
29 #include <memory>
30 
31 using namespace NEO;
32 
33 struct KernelArgBufferTest : public Test<KernelArgBufferFixture> {
34     struct AllocationTypeHelper {
35         GraphicsAllocation::AllocationType allocationType;
36         bool compressed;
37     };
38 };
39 
TEST_F(KernelArgBufferTest,GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect)40 TEST_F(KernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) {
41     Buffer *buffer = new MockBuffer();
42 
43     auto val = (cl_mem)buffer;
44     auto pVal = &val;
45 
46     auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
47     EXPECT_EQ(CL_SUCCESS, retVal);
48 
49     auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() +
50                                   this->pKernelInfo->argAsPtr(0).stateless);
51     EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg);
52 
53     delete buffer;
54 }
55 
56 struct MultiDeviceKernelArgBufferTest : public ::testing::Test {
57 
SetUpMultiDeviceKernelArgBufferTest58     void SetUp() override {
59         ClDeviceVector devicesForContext;
60         devicesForContext.push_back(deviceFactory.rootDevices[1]);
61         devicesForContext.push_back(deviceFactory.subDevices[4]);
62         devicesForContext.push_back(deviceFactory.subDevices[5]);
63         pContext = std::make_unique<MockContext>(devicesForContext);
64         kernelInfos.resize(3);
65         kernelInfos[0] = nullptr;
66         pKernelInfosStorage[0] = std::make_unique<MockKernelInfo>();
67         pKernelInfosStorage[1] = std::make_unique<MockKernelInfo>();
68         kernelInfos[1] = pKernelInfosStorage[0].get();
69         kernelInfos[2] = pKernelInfosStorage[1].get();
70 
71         auto &hwHelper = HwHelper::get(renderCoreFamily);
72 
73         for (auto i = 0u; i < 2; i++) {
74             pKernelInfosStorage[i]->heapInfo.pSsh = pSshLocal[i];
75             pKernelInfosStorage[i]->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal[i]);
76             pKernelInfosStorage[i]->kernelDescriptor.kernelAttributes.simdSize = hwHelper.getMinimalSIMDSize();
77 
78             auto crossThreadDataPointer = &pCrossThreadData[i];
79             memcpy_s(ptrOffset(&pCrossThreadData[i], i * sizeof(void *)), sizeof(void *), &crossThreadDataPointer, sizeof(void *));
80             pKernelInfosStorage[i]->crossThreadData = pCrossThreadData[i];
81 
82             pKernelInfosStorage[i]->addArgBuffer(0, static_cast<NEO::CrossThreadDataOffset>(i * sizeof(void *)), sizeof(void *));
83 
84             pKernelInfosStorage[i]->setCrossThreadDataSize(static_cast<uint16_t>((i + 1) * sizeof(void *)));
85         }
86 
87         auto retVal = CL_INVALID_PROGRAM;
88         pBuffer = std::unique_ptr<Buffer>(Buffer::create(pContext.get(), 0u, MemoryConstants::pageSize, nullptr, retVal));
89         EXPECT_EQ(CL_SUCCESS, retVal);
90         EXPECT_NE(nullptr, pBuffer);
91 
92         pProgram = std::make_unique<MockProgram>(pContext.get(), false, pContext->getDevices());
93     }
94 
TearDownMultiDeviceKernelArgBufferTest95     void TearDown() override {
96         for (auto i = 0u; i < 2; i++) {
97             pKernelInfosStorage[i]->crossThreadData = nullptr;
98         }
99     }
100 
101     UltClDeviceFactory deviceFactory{3, 2};
102     std::unique_ptr<MockContext> pContext;
103     std::unique_ptr<MockKernelInfo> pKernelInfosStorage[2];
104     char pCrossThreadData[2][64]{};
105     char pSshLocal[2][64]{};
106     KernelInfoContainer kernelInfos;
107     std::unique_ptr<Buffer> pBuffer;
108     std::unique_ptr<MockProgram> pProgram;
109 };
TEST_F(MultiDeviceKernelArgBufferTest,GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect)110 TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) {
111     int32_t retVal = CL_INVALID_VALUE;
112     auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, &retVal));
113 
114     EXPECT_EQ(CL_SUCCESS, retVal);
115     cl_mem val = pBuffer.get();
116     auto pVal = &val;
117 
118     retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
119     EXPECT_EQ(CL_SUCCESS, retVal);
120 
121     for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
122         auto pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
123         auto pKernelArg = reinterpret_cast<size_t *>(pKernel->getCrossThreadData() +
124                                                      kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
125         EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg);
126     }
127 }
128 
TEST_F(KernelArgBufferTest,GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly)129 TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
130     Buffer *buffer = new MockBuffer();
131 
132     auto val = (cl_mem)buffer;
133     auto pVal = &val;
134 
135     pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
136 
137     auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
138     EXPECT_EQ(CL_SUCCESS, retVal);
139     EXPECT_FALSE(pKernel->requiresCoherency());
140 
141     delete buffer;
142 }
143 
HWTEST_F(KernelArgBufferTest,GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly)144 HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
145     Buffer *buffer = new MockBuffer();
146 
147     auto val = (cl_mem)buffer;
148     auto pVal = &val;
149 
150     auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
151     EXPECT_EQ(CL_SUCCESS, retVal);
152     EXPECT_FALSE(pKernel->requiresCoherency());
153 
154     EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
155 
156     typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
157     auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
158         ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
159 
160     auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
161     EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
162 
163     delete buffer;
164 }
165 
HWTEST_F(MultiDeviceKernelArgBufferTest,GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly)166 HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
167     cl_mem val = pBuffer.get();
168     auto pVal = &val;
169 
170     int32_t retVal = CL_INVALID_VALUE;
171     for (auto &kernelInfo : pKernelInfosStorage) {
172         kernelInfo->argAsPtr(0).bindful = 0;
173     }
174     auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, &retVal));
175 
176     EXPECT_EQ(CL_SUCCESS, retVal);
177 
178     retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
179     EXPECT_EQ(CL_SUCCESS, retVal);
180 
181     for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
182         auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
183         EXPECT_FALSE(pKernel->requiresCoherency());
184         EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
185         typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
186         auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
187             ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
188 
189         auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
190         EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
191     }
192 }
193 
HWTEST_F(KernelArgBufferTest,GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly)194 HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
195 
196     Buffer *buffer = new MockBuffer();
197     buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
198 
199     auto val = (cl_mem)buffer;
200     auto pVal = &val;
201 
202     auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
203     EXPECT_EQ(CL_SUCCESS, retVal);
204     EXPECT_TRUE(pKernel->requiresCoherency());
205 
206     delete buffer;
207 }
208 
TEST_F(KernelArgBufferTest,GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned)209 TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
210     char *ptr = new char[sizeof(Buffer)];
211 
212     auto val = (cl_mem *)ptr;
213     auto pVal = &val;
214     auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
215     EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal);
216 
217     delete[] ptr;
218 }
219 
TEST_F(KernelArgBufferTest,GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull)220 TEST_F(KernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) {
221     auto val = (cl_mem *)nullptr;
222     auto pVal = &val;
223     this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
224 
225     auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() +
226                                   this->pKernelInfo->argAsPtr(0).stateless);
227 
228     EXPECT_EQ(nullptr, *pKernelArg);
229 }
230 
TEST_F(MultiDeviceKernelArgBufferTest,GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull)231 TEST_F(MultiDeviceKernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) {
232     int32_t retVal = CL_INVALID_VALUE;
233     auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, &retVal));
234 
235     EXPECT_EQ(CL_SUCCESS, retVal);
236 
237     auto val = nullptr;
238     auto pVal = &val;
239     pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
240     for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
241         auto pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
242         auto pKernelArg = reinterpret_cast<void **>(pKernel->getCrossThreadData() +
243                                                     kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
244         EXPECT_EQ(nullptr, *pKernelArg);
245     }
246 }
247 
TEST_F(KernelArgBufferTest,given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched)248 TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched) {
249     auto val = (cl_mem *)nullptr;
250     auto pVal = &val;
251 
252     auto &argAsPtr = pKernelInfo->argAsPtr(0);
253     argAsPtr.pointerSize = 4;
254 
255     auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless);
256     auto pKernelArg32bit = (uint32_t *)pKernelArg64bit;
257 
258     *pKernelArg64bit = 0xffffffffffffffff;
259 
260     this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
261     uint64_t expValue = 0u;
262 
263     EXPECT_EQ(0u, *pKernelArg32bit);
264     EXPECT_NE(expValue, *pKernelArg64bit);
265 }
266 
TEST_F(KernelArgBufferTest,given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched)267 TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) {
268     auto pVal = nullptr;
269 
270     auto &argAsPtr = pKernelInfo->argAsPtr(0);
271     argAsPtr.pointerSize = 4;
272 
273     auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless);
274     auto pKernelArg32bit = (uint32_t *)pKernelArg64bit;
275 
276     *pKernelArg64bit = 0xffffffffffffffff;
277 
278     this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
279     uint64_t expValue = 0u;
280 
281     EXPECT_EQ(0u, *pKernelArg32bit);
282     EXPECT_NE(expValue, *pKernelArg64bit);
283 }
284 
TEST_F(KernelArgBufferTest,givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector)285 TEST_F(KernelArgBufferTest, givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) {
286     auto buffer = std::make_unique<MockBuffer>();
287     buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true);
288     buffer->mockGfxAllocation.setFlushL3Required(false);
289 
290     auto val = static_cast<cl_mem>(buffer.get());
291     auto pVal = &val;
292 
293     auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
294     EXPECT_EQ(CL_SUCCESS, retVal);
295     EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
296 }
297 
TEST_F(KernelArgBufferTest,givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector)298 TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
299     auto buffer = std::make_unique<MockBuffer>();
300     buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
301     buffer->mockGfxAllocation.setFlushL3Required(true);
302 
303     auto val = static_cast<cl_mem>(buffer.get());
304     auto pVal = &val;
305 
306     auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
307     EXPECT_EQ(CL_SUCCESS, retVal);
308     EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
309 }
310 
TEST_F(KernelArgBufferTest,givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector)311 TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) {
312     auto buffer = std::make_unique<MockBuffer>();
313     buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
314     buffer->mockGfxAllocation.setFlushL3Required(false);
315 
316     auto val = static_cast<cl_mem>(buffer.get());
317     auto pVal = &val;
318 
319     auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
320     EXPECT_EQ(CL_SUCCESS, retVal);
321     EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
322 }
323 
TEST_F(KernelArgBufferTest,givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse)324 TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
325     MockBuffer buffer;
326     buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
327 
328     auto val = (cl_mem)&buffer;
329     auto pVal = &val;
330 
331     for (auto pureStatefulBufferAccess : {false, true}) {
332         pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
333 
334         auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
335         EXPECT_EQ(CL_SUCCESS, retVal);
336 
337         EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
338     }
339 }
340 
TEST_F(KernelArgBufferTest,givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue)341 TEST_F(KernelArgBufferTest, givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue) {
342     MockBuffer buffer;
343     buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::SHARED_BUFFER);
344 
345     auto val = (cl_mem)&buffer;
346     auto pVal = &val;
347 
348     for (auto pureStatefulBufferAccess : {false, true}) {
349         pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
350 
351         auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
352         EXPECT_EQ(CL_SUCCESS, retVal);
353 
354         EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToSharedBuffer());
355     }
356 }
357 
TEST_F(KernelArgBufferTest,givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue)358 TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
359     MockBuffer buffer;
360     buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
361 
362     auto val = (cl_mem)&buffer;
363     auto pVal = &val;
364 
365     for (auto pureStatefulBufferAccess : {false, true}) {
366         pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
367 
368         auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
369         EXPECT_EQ(CL_SUCCESS, retVal);
370 
371         EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
372     }
373 }
374 
TEST_F(KernelArgBufferTest,givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse)375 TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
376     char data[128];
377     void *ptr = &data;
378     MockGraphicsAllocation gfxAllocation(ptr, 128);
379     gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
380 
381     for (auto pureStatefulBufferAccess : {false, true}) {
382         pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
383 
384         auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
385         EXPECT_EQ(CL_SUCCESS, retVal);
386 
387         EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
388     }
389 }
390 
TEST_F(KernelArgBufferTest,givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue)391 TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
392     char data[128];
393     void *ptr = &data;
394     MockGraphicsAllocation gfxAllocation(ptr, 128);
395     gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
396 
397     for (auto pureStatefulBufferAccess : {false, true}) {
398         pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
399 
400         auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
401         EXPECT_EQ(CL_SUCCESS, retVal);
402 
403         EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
404     }
405 }
406 
TEST_F(KernelArgBufferTest,givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse)407 TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
408     KernelInfo kernelInfo;
409     MockKernel emptyKernel(pProgram, kernelInfo, *pClDevice);
410     EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory());
411 
412     pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ;
413     EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
414 
415     pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ;
416     EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
417 
418     pKernel->kernelArguments.at(0).type = Kernel::SVM_ALLOC_OBJ;
419     EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
420 }
421 
TEST_F(KernelArgBufferTest,givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations)422 TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
423     KernelInfo kernelInfo;
424     EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess);
425 
426     MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice);
427     EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory());
428     kernelInfo.hasIndirectStatelessAccess = true;
429 
430     MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice);
431     EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory());
432 
433     const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
434                                   GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
435 
436     MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, kernelInfo, *pClDevice);
437     MockGraphicsAllocation gfxAllocation;
438     for (const auto type : allocationTypes) {
439         gfxAllocation.setAllocationType(type);
440         kernelWithIndirectUnifiedMemoryAllocation.setUnifiedMemoryExecInfo(&gfxAllocation);
441         if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
442             EXPECT_TRUE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory());
443         } else {
444             EXPECT_FALSE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory());
445         }
446         kernelWithIndirectUnifiedMemoryAllocation.clearUnifiedMemoryExecInfo();
447     }
448 }
449 
TEST_F(KernelArgBufferTest,givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations)450 TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
451     KernelInfo kernelInfo;
452     kernelInfo.hasIndirectStatelessAccess = true;
453 
454     MockKernel mockKernel(pProgram, kernelInfo, *pClDevice);
455     EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed);
456     EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
457 
458     auto svmAllocationsManager = mockKernel.getContext().getSVMAllocsManager();
459     if (svmAllocationsManager == nullptr) {
460         return;
461     }
462 
463     mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true;
464     EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
465 
466     auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields());
467     deviceProperties.device = &pClDevice->getDevice();
468     auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
469     EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
470 
471     auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields());
472     auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties);
473     EXPECT_TRUE(mockKernel.hasIndirectStatelessAccessToHostMemory());
474 
475     svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
476     svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
477 }
478 
TEST_F(KernelArgBufferTest,whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue)479 TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) {
480     for (auto auxTranslationRequired : {false, true}) {
481         pKernel->setAuxTranslationRequired(auxTranslationRequired);
482         EXPECT_EQ(auxTranslationRequired, pKernel->isAuxTranslationRequired());
483     }
484 }
485 
TEST_F(KernelArgBufferTest,givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue)486 TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
487     DebugManagerStateRestore debugRestorer;
488     DebugManager.flags.EnableStatelessCompression.set(1);
489 
490     MockBuffer buffer;
491     buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::SHARED_BUFFER);
492 
493     auto val = (cl_mem)&buffer;
494     auto pVal = &val;
495 
496     auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
497     EXPECT_EQ(CL_SUCCESS, retVal);
498 
499     EXPECT_TRUE(pKernel->hasDirectStatelessAccessToSharedBuffer());
500 
501     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
502 
503     pKernel->updateAuxTranslationRequired();
504 
505     EXPECT_TRUE(pKernel->isAuxTranslationRequired());
506 }
507 
TEST_F(KernelArgBufferTest,givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue)508 TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
509     DebugManagerStateRestore debugRestorer;
510     DebugManager.flags.EnableStatelessCompression.set(1);
511 
512     MockBuffer buffer;
513     buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
514 
515     auto val = (cl_mem)&buffer;
516     auto pVal = &val;
517 
518     auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
519     EXPECT_EQ(CL_SUCCESS, retVal);
520 
521     EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
522 
523     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
524 
525     pKernel->updateAuxTranslationRequired();
526 
527     EXPECT_TRUE(pKernel->isAuxTranslationRequired());
528 }
529 
TEST_F(KernelArgBufferTest,givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse)530 TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
531     DebugManagerStateRestore debugRestorer;
532     DebugManager.flags.EnableStatelessCompression.set(1);
533 
534     MockBuffer buffer;
535 
536     auto val = (cl_mem)&buffer;
537     auto pVal = &val;
538 
539     auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
540     EXPECT_EQ(CL_SUCCESS, retVal);
541 
542     EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
543 
544     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
545 
546     pKernel->updateAuxTranslationRequired();
547 
548     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
549 }
550 
TEST_F(KernelArgBufferTest,givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue)551 TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
552     DebugManagerStateRestore debugRestorer;
553     DebugManager.flags.EnableStatelessCompression.set(1);
554 
555     char data[128];
556     void *ptr = &data;
557     MockGraphicsAllocation gfxAllocation(ptr, 128);
558     gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
559 
560     auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
561     EXPECT_EQ(CL_SUCCESS, retVal);
562 
563     EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
564 
565     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
566 
567     pKernel->updateAuxTranslationRequired();
568 
569     EXPECT_TRUE(pKernel->isAuxTranslationRequired());
570 }
571 
TEST_F(KernelArgBufferTest,givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse)572 TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
573     DebugManagerStateRestore debugRestorer;
574     DebugManager.flags.EnableStatelessCompression.set(1);
575 
576     char data[128];
577     void *ptr = &data;
578     MockGraphicsAllocation gfxAllocation(ptr, 128);
579 
580     auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
581     EXPECT_EQ(CL_SUCCESS, retVal);
582 
583     EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
584 
585     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
586 
587     pKernel->updateAuxTranslationRequired();
588 
589     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
590 }
591 
TEST_F(KernelArgBufferTest,givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse)592 TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
593     DebugManagerStateRestore debugRestorer;
594     DebugManager.flags.EnableStatelessCompression.set(1);
595 
596     pKernelInfo->hasIndirectStatelessAccess = false;
597 
598     MockGraphicsAllocation gfxAllocation;
599     gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
600 
601     pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
602 
603     EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
604 
605     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
606 
607     pKernel->updateAuxTranslationRequired();
608 
609     EXPECT_FALSE(pKernel->isAuxTranslationRequired());
610 }
611 
TEST_F(KernelArgBufferTest,givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation)612 TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) {
613     DebugManagerStateRestore debugRestorer;
614     DebugManager.flags.EnableStatelessCompression.set(1);
615 
616     pKernelInfo->hasIndirectStatelessAccess = true;
617 
618     const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
619                                   GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
620 
621     MockGraphicsAllocation gfxAllocation;
622 
623     for (const auto type : allocationTypes) {
624         gfxAllocation.setAllocationType(type);
625 
626         pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
627 
628         if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
629             EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory());
630         } else {
631             EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
632         }
633 
634         EXPECT_FALSE(pKernel->isAuxTranslationRequired());
635 
636         pKernel->updateAuxTranslationRequired();
637 
638         if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
639             EXPECT_TRUE(pKernel->isAuxTranslationRequired());
640         } else {
641             EXPECT_FALSE(pKernel->isAuxTranslationRequired());
642         }
643 
644         pKernel->clearUnifiedMemoryExecInfo();
645         pKernel->setAuxTranslationRequired(false);
646     }
647 }
648 
TEST_F(KernelArgBufferTest,givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation)649 TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) {
650     DebugManagerStateRestore debugRestorer;
651     DebugManager.flags.EnableStatelessCompression.set(1);
652 
653     pKernelInfo->hasIndirectStatelessAccess = true;
654 
655     constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{GraphicsAllocation::AllocationType::BUFFER, false},
656                                                                       {GraphicsAllocation::AllocationType::BUFFER, true},
657                                                                       {GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false},
658                                                                       {GraphicsAllocation::AllocationType::SVM_GPU, true}}};
659 
660     auto gmm = std::make_unique<Gmm>(pDevice->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 0, 0, false);
661     MockGraphicsAllocation gfxAllocation;
662     gfxAllocation.setDefaultGmm(gmm.get());
663 
664     for (const auto type : allocationTypes) {
665         gfxAllocation.setAllocationType(type.allocationType);
666 
667         pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
668         gmm->isCompressionEnabled = type.compressed;
669 
670         KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
671         pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
672 
673         if (type.compressed) {
674             EXPECT_EQ(1u, kernelObjsForAuxTranslation.size());
675             auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation});
676             EXPECT_NE(nullptr, kernelObj.object);
677             EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
678             kernelObjsForAuxTranslation.erase(kernelObj);
679         } else {
680             EXPECT_EQ(0u, kernelObjsForAuxTranslation.size());
681         }
682 
683         pKernel->clearUnifiedMemoryExecInfo();
684         pKernel->setAuxTranslationRequired(false);
685     }
686 }
687 
TEST_F(KernelArgBufferTest,givenSVMAllocsManagerWithCompressedSVMAllocationsWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation)688 TEST_F(KernelArgBufferTest, givenSVMAllocsManagerWithCompressedSVMAllocationsWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) {
689     if (pContext->getSVMAllocsManager() == nullptr) {
690         return;
691     }
692 
693     DebugManagerStateRestore debugRestorer;
694     DebugManager.flags.EnableStatelessCompression.set(1);
695 
696     constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{GraphicsAllocation::AllocationType::BUFFER, false},
697                                                                       {GraphicsAllocation::AllocationType::BUFFER, true},
698                                                                       {GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false},
699                                                                       {GraphicsAllocation::AllocationType::SVM_GPU, true}}};
700 
701     auto gmm = std::make_unique<Gmm>(pDevice->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 0, 0, false);
702 
703     MockGraphicsAllocation gfxAllocation;
704     gfxAllocation.setDefaultGmm(gmm.get());
705 
706     SvmAllocationData allocData(0);
707     allocData.gpuAllocations.addAllocation(&gfxAllocation);
708     allocData.device = &pClDevice->getDevice();
709 
710     for (const auto type : allocationTypes) {
711         gfxAllocation.setAllocationType(type.allocationType);
712 
713         gmm->isCompressionEnabled = type.compressed;
714 
715         pContext->getSVMAllocsManager()->insertSVMAlloc(allocData);
716 
717         KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
718         pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
719 
720         if (type.compressed) {
721             EXPECT_EQ(1u, kernelObjsForAuxTranslation.size());
722             auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation});
723             EXPECT_NE(nullptr, kernelObj.object);
724             EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
725             kernelObjsForAuxTranslation.erase(kernelObj);
726         } else {
727             EXPECT_EQ(0u, kernelObjsForAuxTranslation.size());
728         }
729 
730         pContext->getSVMAllocsManager()->removeSVMAlloc(allocData);
731     }
732 }
733 
734 class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
735   public:
SetUp()736     void SetUp() {
737         DebugManager.flags.UseBindlessMode.set(1);
738         KernelArgBufferFixture::SetUp();
739 
740         pBuffer = new MockBuffer();
741         ASSERT_NE(nullptr, pBuffer);
742 
743         pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
744         pKernelInfo->argAsPtr(0).stateless = undefined<CrossThreadDataOffset>;
745         pKernelInfo->argAsPtr(0).bindful = undefined<SurfaceStateHeapOffset>;
746     }
TearDown()747     void TearDown() override {
748         delete pBuffer;
749         KernelArgBufferFixture::TearDown();
750     }
751     DebugManagerStateRestore restorer;
752     MockBuffer *pBuffer;
753     const CrossThreadDataOffset bindlessOffset = 0x10;
754 };
755 
756 typedef Test<KernelArgBufferFixtureBindless> KernelArgBufferTestBindless;
757 
HWTEST_F(KernelArgBufferTestBindless,givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData)758 HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) {
759     using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
760     auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), bindlessOffset));
761     *patchLocation = 0xdead;
762 
763     cl_mem memObj = pBuffer;
764     retVal = pKernel->setArg(0, sizeof(memObj), &memObj);
765 
766     EXPECT_NE(0xdeadu, *patchLocation);
767 }
768