1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/memory_manager/unified_memory_manager.h"
9 #include "shared/source/unified_memory/unified_memory.h"
10 #include "shared/test/common/fixtures/memory_management_fixture.h"
11 #include "shared/test/common/helpers/debug_manager_state_restore.h"
12 #include "shared/test/common/test_macros/test.h"
13
14 #include "opencl/source/kernel/kernel.h"
15 #include "opencl/source/mem_obj/buffer.h"
16 #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
17 #include "opencl/test/unit_test/fixtures/context_fixture.h"
18 #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h"
19 #include "opencl/test/unit_test/mocks/mock_buffer.h"
20 #include "opencl/test/unit_test/mocks/mock_context.h"
21 #include "opencl/test/unit_test/mocks/mock_kernel.h"
22 #include "opencl/test/unit_test/mocks/mock_program.h"
23 #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h"
24
25 #include "CL/cl.h"
26 #include "gtest/gtest.h"
27 #include "hw_cmds.h"
28
29 #include <memory>
30
31 using namespace NEO;
32
33 struct KernelArgBufferTest : public Test<KernelArgBufferFixture> {
34 struct AllocationTypeHelper {
35 GraphicsAllocation::AllocationType allocationType;
36 bool compressed;
37 };
38 };
39
TEST_F(KernelArgBufferTest,GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect)40 TEST_F(KernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) {
41 Buffer *buffer = new MockBuffer();
42
43 auto val = (cl_mem)buffer;
44 auto pVal = &val;
45
46 auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
47 EXPECT_EQ(CL_SUCCESS, retVal);
48
49 auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() +
50 this->pKernelInfo->argAsPtr(0).stateless);
51 EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg);
52
53 delete buffer;
54 }
55
56 struct MultiDeviceKernelArgBufferTest : public ::testing::Test {
57
SetUpMultiDeviceKernelArgBufferTest58 void SetUp() override {
59 ClDeviceVector devicesForContext;
60 devicesForContext.push_back(deviceFactory.rootDevices[1]);
61 devicesForContext.push_back(deviceFactory.subDevices[4]);
62 devicesForContext.push_back(deviceFactory.subDevices[5]);
63 pContext = std::make_unique<MockContext>(devicesForContext);
64 kernelInfos.resize(3);
65 kernelInfos[0] = nullptr;
66 pKernelInfosStorage[0] = std::make_unique<MockKernelInfo>();
67 pKernelInfosStorage[1] = std::make_unique<MockKernelInfo>();
68 kernelInfos[1] = pKernelInfosStorage[0].get();
69 kernelInfos[2] = pKernelInfosStorage[1].get();
70
71 auto &hwHelper = HwHelper::get(renderCoreFamily);
72
73 for (auto i = 0u; i < 2; i++) {
74 pKernelInfosStorage[i]->heapInfo.pSsh = pSshLocal[i];
75 pKernelInfosStorage[i]->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal[i]);
76 pKernelInfosStorage[i]->kernelDescriptor.kernelAttributes.simdSize = hwHelper.getMinimalSIMDSize();
77
78 auto crossThreadDataPointer = &pCrossThreadData[i];
79 memcpy_s(ptrOffset(&pCrossThreadData[i], i * sizeof(void *)), sizeof(void *), &crossThreadDataPointer, sizeof(void *));
80 pKernelInfosStorage[i]->crossThreadData = pCrossThreadData[i];
81
82 pKernelInfosStorage[i]->addArgBuffer(0, static_cast<NEO::CrossThreadDataOffset>(i * sizeof(void *)), sizeof(void *));
83
84 pKernelInfosStorage[i]->setCrossThreadDataSize(static_cast<uint16_t>((i + 1) * sizeof(void *)));
85 }
86
87 auto retVal = CL_INVALID_PROGRAM;
88 pBuffer = std::unique_ptr<Buffer>(Buffer::create(pContext.get(), 0u, MemoryConstants::pageSize, nullptr, retVal));
89 EXPECT_EQ(CL_SUCCESS, retVal);
90 EXPECT_NE(nullptr, pBuffer);
91
92 pProgram = std::make_unique<MockProgram>(pContext.get(), false, pContext->getDevices());
93 }
94
TearDownMultiDeviceKernelArgBufferTest95 void TearDown() override {
96 for (auto i = 0u; i < 2; i++) {
97 pKernelInfosStorage[i]->crossThreadData = nullptr;
98 }
99 }
100
101 UltClDeviceFactory deviceFactory{3, 2};
102 std::unique_ptr<MockContext> pContext;
103 std::unique_ptr<MockKernelInfo> pKernelInfosStorage[2];
104 char pCrossThreadData[2][64]{};
105 char pSshLocal[2][64]{};
106 KernelInfoContainer kernelInfos;
107 std::unique_ptr<Buffer> pBuffer;
108 std::unique_ptr<MockProgram> pProgram;
109 };
TEST_F(MultiDeviceKernelArgBufferTest,GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect)110 TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) {
111 int32_t retVal = CL_INVALID_VALUE;
112 auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, &retVal));
113
114 EXPECT_EQ(CL_SUCCESS, retVal);
115 cl_mem val = pBuffer.get();
116 auto pVal = &val;
117
118 retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
119 EXPECT_EQ(CL_SUCCESS, retVal);
120
121 for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
122 auto pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
123 auto pKernelArg = reinterpret_cast<size_t *>(pKernel->getCrossThreadData() +
124 kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
125 EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg);
126 }
127 }
128
TEST_F(KernelArgBufferTest,GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly)129 TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
130 Buffer *buffer = new MockBuffer();
131
132 auto val = (cl_mem)buffer;
133 auto pVal = &val;
134
135 pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
136
137 auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
138 EXPECT_EQ(CL_SUCCESS, retVal);
139 EXPECT_FALSE(pKernel->requiresCoherency());
140
141 delete buffer;
142 }
143
HWTEST_F(KernelArgBufferTest,GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly)144 HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
145 Buffer *buffer = new MockBuffer();
146
147 auto val = (cl_mem)buffer;
148 auto pVal = &val;
149
150 auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
151 EXPECT_EQ(CL_SUCCESS, retVal);
152 EXPECT_FALSE(pKernel->requiresCoherency());
153
154 EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
155
156 typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
157 auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
158 ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
159
160 auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
161 EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
162
163 delete buffer;
164 }
165
HWTEST_F(MultiDeviceKernelArgBufferTest,GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly)166 HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
167 cl_mem val = pBuffer.get();
168 auto pVal = &val;
169
170 int32_t retVal = CL_INVALID_VALUE;
171 for (auto &kernelInfo : pKernelInfosStorage) {
172 kernelInfo->argAsPtr(0).bindful = 0;
173 }
174 auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, &retVal));
175
176 EXPECT_EQ(CL_SUCCESS, retVal);
177
178 retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
179 EXPECT_EQ(CL_SUCCESS, retVal);
180
181 for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
182 auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
183 EXPECT_FALSE(pKernel->requiresCoherency());
184 EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
185 typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
186 auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
187 ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
188
189 auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
190 EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
191 }
192 }
193
HWTEST_F(KernelArgBufferTest,GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly)194 HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
195
196 Buffer *buffer = new MockBuffer();
197 buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
198
199 auto val = (cl_mem)buffer;
200 auto pVal = &val;
201
202 auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
203 EXPECT_EQ(CL_SUCCESS, retVal);
204 EXPECT_TRUE(pKernel->requiresCoherency());
205
206 delete buffer;
207 }
208
TEST_F(KernelArgBufferTest,GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned)209 TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
210 char *ptr = new char[sizeof(Buffer)];
211
212 auto val = (cl_mem *)ptr;
213 auto pVal = &val;
214 auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
215 EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal);
216
217 delete[] ptr;
218 }
219
TEST_F(KernelArgBufferTest,GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull)220 TEST_F(KernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) {
221 auto val = (cl_mem *)nullptr;
222 auto pVal = &val;
223 this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
224
225 auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() +
226 this->pKernelInfo->argAsPtr(0).stateless);
227
228 EXPECT_EQ(nullptr, *pKernelArg);
229 }
230
TEST_F(MultiDeviceKernelArgBufferTest,GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull)231 TEST_F(MultiDeviceKernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) {
232 int32_t retVal = CL_INVALID_VALUE;
233 auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, &retVal));
234
235 EXPECT_EQ(CL_SUCCESS, retVal);
236
237 auto val = nullptr;
238 auto pVal = &val;
239 pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
240 for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
241 auto pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
242 auto pKernelArg = reinterpret_cast<void **>(pKernel->getCrossThreadData() +
243 kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
244 EXPECT_EQ(nullptr, *pKernelArg);
245 }
246 }
247
TEST_F(KernelArgBufferTest,given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched)248 TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched) {
249 auto val = (cl_mem *)nullptr;
250 auto pVal = &val;
251
252 auto &argAsPtr = pKernelInfo->argAsPtr(0);
253 argAsPtr.pointerSize = 4;
254
255 auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless);
256 auto pKernelArg32bit = (uint32_t *)pKernelArg64bit;
257
258 *pKernelArg64bit = 0xffffffffffffffff;
259
260 this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
261 uint64_t expValue = 0u;
262
263 EXPECT_EQ(0u, *pKernelArg32bit);
264 EXPECT_NE(expValue, *pKernelArg64bit);
265 }
266
TEST_F(KernelArgBufferTest,given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched)267 TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) {
268 auto pVal = nullptr;
269
270 auto &argAsPtr = pKernelInfo->argAsPtr(0);
271 argAsPtr.pointerSize = 4;
272
273 auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless);
274 auto pKernelArg32bit = (uint32_t *)pKernelArg64bit;
275
276 *pKernelArg64bit = 0xffffffffffffffff;
277
278 this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
279 uint64_t expValue = 0u;
280
281 EXPECT_EQ(0u, *pKernelArg32bit);
282 EXPECT_NE(expValue, *pKernelArg64bit);
283 }
284
TEST_F(KernelArgBufferTest,givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector)285 TEST_F(KernelArgBufferTest, givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) {
286 auto buffer = std::make_unique<MockBuffer>();
287 buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true);
288 buffer->mockGfxAllocation.setFlushL3Required(false);
289
290 auto val = static_cast<cl_mem>(buffer.get());
291 auto pVal = &val;
292
293 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
294 EXPECT_EQ(CL_SUCCESS, retVal);
295 EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
296 }
297
TEST_F(KernelArgBufferTest,givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector)298 TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) {
299 auto buffer = std::make_unique<MockBuffer>();
300 buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
301 buffer->mockGfxAllocation.setFlushL3Required(true);
302
303 auto val = static_cast<cl_mem>(buffer.get());
304 auto pVal = &val;
305
306 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
307 EXPECT_EQ(CL_SUCCESS, retVal);
308 EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]);
309 }
310
TEST_F(KernelArgBufferTest,givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector)311 TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) {
312 auto buffer = std::make_unique<MockBuffer>();
313 buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false);
314 buffer->mockGfxAllocation.setFlushL3Required(false);
315
316 auto val = static_cast<cl_mem>(buffer.get());
317 auto pVal = &val;
318
319 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
320 EXPECT_EQ(CL_SUCCESS, retVal);
321 EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
322 }
323
TEST_F(KernelArgBufferTest,givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse)324 TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
325 MockBuffer buffer;
326 buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
327
328 auto val = (cl_mem)&buffer;
329 auto pVal = &val;
330
331 for (auto pureStatefulBufferAccess : {false, true}) {
332 pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
333
334 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
335 EXPECT_EQ(CL_SUCCESS, retVal);
336
337 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
338 }
339 }
340
TEST_F(KernelArgBufferTest,givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue)341 TEST_F(KernelArgBufferTest, givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue) {
342 MockBuffer buffer;
343 buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::SHARED_BUFFER);
344
345 auto val = (cl_mem)&buffer;
346 auto pVal = &val;
347
348 for (auto pureStatefulBufferAccess : {false, true}) {
349 pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
350
351 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
352 EXPECT_EQ(CL_SUCCESS, retVal);
353
354 EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToSharedBuffer());
355 }
356 }
357
TEST_F(KernelArgBufferTest,givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue)358 TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
359 MockBuffer buffer;
360 buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
361
362 auto val = (cl_mem)&buffer;
363 auto pVal = &val;
364
365 for (auto pureStatefulBufferAccess : {false, true}) {
366 pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
367
368 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
369 EXPECT_EQ(CL_SUCCESS, retVal);
370
371 EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
372 }
373 }
374
TEST_F(KernelArgBufferTest,givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse)375 TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
376 char data[128];
377 void *ptr = &data;
378 MockGraphicsAllocation gfxAllocation(ptr, 128);
379 gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
380
381 for (auto pureStatefulBufferAccess : {false, true}) {
382 pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
383
384 auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
385 EXPECT_EQ(CL_SUCCESS, retVal);
386
387 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
388 }
389 }
390
TEST_F(KernelArgBufferTest,givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue)391 TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) {
392 char data[128];
393 void *ptr = &data;
394 MockGraphicsAllocation gfxAllocation(ptr, 128);
395 gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
396
397 for (auto pureStatefulBufferAccess : {false, true}) {
398 pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
399
400 auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
401 EXPECT_EQ(CL_SUCCESS, retVal);
402
403 EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
404 }
405 }
406
TEST_F(KernelArgBufferTest,givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse)407 TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) {
408 KernelInfo kernelInfo;
409 MockKernel emptyKernel(pProgram, kernelInfo, *pClDevice);
410 EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory());
411
412 pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ;
413 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
414
415 pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ;
416 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
417
418 pKernel->kernelArguments.at(0).type = Kernel::SVM_ALLOC_OBJ;
419 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
420 }
421
TEST_F(KernelArgBufferTest,givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations)422 TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
423 KernelInfo kernelInfo;
424 EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess);
425
426 MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice);
427 EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory());
428 kernelInfo.hasIndirectStatelessAccess = true;
429
430 MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice);
431 EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory());
432
433 const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
434 GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
435
436 MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, kernelInfo, *pClDevice);
437 MockGraphicsAllocation gfxAllocation;
438 for (const auto type : allocationTypes) {
439 gfxAllocation.setAllocationType(type);
440 kernelWithIndirectUnifiedMemoryAllocation.setUnifiedMemoryExecInfo(&gfxAllocation);
441 if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
442 EXPECT_TRUE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory());
443 } else {
444 EXPECT_FALSE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory());
445 }
446 kernelWithIndirectUnifiedMemoryAllocation.clearUnifiedMemoryExecInfo();
447 }
448 }
449
TEST_F(KernelArgBufferTest,givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations)450 TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) {
451 KernelInfo kernelInfo;
452 kernelInfo.hasIndirectStatelessAccess = true;
453
454 MockKernel mockKernel(pProgram, kernelInfo, *pClDevice);
455 EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed);
456 EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
457
458 auto svmAllocationsManager = mockKernel.getContext().getSVMAllocsManager();
459 if (svmAllocationsManager == nullptr) {
460 return;
461 }
462
463 mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true;
464 EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
465
466 auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields());
467 deviceProperties.device = &pClDevice->getDevice();
468 auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties);
469 EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory());
470
471 auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields());
472 auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties);
473 EXPECT_TRUE(mockKernel.hasIndirectStatelessAccessToHostMemory());
474
475 svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation);
476 svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation);
477 }
478
TEST_F(KernelArgBufferTest,whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue)479 TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) {
480 for (auto auxTranslationRequired : {false, true}) {
481 pKernel->setAuxTranslationRequired(auxTranslationRequired);
482 EXPECT_EQ(auxTranslationRequired, pKernel->isAuxTranslationRequired());
483 }
484 }
485
TEST_F(KernelArgBufferTest,givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue)486 TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
487 DebugManagerStateRestore debugRestorer;
488 DebugManager.flags.EnableStatelessCompression.set(1);
489
490 MockBuffer buffer;
491 buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::SHARED_BUFFER);
492
493 auto val = (cl_mem)&buffer;
494 auto pVal = &val;
495
496 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
497 EXPECT_EQ(CL_SUCCESS, retVal);
498
499 EXPECT_TRUE(pKernel->hasDirectStatelessAccessToSharedBuffer());
500
501 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
502
503 pKernel->updateAuxTranslationRequired();
504
505 EXPECT_TRUE(pKernel->isAuxTranslationRequired());
506 }
507
TEST_F(KernelArgBufferTest,givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue)508 TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
509 DebugManagerStateRestore debugRestorer;
510 DebugManager.flags.EnableStatelessCompression.set(1);
511
512 MockBuffer buffer;
513 buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
514
515 auto val = (cl_mem)&buffer;
516 auto pVal = &val;
517
518 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
519 EXPECT_EQ(CL_SUCCESS, retVal);
520
521 EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
522
523 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
524
525 pKernel->updateAuxTranslationRequired();
526
527 EXPECT_TRUE(pKernel->isAuxTranslationRequired());
528 }
529
TEST_F(KernelArgBufferTest,givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse)530 TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
531 DebugManagerStateRestore debugRestorer;
532 DebugManager.flags.EnableStatelessCompression.set(1);
533
534 MockBuffer buffer;
535
536 auto val = (cl_mem)&buffer;
537 auto pVal = &val;
538
539 auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
540 EXPECT_EQ(CL_SUCCESS, retVal);
541
542 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
543
544 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
545
546 pKernel->updateAuxTranslationRequired();
547
548 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
549 }
550
TEST_F(KernelArgBufferTest,givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue)551 TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
552 DebugManagerStateRestore debugRestorer;
553 DebugManager.flags.EnableStatelessCompression.set(1);
554
555 char data[128];
556 void *ptr = &data;
557 MockGraphicsAllocation gfxAllocation(ptr, 128);
558 gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
559
560 auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
561 EXPECT_EQ(CL_SUCCESS, retVal);
562
563 EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
564
565 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
566
567 pKernel->updateAuxTranslationRequired();
568
569 EXPECT_TRUE(pKernel->isAuxTranslationRequired());
570 }
571
TEST_F(KernelArgBufferTest,givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse)572 TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
573 DebugManagerStateRestore debugRestorer;
574 DebugManager.flags.EnableStatelessCompression.set(1);
575
576 char data[128];
577 void *ptr = &data;
578 MockGraphicsAllocation gfxAllocation(ptr, 128);
579
580 auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
581 EXPECT_EQ(CL_SUCCESS, retVal);
582
583 EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
584
585 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
586
587 pKernel->updateAuxTranslationRequired();
588
589 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
590 }
591
TEST_F(KernelArgBufferTest,givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse)592 TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
593 DebugManagerStateRestore debugRestorer;
594 DebugManager.flags.EnableStatelessCompression.set(1);
595
596 pKernelInfo->hasIndirectStatelessAccess = false;
597
598 MockGraphicsAllocation gfxAllocation;
599 gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
600
601 pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
602
603 EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
604
605 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
606
607 pKernel->updateAuxTranslationRequired();
608
609 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
610 }
611
TEST_F(KernelArgBufferTest,givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation)612 TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) {
613 DebugManagerStateRestore debugRestorer;
614 DebugManager.flags.EnableStatelessCompression.set(1);
615
616 pKernelInfo->hasIndirectStatelessAccess = true;
617
618 const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
619 GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
620
621 MockGraphicsAllocation gfxAllocation;
622
623 for (const auto type : allocationTypes) {
624 gfxAllocation.setAllocationType(type);
625
626 pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
627
628 if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
629 EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory());
630 } else {
631 EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
632 }
633
634 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
635
636 pKernel->updateAuxTranslationRequired();
637
638 if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
639 EXPECT_TRUE(pKernel->isAuxTranslationRequired());
640 } else {
641 EXPECT_FALSE(pKernel->isAuxTranslationRequired());
642 }
643
644 pKernel->clearUnifiedMemoryExecInfo();
645 pKernel->setAuxTranslationRequired(false);
646 }
647 }
648
TEST_F(KernelArgBufferTest,givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation)649 TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) {
650 DebugManagerStateRestore debugRestorer;
651 DebugManager.flags.EnableStatelessCompression.set(1);
652
653 pKernelInfo->hasIndirectStatelessAccess = true;
654
655 constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{GraphicsAllocation::AllocationType::BUFFER, false},
656 {GraphicsAllocation::AllocationType::BUFFER, true},
657 {GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false},
658 {GraphicsAllocation::AllocationType::SVM_GPU, true}}};
659
660 auto gmm = std::make_unique<Gmm>(pDevice->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 0, 0, false);
661 MockGraphicsAllocation gfxAllocation;
662 gfxAllocation.setDefaultGmm(gmm.get());
663
664 for (const auto type : allocationTypes) {
665 gfxAllocation.setAllocationType(type.allocationType);
666
667 pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
668 gmm->isCompressionEnabled = type.compressed;
669
670 KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
671 pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
672
673 if (type.compressed) {
674 EXPECT_EQ(1u, kernelObjsForAuxTranslation.size());
675 auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation});
676 EXPECT_NE(nullptr, kernelObj.object);
677 EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
678 kernelObjsForAuxTranslation.erase(kernelObj);
679 } else {
680 EXPECT_EQ(0u, kernelObjsForAuxTranslation.size());
681 }
682
683 pKernel->clearUnifiedMemoryExecInfo();
684 pKernel->setAuxTranslationRequired(false);
685 }
686 }
687
TEST_F(KernelArgBufferTest,givenSVMAllocsManagerWithCompressedSVMAllocationsWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation)688 TEST_F(KernelArgBufferTest, givenSVMAllocsManagerWithCompressedSVMAllocationsWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) {
689 if (pContext->getSVMAllocsManager() == nullptr) {
690 return;
691 }
692
693 DebugManagerStateRestore debugRestorer;
694 DebugManager.flags.EnableStatelessCompression.set(1);
695
696 constexpr std::array<AllocationTypeHelper, 4> allocationTypes = {{{GraphicsAllocation::AllocationType::BUFFER, false},
697 {GraphicsAllocation::AllocationType::BUFFER, true},
698 {GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false},
699 {GraphicsAllocation::AllocationType::SVM_GPU, true}}};
700
701 auto gmm = std::make_unique<Gmm>(pDevice->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 0, 0, false);
702
703 MockGraphicsAllocation gfxAllocation;
704 gfxAllocation.setDefaultGmm(gmm.get());
705
706 SvmAllocationData allocData(0);
707 allocData.gpuAllocations.addAllocation(&gfxAllocation);
708 allocData.device = &pClDevice->getDevice();
709
710 for (const auto type : allocationTypes) {
711 gfxAllocation.setAllocationType(type.allocationType);
712
713 gmm->isCompressionEnabled = type.compressed;
714
715 pContext->getSVMAllocsManager()->insertSVMAlloc(allocData);
716
717 KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
718 pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
719
720 if (type.compressed) {
721 EXPECT_EQ(1u, kernelObjsForAuxTranslation.size());
722 auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation});
723 EXPECT_NE(nullptr, kernelObj.object);
724 EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type);
725 kernelObjsForAuxTranslation.erase(kernelObj);
726 } else {
727 EXPECT_EQ(0u, kernelObjsForAuxTranslation.size());
728 }
729
730 pContext->getSVMAllocsManager()->removeSVMAlloc(allocData);
731 }
732 }
733
734 class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
735 public:
SetUp()736 void SetUp() {
737 DebugManager.flags.UseBindlessMode.set(1);
738 KernelArgBufferFixture::SetUp();
739
740 pBuffer = new MockBuffer();
741 ASSERT_NE(nullptr, pBuffer);
742
743 pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
744 pKernelInfo->argAsPtr(0).stateless = undefined<CrossThreadDataOffset>;
745 pKernelInfo->argAsPtr(0).bindful = undefined<SurfaceStateHeapOffset>;
746 }
TearDown()747 void TearDown() override {
748 delete pBuffer;
749 KernelArgBufferFixture::TearDown();
750 }
751 DebugManagerStateRestore restorer;
752 MockBuffer *pBuffer;
753 const CrossThreadDataOffset bindlessOffset = 0x10;
754 };
755
756 typedef Test<KernelArgBufferFixtureBindless> KernelArgBufferTestBindless;
757
HWTEST_F(KernelArgBufferTestBindless,givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData)758 HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) {
759 using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
760 auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(pKernel->getCrossThreadData(), bindlessOffset));
761 *patchLocation = 0xdead;
762
763 cl_mem memObj = pBuffer;
764 retVal = pKernel->setArg(0, sizeof(memObj), &memObj);
765
766 EXPECT_NE(0xdeadu, *patchLocation);
767 }
768