1 /*
2 * Copyright (C) 2020-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/device_binary_format/patchtokens_decoder.h"
9 #include "shared/source/helpers/local_memory_access_modes.h"
10 #include "shared/source/helpers/ray_tracing_helper.h"
11 #include "shared/source/kernel/kernel_descriptor.h"
12 #include "shared/source/program/kernel_info.h"
13 #include "shared/source/program/kernel_info_from_patchtokens.h"
14 #include "shared/source/utilities/stackvec.h"
15 #include "shared/test/common/helpers/debug_manager_state_restore.h"
16 #include "shared/test/common/helpers/engine_descriptor_helper.h"
17 #include "shared/test/common/mocks/mock_device.h"
18 #include "shared/test/common/mocks/mock_graphics_allocation.h"
19 #include "shared/test/common/test_macros/test.h"
20 #include "shared/test/unit_test/compiler_interface/linker_mock.h"
21 #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h"
22
23 #include "level_zero/core/source/debugger/debugger_l0.h"
24 #include "level_zero/core/source/image/image_format_desc_helper.h"
25 #include "level_zero/core/source/image/image_hw.h"
26 #include "level_zero/core/source/kernel/kernel_hw.h"
27 #include "level_zero/core/source/module/module_imp.h"
28 #include "level_zero/core/source/printf_handler/printf_handler.h"
29 #include "level_zero/core/source/sampler/sampler_hw.h"
30 #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
31 #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
32 #include "level_zero/core/test/unit_tests/mocks/mock_device.h"
33 #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
34 #include "level_zero/core/test/unit_tests/mocks/mock_module.h"
35
36 namespace NEO {
37 void populatePointerKernelArg(ArgDescPointer &dst,
38 CrossThreadDataOffset stateless, uint8_t pointerSize, SurfaceStateHeapOffset bindful, CrossThreadDataOffset bindless,
39 KernelDescriptor::AddressingMode addressingMode);
40 }
41
42 namespace L0 {
43 namespace ult {
44
45 using KernelInitTest = Test<ModuleImmutableDataFixture>;
46
TEST_F(KernelInitTest,givenKernelToInitWhenItHasUnknownArgThenUnknowKernelArgHandlerAssigned)47 TEST_F(KernelInitTest, givenKernelToInitWhenItHasUnknownArgThenUnknowKernelArgHandlerAssigned) {
48 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
49
50 std::unique_ptr<MockImmutableData> mockKernelImmData =
51 std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
52
53 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
54 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
55 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
56 ze_kernel_desc_t desc = {};
57 desc.pKernelName = kernelName.c_str();
58 mockKernelImmData->resizeExplicitArgs(1);
59 kernel->initialize(&desc);
60 EXPECT_EQ(kernel->kernelArgHandlers[0], &KernelImp::setArgUnknown);
61 EXPECT_EQ(mockKernelImmData->getDescriptor().payloadMappings.explicitArgs[0].type, NEO::ArgDescriptor::ArgTUnknown);
62 }
63
TEST(KernelArgTest,givenKernelWhenSetArgUnknownCalledThenSuccessRteurned)64 TEST(KernelArgTest, givenKernelWhenSetArgUnknownCalledThenSuccessRteurned) {
65 Mock<Kernel> mockKernel;
66 EXPECT_EQ(mockKernel.setArgUnknown(0, 0, nullptr), ZE_RESULT_SUCCESS);
67 }
68
69 using KernelImpSetGroupSizeTest = Test<DeviceFixture>;
70
TEST_F(KernelImpSetGroupSizeTest,WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable)71 TEST_F(KernelImpSetGroupSizeTest, WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable) {
72 Mock<Kernel> mockKernel;
73 Mock<Module> mockModule(this->device, nullptr);
74 mockKernel.descriptor.kernelAttributes.simdSize = 1;
75 mockKernel.descriptor.kernelAttributes.numLocalIdChannels = 3;
76 mockKernel.module = &mockModule;
77 auto grfSize = mockModule.getDevice()->getHwInfo().capabilityTable.grfSize;
78 uint32_t groupSize[3] = {2, 3, 5};
79 auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
80 EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
81 EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup);
82 EXPECT_EQ(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup);
83 ASSERT_LE(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup);
84 using LocalIdT = unsigned short;
85 auto threadOffsetInLocalIds = grfSize / sizeof(LocalIdT);
86 auto generatedLocalIds = reinterpret_cast<LocalIdT *>(mockKernel.perThreadDataForWholeThreadGroup);
87
88 uint32_t threadId = 0;
89 for (uint32_t z = 0; z < groupSize[2]; ++z) {
90 for (uint32_t y = 0; y < groupSize[1]; ++y) {
91 for (uint32_t x = 0; x < groupSize[0]; ++x) {
92 EXPECT_EQ(x, generatedLocalIds[0 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
93 EXPECT_EQ(y, generatedLocalIds[1 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
94 EXPECT_EQ(z, generatedLocalIds[2 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
95 ++threadId;
96 }
97 }
98 }
99 }
100
TEST_F(KernelImpSetGroupSizeTest,givenLocalIdGenerationByRuntimeDisabledWhenSettingGroupSizeThenLocalIdsAreNotGenerated)101 TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSettingGroupSizeThenLocalIdsAreNotGenerated) {
102 Mock<Kernel> mockKernel;
103 Mock<Module> mockModule(this->device, nullptr);
104 mockKernel.descriptor.kernelAttributes.simdSize = 1;
105 mockKernel.module = &mockModule;
106 mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime = false;
107
108 uint32_t groupSize[3] = {2, 3, 5};
109 auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
110 EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
111 EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup);
112 EXPECT_EQ(0u, mockKernel.perThreadDataSizeForWholeThreadGroup);
113 EXPECT_EQ(0u, mockKernel.perThreadDataSize);
114 EXPECT_EQ(nullptr, mockKernel.perThreadDataForWholeThreadGroup);
115 }
116
TEST_F(KernelImpSetGroupSizeTest,givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned)117 TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) {
118 Mock<Kernel> mockKernel;
119 Mock<Module> mockModule(this->device, nullptr);
120 for (auto i = 0u; i < 3u; i++) {
121 mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
122 }
123 mockKernel.module = &mockModule;
124
125 uint32_t groupSize[3] = {1, 1, 1};
126 auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
127 EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, ret);
128 }
129
TEST_F(KernelImpSetGroupSizeTest,givenZeroGroupSizeWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned)130 TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) {
131 Mock<Kernel> mockKernel;
132 Mock<Module> mockModule(this->device, nullptr);
133 for (auto i = 0u; i < 3u; i++) {
134 mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
135 }
136 mockKernel.module = &mockModule;
137
138 uint32_t groupSize[3] = {0, 0, 0};
139 auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
140 EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
141 }
142
143 using SetKernelArg = Test<ModuleFixture>;
144 using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
145
HWTEST2_F(SetKernelArg,givenImageAndKernelWhenSetArgImageThenCrossThreadDataIsSet,ImageSupport)146 HWTEST2_F(SetKernelArg, givenImageAndKernelWhenSetArgImageThenCrossThreadDataIsSet, ImageSupport) {
147 createKernel();
148
149 auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].as<NEO::ArgDescImage>());
150 imageArg.metadataPayload.imgWidth = 0x0;
151 imageArg.metadataPayload.imgHeight = 0x8;
152 imageArg.metadataPayload.imgDepth = 0x10;
153
154 imageArg.metadataPayload.arraySize = 0x18;
155 imageArg.metadataPayload.numSamples = 0x1c;
156 imageArg.metadataPayload.channelDataType = 0x20;
157 imageArg.metadataPayload.channelOrder = 0x24;
158 imageArg.metadataPayload.numMipLevels = 0x28;
159
160 imageArg.metadataPayload.flatWidth = 0x30;
161 imageArg.metadataPayload.flatHeight = 0x38;
162 imageArg.metadataPayload.flatPitch = 0x40;
163 imageArg.metadataPayload.flatBaseOffset = 0x48;
164
165 ze_image_desc_t desc = {};
166
167 desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
168 desc.type = ZE_IMAGE_TYPE_3D;
169 desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8;
170 desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT;
171 desc.width = 11;
172 desc.height = 13;
173 desc.depth = 17;
174
175 desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A;
176 desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0;
177 desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1;
178 desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X;
179
180 auto imageHW = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
181 auto ret = imageHW->initialize(device, &desc);
182 ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
183
184 auto handle = imageHW->toHandle();
185 auto imgInfo = imageHW->getImageInfo();
186 auto pixelSize = imgInfo.surfaceFormat->ImageElementSizeInBytes;
187
188 kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
189
190 auto crossThreadData = kernel->getCrossThreadData();
191
192 auto pImgWidth = ptrOffset(crossThreadData, imageArg.metadataPayload.imgWidth);
193 EXPECT_EQ(imgInfo.imgDesc.imageWidth, *pImgWidth);
194
195 auto pImgHeight = ptrOffset(crossThreadData, imageArg.metadataPayload.imgHeight);
196 EXPECT_EQ(imgInfo.imgDesc.imageHeight, *pImgHeight);
197
198 auto pImgDepth = ptrOffset(crossThreadData, imageArg.metadataPayload.imgDepth);
199 EXPECT_EQ(imgInfo.imgDesc.imageDepth, *pImgDepth);
200
201 auto pArraySize = ptrOffset(crossThreadData, imageArg.metadataPayload.arraySize);
202 EXPECT_EQ(imgInfo.imgDesc.imageArraySize, *pArraySize);
203
204 auto pNumSamples = ptrOffset(crossThreadData, imageArg.metadataPayload.numSamples);
205 EXPECT_EQ(imgInfo.imgDesc.numSamples, *pNumSamples);
206
207 auto pNumMipLevels = ptrOffset(crossThreadData, imageArg.metadataPayload.numMipLevels);
208 EXPECT_EQ(imgInfo.imgDesc.numMipLevels, *pNumMipLevels);
209
210 auto pFlatBaseOffset = ptrOffset(crossThreadData, imageArg.metadataPayload.flatBaseOffset);
211 EXPECT_EQ(imageHW->getAllocation()->getGpuAddress(), *reinterpret_cast<const uint64_t *>(pFlatBaseOffset));
212
213 auto pFlatWidth = ptrOffset(crossThreadData, imageArg.metadataPayload.flatWidth);
214 EXPECT_EQ((imgInfo.imgDesc.imageWidth * pixelSize) - 1u, *pFlatWidth);
215
216 auto pFlatHeight = ptrOffset(crossThreadData, imageArg.metadataPayload.flatHeight);
217 EXPECT_EQ((imgInfo.imgDesc.imageHeight * pixelSize) - 1u, *pFlatHeight);
218
219 auto pFlatPitch = ptrOffset(crossThreadData, imageArg.metadataPayload.flatPitch);
220 EXPECT_EQ(imgInfo.imgDesc.imageRowPitch - 1u, *pFlatPitch);
221
222 auto pChannelDataType = ptrOffset(crossThreadData, imageArg.metadataPayload.channelDataType);
223 EXPECT_EQ(getClChannelDataType(desc.format), *reinterpret_cast<const cl_channel_type *>(pChannelDataType));
224
225 auto pChannelOrder = ptrOffset(crossThreadData, imageArg.metadataPayload.channelOrder);
226 EXPECT_EQ(getClChannelOrder(desc.format), *reinterpret_cast<const cl_channel_order *>(pChannelOrder));
227 }
228
HWTEST2_F(SetKernelArg,givenSamplerAndKernelWhenSetArgSamplerThenCrossThreadDataIsSet,ImageSupport)229 HWTEST2_F(SetKernelArg, givenSamplerAndKernelWhenSetArgSamplerThenCrossThreadDataIsSet, ImageSupport) {
230 createKernel();
231
232 auto &samplerArg = const_cast<NEO::ArgDescSampler &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[5].as<NEO::ArgDescSampler>());
233 samplerArg.metadataPayload.samplerAddressingMode = 0x0;
234 samplerArg.metadataPayload.samplerNormalizedCoords = 0x4;
235 samplerArg.metadataPayload.samplerSnapWa = 0x8;
236
237 ze_sampler_desc_t desc = {};
238
239 desc.addressMode = ZE_SAMPLER_ADDRESS_MODE_CLAMP;
240 desc.filterMode = ZE_SAMPLER_FILTER_MODE_NEAREST;
241 desc.isNormalized = true;
242
243 auto sampler = std::make_unique<WhiteBox<::L0::SamplerCoreFamily<gfxCoreFamily>>>();
244
245 auto ret = sampler->initialize(device, &desc);
246 ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
247
248 auto handle = sampler->toHandle();
249
250 kernel->setArgSampler(5, sizeof(sampler.get()), &handle);
251
252 auto crossThreadData = kernel->getCrossThreadData();
253
254 auto pSamplerSnapWa = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerSnapWa);
255 EXPECT_EQ(std::numeric_limits<uint32_t>::max(), *reinterpret_cast<const uint32_t *>(pSamplerSnapWa));
256
257 auto pSamplerAddressingMode = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerAddressingMode);
258 EXPECT_EQ(0x01, *pSamplerAddressingMode);
259
260 auto pSamplerNormalizedCoords = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerNormalizedCoords);
261 EXPECT_EQ(0x08, *pSamplerNormalizedCoords);
262 }
263
264 using ArgSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
265
HWTEST2_F(SetKernelArg,givenBufferArgumentWhichHasNotBeenAllocatedByRuntimeThenInvalidArgumentIsReturned,ArgSupport)266 HWTEST2_F(SetKernelArg, givenBufferArgumentWhichHasNotBeenAllocatedByRuntimeThenInvalidArgumentIsReturned, ArgSupport) {
267 createKernel();
268
269 uint64_t hostAddress = 0x1234;
270
271 ze_result_t res = kernel->setArgBuffer(0, sizeof(hostAddress), &hostAddress);
272
273 EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
274 }
275
276 class KernelImmutableDataFixture : public ModuleImmutableDataFixture {
277 public:
SetUp()278 void SetUp() {
279 ModuleImmutableDataFixture::SetUp();
280 }
281
TearDown()282 void TearDown() {
283 ModuleImmutableDataFixture::TearDown();
284 }
285 };
286
287 using KernelImmutableDataTests = Test<KernelImmutableDataFixture>;
288
TEST_F(KernelImmutableDataTests,givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull)289 TEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
290 uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
291 bool isInternal = false;
292
293 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
294
295 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
296
297 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
298 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
299
300 createKernel(kernel.get());
301
302 EXPECT_EQ(nullptr, kernel->privateMemoryGraphicsAllocation);
303 }
304
TEST_F(KernelImmutableDataTests,givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated)305 TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
306 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
307 bool isInternal = false;
308
309 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
310
311 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
312
313 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
314 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
315
316 createKernel(kernel.get());
317
318 EXPECT_NE(nullptr, kernel->privateMemoryGraphicsAllocation);
319
320 size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
321 device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
322 EXPECT_EQ(expectedSize, kernel->privateMemoryGraphicsAllocation->getUnderlyingBufferSize());
323 }
324
325 using KernelImmutableDataIsaCopyTests = KernelImmutableDataTests;
326
TEST_F(KernelImmutableDataIsaCopyTests,whenUserKernelIsCreatedThenIsaIsCopiedWhenModuleIsCreated)327 TEST_F(KernelImmutableDataIsaCopyTests, whenUserKernelIsCreatedThenIsaIsCopiedWhenModuleIsCreated) {
328 MockImmutableMemoryManager *mockMemoryManager =
329 static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
330
331 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
332 bool isInternal = false;
333
334 size_t previouscopyMemoryToAllocationCalledTimes =
335 mockMemoryManager->copyMemoryToAllocationCalledTimes;
336
337 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
338
339 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
340
341 size_t copyForGlobalSurface = 1u;
342 auto copyForIsa = module->getKernelImmutableDataVector().size();
343 size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes +
344 copyForGlobalSurface + copyForIsa;
345 EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
346 mockMemoryManager->copyMemoryToAllocationCalledTimes);
347
348 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
349 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
350
351 createKernel(kernel.get());
352
353 EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
354 mockMemoryManager->copyMemoryToAllocationCalledTimes);
355 }
356
TEST_F(KernelImmutableDataIsaCopyTests,whenImmutableDataIsInitializedForUserKernelThenIsaIsNotCopied)357 TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForUserKernelThenIsaIsNotCopied) {
358 MockImmutableMemoryManager *mockMemoryManager =
359 static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
360
361 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
362 bool isInternal = false;
363
364 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
365 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
366
367 uint32_t previouscopyMemoryToAllocationCalledTimes =
368 mockMemoryManager->copyMemoryToAllocationCalledTimes;
369
370 mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device,
371 device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
372 module.get()->translationUnit->globalConstBuffer,
373 module.get()->translationUnit->globalVarBuffer,
374 isInternal);
375
376 EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes,
377 mockMemoryManager->copyMemoryToAllocationCalledTimes);
378 }
379
TEST_F(KernelImmutableDataIsaCopyTests,whenImmutableDataIsInitializedForInternalKernelThenIsaIsNotCopied)380 TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForInternalKernelThenIsaIsNotCopied) {
381 MockImmutableMemoryManager *mockMemoryManager =
382 static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
383
384 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
385 bool isInternal = true;
386
387 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
388 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
389
390 uint32_t previouscopyMemoryToAllocationCalledTimes =
391 mockMemoryManager->copyMemoryToAllocationCalledTimes;
392
393 mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device,
394 device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
395 module.get()->translationUnit->globalConstBuffer,
396 module.get()->translationUnit->globalVarBuffer,
397 isInternal);
398
399 EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes,
400 mockMemoryManager->copyMemoryToAllocationCalledTimes);
401 }
402
403 using KernelImmutableDataWithNullHeapTests = KernelImmutableDataTests;
404
TEST_F(KernelImmutableDataTests,givenInternalModuleWhenKernelIsCreatedThenIsaIsCopiedOnce)405 TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedThenIsaIsCopiedOnce) {
406 MockImmutableMemoryManager *mockMemoryManager =
407 static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
408
409 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
410 bool isInternal = true;
411
412 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
413 mockKernelImmData->getIsaGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL);
414
415 size_t previouscopyMemoryToAllocationCalledTimes =
416 mockMemoryManager->copyMemoryToAllocationCalledTimes;
417
418 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
419
420 size_t copyForGlobalSurface = 1u;
421 size_t copyForPatchingIsa = 0u;
422 size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes +
423 copyForGlobalSurface + copyForPatchingIsa;
424 EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
425 mockMemoryManager->copyMemoryToAllocationCalledTimes);
426
427 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
428 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
429
430 expectedPreviouscopyMemoryToAllocationCalledTimes++;
431
432 createKernel(kernel.get());
433
434 EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
435 mockMemoryManager->copyMemoryToAllocationCalledTimes);
436 }
437
TEST_F(KernelImmutableDataTests,givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking)438 TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking) {
439 NEO::MockCompilerEnableGuard mock(true);
440 auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();
441 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip);
442
443 MockImmutableMemoryManager *mockMemoryManager = static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
444
445 uint8_t binary[16];
446 ze_module_desc_t moduleDesc = {};
447 moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
448 moduleDesc.pInputModule = binary;
449 moduleDesc.inputSize = 10;
450 ModuleBuildLog *moduleBuildLog = nullptr;
451
452 auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
453 linkerInput->traits.requiresPatchingOfGlobalVariablesBuffer = true;
454
455 std::unique_ptr<L0::ult::MockModule> moduleMock = std::make_unique<L0::ult::MockModule>(device, moduleBuildLog, ModuleType::Builtin);
456 moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
457 moduleMock->translationUnit->programInfo.linkerInput = std::move(linkerInput);
458
459 uint32_t kernelHeap = 0;
460 auto kernelInfo = new KernelInfo();
461 kernelInfo->heapInfo.KernelHeapSize = 1;
462 kernelInfo->heapInfo.pKernelHeap = &kernelHeap;
463
464 Mock<::L0::Kernel> kernelMock;
465 kernelMock.module = moduleMock.get();
466 kernelMock.immutableData.kernelInfo = kernelInfo;
467 kernelMock.immutableData.surfaceStateHeapSize = 64;
468 kernelMock.immutableData.surfaceStateHeapTemplate.reset(new uint8_t[64]);
469 kernelMock.immutableData.getIsaGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL);
470 kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0;
471
472 moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
473 moduleMock->kernelImmData = &kernelMock.immutableData;
474
475 size_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes;
476 auto result = moduleMock->initialize(&moduleDesc, neoDevice);
477 EXPECT_TRUE(result);
478 size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes;
479
480 EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
481
482 for (auto &ki : moduleMock->kernelImmDatas) {
483 EXPECT_FALSE(ki->isIsaCopiedToAllocation());
484 }
485
486 expectedPreviouscopyMemoryToAllocationCalledTimes++;
487
488 ze_kernel_desc_t desc = {};
489 desc.pKernelName = "";
490
491 moduleMock->kernelImmData = moduleMock->kernelImmDatas[0].get();
492
493 kernelMock.initialize(&desc);
494
495 EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
496 }
497
TEST_F(KernelImmutableDataTests,givenKernelInitializedWithPrivateMemoryThenContainerHasOneExtraSpaceForAllocation)498 TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenContainerHasOneExtraSpaceForAllocation) {
499 std::string testFile;
500 retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
501
502 size_t size = 0;
503 auto src = loadDataFromFile(
504 testFile.c_str(),
505 size);
506 ASSERT_NE(0u, size);
507 ASSERT_NE(nullptr, src);
508
509 ze_module_desc_t moduleDesc = {};
510 moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
511 moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
512 moduleDesc.inputSize = size;
513 ModuleBuildLog *moduleBuildLog = nullptr;
514
515 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
516 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
517 std::unique_ptr<MockModule> moduleWithPrivateMemory = std::make_unique<MockModule>(device,
518 moduleBuildLog,
519 ModuleType::User,
520 perHwThreadPrivateMemorySizeRequested,
521 mockKernelImmData.get());
522 bool result = moduleWithPrivateMemory->initialize(&moduleDesc, device->getNEODevice());
523 EXPECT_TRUE(result);
524
525 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernelWithPrivateMemory;
526 kernelWithPrivateMemory = std::make_unique<ModuleImmutableDataFixture::MockKernel>(moduleWithPrivateMemory.get());
527
528 createKernel(kernelWithPrivateMemory.get());
529 EXPECT_NE(nullptr, kernelWithPrivateMemory->privateMemoryGraphicsAllocation);
530
531 size_t sizeContainerWithPrivateMemory = kernelWithPrivateMemory->getResidencyContainer().size();
532
533 perHwThreadPrivateMemorySizeRequested = 0u;
534 std::unique_ptr<MockImmutableData> mockKernelImmDataForModuleWithoutPrivateMemory = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
535 std::unique_ptr<MockModule> moduleWithoutPrivateMemory = std::make_unique<MockModule>(device,
536 moduleBuildLog,
537 ModuleType::User,
538 perHwThreadPrivateMemorySizeRequested,
539 mockKernelImmDataForModuleWithoutPrivateMemory.get());
540 result = moduleWithoutPrivateMemory->initialize(&moduleDesc, device->getNEODevice());
541 EXPECT_TRUE(result);
542
543 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernelWithoutPrivateMemory;
544 kernelWithoutPrivateMemory = std::make_unique<ModuleImmutableDataFixture::MockKernel>(moduleWithoutPrivateMemory.get());
545
546 createKernel(kernelWithoutPrivateMemory.get());
547 EXPECT_EQ(nullptr, kernelWithoutPrivateMemory->privateMemoryGraphicsAllocation);
548
549 size_t sizeContainerWithoutPrivateMemory = kernelWithoutPrivateMemory->getResidencyContainer().size();
550
551 EXPECT_EQ(sizeContainerWithoutPrivateMemory + 1u, sizeContainerWithPrivateMemory);
552 }
553
TEST_F(KernelImmutableDataTests,givenKernelWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated)554 TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated) {
555 std::string testFile;
556 retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
557
558 size_t size = 0;
559 auto src = loadDataFromFile(
560 testFile.c_str(),
561 size);
562 ASSERT_NE(0u, size);
563 ASSERT_NE(nullptr, src);
564
565 ze_module_desc_t moduleDesc = {};
566 moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
567 moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
568 moduleDesc.inputSize = size;
569 ModuleBuildLog *moduleBuildLog = nullptr;
570
571 uint32_t perHwThreadPrivateMemorySizeRequested = std::numeric_limits<uint32_t>::max();
572 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
573 std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
574 moduleBuildLog,
575 ModuleType::User,
576 perHwThreadPrivateMemorySizeRequested,
577 mockKernelImmData.get());
578 bool result = module->initialize(&moduleDesc, device->getNEODevice());
579 EXPECT_TRUE(result);
580 EXPECT_TRUE(module->shouldAllocatePrivateMemoryPerDispatch());
581
582 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
583 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
584
585 createKernel(kernel.get());
586 EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
587 }
588
589 class KernelDescriptorRTCallsTrue : public NEO::KernelDescriptor {
hasRTCalls() const590 bool hasRTCalls() const override {
591 return true;
592 }
593 };
594
595 class KernelDescriptorRTCallsFalse : public NEO::KernelDescriptor {
hasRTCalls() const596 bool hasRTCalls() const override {
597 return false;
598 }
599 };
600
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueThenRayTracingIsInitialized)601 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized) {
602 KernelDescriptorRTCallsTrue mockDescriptor = {};
603 mockDescriptor.kernelMetadata.kernelName = "rt_test";
604 for (auto i = 0u; i < 3u; i++) {
605 mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
606 }
607
608 std::unique_ptr<MockImmutableData> mockKernelImmutableData =
609 std::make_unique<MockImmutableData>(32u);
610 mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
611 mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
612
613 ModuleBuildLog *moduleBuildLog = nullptr;
614 module = std::make_unique<MockModule>(device,
615 moduleBuildLog,
616 ModuleType::User,
617 32u,
618 mockKernelImmutableData.get());
619 module->maxGroupSize = 10;
620
621 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
622 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
623
624 ze_kernel_desc_t kernelDesc = {};
625 kernelDesc.pKernelName = "rt_test";
626
627 auto immDataVector =
628 const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
629
630 immDataVector->push_back(std::move(mockKernelImmutableData));
631
632 neoDevice->setRTDispatchGlobalsForceAllocation();
633
634 auto result = kernel->initialize(&kernelDesc);
635 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
636 EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
637
638 auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
639 EXPECT_NE(nullptr, rtDispatchGlobals);
640
641 size_t residencySize = kernel->getResidencyContainer().size();
642 EXPECT_NE(0u, residencySize);
643
644 EXPECT_EQ(kernel->getResidencyContainer()[residencySize - 1], rtDispatchGlobals);
645 }
646
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation)647 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation) {
648 KernelDescriptorRTCallsTrue mockDescriptor = {};
649 mockDescriptor.kernelMetadata.kernelName = "rt_test";
650 for (auto i = 0u; i < 3u; i++) {
651 mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
652 }
653
654 std::unique_ptr<MockImmutableData> mockKernelImmutableData =
655 std::make_unique<MockImmutableData>(32u);
656 mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
657
658 ModuleBuildLog *moduleBuildLog = nullptr;
659 module = std::make_unique<MockModule>(device,
660 moduleBuildLog,
661 ModuleType::User,
662 32u,
663 mockKernelImmutableData.get());
664 module->maxGroupSize = 10;
665
666 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
667 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
668
669 ze_kernel_desc_t kernelDesc = {};
670 kernelDesc.pKernelName = "rt_test";
671
672 auto immDataVector =
673 const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
674
675 immDataVector->push_back(std::move(mockKernelImmutableData));
676
677 auto result = kernel->initialize(&kernelDesc);
678 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
679 EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
680
681 auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
682 EXPECT_EQ(nullptr, rtDispatchGlobals);
683 }
684
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized)685 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) {
686 KernelDescriptorRTCallsTrue mockDescriptor = {};
687 mockDescriptor.kernelMetadata.kernelName = "rt_test";
688 for (auto i = 0u; i < 3u; i++) {
689 mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
690 }
691 mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
692
693 NEO::MemoryManager *currMemoryManager = new NEO::FailMemoryManager(0, *neoDevice->executionEnvironment);
694
695 std::unique_ptr<MockImmutableData> mockKernelImmutableData =
696 std::make_unique<MockImmutableData>(32u);
697 mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
698
699 ModuleBuildLog *moduleBuildLog = nullptr;
700 module = std::make_unique<MockModule>(device,
701 moduleBuildLog,
702 ModuleType::User,
703 32u,
704 mockKernelImmutableData.get());
705 module->maxGroupSize = 10;
706
707 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
708 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
709
710 ze_kernel_desc_t kernelDesc = {};
711 kernelDesc.pKernelName = "rt_test";
712 auto immDataVector =
713 const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
714
715 immDataVector->push_back(std::move(mockKernelImmutableData));
716
717 neoDevice->injectMemoryManager(currMemoryManager);
718
719 EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
720 }
721
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsFalseThenRayTracingIsNotInitialized)722 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) {
723 KernelDescriptorRTCallsFalse mockDescriptor = {};
724 mockDescriptor.kernelMetadata.kernelName = "rt_test";
725 for (auto i = 0u; i < 3u; i++) {
726 mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
727 }
728
729 std::unique_ptr<MockImmutableData> mockKernelImmutableData =
730 std::make_unique<MockImmutableData>(32u);
731 mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
732
733 ModuleBuildLog *moduleBuildLog = nullptr;
734 module = std::make_unique<MockModule>(device,
735 moduleBuildLog,
736 ModuleType::User,
737 32u,
738 mockKernelImmutableData.get());
739 module->maxGroupSize = 10;
740
741 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
742 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
743
744 ze_kernel_desc_t kernelDesc = {};
745 kernelDesc.pKernelName = "rt_test";
746
747 auto immDataVector =
748 const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
749
750 immDataVector->push_back(std::move(mockKernelImmutableData));
751
752 EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
753 EXPECT_EQ(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
754 }
755
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueThenCrossThreadDataIsPatched)756 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) {
757 KernelDescriptorRTCallsTrue mockDescriptor = {};
758 mockDescriptor.kernelMetadata.kernelName = "rt_test";
759 for (auto i = 0u; i < 3u; i++) {
760 mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
761 }
762
763 std::unique_ptr<MockImmutableData> mockKernelImmutableData =
764 std::make_unique<MockImmutableData>(32u);
765 mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
766 mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
767
768 ModuleBuildLog *moduleBuildLog = nullptr;
769 module = std::make_unique<MockModule>(device,
770 moduleBuildLog,
771 ModuleType::User,
772 32u,
773 mockKernelImmutableData.get());
774 module->maxGroupSize = 10;
775
776 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
777 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
778
779 ze_kernel_desc_t kernelDesc = {};
780 kernelDesc.pKernelName = "rt_test";
781
782 auto immDataVector =
783 const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
784
785 immDataVector->push_back(std::move(mockKernelImmutableData));
786
787 auto crossThreadData = std::make_unique<uint32_t[]>(4);
788 kernel->crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
789 kernel->crossThreadDataSize = sizeof(uint32_t[4]);
790
791 neoDevice->setRTDispatchGlobalsForceAllocation();
792
793 auto result = kernel->initialize(&kernelDesc);
794 EXPECT_EQ(ZE_RESULT_SUCCESS, result);
795
796 auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
797 EXPECT_NE(nullptr, rtDispatchGlobals);
798
799 auto dispatchGlobalsAddressPatched = *reinterpret_cast<uintptr_t *>(crossThreadData.get());
800 auto dispatchGlobalsGpuAddressOffset = static_cast<uintptr_t>(rtDispatchGlobals->getGpuAddressToPatch());
801 EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
802
803 kernel->crossThreadData.release();
804 }
805
806 using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
807
TEST_F(KernelIndirectPropertiesFromIGCTests,whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse)808 TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) {
809 DebugManagerStateRestore restorer;
810 NEO::DebugManager.flags.DisableIndirectAccess.set(0);
811
812 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
813 bool isInternal = false;
814
815 std::unique_ptr<MockImmutableData> mockKernelImmData =
816 std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
817
818 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
819
820 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
821 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
822
823 ze_kernel_desc_t desc = {};
824 desc.pKernelName = kernelName.c_str();
825
826 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
827 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
828 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
829
830 kernel->initialize(&desc);
831
832 EXPECT_FALSE(kernel->hasIndirectAccess());
833 }
834
TEST_F(KernelIndirectPropertiesFromIGCTests,whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue)835 TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) {
836 DebugManagerStateRestore restorer;
837 NEO::DebugManager.flags.DisableIndirectAccess.set(0);
838
839 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
840 bool isInternal = false;
841
842 std::unique_ptr<MockImmutableData> mockKernelImmData =
843 std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
844
845 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
846
847 {
848 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
849 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
850
851 ze_kernel_desc_t desc = {};
852 desc.pKernelName = kernelName.c_str();
853
854 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true;
855 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
856 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
857
858 kernel->initialize(&desc);
859
860 EXPECT_TRUE(kernel->hasIndirectAccess());
861 }
862
863 {
864 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
865 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
866
867 ze_kernel_desc_t desc = {};
868 desc.pKernelName = kernelName.c_str();
869
870 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
871 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true;
872 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
873
874 kernel->initialize(&desc);
875
876 EXPECT_TRUE(kernel->hasIndirectAccess());
877 }
878
879 {
880 std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
881 kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
882
883 ze_kernel_desc_t desc = {};
884 desc.pKernelName = kernelName.c_str();
885
886 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
887 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
888 module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true;
889
890 kernel->initialize(&desc);
891
892 EXPECT_TRUE(kernel->hasIndirectAccess());
893 }
894 }
895
896 class KernelPropertiesTests : public ModuleFixture, public ::testing::Test {
897 public:
898 class MockKernel : public KernelImp {
899 public:
900 using KernelImp::kernelHasIndirectAccess;
901 };
SetUp()902 void SetUp() override {
903 ModuleFixture::SetUp();
904
905 ze_kernel_desc_t kernelDesc = {};
906 kernelDesc.pKernelName = kernelName.c_str();
907
908 ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
909 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
910
911 kernel = static_cast<MockKernel *>(L0::Kernel::fromHandle(kernelHandle));
912 kernel->kernelHasIndirectAccess = true;
913 }
914
TearDown()915 void TearDown() override {
916 Kernel::fromHandle(kernelHandle)->destroy();
917 ModuleFixture::TearDown();
918 }
919
920 ze_kernel_handle_t kernelHandle;
921 MockKernel *kernel = nullptr;
922 };
923
TEST_F(KernelPropertiesTests,givenKernelThenCorrectNameIsRetrieved)924 TEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
925 size_t kernelSize = 0;
926 ze_result_t res = kernel->getKernelName(&kernelSize, nullptr);
927 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
928 EXPECT_EQ(kernelSize, kernelName.length() + 1);
929
930 size_t alteredKernelSize = kernelSize * 2;
931 res = kernel->getKernelName(&alteredKernelSize, nullptr);
932 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
933 EXPECT_EQ(alteredKernelSize, kernelSize);
934
935 char *kernelNameRetrieved = new char[kernelSize];
936 res = kernel->getKernelName(&kernelSize, kernelNameRetrieved);
937 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
938
939 EXPECT_EQ(0, strncmp(kernelName.c_str(), kernelNameRetrieved, kernelSize));
940
941 delete[] kernelNameRetrieved;
942 }
943
TEST_F(KernelPropertiesTests,givenValidKernelThenPropertiesAreRetrieved)944 TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
945 ze_kernel_properties_t kernelProperties = {};
946
947 kernelProperties.requiredNumSubGroups = std::numeric_limits<uint32_t>::max();
948 kernelProperties.requiredSubgroupSize = std::numeric_limits<uint32_t>::max();
949 kernelProperties.maxSubgroupSize = std::numeric_limits<uint32_t>::max();
950 kernelProperties.maxNumSubgroups = std::numeric_limits<uint32_t>::max();
951 kernelProperties.localMemSize = std::numeric_limits<uint32_t>::max();
952 kernelProperties.privateMemSize = std::numeric_limits<uint32_t>::max();
953 kernelProperties.spillMemSize = std::numeric_limits<uint32_t>::max();
954 kernelProperties.numKernelArgs = std::numeric_limits<uint32_t>::max();
955 memset(&kernelProperties.uuid.kid, std::numeric_limits<int>::max(),
956 sizeof(kernelProperties.uuid.kid));
957 memset(&kernelProperties.uuid.mid, std::numeric_limits<int>::max(),
958 sizeof(kernelProperties.uuid.mid));
959
960 ze_kernel_properties_t kernelPropertiesBefore = {};
961 kernelPropertiesBefore = kernelProperties;
962
963 ze_result_t res = kernel->getProperties(&kernelProperties);
964 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
965
966 EXPECT_EQ(6U, kernelProperties.numKernelArgs);
967
968 EXPECT_EQ(0U, kernelProperties.requiredNumSubGroups);
969 EXPECT_EQ(0U, kernelProperties.requiredSubgroupSize);
970
971 uint32_t maxSubgroupSize = this->kernel->getKernelDescriptor().kernelAttributes.simdSize;
972 ASSERT_NE(0U, maxSubgroupSize);
973 EXPECT_EQ(maxSubgroupSize, kernelProperties.maxSubgroupSize);
974
975 uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
976 uint32_t maxNumSubgroups = maxKernelWorkGroupSize / maxSubgroupSize;
977 EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups);
978
979 EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize);
980 EXPECT_EQ(0U, kernelProperties.privateMemSize);
981 EXPECT_EQ(0U, kernelProperties.spillMemSize);
982
983 uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE];
984 uint8_t zeroMid[ZE_MAX_MODULE_UUID_SIZE];
985 memset(&zeroKid, 0, ZE_MAX_KERNEL_UUID_SIZE);
986 memset(&zeroMid, 0, ZE_MAX_MODULE_UUID_SIZE);
987 EXPECT_EQ(0, memcmp(&kernelProperties.uuid.kid, &zeroKid,
988 sizeof(kernelProperties.uuid.kid)));
989 EXPECT_EQ(0, memcmp(&kernelProperties.uuid.mid, &zeroMid,
990 sizeof(kernelProperties.uuid.mid)));
991 }
992
TEST_F(KernelPropertiesTests,whenPassingPreferredGroupSizeStructToGetPropertiesThenPreferredMultipleIsReturned)993 TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructToGetPropertiesThenPreferredMultipleIsReturned) {
994 ze_kernel_properties_t kernelProperties = {};
995 kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
996
997 ze_kernel_preferred_group_size_properties_t preferredGroupProperties = {};
998 preferredGroupProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES;
999
1000 kernelProperties.pNext = &preferredGroupProperties;
1001
1002 ze_result_t res = kernel->getProperties(&kernelProperties);
1003 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1004
1005 auto &hwHelper = NEO::HwHelper::get(module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
1006 if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo())) {
1007 EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()) * 2);
1008 } else {
1009 EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()));
1010 }
1011 }
1012
TEST_F(KernelPropertiesTests,whenPassingPreferredGroupSizeStructWithWrongStypeSuccessIsReturnedAndNoFieldsInPreferredGroupSizeStructAreSet)1013 TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructWithWrongStypeSuccessIsReturnedAndNoFieldsInPreferredGroupSizeStructAreSet) {
1014 ze_kernel_properties_t kernelProperties = {};
1015 kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
1016
1017 ze_kernel_preferred_group_size_properties_t preferredGroupProperties = {};
1018 preferredGroupProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32;
1019
1020 kernelProperties.pNext = &preferredGroupProperties;
1021
1022 uint32_t dummyPreferredMultiple = 101;
1023 preferredGroupProperties.preferredMultiple = dummyPreferredMultiple;
1024
1025 ze_result_t res = kernel->getProperties(&kernelProperties);
1026 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1027
1028 EXPECT_EQ(preferredGroupProperties.preferredMultiple, dummyPreferredMultiple);
1029 }
1030
TEST_F(KernelPropertiesTests,givenValidKernelThenProfilePropertiesAreRetrieved)1031 TEST_F(KernelPropertiesTests, givenValidKernelThenProfilePropertiesAreRetrieved) {
1032 zet_profile_properties_t kernelProfileProperties = {};
1033
1034 kernelProfileProperties.flags = std::numeric_limits<uint32_t>::max();
1035 kernelProfileProperties.numTokens = std::numeric_limits<uint32_t>::max();
1036
1037 ze_result_t res = kernel->getProfileInfo(&kernelProfileProperties);
1038 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1039
1040 EXPECT_EQ(0U, kernelProfileProperties.flags);
1041 EXPECT_EQ(0U, kernelProfileProperties.numTokens);
1042 }
1043
TEST_F(KernelPropertiesTests,whenSettingValidKernelIndirectAccessFlagsThenFlagsAreSetCorrectly)1044 TEST_F(KernelPropertiesTests, whenSettingValidKernelIndirectAccessFlagsThenFlagsAreSetCorrectly) {
1045 UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1046 EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1047 EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1048 EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1049
1050 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1051 ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1052 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1053 auto res = kernel->setIndirectAccess(flags);
1054 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1055
1056 unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1057 EXPECT_EQ(true, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1058 EXPECT_EQ(true, unifiedMemoryControls.indirectHostAllocationsAllowed);
1059 EXPECT_EQ(true, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1060 }
1061
TEST_F(KernelPropertiesTests,whenCallingGetIndirectAccessAfterSetIndirectAccessWithDeviceFlagThenCorrectFlagIsReturned)1062 TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithDeviceFlagThenCorrectFlagIsReturned) {
1063 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE;
1064 auto res = kernel->setIndirectAccess(flags);
1065 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1066
1067 ze_kernel_indirect_access_flags_t returnedFlags;
1068 res = kernel->getIndirectAccess(&returnedFlags);
1069 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1070 EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
1071 EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
1072 EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
1073 }
1074
TEST_F(KernelPropertiesTests,whenCallingGetIndirectAccessAfterSetIndirectAccessWithHostFlagThenCorrectFlagIsReturned)1075 TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithHostFlagThenCorrectFlagIsReturned) {
1076 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST;
1077 auto res = kernel->setIndirectAccess(flags);
1078 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1079
1080 ze_kernel_indirect_access_flags_t returnedFlags;
1081 res = kernel->getIndirectAccess(&returnedFlags);
1082 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1083 EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
1084 EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
1085 EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
1086 }
1087
TEST_F(KernelPropertiesTests,whenCallingGetIndirectAccessAfterSetIndirectAccessWithSharedFlagThenCorrectFlagIsReturned)1088 TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithSharedFlagThenCorrectFlagIsReturned) {
1089 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1090 auto res = kernel->setIndirectAccess(flags);
1091 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1092
1093 ze_kernel_indirect_access_flags_t returnedFlags;
1094 res = kernel->getIndirectAccess(&returnedFlags);
1095 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1096 EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
1097 EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
1098 EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
1099 }
TEST_F(KernelPropertiesTests,givenValidKernelWithIndirectAccessFlagsAndDisableIndirectAccessSetToZeroThenFlagsAreSet)1100 TEST_F(KernelPropertiesTests, givenValidKernelWithIndirectAccessFlagsAndDisableIndirectAccessSetToZeroThenFlagsAreSet) {
1101 DebugManagerStateRestore restorer;
1102 NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1103
1104 UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1105 EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1106 EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1107 EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1108
1109 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1110 ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1111 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1112 auto res = kernel->setIndirectAccess(flags);
1113 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1114
1115 unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1116 EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1117 EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1118 EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1119 }
1120
HWTEST2_F(KernelPropertiesTests,whenHasRTCallsIsTrueThenUsesRayTracingIsTrue,MatchAny)1121 HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsTrueThenUsesRayTracingIsTrue, MatchAny) {
1122 WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1123 KernelDescriptorRTCallsTrue mockDescriptor = {};
1124 WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
1125
1126 mockKernelImmutableData.kernelDescriptor = &mockDescriptor;
1127 mockKernel.kernelImmData = &mockKernelImmutableData;
1128
1129 EXPECT_TRUE(mockKernel.usesRayTracing());
1130 }
1131
HWTEST2_F(KernelPropertiesTests,whenHasRTCallsIsFalseThenUsesRayTracingIsFalse,MatchAny)1132 HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsFalseThenUsesRayTracingIsFalse, MatchAny) {
1133 WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1134 KernelDescriptorRTCallsFalse mockDescriptor = {};
1135 WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
1136
1137 mockKernelImmutableData.kernelDescriptor = &mockDescriptor;
1138 mockKernel.kernelImmData = &mockKernelImmutableData;
1139
1140 EXPECT_FALSE(mockKernel.usesRayTracing());
1141 }
1142
1143 using KernelIndirectPropertiesTests = KernelPropertiesTests;
1144
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessThenIndirectAccessIsSet)1145 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessThenIndirectAccessIsSet) {
1146 DebugManagerStateRestore restorer;
1147 NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1148 kernel->kernelHasIndirectAccess = true;
1149
1150 UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1151 EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1152 EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1153 EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1154
1155 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1156 ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1157 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1158 auto res = kernel->setIndirectAccess(flags);
1159 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1160
1161 unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1162 EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1163 EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1164 EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1165 }
1166
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessButWithDisableIndirectAccessSetThenIndirectAccessIsNotSet)1167 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessButWithDisableIndirectAccessSetThenIndirectAccessIsNotSet) {
1168 DebugManagerStateRestore restorer;
1169 NEO::DebugManager.flags.DisableIndirectAccess.set(1);
1170 kernel->kernelHasIndirectAccess = true;
1171
1172 UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1173 EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1174 EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1175 EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1176
1177 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1178 ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1179 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1180 auto res = kernel->setIndirectAccess(flags);
1181 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1182
1183 unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1184 EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1185 EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1186 EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1187 }
1188
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessAndDisableIndirectAccessNotSetThenIndirectAccessIsSet)1189 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessAndDisableIndirectAccessNotSetThenIndirectAccessIsSet) {
1190 DebugManagerStateRestore restorer;
1191 NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1192 kernel->kernelHasIndirectAccess = true;
1193
1194 UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1195 EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1196 EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1197 EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1198
1199 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1200 ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1201 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1202 auto res = kernel->setIndirectAccess(flags);
1203 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1204
1205 unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1206 EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1207 EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1208 EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1209 }
1210
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet)1211 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet) {
1212 DebugManagerStateRestore restorer;
1213 NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1214 kernel->kernelHasIndirectAccess = false;
1215
1216 UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1217 EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1218 EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1219 EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1220
1221 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1222 ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1223 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1224 auto res = kernel->setIndirectAccess(flags);
1225 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1226
1227 unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1228 EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1229 EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1230 EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1231 }
1232
TEST_F(KernelPropertiesTests,givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue)1233 TEST_F(KernelPropertiesTests, givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue) {
1234 EXPECT_EQ(false, kernel->hasIndirectAllocationsAllowed());
1235
1236 ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE;
1237 auto res = kernel->setIndirectAccess(flags);
1238
1239 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1240 EXPECT_EQ(true, kernel->hasIndirectAllocationsAllowed());
1241 }
1242
TEST_F(KernelPropertiesTests,givenValidKernelAndNoMediavfestateThenSpillMemSizeIsZero)1243 TEST_F(KernelPropertiesTests, givenValidKernelAndNoMediavfestateThenSpillMemSizeIsZero) {
1244 ze_kernel_properties_t kernelProperties = {};
1245
1246 kernelProperties.spillMemSize = std::numeric_limits<uint32_t>::max();
1247
1248 ze_kernel_properties_t kernelPropertiesBefore = {};
1249 kernelPropertiesBefore = kernelProperties;
1250
1251 ze_result_t res = kernel->getProperties(&kernelProperties);
1252 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1253
1254 L0::ModuleImp *moduleImp = reinterpret_cast<L0::ModuleImp *>(module.get());
1255 NEO::KernelInfo *ki = nullptr;
1256 for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) {
1257 ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i];
1258 if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) {
1259 break;
1260 }
1261 }
1262
1263 EXPECT_EQ(0u, kernelProperties.spillMemSize);
1264 }
1265
TEST_F(KernelPropertiesTests,givenValidKernelAndNollocateStatelessPrivateSurfaceThenPrivateMemSizeIsZero)1266 TEST_F(KernelPropertiesTests, givenValidKernelAndNollocateStatelessPrivateSurfaceThenPrivateMemSizeIsZero) {
1267 ze_kernel_properties_t kernelProperties = {};
1268
1269 kernelProperties.spillMemSize = std::numeric_limits<uint32_t>::max();
1270
1271 ze_kernel_properties_t kernelPropertiesBefore = {};
1272 kernelPropertiesBefore = kernelProperties;
1273
1274 ze_result_t res = kernel->getProperties(&kernelProperties);
1275 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1276
1277 L0::ModuleImp *moduleImp = reinterpret_cast<L0::ModuleImp *>(module.get());
1278 NEO::KernelInfo *ki = nullptr;
1279 for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) {
1280 ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i];
1281 if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) {
1282 break;
1283 }
1284 }
1285
1286 EXPECT_EQ(0u, kernelProperties.privateMemSize);
1287 }
1288
TEST_F(KernelPropertiesTests,givenValidKernelAndLargeSlmIsSetThenForceLargeSlmIsTrue)1289 TEST_F(KernelPropertiesTests, givenValidKernelAndLargeSlmIsSetThenForceLargeSlmIsTrue) {
1290 EXPECT_EQ(NEO::SlmPolicy::SlmPolicyNone, kernel->getSlmPolicy());
1291 ze_result_t res = kernel->setCacheConfig(ZE_CACHE_CONFIG_FLAG_LARGE_SLM);
1292 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1293 EXPECT_EQ(NEO::SlmPolicy::SlmPolicyLargeSlm, kernel->getSlmPolicy());
1294 }
1295
TEST_F(KernelPropertiesTests,givenValidKernelAndLargeDataIsSetThenForceLargeDataIsTrue)1296 TEST_F(KernelPropertiesTests, givenValidKernelAndLargeDataIsSetThenForceLargeDataIsTrue) {
1297 EXPECT_EQ(NEO::SlmPolicy::SlmPolicyNone, kernel->getSlmPolicy());
1298 ze_result_t res = kernel->setCacheConfig(ZE_CACHE_CONFIG_FLAG_LARGE_DATA);
1299 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1300 EXPECT_EQ(NEO::SlmPolicy::SlmPolicyLargeData, kernel->getSlmPolicy());
1301 }
1302
1303 using KernelLocalIdsTest = Test<ModuleFixture>;
1304
TEST_F(KernelLocalIdsTest,WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRuntimeIsTrue)1305 TEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRuntimeIsTrue) {
1306 createKernel();
1307
1308 EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime());
1309 }
1310
1311 struct KernelIsaTests : Test<ModuleFixture> {
SetUpL0::ult::KernelIsaTests1312 void SetUp() override {
1313 Test<ModuleFixture>::SetUp();
1314
1315 auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
1316 bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
1317 capabilityTable.blitterOperationsSupported = true;
1318
1319 if (createBcsEngine) {
1320 auto &engine = device->getNEODevice()->getEngine(0);
1321 bcsOsContext.reset(OsContext::create(nullptr, 0,
1322 EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getNEODevice()->getDeviceBitfield())));
1323 engine.osContext = bcsOsContext.get();
1324 engine.commandStreamReceiver->setupContext(*bcsOsContext);
1325 }
1326 }
1327
1328 std::unique_ptr<OsContext> bcsOsContext;
1329 };
1330
TEST_F(KernelIsaTests,givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer)1331 TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
1332 DebugManagerStateRestore restore;
1333 DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
1334 DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
1335
1336 uint32_t kernelHeap = 0;
1337 KernelInfo kernelInfo;
1338 kernelInfo.heapInfo.KernelHeapSize = 1;
1339 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1340
1341 KernelImmutableData kernelImmutableData(device);
1342
1343 auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
1344 auto initialTaskCount = bcsCsr->peekTaskCount();
1345
1346 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1347
1348 if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
1349 EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
1350 } else {
1351 EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
1352 }
1353
1354 device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
1355 }
1356
TEST_F(KernelIsaTests,givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer)1357 TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
1358 DebugManagerStateRestore restore;
1359 DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
1360 DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
1361
1362 uint32_t kernelHeap = 0;
1363 KernelInfo kernelInfo;
1364 kernelInfo.heapInfo.KernelHeapSize = 1;
1365 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1366
1367 KernelImmutableData kernelImmutableData(device);
1368
1369 auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
1370 auto initialTaskCount = bcsCsr->peekTaskCount();
1371
1372 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1373
1374 EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
1375
1376 device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
1377 }
1378
TEST_F(KernelIsaTests,givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy)1379 TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
1380 DebugManagerStateRestore restore;
1381 DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
1382 DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
1383
1384 device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
1385
1386 uint32_t kernelHeap = 0;
1387 KernelInfo kernelInfo;
1388 kernelInfo.heapInfo.KernelHeapSize = 1;
1389 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1390
1391 KernelImmutableData kernelImmutableData(device);
1392
1393 auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
1394 auto initialTaskCount = bcsCsr->peekTaskCount();
1395
1396 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1397
1398 EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
1399
1400 device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
1401 }
1402
TEST_F(KernelIsaTests,givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed)1403 TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) {
1404 uint32_t kernelHeap = 0;
1405 KernelInfo kernelInfo;
1406 kernelInfo.heapInfo.KernelHeapSize = 1;
1407 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1408
1409 KernelImmutableData kernelImmutableData(device);
1410
1411 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true);
1412 EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
1413 }
1414
TEST_F(KernelIsaTests,givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed)1415 TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) {
1416 uint32_t kernelHeap = 0;
1417 KernelInfo kernelInfo;
1418 kernelInfo.heapInfo.KernelHeapSize = 1;
1419 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1420
1421 KernelImmutableData kernelImmutableData(device);
1422
1423 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1424 EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
1425 }
1426
TEST_F(KernelIsaTests,givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded)1427 TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) {
1428 uint32_t kernelHeap = 0;
1429 KernelInfo kernelInfo;
1430 kernelInfo.heapInfo.KernelHeapSize = 1;
1431 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1432
1433 KernelImmutableData kernelImmutableData(device);
1434 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1435 auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation();
1436 auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1437 size_t isaPadding = hwHelper.getPaddingForISAAllocation();
1438 EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.KernelHeapSize + isaPadding);
1439 }
1440
TEST_F(KernelIsaTests,givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer)1441 TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) {
1442 uint32_t kernelHeap = 0;
1443 KernelInfo kernelInfo;
1444 kernelInfo.heapInfo.KernelHeapSize = 1;
1445 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1446
1447 KernelImmutableData kernelImmutableData(device);
1448
1449 uint64_t gpuAddress = 0x1200;
1450 void *buffer = reinterpret_cast<void *>(gpuAddress);
1451 size_t size = 0x1100;
1452 NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size);
1453 NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size);
1454
1455 kernelImmutableData.initialize(&kernelInfo, device, 0,
1456 &globalConstBuffer, &globalVarBuffer, false);
1457 auto &resCont = kernelImmutableData.getResidencyContainer();
1458 EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer));
1459 EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
1460 }
1461
TEST_F(KernelIsaTests,givenDebugONAndKernelDegugInfoWhenInitializingImmutableDataThenRegisterElf)1462 TEST_F(KernelIsaTests, givenDebugONAndKernelDegugInfoWhenInitializingImmutableDataThenRegisterElf) {
1463 uint32_t kernelHeap = 0;
1464 KernelInfo kernelInfo;
1465 kernelInfo.heapInfo.KernelHeapSize = 1;
1466 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1467 auto debugData = new DebugData;
1468 kernelInfo.kernelDescriptor.external.debugData.reset(debugData);
1469 class MockDebugger : public DebuggerL0 {
1470 public:
1471 MockDebugger(NEO::Device *neodev) : DebuggerL0(neodev) {
1472 }
1473 void registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) override {
1474 debugData->vIsaSize = 123;
1475 };
1476 size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override { return static_cast<size_t>(0); };
1477 void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) override{};
1478 };
1479 MockDebugger *debugger = new MockDebugger(neoDevice);
1480
1481 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(static_cast<NEO::Debugger *>(debugger));
1482 KernelImmutableData kernelImmutableData(device);
1483
1484 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1485 EXPECT_EQ(kernelInfo.kernelDescriptor.external.debugData->vIsaSize, static_cast<uint32_t>(123));
1486 }
1487
TEST_F(KernelIsaTests,givenDebugONAndNoKernelDegugInfoWhenInitializingImmutableDataThenDoNotRegisterElf)1488 TEST_F(KernelIsaTests, givenDebugONAndNoKernelDegugInfoWhenInitializingImmutableDataThenDoNotRegisterElf) {
1489 uint32_t kernelHeap = 0;
1490 KernelInfo kernelInfo;
1491 kernelInfo.heapInfo.KernelHeapSize = 1;
1492 kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1493 kernelInfo.kernelDescriptor.external.debugData.reset(nullptr);
1494 class MockDebugger : public DebuggerL0 {
1495 public:
1496 MockDebugger(NEO::Device *neodev) : DebuggerL0(neodev) {
1497 }
1498 void registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) override {
1499 debugData->vIsaSize = 123;
1500 };
1501 size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override { return static_cast<size_t>(0); };
1502 void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) override{};
1503 };
1504 MockDebugger *debugger = new MockDebugger(neoDevice);
1505
1506 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(static_cast<NEO::Debugger *>(debugger));
1507 KernelImmutableData kernelImmutableData(device);
1508
1509 kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1510 EXPECT_EQ(kernelInfo.kernelDescriptor.external.debugData, nullptr);
1511 }
1512
1513 using KernelImpPatchBindlessTest = Test<ModuleFixture>;
1514
TEST_F(KernelImpPatchBindlessTest,GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly)1515 TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly) {
1516 Mock<Kernel> kernel;
1517 neoDevice->incRefInternal();
1518 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1519 neoDevice->getNumGenericSubDevices() > 1,
1520 neoDevice->getRootDeviceIndex(),
1521 neoDevice->getDeviceBitfield());
1522 Mock<Module> mockModule(device, nullptr);
1523 kernel.module = &mockModule;
1524 NEO::MockGraphicsAllocation alloc;
1525 uint32_t bindless = 0x40;
1526 auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1527 size_t size = hwHelper.getRenderSurfaceStateSize();
1528 auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1529 auto patchLocation = ptrOffset(kernel.getCrossThreadData(), bindless);
1530 auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(expectedSsInHeap.surfaceStateOffset));
1531
1532 auto ssPtr = kernel.patchBindlessSurfaceState(&alloc, bindless);
1533
1534 EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr);
1535 EXPECT_TRUE(memcmp(const_cast<uint8_t *>(patchLocation), &patchValue, sizeof(patchValue)) == 0);
1536 EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end());
1537 neoDevice->decRefInternal();
1538 }
1539
HWTEST2_F(KernelImpPatchBindlessTest,GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated,MatchAny)1540 HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated, MatchAny) {
1541 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1542
1543 ze_kernel_desc_t desc = {};
1544 desc.pKernelName = kernelName.c_str();
1545
1546 WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1547 mockKernel.module = module.get();
1548 mockKernel.initialize(&desc);
1549 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
1550 arg.bindless = 0x40;
1551 arg.bindful = undefined<SurfaceStateHeapOffset>;
1552
1553 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1554 neoDevice->getNumGenericSubDevices() > 1,
1555 neoDevice->getRootDeviceIndex(),
1556 neoDevice->getDeviceBitfield());
1557
1558 auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1559 size_t size = hwHelper.getRenderSurfaceStateSize();
1560 uint64_t gpuAddress = 0x2000;
1561 void *buffer = reinterpret_cast<void *>(gpuAddress);
1562
1563 NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
1564 auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1565
1566 memset(expectedSsInHeap.ssPtr, 0, size);
1567 auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1568 mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
1569
1570 auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1571
1572 EXPECT_FALSE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
1573 }
1574
HWTEST2_F(KernelImpPatchBindlessTest,GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated,MatchAny)1575 HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated, MatchAny) {
1576 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1577 ze_kernel_desc_t desc = {};
1578 desc.pKernelName = kernelName.c_str();
1579
1580 WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1581 mockKernel.module = module.get();
1582 mockKernel.initialize(&desc);
1583
1584 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
1585 arg.bindless = undefined<CrossThreadDataOffset>;
1586 arg.bindful = 0x40;
1587
1588 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1589 neoDevice->getNumGenericSubDevices() > 1,
1590 neoDevice->getRootDeviceIndex(),
1591 neoDevice->getDeviceBitfield());
1592
1593 auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1594 size_t size = hwHelper.getRenderSurfaceStateSize();
1595 uint64_t gpuAddress = 0x2000;
1596 void *buffer = reinterpret_cast<void *>(gpuAddress);
1597
1598 NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
1599 auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1600
1601 memset(expectedSsInHeap.ssPtr, 0, size);
1602 auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1603 mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
1604
1605 auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1606
1607 EXPECT_TRUE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
1608 }
1609
1610 using KernelImpL3CachingTests = Test<ModuleFixture>;
1611
HWTEST2_F(KernelImpL3CachingTests,GivenKernelImpWhenSetSurfaceStateWithUnalignedMemoryThenL3CachingIsDisabled,MatchAny)1612 HWTEST2_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnalignedMemoryThenL3CachingIsDisabled, MatchAny) {
1613 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1614 ze_kernel_desc_t desc = {};
1615 desc.pKernelName = kernelName.c_str();
1616
1617 WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1618 mockKernel.module = module.get();
1619 mockKernel.initialize(&desc);
1620
1621 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
1622 arg.bindless = undefined<CrossThreadDataOffset>;
1623 arg.bindful = 0x40;
1624
1625 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1626 neoDevice->getNumGenericSubDevices() > 1,
1627 neoDevice->getRootDeviceIndex(),
1628 neoDevice->getDeviceBitfield());
1629 auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1630 size_t size = hwHelper.getRenderSurfaceStateSize();
1631 uint64_t gpuAddress = 0x2000;
1632 void *buffer = reinterpret_cast<void *>(0x20123);
1633
1634 NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
1635 auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1636
1637 memset(expectedSsInHeap.ssPtr, 0, size);
1638 mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
1639 EXPECT_EQ(mockKernel.getKernelRequiresQueueUncachedMocs(), true);
1640 }
1641
1642 struct MyMockKernel : public Mock<Kernel> {
setBufferSurfaceStateL0::ult::MyMockKernel1643 void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
1644 setSurfaceStateCalled = true;
1645 }
setArgBufferWithAllocL0::ult::MyMockKernel1646 ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override {
1647 return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation);
1648 }
1649 bool setSurfaceStateCalled = false;
1650 };
1651
TEST_F(KernelImpPatchBindlessTest,GivenValidBindlessOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled)1652 TEST_F(KernelImpPatchBindlessTest, GivenValidBindlessOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) {
1653 ze_kernel_desc_t desc = {};
1654 desc.pKernelName = kernelName.c_str();
1655 MyMockKernel mockKernel;
1656
1657 mockKernel.module = module.get();
1658 mockKernel.initialize(&desc);
1659
1660 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1661 arg.bindless = 0x40;
1662 arg.bindful = undefined<SurfaceStateHeapOffset>;
1663
1664 NEO::MockGraphicsAllocation alloc;
1665
1666 mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1667
1668 EXPECT_TRUE(mockKernel.setSurfaceStateCalled);
1669 }
1670
TEST_F(KernelImpPatchBindlessTest,GivenValidBindfulOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled)1671 TEST_F(KernelImpPatchBindlessTest, GivenValidBindfulOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) {
1672 ze_kernel_desc_t desc = {};
1673 desc.pKernelName = kernelName.c_str();
1674 MyMockKernel mockKernel;
1675
1676 mockKernel.module = module.get();
1677 mockKernel.initialize(&desc);
1678
1679 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1680 arg.bindless = undefined<CrossThreadDataOffset>;
1681 arg.bindful = 0x40;
1682
1683 NEO::MockGraphicsAllocation alloc;
1684
1685 mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1686
1687 EXPECT_TRUE(mockKernel.setSurfaceStateCalled);
1688 }
1689
TEST_F(KernelImpPatchBindlessTest,GivenUndefiedBidfulAndBindlesstOffsetWhenSetArgBufferWithAllocThenSetBufferSurfaceStateIsNotCalled)1690 TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetArgBufferWithAllocThenSetBufferSurfaceStateIsNotCalled) {
1691 ze_kernel_desc_t desc = {};
1692 desc.pKernelName = kernelName.c_str();
1693 MyMockKernel mockKernel;
1694
1695 mockKernel.module = module.get();
1696 mockKernel.initialize(&desc);
1697
1698 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1699 arg.bindless = undefined<CrossThreadDataOffset>;
1700 arg.bindful = undefined<SurfaceStateHeapOffset>;
1701
1702 NEO::MockGraphicsAllocation alloc;
1703
1704 mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1705
1706 EXPECT_FALSE(mockKernel.setSurfaceStateCalled);
1707 }
1708
1709 using KernelBindlessUncachedMemoryTests = Test<ModuleFixture>;
1710
TEST_F(KernelBindlessUncachedMemoryTests,givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet)1711 TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet) {
1712 ze_kernel_desc_t desc = {};
1713 desc.pKernelName = kernelName.c_str();
1714 MyMockKernel mockKernel;
1715
1716 mockKernel.module = module.get();
1717 mockKernel.initialize(&desc);
1718
1719 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1720 arg.bindless = undefined<CrossThreadDataOffset>;
1721 arg.bindful = undefined<SurfaceStateHeapOffset>;
1722
1723 NEO::MockGraphicsAllocation alloc;
1724
1725 mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1726 EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1727 }
1728
TEST_F(KernelBindlessUncachedMemoryTests,givenNonUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet)1729 TEST_F(KernelBindlessUncachedMemoryTests,
1730 givenNonUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1731 ze_kernel_desc_t desc = {};
1732 desc.pKernelName = kernelName.c_str();
1733 MyMockKernel mockKernel;
1734
1735 mockKernel.module = module.get();
1736 mockKernel.initialize(&desc);
1737
1738 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1739 arg.bindless = undefined<CrossThreadDataOffset>;
1740 arg.bindful = undefined<SurfaceStateHeapOffset>;
1741
1742 {
1743 void *devicePtr = nullptr;
1744 ze_device_mem_alloc_desc_t deviceDesc = {};
1745 ze_result_t res = context->allocDeviceMem(device->toHandle(),
1746 &deviceDesc,
1747 16384u,
1748 0u,
1749 &devicePtr);
1750 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1751
1752 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1753 EXPECT_NE(nullptr, alloc);
1754
1755 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1756 EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1757 context->freeMem(devicePtr);
1758 }
1759
1760 {
1761 void *devicePtr = nullptr;
1762 ze_device_mem_alloc_desc_t deviceDesc = {};
1763 ze_result_t res = context->allocDeviceMem(device->toHandle(),
1764 &deviceDesc,
1765 16384u,
1766 0u,
1767 &devicePtr);
1768 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1769
1770 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1771 EXPECT_NE(nullptr, alloc);
1772
1773 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1774 EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1775 context->freeMem(devicePtr);
1776 }
1777 }
1778
TEST_F(KernelBindlessUncachedMemoryTests,givenUncachedAllocationSetAsArgumentFollowedByUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet)1779 TEST_F(KernelBindlessUncachedMemoryTests,
1780 givenUncachedAllocationSetAsArgumentFollowedByUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1781 ze_kernel_desc_t desc = {};
1782 desc.pKernelName = kernelName.c_str();
1783 MyMockKernel mockKernel;
1784
1785 mockKernel.module = module.get();
1786 mockKernel.initialize(&desc);
1787
1788 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1789 arg.bindless = undefined<CrossThreadDataOffset>;
1790 arg.bindful = undefined<SurfaceStateHeapOffset>;
1791
1792 {
1793 void *devicePtr = nullptr;
1794 ze_device_mem_alloc_desc_t deviceDesc = {};
1795 deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1796 ze_result_t res = context->allocDeviceMem(device->toHandle(),
1797 &deviceDesc,
1798 16384u,
1799 0u,
1800 &devicePtr);
1801 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1802
1803 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1804 EXPECT_NE(nullptr, alloc);
1805
1806 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1807 EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1808 context->freeMem(devicePtr);
1809 }
1810
1811 {
1812 void *devicePtr = nullptr;
1813 ze_device_mem_alloc_desc_t deviceDesc = {};
1814 deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1815 ze_result_t res = context->allocDeviceMem(device->toHandle(),
1816 &deviceDesc,
1817 16384u,
1818 0u,
1819 &devicePtr);
1820 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1821
1822 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1823 EXPECT_NE(nullptr, alloc);
1824
1825 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1826 EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1827 context->freeMem(devicePtr);
1828 }
1829 }
1830
TEST_F(KernelBindlessUncachedMemoryTests,givenUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet)1831 TEST_F(KernelBindlessUncachedMemoryTests,
1832 givenUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1833 ze_kernel_desc_t desc = {};
1834 desc.pKernelName = kernelName.c_str();
1835 MyMockKernel mockKernel;
1836
1837 mockKernel.module = module.get();
1838 mockKernel.initialize(&desc);
1839
1840 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1841 arg.bindless = undefined<CrossThreadDataOffset>;
1842 arg.bindful = undefined<SurfaceStateHeapOffset>;
1843
1844 {
1845 void *devicePtr = nullptr;
1846 ze_device_mem_alloc_desc_t deviceDesc = {};
1847 deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1848 ze_result_t res = context->allocDeviceMem(device->toHandle(),
1849 &deviceDesc,
1850 16384u,
1851 0u,
1852 &devicePtr);
1853 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1854
1855 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1856 EXPECT_NE(nullptr, alloc);
1857
1858 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1859 EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1860 context->freeMem(devicePtr);
1861 }
1862
1863 {
1864 void *devicePtr = nullptr;
1865 ze_device_mem_alloc_desc_t deviceDesc = {};
1866 ze_result_t res = context->allocDeviceMem(device->toHandle(),
1867 &deviceDesc,
1868 16384u,
1869 0u,
1870 &devicePtr);
1871 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1872
1873 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1874 EXPECT_NE(nullptr, alloc);
1875
1876 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1877 EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1878 context->freeMem(devicePtr);
1879 }
1880 }
1881
TEST_F(KernelBindlessUncachedMemoryTests,givenUncachedHostAllocationSetAsArgumentFollowedByNonUncachedHostAllocationThenRequiresUncachedMocsIsCorrectlySet)1882 TEST_F(KernelBindlessUncachedMemoryTests,
1883 givenUncachedHostAllocationSetAsArgumentFollowedByNonUncachedHostAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1884 ze_kernel_desc_t desc = {};
1885 desc.pKernelName = kernelName.c_str();
1886 MyMockKernel mockKernel;
1887
1888 mockKernel.module = module.get();
1889 mockKernel.initialize(&desc);
1890
1891 auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1892 arg.bindless = undefined<CrossThreadDataOffset>;
1893 arg.bindful = undefined<SurfaceStateHeapOffset>;
1894
1895 {
1896 void *ptr = nullptr;
1897 ze_host_mem_alloc_desc_t hostDesc = {};
1898 hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1899 ze_result_t res = context->allocHostMem(&hostDesc,
1900 16384u,
1901 0u,
1902 &ptr);
1903 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1904
1905 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1906 EXPECT_NE(nullptr, alloc);
1907
1908 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1909 EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1910 context->freeMem(ptr);
1911 }
1912
1913 {
1914 void *ptr = nullptr;
1915 ze_host_mem_alloc_desc_t hostDesc = {};
1916 ze_result_t res = context->allocHostMem(&hostDesc,
1917 16384u,
1918 0u,
1919 &ptr);
1920 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1921
1922 auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1923 EXPECT_NE(nullptr, alloc);
1924
1925 mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1926 EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1927 context->freeMem(ptr);
1928 }
1929 }
1930
1931 template <GFXCORE_FAMILY gfxCoreFamily>
1932 struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
1933 //MyMockImage() : WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>();
copySurfaceStateToSSHL0::ult::MyMockImage1934 void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override {
1935 passedSurfaceStateHeap = surfaceStateHeap;
1936 passedSurfaceStateOffset = surfaceStateOffset;
1937 }
1938 void *passedSurfaceStateHeap = nullptr;
1939 uint32_t passedSurfaceStateOffset = 0;
1940 };
1941
HWTEST2_F(SetKernelArg,givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs,ImageSupport)1942 HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
1943 createKernel();
1944
1945 neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1946 neoDevice->getNumGenericSubDevices() > 1,
1947 neoDevice->getRootDeviceIndex(),
1948 neoDevice->getDeviceBitfield());
1949 auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
1950 auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
1951 const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
1952 imageArg.bindless = 0x0;
1953 imageArg.bindful = undefined<SurfaceStateHeapOffset>;
1954 ze_image_desc_t desc = {};
1955 desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
1956 auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
1957 auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize();
1958
1959 auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
1960 auto ret = imageHW->initialize(device, &desc);
1961 auto handle = imageHW->toHandle();
1962 ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
1963
1964 auto expectedSsInHeap = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->allocateSSInHeap(surfaceStateSize, imageHW->getAllocation(), BindlessHeapsHelper::BindlesHeapType::GLOBAL_SSH);
1965
1966 kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
1967
1968 EXPECT_EQ(imageHW->passedSurfaceStateHeap, expectedSsInHeap.ssPtr);
1969 EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u);
1970 }
1971
HWTEST2_F(SetKernelArg,givenImageAndBindfulKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs,ImageSupport)1972 HWTEST2_F(SetKernelArg, givenImageAndBindfulKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
1973 createKernel();
1974
1975 auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
1976 auto addressingMode = const_cast<NEO::KernelDescriptor::AddressingMode &>(kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode);
1977 addressingMode = NEO::KernelDescriptor::Bindful;
1978 imageArg.bindless = undefined<CrossThreadDataOffset>;
1979 imageArg.bindful = 0x40;
1980 ze_image_desc_t desc = {};
1981 desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
1982
1983 auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
1984 auto ret = imageHW->initialize(device, &desc);
1985 auto handle = imageHW->toHandle();
1986 ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
1987
1988 kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
1989
1990 EXPECT_EQ(imageHW->passedSurfaceStateHeap, kernel->getSurfaceStateHeapData());
1991 EXPECT_EQ(imageHW->passedSurfaceStateOffset, imageArg.bindful);
1992 }
1993
1994 template <GFXCORE_FAMILY gfxCoreFamily>
1995 struct MyMockImageMediaBlock : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
copySurfaceStateToSSHL0::ult::MyMockImageMediaBlock1996 void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override {
1997 isMediaBlockPassedValue = isMediaBlockArg;
1998 }
1999 bool isMediaBlockPassedValue = false;
2000 };
2001
HWTEST2_F(SetKernelArg,givenSupportsMediaBlockAndIsMediaBlockImageWhenSetArgImageIsCalledThenIsMediaBlockArgIsPassedCorrectly,ImageSupport)2002 HWTEST2_F(SetKernelArg, givenSupportsMediaBlockAndIsMediaBlockImageWhenSetArgImageIsCalledThenIsMediaBlockArgIsPassedCorrectly, ImageSupport) {
2003 auto hwInfo = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
2004 createKernel();
2005 auto argIndex = 3u;
2006 auto &arg = const_cast<NEO::ArgDescriptor &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex]);
2007 auto imageHW = std::make_unique<MyMockImageMediaBlock<gfxCoreFamily>>();
2008 ze_image_desc_t desc = {};
2009 desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
2010 auto ret = imageHW->initialize(device, &desc);
2011 ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
2012 auto handle = imageHW->toHandle();
2013
2014 {
2015 hwInfo->capabilityTable.supportsMediaBlock = true;
2016 arg.getExtendedTypeInfo().isMediaBlockImage = true;
2017 kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2018 EXPECT_TRUE(imageHW->isMediaBlockPassedValue);
2019 }
2020 {
2021 hwInfo->capabilityTable.supportsMediaBlock = false;
2022 arg.getExtendedTypeInfo().isMediaBlockImage = true;
2023 kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2024 EXPECT_FALSE(imageHW->isMediaBlockPassedValue);
2025 }
2026 {
2027 hwInfo->capabilityTable.supportsMediaBlock = true;
2028 arg.getExtendedTypeInfo().isMediaBlockImage = false;
2029 kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2030 EXPECT_FALSE(imageHW->isMediaBlockPassedValue);
2031 }
2032 {
2033 hwInfo->capabilityTable.supportsMediaBlock = false;
2034 arg.getExtendedTypeInfo().isMediaBlockImage = false;
2035 kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2036 EXPECT_FALSE(imageHW->isMediaBlockPassedValue);
2037 }
2038 }
2039
2040 using ImportHostPointerSetKernelArg = Test<ImportHostPointerModuleFixture>;
TEST_F(ImportHostPointerSetKernelArg,givenHostPointerImportedWhenSettingKernelArgThenUseHostPointerAllocation)2041 TEST_F(ImportHostPointerSetKernelArg, givenHostPointerImportedWhenSettingKernelArgThenUseHostPointerAllocation) {
2042 createKernel();
2043
2044 auto ret = driverHandle->importExternalPointer(hostPointer, MemoryConstants::pageSize);
2045 EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
2046
2047 ret = kernel->setArgBuffer(0, sizeof(hostPointer), &hostPointer);
2048 EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
2049
2050 ret = driverHandle->releaseImportedPointer(hostPointer);
2051 EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
2052 }
2053
2054 class KernelGlobalWorkOffsetTests : public ModuleFixture, public ::testing::Test {
2055 public:
SetUp()2056 void SetUp() override {
2057 ModuleFixture::SetUp();
2058
2059 ze_kernel_desc_t kernelDesc = {};
2060 kernelDesc.pKernelName = kernelName.c_str();
2061
2062 ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
2063 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
2064
2065 kernel = L0::Kernel::fromHandle(kernelHandle);
2066 }
2067
TearDown()2068 void TearDown() override {
2069 Kernel::fromHandle(kernelHandle)->destroy();
2070 ModuleFixture::TearDown();
2071 }
2072
2073 ze_kernel_handle_t kernelHandle;
2074 L0::Kernel *kernel = nullptr;
2075 };
2076
TEST_F(KernelGlobalWorkOffsetTests,givenCallToSetGlobalWorkOffsetThenOffsetsAreSet)2077 TEST_F(KernelGlobalWorkOffsetTests, givenCallToSetGlobalWorkOffsetThenOffsetsAreSet) {
2078 uint32_t globalOffsetx = 10;
2079 uint32_t globalOffsety = 20;
2080 uint32_t globalOffsetz = 30;
2081
2082 ze_result_t res = kernel->setGlobalOffsetExp(globalOffsetx, globalOffsety, globalOffsetz);
2083 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
2084
2085 KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
2086 EXPECT_EQ(globalOffsetx, kernelImp->getGlobalOffsets()[0]);
2087 EXPECT_EQ(globalOffsety, kernelImp->getGlobalOffsets()[1]);
2088 EXPECT_EQ(globalOffsetz, kernelImp->getGlobalOffsets()[2]);
2089 }
2090
TEST_F(KernelGlobalWorkOffsetTests,whenSettingGlobalOffsetThenCrossThreadDataIsPatched)2091 TEST_F(KernelGlobalWorkOffsetTests, whenSettingGlobalOffsetThenCrossThreadDataIsPatched) {
2092 uint32_t globalOffsetx = 10;
2093 uint32_t globalOffsety = 20;
2094 uint32_t globalOffsetz = 30;
2095
2096 ze_result_t res = kernel->setGlobalOffsetExp(globalOffsetx, globalOffsety, globalOffsetz);
2097 EXPECT_EQ(ZE_RESULT_SUCCESS, res);
2098
2099 KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
2100 kernelImp->patchGlobalOffset();
2101
2102 const NEO::KernelDescriptor &desc = kernelImp->getImmutableData()->getDescriptor();
2103 auto dst = ArrayRef<const uint8_t>(kernelImp->getCrossThreadData(), kernelImp->getCrossThreadDataSize());
2104 EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[0]), globalOffsetx);
2105 EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[1]), globalOffsety);
2106 EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[2]), globalOffsetz);
2107 }
2108
2109 using KernelWorkDimTests = Test<ModuleImmutableDataFixture>;
2110
TEST_F(KernelWorkDimTests,givenGroupCountsWhenPatchingWorkDimThenCrossThreadDataIsPatched)2111 TEST_F(KernelWorkDimTests, givenGroupCountsWhenPatchingWorkDimThenCrossThreadDataIsPatched) {
2112 uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
2113
2114 std::unique_ptr<MockImmutableData> mockKernelImmData =
2115 std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
2116
2117 createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
2118 auto kernel = std::make_unique<MockKernel>(module.get());
2119 createKernel(kernel.get());
2120 kernel->setCrossThreadData(sizeof(uint32_t));
2121
2122 mockKernelImmData->mockKernelDescriptor->payloadMappings.dispatchTraits.workDim = 0x0u;
2123
2124 auto destinationBuffer = ArrayRef<const uint8_t>(kernel->getCrossThreadData(), kernel->getCrossThreadDataSize());
2125 auto &kernelDescriptor = mockKernelImmData->getDescriptor();
2126 auto workDimInCrossThreadDataPtr = destinationBuffer.begin() + kernelDescriptor.payloadMappings.dispatchTraits.workDim;
2127 EXPECT_EQ(*workDimInCrossThreadDataPtr, 0u);
2128
2129 std::array<std::array<uint32_t, 7>, 8> sizesCountsWorkDim = {{{2, 1, 1, 1, 1, 1, 1},
2130 {1, 1, 1, 1, 1, 1, 1},
2131 {1, 2, 1, 2, 1, 1, 2},
2132 {1, 2, 1, 1, 1, 1, 2},
2133 {1, 1, 1, 1, 2, 1, 2},
2134 {1, 1, 1, 2, 2, 2, 3},
2135 {1, 1, 2, 1, 1, 1, 3},
2136 {1, 1, 1, 1, 1, 2, 3}}};
2137
2138 for (auto &[groupSizeX, groupSizeY, groupSizeZ, groupCountX, groupCountY, groupCountZ, expectedWorkDim] : sizesCountsWorkDim) {
2139 ze_result_t res = kernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
2140 EXPECT_EQ(res, ZE_RESULT_SUCCESS);
2141 kernel->setGroupCount(groupCountX, groupCountY, groupCountZ);
2142 EXPECT_EQ(*workDimInCrossThreadDataPtr, expectedWorkDim);
2143 }
2144 }
2145
2146 using KernelPrintHandlerTest = Test<ModuleFixture>;
2147 struct MyPrintfHandler : public PrintfHandler {
getPrintfSurfaceInitialDataSizeL0::ult::MyPrintfHandler2148 static uint32_t getPrintfSurfaceInitialDataSize() {
2149 return PrintfHandler::printfSurfaceInitialDataSize;
2150 }
2151 };
2152
TEST_F(KernelPrintHandlerTest,whenPrintPrintfOutputIsCalledThenPrintfBufferIsUsed)2153 TEST_F(KernelPrintHandlerTest, whenPrintPrintfOutputIsCalledThenPrintfBufferIsUsed) {
2154 ze_kernel_desc_t desc = {};
2155 desc.pKernelName = kernelName.c_str();
2156
2157 kernel = std::make_unique<WhiteBox<::L0::Kernel>>();
2158 kernel->module = module.get();
2159 kernel->initialize(&desc);
2160
2161 EXPECT_FALSE(kernel->printfBuffer == nullptr);
2162 kernel->printPrintfOutput();
2163 auto buffer = *reinterpret_cast<uint32_t *>(kernel->printfBuffer->getUnderlyingBuffer());
2164 EXPECT_EQ(buffer, MyPrintfHandler::getPrintfSurfaceInitialDataSize());
2165 }
2166
2167 using PrintfTest = Test<DeviceFixture>;
2168
TEST_F(PrintfTest,givenKernelWithPrintfThenPrintfBufferIsCreated)2169 TEST_F(PrintfTest, givenKernelWithPrintfThenPrintfBufferIsCreated) {
2170 Mock<Module> mockModule(this->device, nullptr);
2171 Mock<Kernel> mockKernel;
2172 mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
2173 mockKernel.module = &mockModule;
2174
2175 EXPECT_TRUE(mockKernel.getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf);
2176
2177 ze_kernel_desc_t kernelDesc = {};
2178 kernelDesc.pKernelName = "mock";
2179 mockKernel.createPrintfBuffer();
2180 EXPECT_NE(nullptr, mockKernel.getPrintfBufferAllocation());
2181 }
2182
TEST_F(PrintfTest,GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocationIsNotCreated)2183 TEST_F(PrintfTest, GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocationIsNotCreated) {
2184 Mock<Module> mockModule(this->device, nullptr);
2185 Mock<Kernel> mockKernel;
2186 mockKernel.descriptor.kernelAttributes.flags.usesPrintf = false;
2187 mockKernel.module = &mockModule;
2188
2189 ze_kernel_desc_t kernelDesc = {};
2190 kernelDesc.pKernelName = "mock";
2191 mockKernel.createPrintfBuffer();
2192 EXPECT_EQ(nullptr, mockKernel.getPrintfBufferAllocation());
2193 }
2194
TEST_F(PrintfTest,WhenCreatingPrintfBufferThenAllocationAddedToResidencyContainer)2195 TEST_F(PrintfTest, WhenCreatingPrintfBufferThenAllocationAddedToResidencyContainer) {
2196 Mock<Module> mockModule(this->device, nullptr);
2197 Mock<Kernel> mockKernel;
2198 mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
2199 mockKernel.module = &mockModule;
2200
2201 ze_kernel_desc_t kernelDesc = {};
2202 kernelDesc.pKernelName = "mock";
2203 mockKernel.createPrintfBuffer();
2204
2205 auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation();
2206 EXPECT_NE(nullptr, printfBufferAllocation);
2207
2208 EXPECT_NE(0u, mockKernel.residencyContainer.size());
2209 EXPECT_EQ(mockKernel.residencyContainer[mockKernel.residencyContainer.size() - 1], printfBufferAllocation);
2210 }
2211
TEST_F(PrintfTest,WhenCreatingPrintfBufferThenCrossThreadDataIsPatched)2212 TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
2213 Mock<Module> mockModule(this->device, nullptr);
2214 Mock<Kernel> mockKernel;
2215 mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
2216 mockKernel.module = &mockModule;
2217
2218 ze_kernel_desc_t kernelDesc = {};
2219 kernelDesc.pKernelName = "mock";
2220
2221 auto crossThreadData = std::make_unique<uint32_t[]>(4);
2222
2223 mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0;
2224 mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t);
2225 mockKernel.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
2226 mockKernel.crossThreadDataSize = sizeof(uint32_t[4]);
2227
2228 mockKernel.createPrintfBuffer();
2229
2230 auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation();
2231 EXPECT_NE(nullptr, printfBufferAllocation);
2232
2233 auto printfBufferAddressPatched = *reinterpret_cast<uintptr_t *>(crossThreadData.get());
2234 auto printfBufferGpuAddressOffset = static_cast<uintptr_t>(printfBufferAllocation->getGpuAddressToPatch());
2235 EXPECT_EQ(printfBufferGpuAddressOffset, printfBufferAddressPatched);
2236
2237 mockKernel.crossThreadData.release();
2238 }
2239
2240 using KernelImplicitArgTests = Test<ModuleImmutableDataFixture>;
2241
TEST_F(KernelImplicitArgTests,givenKernelWithImplicitArgsWhenInitializeThenPrintfSurfaceIsCreatedAndProperlyPatchedInImplicitArgs)2242 TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenInitializeThenPrintfSurfaceIsCreatedAndProperlyPatchedInImplicitArgs) {
2243 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2244 mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2245 mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesPrintf = false;
2246
2247 createModuleFromBinary(0u, false, mockKernelImmData.get());
2248
2249 auto kernel = std::make_unique<MockKernel>(module.get());
2250
2251 ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2252 kernel->initialize(&kernelDesc);
2253
2254 EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2255 auto pImplicitArgs = kernel->getImplicitArgs();
2256 ASSERT_NE(nullptr, pImplicitArgs);
2257
2258 auto printfSurface = kernel->getPrintfBufferAllocation();
2259 ASSERT_NE(nullptr, printfSurface);
2260
2261 EXPECT_NE(0u, pImplicitArgs->printfBufferPtr);
2262 EXPECT_EQ(printfSurface->getGpuAddress(), pImplicitArgs->printfBufferPtr);
2263 }
2264
TEST_F(KernelImplicitArgTests,givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated)2265 TEST_F(KernelImplicitArgTests, givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated) {
2266 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2267
2268 mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2269
2270 createModuleFromBinary(0u, false, mockKernelImmData.get());
2271
2272 auto kernel = std::make_unique<MockKernel>(module.get());
2273
2274 ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2275 kernel->initialize(&kernelDesc);
2276
2277 EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2278 auto pImplicitArgs = kernel->getImplicitArgs();
2279 ASSERT_NE(nullptr, pImplicitArgs);
2280
2281 EXPECT_EQ(sizeof(ImplicitArgs), pImplicitArgs->structSize);
2282 EXPECT_EQ(0u, pImplicitArgs->structVersion);
2283 }
2284
TEST_F(KernelImplicitArgTests,givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreUpdated)2285 TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreUpdated) {
2286 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2287 mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2288 auto simd = mockKernelImmData->kernelDescriptor->kernelAttributes.simdSize;
2289
2290 createModuleFromBinary(0u, false, mockKernelImmData.get());
2291
2292 auto kernel = std::make_unique<MockKernel>(module.get());
2293
2294 ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2295 kernel->initialize(&kernelDesc);
2296
2297 EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2298 auto pImplicitArgs = kernel->getImplicitArgs();
2299 ASSERT_NE(nullptr, pImplicitArgs);
2300
2301 ImplicitArgs expectedImplicitArgs{sizeof(ImplicitArgs)};
2302 expectedImplicitArgs.numWorkDim = 3;
2303 expectedImplicitArgs.simdWidth = simd;
2304 expectedImplicitArgs.localSizeX = 4;
2305 expectedImplicitArgs.localSizeY = 5;
2306 expectedImplicitArgs.localSizeZ = 6;
2307 expectedImplicitArgs.globalSizeX = 12;
2308 expectedImplicitArgs.globalSizeY = 10;
2309 expectedImplicitArgs.globalSizeZ = 6;
2310 expectedImplicitArgs.globalOffsetX = 1;
2311 expectedImplicitArgs.globalOffsetY = 2;
2312 expectedImplicitArgs.globalOffsetZ = 3;
2313 expectedImplicitArgs.groupCountX = 3;
2314 expectedImplicitArgs.groupCountY = 2;
2315 expectedImplicitArgs.groupCountZ = 1;
2316 expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress();
2317
2318 kernel->setGroupSize(4, 5, 6);
2319 kernel->setGroupCount(3, 2, 1);
2320 kernel->setGlobalOffsetExp(1, 2, 3);
2321 kernel->patchGlobalOffset();
2322 EXPECT_EQ(0, memcmp(pImplicitArgs, &expectedImplicitArgs, sizeof(ImplicitArgs)));
2323 }
2324
TEST_F(KernelImplicitArgTests,givenKernelWithImplicitArgsAndPrintfStringsMapWhenPrintOutputThenProperStringIsPrinted)2325 TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsAndPrintfStringsMapWhenPrintOutputThenProperStringIsPrinted) {
2326 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2327
2328 auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
2329 kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2330 kernelDescriptor->kernelAttributes.flags.usesPrintf = false;
2331 kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = false;
2332 std::string expectedString("test123");
2333 kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString));
2334
2335 createModuleFromBinary(0u, false, mockKernelImmData.get());
2336
2337 auto kernel = std::make_unique<MockKernel>(module.get());
2338
2339 ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2340 kernel->initialize(&kernelDesc);
2341
2342 auto printfAllocation = reinterpret_cast<uint32_t *>(kernel->getPrintfBufferAllocation()->getUnderlyingBuffer());
2343 printfAllocation[0] = 8;
2344 printfAllocation[1] = 0;
2345
2346 EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2347 ASSERT_NE(nullptr, kernel->getImplicitArgs());
2348
2349 testing::internal::CaptureStdout();
2350 kernel->printPrintfOutput();
2351 std::string output = testing::internal::GetCapturedStdout();
2352 EXPECT_STREQ(expectedString.c_str(), output.c_str());
2353 }
2354
TEST_F(KernelImplicitArgTests,givenKernelWithoutImplicitArgsWhenPatchingImplicitArgsThenNothingHappens)2355 TEST_F(KernelImplicitArgTests, givenKernelWithoutImplicitArgsWhenPatchingImplicitArgsThenNothingHappens) {
2356 std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2357 mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
2358
2359 createModuleFromBinary(0u, false, mockKernelImmData.get());
2360
2361 auto kernel = std::make_unique<MockKernel>(module.get());
2362
2363 ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2364 kernel->initialize(&kernelDesc);
2365 EXPECT_EQ(nullptr, kernel->getImplicitArgs());
2366
2367 uint8_t initData[64]{};
2368 uint8_t data[64]{};
2369 int pattern = 0xcd;
2370 memset(data, pattern, 64);
2371 memset(initData, pattern, 64);
2372
2373 EXPECT_EQ(0u, kernel->getSizeForImplicitArgsPatching());
2374 void *dataPtr = data;
2375 kernel->patchImplicitArgs(dataPtr);
2376
2377 EXPECT_EQ(dataPtr, data);
2378
2379 EXPECT_EQ(0, memcmp(data, initData, 64));
2380 }
2381
2382 } // namespace ult
2383 } // namespace L0
2384