1 /*
2  * Copyright (C) 2020-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/device_binary_format/patchtokens_decoder.h"
9 #include "shared/source/helpers/local_memory_access_modes.h"
10 #include "shared/source/helpers/ray_tracing_helper.h"
11 #include "shared/source/kernel/kernel_descriptor.h"
12 #include "shared/source/program/kernel_info.h"
13 #include "shared/source/program/kernel_info_from_patchtokens.h"
14 #include "shared/source/utilities/stackvec.h"
15 #include "shared/test/common/helpers/debug_manager_state_restore.h"
16 #include "shared/test/common/helpers/engine_descriptor_helper.h"
17 #include "shared/test/common/mocks/mock_device.h"
18 #include "shared/test/common/mocks/mock_graphics_allocation.h"
19 #include "shared/test/common/test_macros/test.h"
20 #include "shared/test/unit_test/compiler_interface/linker_mock.h"
21 #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h"
22 
23 #include "level_zero/core/source/debugger/debugger_l0.h"
24 #include "level_zero/core/source/image/image_format_desc_helper.h"
25 #include "level_zero/core/source/image/image_hw.h"
26 #include "level_zero/core/source/kernel/kernel_hw.h"
27 #include "level_zero/core/source/module/module_imp.h"
28 #include "level_zero/core/source/printf_handler/printf_handler.h"
29 #include "level_zero/core/source/sampler/sampler_hw.h"
30 #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
31 #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
32 #include "level_zero/core/test/unit_tests/mocks/mock_device.h"
33 #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
34 #include "level_zero/core/test/unit_tests/mocks/mock_module.h"
35 
36 namespace NEO {
37 void populatePointerKernelArg(ArgDescPointer &dst,
38                               CrossThreadDataOffset stateless, uint8_t pointerSize, SurfaceStateHeapOffset bindful, CrossThreadDataOffset bindless,
39                               KernelDescriptor::AddressingMode addressingMode);
40 }
41 
42 namespace L0 {
43 namespace ult {
44 
45 using KernelInitTest = Test<ModuleImmutableDataFixture>;
46 
TEST_F(KernelInitTest,givenKernelToInitWhenItHasUnknownArgThenUnknowKernelArgHandlerAssigned)47 TEST_F(KernelInitTest, givenKernelToInitWhenItHasUnknownArgThenUnknowKernelArgHandlerAssigned) {
48     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
49 
50     std::unique_ptr<MockImmutableData> mockKernelImmData =
51         std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
52 
53     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
54     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
55     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
56     ze_kernel_desc_t desc = {};
57     desc.pKernelName = kernelName.c_str();
58     mockKernelImmData->resizeExplicitArgs(1);
59     kernel->initialize(&desc);
60     EXPECT_EQ(kernel->kernelArgHandlers[0], &KernelImp::setArgUnknown);
61     EXPECT_EQ(mockKernelImmData->getDescriptor().payloadMappings.explicitArgs[0].type, NEO::ArgDescriptor::ArgTUnknown);
62 }
63 
TEST(KernelArgTest,givenKernelWhenSetArgUnknownCalledThenSuccessRteurned)64 TEST(KernelArgTest, givenKernelWhenSetArgUnknownCalledThenSuccessRteurned) {
65     Mock<Kernel> mockKernel;
66     EXPECT_EQ(mockKernel.setArgUnknown(0, 0, nullptr), ZE_RESULT_SUCCESS);
67 }
68 
69 using KernelImpSetGroupSizeTest = Test<DeviceFixture>;
70 
TEST_F(KernelImpSetGroupSizeTest,WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable)71 TEST_F(KernelImpSetGroupSizeTest, WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable) {
72     Mock<Kernel> mockKernel;
73     Mock<Module> mockModule(this->device, nullptr);
74     mockKernel.descriptor.kernelAttributes.simdSize = 1;
75     mockKernel.descriptor.kernelAttributes.numLocalIdChannels = 3;
76     mockKernel.module = &mockModule;
77     auto grfSize = mockModule.getDevice()->getHwInfo().capabilityTable.grfSize;
78     uint32_t groupSize[3] = {2, 3, 5};
79     auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
80     EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
81     EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup);
82     EXPECT_EQ(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup);
83     ASSERT_LE(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup);
84     using LocalIdT = unsigned short;
85     auto threadOffsetInLocalIds = grfSize / sizeof(LocalIdT);
86     auto generatedLocalIds = reinterpret_cast<LocalIdT *>(mockKernel.perThreadDataForWholeThreadGroup);
87 
88     uint32_t threadId = 0;
89     for (uint32_t z = 0; z < groupSize[2]; ++z) {
90         for (uint32_t y = 0; y < groupSize[1]; ++y) {
91             for (uint32_t x = 0; x < groupSize[0]; ++x) {
92                 EXPECT_EQ(x, generatedLocalIds[0 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
93                 EXPECT_EQ(y, generatedLocalIds[1 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
94                 EXPECT_EQ(z, generatedLocalIds[2 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId;
95                 ++threadId;
96             }
97         }
98     }
99 }
100 
TEST_F(KernelImpSetGroupSizeTest,givenLocalIdGenerationByRuntimeDisabledWhenSettingGroupSizeThenLocalIdsAreNotGenerated)101 TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSettingGroupSizeThenLocalIdsAreNotGenerated) {
102     Mock<Kernel> mockKernel;
103     Mock<Module> mockModule(this->device, nullptr);
104     mockKernel.descriptor.kernelAttributes.simdSize = 1;
105     mockKernel.module = &mockModule;
106     mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime = false;
107 
108     uint32_t groupSize[3] = {2, 3, 5};
109     auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
110     EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
111     EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup);
112     EXPECT_EQ(0u, mockKernel.perThreadDataSizeForWholeThreadGroup);
113     EXPECT_EQ(0u, mockKernel.perThreadDataSize);
114     EXPECT_EQ(nullptr, mockKernel.perThreadDataForWholeThreadGroup);
115 }
116 
TEST_F(KernelImpSetGroupSizeTest,givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned)117 TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) {
118     Mock<Kernel> mockKernel;
119     Mock<Module> mockModule(this->device, nullptr);
120     for (auto i = 0u; i < 3u; i++) {
121         mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
122     }
123     mockKernel.module = &mockModule;
124 
125     uint32_t groupSize[3] = {1, 1, 1};
126     auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
127     EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, ret);
128 }
129 
TEST_F(KernelImpSetGroupSizeTest,givenZeroGroupSizeWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned)130 TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) {
131     Mock<Kernel> mockKernel;
132     Mock<Module> mockModule(this->device, nullptr);
133     for (auto i = 0u; i < 3u; i++) {
134         mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
135     }
136     mockKernel.module = &mockModule;
137 
138     uint32_t groupSize[3] = {0, 0, 0};
139     auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
140     EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
141 }
142 
143 using SetKernelArg = Test<ModuleFixture>;
144 using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
145 
HWTEST2_F(SetKernelArg,givenImageAndKernelWhenSetArgImageThenCrossThreadDataIsSet,ImageSupport)146 HWTEST2_F(SetKernelArg, givenImageAndKernelWhenSetArgImageThenCrossThreadDataIsSet, ImageSupport) {
147     createKernel();
148 
149     auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].as<NEO::ArgDescImage>());
150     imageArg.metadataPayload.imgWidth = 0x0;
151     imageArg.metadataPayload.imgHeight = 0x8;
152     imageArg.metadataPayload.imgDepth = 0x10;
153 
154     imageArg.metadataPayload.arraySize = 0x18;
155     imageArg.metadataPayload.numSamples = 0x1c;
156     imageArg.metadataPayload.channelDataType = 0x20;
157     imageArg.metadataPayload.channelOrder = 0x24;
158     imageArg.metadataPayload.numMipLevels = 0x28;
159 
160     imageArg.metadataPayload.flatWidth = 0x30;
161     imageArg.metadataPayload.flatHeight = 0x38;
162     imageArg.metadataPayload.flatPitch = 0x40;
163     imageArg.metadataPayload.flatBaseOffset = 0x48;
164 
165     ze_image_desc_t desc = {};
166 
167     desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
168     desc.type = ZE_IMAGE_TYPE_3D;
169     desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8;
170     desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT;
171     desc.width = 11;
172     desc.height = 13;
173     desc.depth = 17;
174 
175     desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A;
176     desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0;
177     desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1;
178     desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X;
179 
180     auto imageHW = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
181     auto ret = imageHW->initialize(device, &desc);
182     ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
183 
184     auto handle = imageHW->toHandle();
185     auto imgInfo = imageHW->getImageInfo();
186     auto pixelSize = imgInfo.surfaceFormat->ImageElementSizeInBytes;
187 
188     kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
189 
190     auto crossThreadData = kernel->getCrossThreadData();
191 
192     auto pImgWidth = ptrOffset(crossThreadData, imageArg.metadataPayload.imgWidth);
193     EXPECT_EQ(imgInfo.imgDesc.imageWidth, *pImgWidth);
194 
195     auto pImgHeight = ptrOffset(crossThreadData, imageArg.metadataPayload.imgHeight);
196     EXPECT_EQ(imgInfo.imgDesc.imageHeight, *pImgHeight);
197 
198     auto pImgDepth = ptrOffset(crossThreadData, imageArg.metadataPayload.imgDepth);
199     EXPECT_EQ(imgInfo.imgDesc.imageDepth, *pImgDepth);
200 
201     auto pArraySize = ptrOffset(crossThreadData, imageArg.metadataPayload.arraySize);
202     EXPECT_EQ(imgInfo.imgDesc.imageArraySize, *pArraySize);
203 
204     auto pNumSamples = ptrOffset(crossThreadData, imageArg.metadataPayload.numSamples);
205     EXPECT_EQ(imgInfo.imgDesc.numSamples, *pNumSamples);
206 
207     auto pNumMipLevels = ptrOffset(crossThreadData, imageArg.metadataPayload.numMipLevels);
208     EXPECT_EQ(imgInfo.imgDesc.numMipLevels, *pNumMipLevels);
209 
210     auto pFlatBaseOffset = ptrOffset(crossThreadData, imageArg.metadataPayload.flatBaseOffset);
211     EXPECT_EQ(imageHW->getAllocation()->getGpuAddress(), *reinterpret_cast<const uint64_t *>(pFlatBaseOffset));
212 
213     auto pFlatWidth = ptrOffset(crossThreadData, imageArg.metadataPayload.flatWidth);
214     EXPECT_EQ((imgInfo.imgDesc.imageWidth * pixelSize) - 1u, *pFlatWidth);
215 
216     auto pFlatHeight = ptrOffset(crossThreadData, imageArg.metadataPayload.flatHeight);
217     EXPECT_EQ((imgInfo.imgDesc.imageHeight * pixelSize) - 1u, *pFlatHeight);
218 
219     auto pFlatPitch = ptrOffset(crossThreadData, imageArg.metadataPayload.flatPitch);
220     EXPECT_EQ(imgInfo.imgDesc.imageRowPitch - 1u, *pFlatPitch);
221 
222     auto pChannelDataType = ptrOffset(crossThreadData, imageArg.metadataPayload.channelDataType);
223     EXPECT_EQ(getClChannelDataType(desc.format), *reinterpret_cast<const cl_channel_type *>(pChannelDataType));
224 
225     auto pChannelOrder = ptrOffset(crossThreadData, imageArg.metadataPayload.channelOrder);
226     EXPECT_EQ(getClChannelOrder(desc.format), *reinterpret_cast<const cl_channel_order *>(pChannelOrder));
227 }
228 
HWTEST2_F(SetKernelArg,givenSamplerAndKernelWhenSetArgSamplerThenCrossThreadDataIsSet,ImageSupport)229 HWTEST2_F(SetKernelArg, givenSamplerAndKernelWhenSetArgSamplerThenCrossThreadDataIsSet, ImageSupport) {
230     createKernel();
231 
232     auto &samplerArg = const_cast<NEO::ArgDescSampler &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[5].as<NEO::ArgDescSampler>());
233     samplerArg.metadataPayload.samplerAddressingMode = 0x0;
234     samplerArg.metadataPayload.samplerNormalizedCoords = 0x4;
235     samplerArg.metadataPayload.samplerSnapWa = 0x8;
236 
237     ze_sampler_desc_t desc = {};
238 
239     desc.addressMode = ZE_SAMPLER_ADDRESS_MODE_CLAMP;
240     desc.filterMode = ZE_SAMPLER_FILTER_MODE_NEAREST;
241     desc.isNormalized = true;
242 
243     auto sampler = std::make_unique<WhiteBox<::L0::SamplerCoreFamily<gfxCoreFamily>>>();
244 
245     auto ret = sampler->initialize(device, &desc);
246     ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
247 
248     auto handle = sampler->toHandle();
249 
250     kernel->setArgSampler(5, sizeof(sampler.get()), &handle);
251 
252     auto crossThreadData = kernel->getCrossThreadData();
253 
254     auto pSamplerSnapWa = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerSnapWa);
255     EXPECT_EQ(std::numeric_limits<uint32_t>::max(), *reinterpret_cast<const uint32_t *>(pSamplerSnapWa));
256 
257     auto pSamplerAddressingMode = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerAddressingMode);
258     EXPECT_EQ(0x01, *pSamplerAddressingMode);
259 
260     auto pSamplerNormalizedCoords = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerNormalizedCoords);
261     EXPECT_EQ(0x08, *pSamplerNormalizedCoords);
262 }
263 
264 using ArgSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
265 
HWTEST2_F(SetKernelArg,givenBufferArgumentWhichHasNotBeenAllocatedByRuntimeThenInvalidArgumentIsReturned,ArgSupport)266 HWTEST2_F(SetKernelArg, givenBufferArgumentWhichHasNotBeenAllocatedByRuntimeThenInvalidArgumentIsReturned, ArgSupport) {
267     createKernel();
268 
269     uint64_t hostAddress = 0x1234;
270 
271     ze_result_t res = kernel->setArgBuffer(0, sizeof(hostAddress), &hostAddress);
272 
273     EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
274 }
275 
276 class KernelImmutableDataFixture : public ModuleImmutableDataFixture {
277   public:
SetUp()278     void SetUp() {
279         ModuleImmutableDataFixture::SetUp();
280     }
281 
TearDown()282     void TearDown() {
283         ModuleImmutableDataFixture::TearDown();
284     }
285 };
286 
287 using KernelImmutableDataTests = Test<KernelImmutableDataFixture>;
288 
TEST_F(KernelImmutableDataTests,givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull)289 TEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
290     uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
291     bool isInternal = false;
292 
293     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
294 
295     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
296 
297     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
298     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
299 
300     createKernel(kernel.get());
301 
302     EXPECT_EQ(nullptr, kernel->privateMemoryGraphicsAllocation);
303 }
304 
TEST_F(KernelImmutableDataTests,givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated)305 TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
306     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
307     bool isInternal = false;
308 
309     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
310 
311     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
312 
313     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
314     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
315 
316     createKernel(kernel.get());
317 
318     EXPECT_NE(nullptr, kernel->privateMemoryGraphicsAllocation);
319 
320     size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
321                           device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
322     EXPECT_EQ(expectedSize, kernel->privateMemoryGraphicsAllocation->getUnderlyingBufferSize());
323 }
324 
325 using KernelImmutableDataIsaCopyTests = KernelImmutableDataTests;
326 
TEST_F(KernelImmutableDataIsaCopyTests,whenUserKernelIsCreatedThenIsaIsCopiedWhenModuleIsCreated)327 TEST_F(KernelImmutableDataIsaCopyTests, whenUserKernelIsCreatedThenIsaIsCopiedWhenModuleIsCreated) {
328     MockImmutableMemoryManager *mockMemoryManager =
329         static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
330 
331     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
332     bool isInternal = false;
333 
334     size_t previouscopyMemoryToAllocationCalledTimes =
335         mockMemoryManager->copyMemoryToAllocationCalledTimes;
336 
337     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
338 
339     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
340 
341     size_t copyForGlobalSurface = 1u;
342     auto copyForIsa = module->getKernelImmutableDataVector().size();
343     size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes +
344                                                                copyForGlobalSurface + copyForIsa;
345     EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
346               mockMemoryManager->copyMemoryToAllocationCalledTimes);
347 
348     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
349     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
350 
351     createKernel(kernel.get());
352 
353     EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
354               mockMemoryManager->copyMemoryToAllocationCalledTimes);
355 }
356 
TEST_F(KernelImmutableDataIsaCopyTests,whenImmutableDataIsInitializedForUserKernelThenIsaIsNotCopied)357 TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForUserKernelThenIsaIsNotCopied) {
358     MockImmutableMemoryManager *mockMemoryManager =
359         static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
360 
361     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
362     bool isInternal = false;
363 
364     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
365     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
366 
367     uint32_t previouscopyMemoryToAllocationCalledTimes =
368         mockMemoryManager->copyMemoryToAllocationCalledTimes;
369 
370     mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device,
371                                   device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
372                                   module.get()->translationUnit->globalConstBuffer,
373                                   module.get()->translationUnit->globalVarBuffer,
374                                   isInternal);
375 
376     EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes,
377               mockMemoryManager->copyMemoryToAllocationCalledTimes);
378 }
379 
TEST_F(KernelImmutableDataIsaCopyTests,whenImmutableDataIsInitializedForInternalKernelThenIsaIsNotCopied)380 TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForInternalKernelThenIsaIsNotCopied) {
381     MockImmutableMemoryManager *mockMemoryManager =
382         static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
383 
384     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
385     bool isInternal = true;
386 
387     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
388     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
389 
390     uint32_t previouscopyMemoryToAllocationCalledTimes =
391         mockMemoryManager->copyMemoryToAllocationCalledTimes;
392 
393     mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device,
394                                   device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch,
395                                   module.get()->translationUnit->globalConstBuffer,
396                                   module.get()->translationUnit->globalVarBuffer,
397                                   isInternal);
398 
399     EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes,
400               mockMemoryManager->copyMemoryToAllocationCalledTimes);
401 }
402 
403 using KernelImmutableDataWithNullHeapTests = KernelImmutableDataTests;
404 
TEST_F(KernelImmutableDataTests,givenInternalModuleWhenKernelIsCreatedThenIsaIsCopiedOnce)405 TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedThenIsaIsCopiedOnce) {
406     MockImmutableMemoryManager *mockMemoryManager =
407         static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
408 
409     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
410     bool isInternal = true;
411 
412     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
413     mockKernelImmData->getIsaGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL);
414 
415     size_t previouscopyMemoryToAllocationCalledTimes =
416         mockMemoryManager->copyMemoryToAllocationCalledTimes;
417 
418     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
419 
420     size_t copyForGlobalSurface = 1u;
421     size_t copyForPatchingIsa = 0u;
422     size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes +
423                                                                copyForGlobalSurface + copyForPatchingIsa;
424     EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
425               mockMemoryManager->copyMemoryToAllocationCalledTimes);
426 
427     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
428     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
429 
430     expectedPreviouscopyMemoryToAllocationCalledTimes++;
431 
432     createKernel(kernel.get());
433 
434     EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes,
435               mockMemoryManager->copyMemoryToAllocationCalledTimes);
436 }
437 
TEST_F(KernelImmutableDataTests,givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking)438 TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking) {
439     NEO::MockCompilerEnableGuard mock(true);
440     auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();
441     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip);
442 
443     MockImmutableMemoryManager *mockMemoryManager = static_cast<MockImmutableMemoryManager *>(device->getNEODevice()->getMemoryManager());
444 
445     uint8_t binary[16];
446     ze_module_desc_t moduleDesc = {};
447     moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
448     moduleDesc.pInputModule = binary;
449     moduleDesc.inputSize = 10;
450     ModuleBuildLog *moduleBuildLog = nullptr;
451 
452     auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
453     linkerInput->traits.requiresPatchingOfGlobalVariablesBuffer = true;
454 
455     std::unique_ptr<L0::ult::MockModule> moduleMock = std::make_unique<L0::ult::MockModule>(device, moduleBuildLog, ModuleType::Builtin);
456     moduleMock->translationUnit = std::make_unique<MockModuleTranslationUnit>(device);
457     moduleMock->translationUnit->programInfo.linkerInput = std::move(linkerInput);
458 
459     uint32_t kernelHeap = 0;
460     auto kernelInfo = new KernelInfo();
461     kernelInfo->heapInfo.KernelHeapSize = 1;
462     kernelInfo->heapInfo.pKernelHeap = &kernelHeap;
463 
464     Mock<::L0::Kernel> kernelMock;
465     kernelMock.module = moduleMock.get();
466     kernelMock.immutableData.kernelInfo = kernelInfo;
467     kernelMock.immutableData.surfaceStateHeapSize = 64;
468     kernelMock.immutableData.surfaceStateHeapTemplate.reset(new uint8_t[64]);
469     kernelMock.immutableData.getIsaGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL);
470     kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0;
471 
472     moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
473     moduleMock->kernelImmData = &kernelMock.immutableData;
474 
475     size_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes;
476     auto result = moduleMock->initialize(&moduleDesc, neoDevice);
477     EXPECT_TRUE(result);
478     size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes;
479 
480     EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
481 
482     for (auto &ki : moduleMock->kernelImmDatas) {
483         EXPECT_FALSE(ki->isIsaCopiedToAllocation());
484     }
485 
486     expectedPreviouscopyMemoryToAllocationCalledTimes++;
487 
488     ze_kernel_desc_t desc = {};
489     desc.pKernelName = "";
490 
491     moduleMock->kernelImmData = moduleMock->kernelImmDatas[0].get();
492 
493     kernelMock.initialize(&desc);
494 
495     EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes);
496 }
497 
TEST_F(KernelImmutableDataTests,givenKernelInitializedWithPrivateMemoryThenContainerHasOneExtraSpaceForAllocation)498 TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenContainerHasOneExtraSpaceForAllocation) {
499     std::string testFile;
500     retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
501 
502     size_t size = 0;
503     auto src = loadDataFromFile(
504         testFile.c_str(),
505         size);
506     ASSERT_NE(0u, size);
507     ASSERT_NE(nullptr, src);
508 
509     ze_module_desc_t moduleDesc = {};
510     moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
511     moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
512     moduleDesc.inputSize = size;
513     ModuleBuildLog *moduleBuildLog = nullptr;
514 
515     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
516     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
517     std::unique_ptr<MockModule> moduleWithPrivateMemory = std::make_unique<MockModule>(device,
518                                                                                        moduleBuildLog,
519                                                                                        ModuleType::User,
520                                                                                        perHwThreadPrivateMemorySizeRequested,
521                                                                                        mockKernelImmData.get());
522     bool result = moduleWithPrivateMemory->initialize(&moduleDesc, device->getNEODevice());
523     EXPECT_TRUE(result);
524 
525     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernelWithPrivateMemory;
526     kernelWithPrivateMemory = std::make_unique<ModuleImmutableDataFixture::MockKernel>(moduleWithPrivateMemory.get());
527 
528     createKernel(kernelWithPrivateMemory.get());
529     EXPECT_NE(nullptr, kernelWithPrivateMemory->privateMemoryGraphicsAllocation);
530 
531     size_t sizeContainerWithPrivateMemory = kernelWithPrivateMemory->getResidencyContainer().size();
532 
533     perHwThreadPrivateMemorySizeRequested = 0u;
534     std::unique_ptr<MockImmutableData> mockKernelImmDataForModuleWithoutPrivateMemory = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
535     std::unique_ptr<MockModule> moduleWithoutPrivateMemory = std::make_unique<MockModule>(device,
536                                                                                           moduleBuildLog,
537                                                                                           ModuleType::User,
538                                                                                           perHwThreadPrivateMemorySizeRequested,
539                                                                                           mockKernelImmDataForModuleWithoutPrivateMemory.get());
540     result = moduleWithoutPrivateMemory->initialize(&moduleDesc, device->getNEODevice());
541     EXPECT_TRUE(result);
542 
543     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernelWithoutPrivateMemory;
544     kernelWithoutPrivateMemory = std::make_unique<ModuleImmutableDataFixture::MockKernel>(moduleWithoutPrivateMemory.get());
545 
546     createKernel(kernelWithoutPrivateMemory.get());
547     EXPECT_EQ(nullptr, kernelWithoutPrivateMemory->privateMemoryGraphicsAllocation);
548 
549     size_t sizeContainerWithoutPrivateMemory = kernelWithoutPrivateMemory->getResidencyContainer().size();
550 
551     EXPECT_EQ(sizeContainerWithoutPrivateMemory + 1u, sizeContainerWithPrivateMemory);
552 }
553 
TEST_F(KernelImmutableDataTests,givenKernelWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated)554 TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated) {
555     std::string testFile;
556     retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
557 
558     size_t size = 0;
559     auto src = loadDataFromFile(
560         testFile.c_str(),
561         size);
562     ASSERT_NE(0u, size);
563     ASSERT_NE(nullptr, src);
564 
565     ze_module_desc_t moduleDesc = {};
566     moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
567     moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
568     moduleDesc.inputSize = size;
569     ModuleBuildLog *moduleBuildLog = nullptr;
570 
571     uint32_t perHwThreadPrivateMemorySizeRequested = std::numeric_limits<uint32_t>::max();
572     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
573     std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
574                                                                       moduleBuildLog,
575                                                                       ModuleType::User,
576                                                                       perHwThreadPrivateMemorySizeRequested,
577                                                                       mockKernelImmData.get());
578     bool result = module->initialize(&moduleDesc, device->getNEODevice());
579     EXPECT_TRUE(result);
580     EXPECT_TRUE(module->shouldAllocatePrivateMemoryPerDispatch());
581 
582     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
583     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
584 
585     createKernel(kernel.get());
586     EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
587 }
588 
589 class KernelDescriptorRTCallsTrue : public NEO::KernelDescriptor {
hasRTCalls() const590     bool hasRTCalls() const override {
591         return true;
592     }
593 };
594 
595 class KernelDescriptorRTCallsFalse : public NEO::KernelDescriptor {
hasRTCalls() const596     bool hasRTCalls() const override {
597         return false;
598     }
599 };
600 
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueThenRayTracingIsInitialized)601 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized) {
602     KernelDescriptorRTCallsTrue mockDescriptor = {};
603     mockDescriptor.kernelMetadata.kernelName = "rt_test";
604     for (auto i = 0u; i < 3u; i++) {
605         mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
606     }
607 
608     std::unique_ptr<MockImmutableData> mockKernelImmutableData =
609         std::make_unique<MockImmutableData>(32u);
610     mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
611     mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
612 
613     ModuleBuildLog *moduleBuildLog = nullptr;
614     module = std::make_unique<MockModule>(device,
615                                           moduleBuildLog,
616                                           ModuleType::User,
617                                           32u,
618                                           mockKernelImmutableData.get());
619     module->maxGroupSize = 10;
620 
621     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
622     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
623 
624     ze_kernel_desc_t kernelDesc = {};
625     kernelDesc.pKernelName = "rt_test";
626 
627     auto immDataVector =
628         const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
629 
630     immDataVector->push_back(std::move(mockKernelImmutableData));
631 
632     neoDevice->setRTDispatchGlobalsForceAllocation();
633 
634     auto result = kernel->initialize(&kernelDesc);
635     EXPECT_EQ(ZE_RESULT_SUCCESS, result);
636     EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
637 
638     auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
639     EXPECT_NE(nullptr, rtDispatchGlobals);
640 
641     size_t residencySize = kernel->getResidencyContainer().size();
642     EXPECT_NE(0u, residencySize);
643 
644     EXPECT_EQ(kernel->getResidencyContainer()[residencySize - 1], rtDispatchGlobals);
645 }
646 
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation)647 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation) {
648     KernelDescriptorRTCallsTrue mockDescriptor = {};
649     mockDescriptor.kernelMetadata.kernelName = "rt_test";
650     for (auto i = 0u; i < 3u; i++) {
651         mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
652     }
653 
654     std::unique_ptr<MockImmutableData> mockKernelImmutableData =
655         std::make_unique<MockImmutableData>(32u);
656     mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
657 
658     ModuleBuildLog *moduleBuildLog = nullptr;
659     module = std::make_unique<MockModule>(device,
660                                           moduleBuildLog,
661                                           ModuleType::User,
662                                           32u,
663                                           mockKernelImmutableData.get());
664     module->maxGroupSize = 10;
665 
666     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
667     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
668 
669     ze_kernel_desc_t kernelDesc = {};
670     kernelDesc.pKernelName = "rt_test";
671 
672     auto immDataVector =
673         const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
674 
675     immDataVector->push_back(std::move(mockKernelImmutableData));
676 
677     auto result = kernel->initialize(&kernelDesc);
678     EXPECT_EQ(ZE_RESULT_SUCCESS, result);
679     EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
680 
681     auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
682     EXPECT_EQ(nullptr, rtDispatchGlobals);
683 }
684 
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized)685 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) {
686     KernelDescriptorRTCallsTrue mockDescriptor = {};
687     mockDescriptor.kernelMetadata.kernelName = "rt_test";
688     for (auto i = 0u; i < 3u; i++) {
689         mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
690     }
691     mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
692 
693     NEO::MemoryManager *currMemoryManager = new NEO::FailMemoryManager(0, *neoDevice->executionEnvironment);
694 
695     std::unique_ptr<MockImmutableData> mockKernelImmutableData =
696         std::make_unique<MockImmutableData>(32u);
697     mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
698 
699     ModuleBuildLog *moduleBuildLog = nullptr;
700     module = std::make_unique<MockModule>(device,
701                                           moduleBuildLog,
702                                           ModuleType::User,
703                                           32u,
704                                           mockKernelImmutableData.get());
705     module->maxGroupSize = 10;
706 
707     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
708     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
709 
710     ze_kernel_desc_t kernelDesc = {};
711     kernelDesc.pKernelName = "rt_test";
712     auto immDataVector =
713         const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
714 
715     immDataVector->push_back(std::move(mockKernelImmutableData));
716 
717     neoDevice->injectMemoryManager(currMemoryManager);
718 
719     EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc));
720 }
721 
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsFalseThenRayTracingIsNotInitialized)722 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) {
723     KernelDescriptorRTCallsFalse mockDescriptor = {};
724     mockDescriptor.kernelMetadata.kernelName = "rt_test";
725     for (auto i = 0u; i < 3u; i++) {
726         mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
727     }
728 
729     std::unique_ptr<MockImmutableData> mockKernelImmutableData =
730         std::make_unique<MockImmutableData>(32u);
731     mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
732 
733     ModuleBuildLog *moduleBuildLog = nullptr;
734     module = std::make_unique<MockModule>(device,
735                                           moduleBuildLog,
736                                           ModuleType::User,
737                                           32u,
738                                           mockKernelImmutableData.get());
739     module->maxGroupSize = 10;
740 
741     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
742     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
743 
744     ze_kernel_desc_t kernelDesc = {};
745     kernelDesc.pKernelName = "rt_test";
746 
747     auto immDataVector =
748         const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
749 
750     immDataVector->push_back(std::move(mockKernelImmutableData));
751 
752     EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc));
753     EXPECT_EQ(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
754 }
755 
TEST_F(KernelImmutableDataTests,whenHasRTCallsIsTrueThenCrossThreadDataIsPatched)756 TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) {
757     KernelDescriptorRTCallsTrue mockDescriptor = {};
758     mockDescriptor.kernelMetadata.kernelName = "rt_test";
759     for (auto i = 0u; i < 3u; i++) {
760         mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
761     }
762 
763     std::unique_ptr<MockImmutableData> mockKernelImmutableData =
764         std::make_unique<MockImmutableData>(32u);
765     mockKernelImmutableData->kernelDescriptor = &mockDescriptor;
766     mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4;
767 
768     ModuleBuildLog *moduleBuildLog = nullptr;
769     module = std::make_unique<MockModule>(device,
770                                           moduleBuildLog,
771                                           ModuleType::User,
772                                           32u,
773                                           mockKernelImmutableData.get());
774     module->maxGroupSize = 10;
775 
776     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
777     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
778 
779     ze_kernel_desc_t kernelDesc = {};
780     kernelDesc.pKernelName = "rt_test";
781 
782     auto immDataVector =
783         const_cast<std::vector<std::unique_ptr<KernelImmutableData>> *>(&module.get()->getKernelImmutableDataVector());
784 
785     immDataVector->push_back(std::move(mockKernelImmutableData));
786 
787     auto crossThreadData = std::make_unique<uint32_t[]>(4);
788     kernel->crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
789     kernel->crossThreadDataSize = sizeof(uint32_t[4]);
790 
791     neoDevice->setRTDispatchGlobalsForceAllocation();
792 
793     auto result = kernel->initialize(&kernelDesc);
794     EXPECT_EQ(ZE_RESULT_SUCCESS, result);
795 
796     auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
797     EXPECT_NE(nullptr, rtDispatchGlobals);
798 
799     auto dispatchGlobalsAddressPatched = *reinterpret_cast<uintptr_t *>(crossThreadData.get());
800     auto dispatchGlobalsGpuAddressOffset = static_cast<uintptr_t>(rtDispatchGlobals->getGpuAddressToPatch());
801     EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched);
802 
803     kernel->crossThreadData.release();
804 }
805 
806 using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests;
807 
TEST_F(KernelIndirectPropertiesFromIGCTests,whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse)808 TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) {
809     DebugManagerStateRestore restorer;
810     NEO::DebugManager.flags.DisableIndirectAccess.set(0);
811 
812     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
813     bool isInternal = false;
814 
815     std::unique_ptr<MockImmutableData> mockKernelImmData =
816         std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
817 
818     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
819 
820     std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
821     kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
822 
823     ze_kernel_desc_t desc = {};
824     desc.pKernelName = kernelName.c_str();
825 
826     module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
827     module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
828     module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
829 
830     kernel->initialize(&desc);
831 
832     EXPECT_FALSE(kernel->hasIndirectAccess());
833 }
834 
TEST_F(KernelIndirectPropertiesFromIGCTests,whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue)835 TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) {
836     DebugManagerStateRestore restorer;
837     NEO::DebugManager.flags.DisableIndirectAccess.set(0);
838 
839     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
840     bool isInternal = false;
841 
842     std::unique_ptr<MockImmutableData> mockKernelImmData =
843         std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
844 
845     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get());
846 
847     {
848         std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
849         kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
850 
851         ze_kernel_desc_t desc = {};
852         desc.pKernelName = kernelName.c_str();
853 
854         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true;
855         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
856         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
857 
858         kernel->initialize(&desc);
859 
860         EXPECT_TRUE(kernel->hasIndirectAccess());
861     }
862 
863     {
864         std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
865         kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
866 
867         ze_kernel_desc_t desc = {};
868         desc.pKernelName = kernelName.c_str();
869 
870         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
871         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true;
872         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false;
873 
874         kernel->initialize(&desc);
875 
876         EXPECT_TRUE(kernel->hasIndirectAccess());
877     }
878 
879     {
880         std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
881         kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
882 
883         ze_kernel_desc_t desc = {};
884         desc.pKernelName = kernelName.c_str();
885 
886         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false;
887         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false;
888         module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true;
889 
890         kernel->initialize(&desc);
891 
892         EXPECT_TRUE(kernel->hasIndirectAccess());
893     }
894 }
895 
896 class KernelPropertiesTests : public ModuleFixture, public ::testing::Test {
897   public:
898     class MockKernel : public KernelImp {
899       public:
900         using KernelImp::kernelHasIndirectAccess;
901     };
SetUp()902     void SetUp() override {
903         ModuleFixture::SetUp();
904 
905         ze_kernel_desc_t kernelDesc = {};
906         kernelDesc.pKernelName = kernelName.c_str();
907 
908         ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
909         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
910 
911         kernel = static_cast<MockKernel *>(L0::Kernel::fromHandle(kernelHandle));
912         kernel->kernelHasIndirectAccess = true;
913     }
914 
TearDown()915     void TearDown() override {
916         Kernel::fromHandle(kernelHandle)->destroy();
917         ModuleFixture::TearDown();
918     }
919 
920     ze_kernel_handle_t kernelHandle;
921     MockKernel *kernel = nullptr;
922 };
923 
TEST_F(KernelPropertiesTests,givenKernelThenCorrectNameIsRetrieved)924 TEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
925     size_t kernelSize = 0;
926     ze_result_t res = kernel->getKernelName(&kernelSize, nullptr);
927     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
928     EXPECT_EQ(kernelSize, kernelName.length() + 1);
929 
930     size_t alteredKernelSize = kernelSize * 2;
931     res = kernel->getKernelName(&alteredKernelSize, nullptr);
932     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
933     EXPECT_EQ(alteredKernelSize, kernelSize);
934 
935     char *kernelNameRetrieved = new char[kernelSize];
936     res = kernel->getKernelName(&kernelSize, kernelNameRetrieved);
937     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
938 
939     EXPECT_EQ(0, strncmp(kernelName.c_str(), kernelNameRetrieved, kernelSize));
940 
941     delete[] kernelNameRetrieved;
942 }
943 
TEST_F(KernelPropertiesTests,givenValidKernelThenPropertiesAreRetrieved)944 TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
945     ze_kernel_properties_t kernelProperties = {};
946 
947     kernelProperties.requiredNumSubGroups = std::numeric_limits<uint32_t>::max();
948     kernelProperties.requiredSubgroupSize = std::numeric_limits<uint32_t>::max();
949     kernelProperties.maxSubgroupSize = std::numeric_limits<uint32_t>::max();
950     kernelProperties.maxNumSubgroups = std::numeric_limits<uint32_t>::max();
951     kernelProperties.localMemSize = std::numeric_limits<uint32_t>::max();
952     kernelProperties.privateMemSize = std::numeric_limits<uint32_t>::max();
953     kernelProperties.spillMemSize = std::numeric_limits<uint32_t>::max();
954     kernelProperties.numKernelArgs = std::numeric_limits<uint32_t>::max();
955     memset(&kernelProperties.uuid.kid, std::numeric_limits<int>::max(),
956            sizeof(kernelProperties.uuid.kid));
957     memset(&kernelProperties.uuid.mid, std::numeric_limits<int>::max(),
958            sizeof(kernelProperties.uuid.mid));
959 
960     ze_kernel_properties_t kernelPropertiesBefore = {};
961     kernelPropertiesBefore = kernelProperties;
962 
963     ze_result_t res = kernel->getProperties(&kernelProperties);
964     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
965 
966     EXPECT_EQ(6U, kernelProperties.numKernelArgs);
967 
968     EXPECT_EQ(0U, kernelProperties.requiredNumSubGroups);
969     EXPECT_EQ(0U, kernelProperties.requiredSubgroupSize);
970 
971     uint32_t maxSubgroupSize = this->kernel->getKernelDescriptor().kernelAttributes.simdSize;
972     ASSERT_NE(0U, maxSubgroupSize);
973     EXPECT_EQ(maxSubgroupSize, kernelProperties.maxSubgroupSize);
974 
975     uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
976     uint32_t maxNumSubgroups = maxKernelWorkGroupSize / maxSubgroupSize;
977     EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups);
978 
979     EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize);
980     EXPECT_EQ(0U, kernelProperties.privateMemSize);
981     EXPECT_EQ(0U, kernelProperties.spillMemSize);
982 
983     uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE];
984     uint8_t zeroMid[ZE_MAX_MODULE_UUID_SIZE];
985     memset(&zeroKid, 0, ZE_MAX_KERNEL_UUID_SIZE);
986     memset(&zeroMid, 0, ZE_MAX_MODULE_UUID_SIZE);
987     EXPECT_EQ(0, memcmp(&kernelProperties.uuid.kid, &zeroKid,
988                         sizeof(kernelProperties.uuid.kid)));
989     EXPECT_EQ(0, memcmp(&kernelProperties.uuid.mid, &zeroMid,
990                         sizeof(kernelProperties.uuid.mid)));
991 }
992 
TEST_F(KernelPropertiesTests,whenPassingPreferredGroupSizeStructToGetPropertiesThenPreferredMultipleIsReturned)993 TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructToGetPropertiesThenPreferredMultipleIsReturned) {
994     ze_kernel_properties_t kernelProperties = {};
995     kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
996 
997     ze_kernel_preferred_group_size_properties_t preferredGroupProperties = {};
998     preferredGroupProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES;
999 
1000     kernelProperties.pNext = &preferredGroupProperties;
1001 
1002     ze_result_t res = kernel->getProperties(&kernelProperties);
1003     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1004 
1005     auto &hwHelper = NEO::HwHelper::get(module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
1006     if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo())) {
1007         EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()) * 2);
1008     } else {
1009         EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()));
1010     }
1011 }
1012 
TEST_F(KernelPropertiesTests,whenPassingPreferredGroupSizeStructWithWrongStypeSuccessIsReturnedAndNoFieldsInPreferredGroupSizeStructAreSet)1013 TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructWithWrongStypeSuccessIsReturnedAndNoFieldsInPreferredGroupSizeStructAreSet) {
1014     ze_kernel_properties_t kernelProperties = {};
1015     kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
1016 
1017     ze_kernel_preferred_group_size_properties_t preferredGroupProperties = {};
1018     preferredGroupProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32;
1019 
1020     kernelProperties.pNext = &preferredGroupProperties;
1021 
1022     uint32_t dummyPreferredMultiple = 101;
1023     preferredGroupProperties.preferredMultiple = dummyPreferredMultiple;
1024 
1025     ze_result_t res = kernel->getProperties(&kernelProperties);
1026     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1027 
1028     EXPECT_EQ(preferredGroupProperties.preferredMultiple, dummyPreferredMultiple);
1029 }
1030 
TEST_F(KernelPropertiesTests,givenValidKernelThenProfilePropertiesAreRetrieved)1031 TEST_F(KernelPropertiesTests, givenValidKernelThenProfilePropertiesAreRetrieved) {
1032     zet_profile_properties_t kernelProfileProperties = {};
1033 
1034     kernelProfileProperties.flags = std::numeric_limits<uint32_t>::max();
1035     kernelProfileProperties.numTokens = std::numeric_limits<uint32_t>::max();
1036 
1037     ze_result_t res = kernel->getProfileInfo(&kernelProfileProperties);
1038     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1039 
1040     EXPECT_EQ(0U, kernelProfileProperties.flags);
1041     EXPECT_EQ(0U, kernelProfileProperties.numTokens);
1042 }
1043 
TEST_F(KernelPropertiesTests,whenSettingValidKernelIndirectAccessFlagsThenFlagsAreSetCorrectly)1044 TEST_F(KernelPropertiesTests, whenSettingValidKernelIndirectAccessFlagsThenFlagsAreSetCorrectly) {
1045     UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1046     EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1047     EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1048     EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1049 
1050     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1051                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1052                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1053     auto res = kernel->setIndirectAccess(flags);
1054     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1055 
1056     unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1057     EXPECT_EQ(true, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1058     EXPECT_EQ(true, unifiedMemoryControls.indirectHostAllocationsAllowed);
1059     EXPECT_EQ(true, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1060 }
1061 
TEST_F(KernelPropertiesTests,whenCallingGetIndirectAccessAfterSetIndirectAccessWithDeviceFlagThenCorrectFlagIsReturned)1062 TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithDeviceFlagThenCorrectFlagIsReturned) {
1063     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE;
1064     auto res = kernel->setIndirectAccess(flags);
1065     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1066 
1067     ze_kernel_indirect_access_flags_t returnedFlags;
1068     res = kernel->getIndirectAccess(&returnedFlags);
1069     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1070     EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
1071     EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
1072     EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
1073 }
1074 
TEST_F(KernelPropertiesTests,whenCallingGetIndirectAccessAfterSetIndirectAccessWithHostFlagThenCorrectFlagIsReturned)1075 TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithHostFlagThenCorrectFlagIsReturned) {
1076     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST;
1077     auto res = kernel->setIndirectAccess(flags);
1078     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1079 
1080     ze_kernel_indirect_access_flags_t returnedFlags;
1081     res = kernel->getIndirectAccess(&returnedFlags);
1082     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1083     EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
1084     EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
1085     EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
1086 }
1087 
TEST_F(KernelPropertiesTests,whenCallingGetIndirectAccessAfterSetIndirectAccessWithSharedFlagThenCorrectFlagIsReturned)1088 TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithSharedFlagThenCorrectFlagIsReturned) {
1089     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1090     auto res = kernel->setIndirectAccess(flags);
1091     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1092 
1093     ze_kernel_indirect_access_flags_t returnedFlags;
1094     res = kernel->getIndirectAccess(&returnedFlags);
1095     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1096     EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE);
1097     EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST);
1098     EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED);
1099 }
TEST_F(KernelPropertiesTests,givenValidKernelWithIndirectAccessFlagsAndDisableIndirectAccessSetToZeroThenFlagsAreSet)1100 TEST_F(KernelPropertiesTests, givenValidKernelWithIndirectAccessFlagsAndDisableIndirectAccessSetToZeroThenFlagsAreSet) {
1101     DebugManagerStateRestore restorer;
1102     NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1103 
1104     UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1105     EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1106     EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1107     EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1108 
1109     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1110                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1111                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1112     auto res = kernel->setIndirectAccess(flags);
1113     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1114 
1115     unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1116     EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1117     EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1118     EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1119 }
1120 
HWTEST2_F(KernelPropertiesTests,whenHasRTCallsIsTrueThenUsesRayTracingIsTrue,MatchAny)1121 HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsTrueThenUsesRayTracingIsTrue, MatchAny) {
1122     WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1123     KernelDescriptorRTCallsTrue mockDescriptor = {};
1124     WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
1125 
1126     mockKernelImmutableData.kernelDescriptor = &mockDescriptor;
1127     mockKernel.kernelImmData = &mockKernelImmutableData;
1128 
1129     EXPECT_TRUE(mockKernel.usesRayTracing());
1130 }
1131 
HWTEST2_F(KernelPropertiesTests,whenHasRTCallsIsFalseThenUsesRayTracingIsFalse,MatchAny)1132 HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsFalseThenUsesRayTracingIsFalse, MatchAny) {
1133     WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1134     KernelDescriptorRTCallsFalse mockDescriptor = {};
1135     WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
1136 
1137     mockKernelImmutableData.kernelDescriptor = &mockDescriptor;
1138     mockKernel.kernelImmData = &mockKernelImmutableData;
1139 
1140     EXPECT_FALSE(mockKernel.usesRayTracing());
1141 }
1142 
1143 using KernelIndirectPropertiesTests = KernelPropertiesTests;
1144 
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessThenIndirectAccessIsSet)1145 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessThenIndirectAccessIsSet) {
1146     DebugManagerStateRestore restorer;
1147     NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1148     kernel->kernelHasIndirectAccess = true;
1149 
1150     UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1151     EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1152     EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1153     EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1154 
1155     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1156                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1157                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1158     auto res = kernel->setIndirectAccess(flags);
1159     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1160 
1161     unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1162     EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1163     EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1164     EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1165 }
1166 
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessButWithDisableIndirectAccessSetThenIndirectAccessIsNotSet)1167 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessButWithDisableIndirectAccessSetThenIndirectAccessIsNotSet) {
1168     DebugManagerStateRestore restorer;
1169     NEO::DebugManager.flags.DisableIndirectAccess.set(1);
1170     kernel->kernelHasIndirectAccess = true;
1171 
1172     UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1173     EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1174     EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1175     EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1176 
1177     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1178                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1179                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1180     auto res = kernel->setIndirectAccess(flags);
1181     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1182 
1183     unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1184     EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1185     EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1186     EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1187 }
1188 
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessAndDisableIndirectAccessNotSetThenIndirectAccessIsSet)1189 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessAndDisableIndirectAccessNotSetThenIndirectAccessIsSet) {
1190     DebugManagerStateRestore restorer;
1191     NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1192     kernel->kernelHasIndirectAccess = true;
1193 
1194     UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1195     EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1196     EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1197     EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1198 
1199     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1200                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1201                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1202     auto res = kernel->setIndirectAccess(flags);
1203     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1204 
1205     unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1206     EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1207     EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1208     EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1209 }
1210 
TEST_F(KernelIndirectPropertiesTests,whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet)1211 TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet) {
1212     DebugManagerStateRestore restorer;
1213     NEO::DebugManager.flags.DisableIndirectAccess.set(0);
1214     kernel->kernelHasIndirectAccess = false;
1215 
1216     UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1217     EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1218     EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed);
1219     EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed);
1220 
1221     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
1222                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
1223                                               ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
1224     auto res = kernel->setIndirectAccess(flags);
1225     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1226 
1227     unifiedMemoryControls = kernel->getUnifiedMemoryControls();
1228     EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed);
1229     EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed);
1230     EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed);
1231 }
1232 
TEST_F(KernelPropertiesTests,givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue)1233 TEST_F(KernelPropertiesTests, givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue) {
1234     EXPECT_EQ(false, kernel->hasIndirectAllocationsAllowed());
1235 
1236     ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE;
1237     auto res = kernel->setIndirectAccess(flags);
1238 
1239     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1240     EXPECT_EQ(true, kernel->hasIndirectAllocationsAllowed());
1241 }
1242 
TEST_F(KernelPropertiesTests,givenValidKernelAndNoMediavfestateThenSpillMemSizeIsZero)1243 TEST_F(KernelPropertiesTests, givenValidKernelAndNoMediavfestateThenSpillMemSizeIsZero) {
1244     ze_kernel_properties_t kernelProperties = {};
1245 
1246     kernelProperties.spillMemSize = std::numeric_limits<uint32_t>::max();
1247 
1248     ze_kernel_properties_t kernelPropertiesBefore = {};
1249     kernelPropertiesBefore = kernelProperties;
1250 
1251     ze_result_t res = kernel->getProperties(&kernelProperties);
1252     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1253 
1254     L0::ModuleImp *moduleImp = reinterpret_cast<L0::ModuleImp *>(module.get());
1255     NEO::KernelInfo *ki = nullptr;
1256     for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) {
1257         ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i];
1258         if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) {
1259             break;
1260         }
1261     }
1262 
1263     EXPECT_EQ(0u, kernelProperties.spillMemSize);
1264 }
1265 
TEST_F(KernelPropertiesTests,givenValidKernelAndNollocateStatelessPrivateSurfaceThenPrivateMemSizeIsZero)1266 TEST_F(KernelPropertiesTests, givenValidKernelAndNollocateStatelessPrivateSurfaceThenPrivateMemSizeIsZero) {
1267     ze_kernel_properties_t kernelProperties = {};
1268 
1269     kernelProperties.spillMemSize = std::numeric_limits<uint32_t>::max();
1270 
1271     ze_kernel_properties_t kernelPropertiesBefore = {};
1272     kernelPropertiesBefore = kernelProperties;
1273 
1274     ze_result_t res = kernel->getProperties(&kernelProperties);
1275     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1276 
1277     L0::ModuleImp *moduleImp = reinterpret_cast<L0::ModuleImp *>(module.get());
1278     NEO::KernelInfo *ki = nullptr;
1279     for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) {
1280         ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i];
1281         if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) {
1282             break;
1283         }
1284     }
1285 
1286     EXPECT_EQ(0u, kernelProperties.privateMemSize);
1287 }
1288 
TEST_F(KernelPropertiesTests,givenValidKernelAndLargeSlmIsSetThenForceLargeSlmIsTrue)1289 TEST_F(KernelPropertiesTests, givenValidKernelAndLargeSlmIsSetThenForceLargeSlmIsTrue) {
1290     EXPECT_EQ(NEO::SlmPolicy::SlmPolicyNone, kernel->getSlmPolicy());
1291     ze_result_t res = kernel->setCacheConfig(ZE_CACHE_CONFIG_FLAG_LARGE_SLM);
1292     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1293     EXPECT_EQ(NEO::SlmPolicy::SlmPolicyLargeSlm, kernel->getSlmPolicy());
1294 }
1295 
TEST_F(KernelPropertiesTests,givenValidKernelAndLargeDataIsSetThenForceLargeDataIsTrue)1296 TEST_F(KernelPropertiesTests, givenValidKernelAndLargeDataIsSetThenForceLargeDataIsTrue) {
1297     EXPECT_EQ(NEO::SlmPolicy::SlmPolicyNone, kernel->getSlmPolicy());
1298     ze_result_t res = kernel->setCacheConfig(ZE_CACHE_CONFIG_FLAG_LARGE_DATA);
1299     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1300     EXPECT_EQ(NEO::SlmPolicy::SlmPolicyLargeData, kernel->getSlmPolicy());
1301 }
1302 
1303 using KernelLocalIdsTest = Test<ModuleFixture>;
1304 
TEST_F(KernelLocalIdsTest,WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRuntimeIsTrue)1305 TEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRuntimeIsTrue) {
1306     createKernel();
1307 
1308     EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime());
1309 }
1310 
1311 struct KernelIsaTests : Test<ModuleFixture> {
SetUpL0::ult::KernelIsaTests1312     void SetUp() override {
1313         Test<ModuleFixture>::SetUp();
1314 
1315         auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
1316         bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
1317         capabilityTable.blitterOperationsSupported = true;
1318 
1319         if (createBcsEngine) {
1320             auto &engine = device->getNEODevice()->getEngine(0);
1321             bcsOsContext.reset(OsContext::create(nullptr, 0,
1322                                                  EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getNEODevice()->getDeviceBitfield())));
1323             engine.osContext = bcsOsContext.get();
1324             engine.commandStreamReceiver->setupContext(*bcsOsContext);
1325         }
1326     }
1327 
1328     std::unique_ptr<OsContext> bcsOsContext;
1329 };
1330 
TEST_F(KernelIsaTests,givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer)1331 TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
1332     DebugManagerStateRestore restore;
1333     DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
1334     DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
1335 
1336     uint32_t kernelHeap = 0;
1337     KernelInfo kernelInfo;
1338     kernelInfo.heapInfo.KernelHeapSize = 1;
1339     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1340 
1341     KernelImmutableData kernelImmutableData(device);
1342 
1343     auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
1344     auto initialTaskCount = bcsCsr->peekTaskCount();
1345 
1346     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1347 
1348     if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
1349         EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
1350     } else {
1351         EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
1352     }
1353 
1354     device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
1355 }
1356 
TEST_F(KernelIsaTests,givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer)1357 TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
1358     DebugManagerStateRestore restore;
1359     DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
1360     DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
1361 
1362     uint32_t kernelHeap = 0;
1363     KernelInfo kernelInfo;
1364     kernelInfo.heapInfo.KernelHeapSize = 1;
1365     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1366 
1367     KernelImmutableData kernelImmutableData(device);
1368 
1369     auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
1370     auto initialTaskCount = bcsCsr->peekTaskCount();
1371 
1372     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1373 
1374     EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
1375 
1376     device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
1377 }
1378 
TEST_F(KernelIsaTests,givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy)1379 TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
1380     DebugManagerStateRestore restore;
1381     DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
1382     DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
1383 
1384     device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
1385 
1386     uint32_t kernelHeap = 0;
1387     KernelInfo kernelInfo;
1388     kernelInfo.heapInfo.KernelHeapSize = 1;
1389     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1390 
1391     KernelImmutableData kernelImmutableData(device);
1392 
1393     auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver;
1394     auto initialTaskCount = bcsCsr->peekTaskCount();
1395 
1396     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1397 
1398     EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
1399 
1400     device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
1401 }
1402 
TEST_F(KernelIsaTests,givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed)1403 TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) {
1404     uint32_t kernelHeap = 0;
1405     KernelInfo kernelInfo;
1406     kernelInfo.heapInfo.KernelHeapSize = 1;
1407     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1408 
1409     KernelImmutableData kernelImmutableData(device);
1410 
1411     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true);
1412     EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
1413 }
1414 
TEST_F(KernelIsaTests,givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed)1415 TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) {
1416     uint32_t kernelHeap = 0;
1417     KernelInfo kernelInfo;
1418     kernelInfo.heapInfo.KernelHeapSize = 1;
1419     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1420 
1421     KernelImmutableData kernelImmutableData(device);
1422 
1423     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1424     EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType());
1425 }
1426 
TEST_F(KernelIsaTests,givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded)1427 TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) {
1428     uint32_t kernelHeap = 0;
1429     KernelInfo kernelInfo;
1430     kernelInfo.heapInfo.KernelHeapSize = 1;
1431     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1432 
1433     KernelImmutableData kernelImmutableData(device);
1434     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1435     auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation();
1436     auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1437     size_t isaPadding = hwHelper.getPaddingForISAAllocation();
1438     EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.KernelHeapSize + isaPadding);
1439 }
1440 
TEST_F(KernelIsaTests,givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer)1441 TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) {
1442     uint32_t kernelHeap = 0;
1443     KernelInfo kernelInfo;
1444     kernelInfo.heapInfo.KernelHeapSize = 1;
1445     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1446 
1447     KernelImmutableData kernelImmutableData(device);
1448 
1449     uint64_t gpuAddress = 0x1200;
1450     void *buffer = reinterpret_cast<void *>(gpuAddress);
1451     size_t size = 0x1100;
1452     NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size);
1453     NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size);
1454 
1455     kernelImmutableData.initialize(&kernelInfo, device, 0,
1456                                    &globalConstBuffer, &globalVarBuffer, false);
1457     auto &resCont = kernelImmutableData.getResidencyContainer();
1458     EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer));
1459     EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
1460 }
1461 
TEST_F(KernelIsaTests,givenDebugONAndKernelDegugInfoWhenInitializingImmutableDataThenRegisterElf)1462 TEST_F(KernelIsaTests, givenDebugONAndKernelDegugInfoWhenInitializingImmutableDataThenRegisterElf) {
1463     uint32_t kernelHeap = 0;
1464     KernelInfo kernelInfo;
1465     kernelInfo.heapInfo.KernelHeapSize = 1;
1466     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1467     auto debugData = new DebugData;
1468     kernelInfo.kernelDescriptor.external.debugData.reset(debugData);
1469     class MockDebugger : public DebuggerL0 {
1470       public:
1471         MockDebugger(NEO::Device *neodev) : DebuggerL0(neodev) {
1472         }
1473         void registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) override {
1474             debugData->vIsaSize = 123;
1475         };
1476         size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override { return static_cast<size_t>(0); };
1477         void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) override{};
1478     };
1479     MockDebugger *debugger = new MockDebugger(neoDevice);
1480 
1481     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(static_cast<NEO::Debugger *>(debugger));
1482     KernelImmutableData kernelImmutableData(device);
1483 
1484     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1485     EXPECT_EQ(kernelInfo.kernelDescriptor.external.debugData->vIsaSize, static_cast<uint32_t>(123));
1486 }
1487 
TEST_F(KernelIsaTests,givenDebugONAndNoKernelDegugInfoWhenInitializingImmutableDataThenDoNotRegisterElf)1488 TEST_F(KernelIsaTests, givenDebugONAndNoKernelDegugInfoWhenInitializingImmutableDataThenDoNotRegisterElf) {
1489     uint32_t kernelHeap = 0;
1490     KernelInfo kernelInfo;
1491     kernelInfo.heapInfo.KernelHeapSize = 1;
1492     kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
1493     kernelInfo.kernelDescriptor.external.debugData.reset(nullptr);
1494     class MockDebugger : public DebuggerL0 {
1495       public:
1496         MockDebugger(NEO::Device *neodev) : DebuggerL0(neodev) {
1497         }
1498         void registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) override {
1499             debugData->vIsaSize = 123;
1500         };
1501         size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override { return static_cast<size_t>(0); };
1502         void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) override{};
1503     };
1504     MockDebugger *debugger = new MockDebugger(neoDevice);
1505 
1506     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(static_cast<NEO::Debugger *>(debugger));
1507     KernelImmutableData kernelImmutableData(device);
1508 
1509     kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false);
1510     EXPECT_EQ(kernelInfo.kernelDescriptor.external.debugData, nullptr);
1511 }
1512 
1513 using KernelImpPatchBindlessTest = Test<ModuleFixture>;
1514 
TEST_F(KernelImpPatchBindlessTest,GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly)1515 TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly) {
1516     Mock<Kernel> kernel;
1517     neoDevice->incRefInternal();
1518     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1519                                                                                                                              neoDevice->getNumGenericSubDevices() > 1,
1520                                                                                                                              neoDevice->getRootDeviceIndex(),
1521                                                                                                                              neoDevice->getDeviceBitfield());
1522     Mock<Module> mockModule(device, nullptr);
1523     kernel.module = &mockModule;
1524     NEO::MockGraphicsAllocation alloc;
1525     uint32_t bindless = 0x40;
1526     auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1527     size_t size = hwHelper.getRenderSurfaceStateSize();
1528     auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1529     auto patchLocation = ptrOffset(kernel.getCrossThreadData(), bindless);
1530     auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(expectedSsInHeap.surfaceStateOffset));
1531 
1532     auto ssPtr = kernel.patchBindlessSurfaceState(&alloc, bindless);
1533 
1534     EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr);
1535     EXPECT_TRUE(memcmp(const_cast<uint8_t *>(patchLocation), &patchValue, sizeof(patchValue)) == 0);
1536     EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end());
1537     neoDevice->decRefInternal();
1538 }
1539 
HWTEST2_F(KernelImpPatchBindlessTest,GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated,MatchAny)1540 HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated, MatchAny) {
1541     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1542 
1543     ze_kernel_desc_t desc = {};
1544     desc.pKernelName = kernelName.c_str();
1545 
1546     WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1547     mockKernel.module = module.get();
1548     mockKernel.initialize(&desc);
1549     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
1550     arg.bindless = 0x40;
1551     arg.bindful = undefined<SurfaceStateHeapOffset>;
1552 
1553     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1554                                                                                                                              neoDevice->getNumGenericSubDevices() > 1,
1555                                                                                                                              neoDevice->getRootDeviceIndex(),
1556                                                                                                                              neoDevice->getDeviceBitfield());
1557 
1558     auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1559     size_t size = hwHelper.getRenderSurfaceStateSize();
1560     uint64_t gpuAddress = 0x2000;
1561     void *buffer = reinterpret_cast<void *>(gpuAddress);
1562 
1563     NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
1564     auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1565 
1566     memset(expectedSsInHeap.ssPtr, 0, size);
1567     auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1568     mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
1569 
1570     auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1571 
1572     EXPECT_FALSE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
1573 }
1574 
HWTEST2_F(KernelImpPatchBindlessTest,GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated,MatchAny)1575 HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated, MatchAny) {
1576     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1577     ze_kernel_desc_t desc = {};
1578     desc.pKernelName = kernelName.c_str();
1579 
1580     WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1581     mockKernel.module = module.get();
1582     mockKernel.initialize(&desc);
1583 
1584     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
1585     arg.bindless = undefined<CrossThreadDataOffset>;
1586     arg.bindful = 0x40;
1587 
1588     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1589                                                                                                                              neoDevice->getNumGenericSubDevices() > 1,
1590                                                                                                                              neoDevice->getRootDeviceIndex(),
1591                                                                                                                              neoDevice->getDeviceBitfield());
1592 
1593     auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1594     size_t size = hwHelper.getRenderSurfaceStateSize();
1595     uint64_t gpuAddress = 0x2000;
1596     void *buffer = reinterpret_cast<void *>(gpuAddress);
1597 
1598     NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
1599     auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1600 
1601     memset(expectedSsInHeap.ssPtr, 0, size);
1602     auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1603     mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
1604 
1605     auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
1606 
1607     EXPECT_TRUE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
1608 }
1609 
1610 using KernelImpL3CachingTests = Test<ModuleFixture>;
1611 
HWTEST2_F(KernelImpL3CachingTests,GivenKernelImpWhenSetSurfaceStateWithUnalignedMemoryThenL3CachingIsDisabled,MatchAny)1612 HWTEST2_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnalignedMemoryThenL3CachingIsDisabled, MatchAny) {
1613     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1614     ze_kernel_desc_t desc = {};
1615     desc.pKernelName = kernelName.c_str();
1616 
1617     WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
1618     mockKernel.module = module.get();
1619     mockKernel.initialize(&desc);
1620 
1621     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
1622     arg.bindless = undefined<CrossThreadDataOffset>;
1623     arg.bindful = 0x40;
1624 
1625     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1626                                                                                                                              neoDevice->getNumGenericSubDevices() > 1,
1627                                                                                                                              neoDevice->getRootDeviceIndex(),
1628                                                                                                                              neoDevice->getDeviceBitfield());
1629     auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1630     size_t size = hwHelper.getRenderSurfaceStateSize();
1631     uint64_t gpuAddress = 0x2000;
1632     void *buffer = reinterpret_cast<void *>(0x20123);
1633 
1634     NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
1635     auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
1636 
1637     memset(expectedSsInHeap.ssPtr, 0, size);
1638     mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
1639     EXPECT_EQ(mockKernel.getKernelRequiresQueueUncachedMocs(), true);
1640 }
1641 
1642 struct MyMockKernel : public Mock<Kernel> {
setBufferSurfaceStateL0::ult::MyMockKernel1643     void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
1644         setSurfaceStateCalled = true;
1645     }
setArgBufferWithAllocL0::ult::MyMockKernel1646     ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override {
1647         return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation);
1648     }
1649     bool setSurfaceStateCalled = false;
1650 };
1651 
TEST_F(KernelImpPatchBindlessTest,GivenValidBindlessOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled)1652 TEST_F(KernelImpPatchBindlessTest, GivenValidBindlessOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) {
1653     ze_kernel_desc_t desc = {};
1654     desc.pKernelName = kernelName.c_str();
1655     MyMockKernel mockKernel;
1656 
1657     mockKernel.module = module.get();
1658     mockKernel.initialize(&desc);
1659 
1660     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1661     arg.bindless = 0x40;
1662     arg.bindful = undefined<SurfaceStateHeapOffset>;
1663 
1664     NEO::MockGraphicsAllocation alloc;
1665 
1666     mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1667 
1668     EXPECT_TRUE(mockKernel.setSurfaceStateCalled);
1669 }
1670 
TEST_F(KernelImpPatchBindlessTest,GivenValidBindfulOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled)1671 TEST_F(KernelImpPatchBindlessTest, GivenValidBindfulOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) {
1672     ze_kernel_desc_t desc = {};
1673     desc.pKernelName = kernelName.c_str();
1674     MyMockKernel mockKernel;
1675 
1676     mockKernel.module = module.get();
1677     mockKernel.initialize(&desc);
1678 
1679     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1680     arg.bindless = undefined<CrossThreadDataOffset>;
1681     arg.bindful = 0x40;
1682 
1683     NEO::MockGraphicsAllocation alloc;
1684 
1685     mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1686 
1687     EXPECT_TRUE(mockKernel.setSurfaceStateCalled);
1688 }
1689 
TEST_F(KernelImpPatchBindlessTest,GivenUndefiedBidfulAndBindlesstOffsetWhenSetArgBufferWithAllocThenSetBufferSurfaceStateIsNotCalled)1690 TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetArgBufferWithAllocThenSetBufferSurfaceStateIsNotCalled) {
1691     ze_kernel_desc_t desc = {};
1692     desc.pKernelName = kernelName.c_str();
1693     MyMockKernel mockKernel;
1694 
1695     mockKernel.module = module.get();
1696     mockKernel.initialize(&desc);
1697 
1698     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1699     arg.bindless = undefined<CrossThreadDataOffset>;
1700     arg.bindful = undefined<SurfaceStateHeapOffset>;
1701 
1702     NEO::MockGraphicsAllocation alloc;
1703 
1704     mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1705 
1706     EXPECT_FALSE(mockKernel.setSurfaceStateCalled);
1707 }
1708 
1709 using KernelBindlessUncachedMemoryTests = Test<ModuleFixture>;
1710 
TEST_F(KernelBindlessUncachedMemoryTests,givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet)1711 TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet) {
1712     ze_kernel_desc_t desc = {};
1713     desc.pKernelName = kernelName.c_str();
1714     MyMockKernel mockKernel;
1715 
1716     mockKernel.module = module.get();
1717     mockKernel.initialize(&desc);
1718 
1719     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1720     arg.bindless = undefined<CrossThreadDataOffset>;
1721     arg.bindful = undefined<SurfaceStateHeapOffset>;
1722 
1723     NEO::MockGraphicsAllocation alloc;
1724 
1725     mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
1726     EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1727 }
1728 
TEST_F(KernelBindlessUncachedMemoryTests,givenNonUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet)1729 TEST_F(KernelBindlessUncachedMemoryTests,
1730        givenNonUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1731     ze_kernel_desc_t desc = {};
1732     desc.pKernelName = kernelName.c_str();
1733     MyMockKernel mockKernel;
1734 
1735     mockKernel.module = module.get();
1736     mockKernel.initialize(&desc);
1737 
1738     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1739     arg.bindless = undefined<CrossThreadDataOffset>;
1740     arg.bindful = undefined<SurfaceStateHeapOffset>;
1741 
1742     {
1743         void *devicePtr = nullptr;
1744         ze_device_mem_alloc_desc_t deviceDesc = {};
1745         ze_result_t res = context->allocDeviceMem(device->toHandle(),
1746                                                   &deviceDesc,
1747                                                   16384u,
1748                                                   0u,
1749                                                   &devicePtr);
1750         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1751 
1752         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1753         EXPECT_NE(nullptr, alloc);
1754 
1755         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1756         EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1757         context->freeMem(devicePtr);
1758     }
1759 
1760     {
1761         void *devicePtr = nullptr;
1762         ze_device_mem_alloc_desc_t deviceDesc = {};
1763         ze_result_t res = context->allocDeviceMem(device->toHandle(),
1764                                                   &deviceDesc,
1765                                                   16384u,
1766                                                   0u,
1767                                                   &devicePtr);
1768         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1769 
1770         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1771         EXPECT_NE(nullptr, alloc);
1772 
1773         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1774         EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1775         context->freeMem(devicePtr);
1776     }
1777 }
1778 
TEST_F(KernelBindlessUncachedMemoryTests,givenUncachedAllocationSetAsArgumentFollowedByUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet)1779 TEST_F(KernelBindlessUncachedMemoryTests,
1780        givenUncachedAllocationSetAsArgumentFollowedByUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1781     ze_kernel_desc_t desc = {};
1782     desc.pKernelName = kernelName.c_str();
1783     MyMockKernel mockKernel;
1784 
1785     mockKernel.module = module.get();
1786     mockKernel.initialize(&desc);
1787 
1788     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1789     arg.bindless = undefined<CrossThreadDataOffset>;
1790     arg.bindful = undefined<SurfaceStateHeapOffset>;
1791 
1792     {
1793         void *devicePtr = nullptr;
1794         ze_device_mem_alloc_desc_t deviceDesc = {};
1795         deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1796         ze_result_t res = context->allocDeviceMem(device->toHandle(),
1797                                                   &deviceDesc,
1798                                                   16384u,
1799                                                   0u,
1800                                                   &devicePtr);
1801         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1802 
1803         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1804         EXPECT_NE(nullptr, alloc);
1805 
1806         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1807         EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1808         context->freeMem(devicePtr);
1809     }
1810 
1811     {
1812         void *devicePtr = nullptr;
1813         ze_device_mem_alloc_desc_t deviceDesc = {};
1814         deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1815         ze_result_t res = context->allocDeviceMem(device->toHandle(),
1816                                                   &deviceDesc,
1817                                                   16384u,
1818                                                   0u,
1819                                                   &devicePtr);
1820         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1821 
1822         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1823         EXPECT_NE(nullptr, alloc);
1824 
1825         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1826         EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1827         context->freeMem(devicePtr);
1828     }
1829 }
1830 
TEST_F(KernelBindlessUncachedMemoryTests,givenUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet)1831 TEST_F(KernelBindlessUncachedMemoryTests,
1832        givenUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1833     ze_kernel_desc_t desc = {};
1834     desc.pKernelName = kernelName.c_str();
1835     MyMockKernel mockKernel;
1836 
1837     mockKernel.module = module.get();
1838     mockKernel.initialize(&desc);
1839 
1840     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1841     arg.bindless = undefined<CrossThreadDataOffset>;
1842     arg.bindful = undefined<SurfaceStateHeapOffset>;
1843 
1844     {
1845         void *devicePtr = nullptr;
1846         ze_device_mem_alloc_desc_t deviceDesc = {};
1847         deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1848         ze_result_t res = context->allocDeviceMem(device->toHandle(),
1849                                                   &deviceDesc,
1850                                                   16384u,
1851                                                   0u,
1852                                                   &devicePtr);
1853         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1854 
1855         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1856         EXPECT_NE(nullptr, alloc);
1857 
1858         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1859         EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1860         context->freeMem(devicePtr);
1861     }
1862 
1863     {
1864         void *devicePtr = nullptr;
1865         ze_device_mem_alloc_desc_t deviceDesc = {};
1866         ze_result_t res = context->allocDeviceMem(device->toHandle(),
1867                                                   &deviceDesc,
1868                                                   16384u,
1869                                                   0u,
1870                                                   &devicePtr);
1871         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1872 
1873         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1874         EXPECT_NE(nullptr, alloc);
1875 
1876         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1877         EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1878         context->freeMem(devicePtr);
1879     }
1880 }
1881 
TEST_F(KernelBindlessUncachedMemoryTests,givenUncachedHostAllocationSetAsArgumentFollowedByNonUncachedHostAllocationThenRequiresUncachedMocsIsCorrectlySet)1882 TEST_F(KernelBindlessUncachedMemoryTests,
1883        givenUncachedHostAllocationSetAsArgumentFollowedByNonUncachedHostAllocationThenRequiresUncachedMocsIsCorrectlySet) {
1884     ze_kernel_desc_t desc = {};
1885     desc.pKernelName = kernelName.c_str();
1886     MyMockKernel mockKernel;
1887 
1888     mockKernel.module = module.get();
1889     mockKernel.initialize(&desc);
1890 
1891     auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
1892     arg.bindless = undefined<CrossThreadDataOffset>;
1893     arg.bindful = undefined<SurfaceStateHeapOffset>;
1894 
1895     {
1896         void *ptr = nullptr;
1897         ze_host_mem_alloc_desc_t hostDesc = {};
1898         hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
1899         ze_result_t res = context->allocHostMem(&hostDesc,
1900                                                 16384u,
1901                                                 0u,
1902                                                 &ptr);
1903         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1904 
1905         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1906         EXPECT_NE(nullptr, alloc);
1907 
1908         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1909         EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs());
1910         context->freeMem(ptr);
1911     }
1912 
1913     {
1914         void *ptr = nullptr;
1915         ze_host_mem_alloc_desc_t hostDesc = {};
1916         ze_result_t res = context->allocHostMem(&hostDesc,
1917                                                 16384u,
1918                                                 0u,
1919                                                 &ptr);
1920         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1921 
1922         auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
1923         EXPECT_NE(nullptr, alloc);
1924 
1925         mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc);
1926         EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs());
1927         context->freeMem(ptr);
1928     }
1929 }
1930 
1931 template <GFXCORE_FAMILY gfxCoreFamily>
1932 struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
1933     //MyMockImage() : WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>();
copySurfaceStateToSSHL0::ult::MyMockImage1934     void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override {
1935         passedSurfaceStateHeap = surfaceStateHeap;
1936         passedSurfaceStateOffset = surfaceStateOffset;
1937     }
1938     void *passedSurfaceStateHeap = nullptr;
1939     uint32_t passedSurfaceStateOffset = 0;
1940 };
1941 
HWTEST2_F(SetKernelArg,givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs,ImageSupport)1942 HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
1943     createKernel();
1944 
1945     neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
1946                                                                                                                              neoDevice->getNumGenericSubDevices() > 1,
1947                                                                                                                              neoDevice->getRootDeviceIndex(),
1948                                                                                                                              neoDevice->getDeviceBitfield());
1949     auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
1950     auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
1951     const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
1952     imageArg.bindless = 0x0;
1953     imageArg.bindful = undefined<SurfaceStateHeapOffset>;
1954     ze_image_desc_t desc = {};
1955     desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
1956     auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
1957     auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize();
1958 
1959     auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
1960     auto ret = imageHW->initialize(device, &desc);
1961     auto handle = imageHW->toHandle();
1962     ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
1963 
1964     auto expectedSsInHeap = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->allocateSSInHeap(surfaceStateSize, imageHW->getAllocation(), BindlessHeapsHelper::BindlesHeapType::GLOBAL_SSH);
1965 
1966     kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
1967 
1968     EXPECT_EQ(imageHW->passedSurfaceStateHeap, expectedSsInHeap.ssPtr);
1969     EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u);
1970 }
1971 
HWTEST2_F(SetKernelArg,givenImageAndBindfulKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs,ImageSupport)1972 HWTEST2_F(SetKernelArg, givenImageAndBindfulKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
1973     createKernel();
1974 
1975     auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
1976     auto addressingMode = const_cast<NEO::KernelDescriptor::AddressingMode &>(kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode);
1977     addressingMode = NEO::KernelDescriptor::Bindful;
1978     imageArg.bindless = undefined<CrossThreadDataOffset>;
1979     imageArg.bindful = 0x40;
1980     ze_image_desc_t desc = {};
1981     desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
1982 
1983     auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
1984     auto ret = imageHW->initialize(device, &desc);
1985     auto handle = imageHW->toHandle();
1986     ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
1987 
1988     kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
1989 
1990     EXPECT_EQ(imageHW->passedSurfaceStateHeap, kernel->getSurfaceStateHeapData());
1991     EXPECT_EQ(imageHW->passedSurfaceStateOffset, imageArg.bindful);
1992 }
1993 
1994 template <GFXCORE_FAMILY gfxCoreFamily>
1995 struct MyMockImageMediaBlock : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
copySurfaceStateToSSHL0::ult::MyMockImageMediaBlock1996     void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override {
1997         isMediaBlockPassedValue = isMediaBlockArg;
1998     }
1999     bool isMediaBlockPassedValue = false;
2000 };
2001 
HWTEST2_F(SetKernelArg,givenSupportsMediaBlockAndIsMediaBlockImageWhenSetArgImageIsCalledThenIsMediaBlockArgIsPassedCorrectly,ImageSupport)2002 HWTEST2_F(SetKernelArg, givenSupportsMediaBlockAndIsMediaBlockImageWhenSetArgImageIsCalledThenIsMediaBlockArgIsPassedCorrectly, ImageSupport) {
2003     auto hwInfo = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
2004     createKernel();
2005     auto argIndex = 3u;
2006     auto &arg = const_cast<NEO::ArgDescriptor &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex]);
2007     auto imageHW = std::make_unique<MyMockImageMediaBlock<gfxCoreFamily>>();
2008     ze_image_desc_t desc = {};
2009     desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
2010     auto ret = imageHW->initialize(device, &desc);
2011     ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
2012     auto handle = imageHW->toHandle();
2013 
2014     {
2015         hwInfo->capabilityTable.supportsMediaBlock = true;
2016         arg.getExtendedTypeInfo().isMediaBlockImage = true;
2017         kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2018         EXPECT_TRUE(imageHW->isMediaBlockPassedValue);
2019     }
2020     {
2021         hwInfo->capabilityTable.supportsMediaBlock = false;
2022         arg.getExtendedTypeInfo().isMediaBlockImage = true;
2023         kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2024         EXPECT_FALSE(imageHW->isMediaBlockPassedValue);
2025     }
2026     {
2027         hwInfo->capabilityTable.supportsMediaBlock = true;
2028         arg.getExtendedTypeInfo().isMediaBlockImage = false;
2029         kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2030         EXPECT_FALSE(imageHW->isMediaBlockPassedValue);
2031     }
2032     {
2033         hwInfo->capabilityTable.supportsMediaBlock = false;
2034         arg.getExtendedTypeInfo().isMediaBlockImage = false;
2035         kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle);
2036         EXPECT_FALSE(imageHW->isMediaBlockPassedValue);
2037     }
2038 }
2039 
2040 using ImportHostPointerSetKernelArg = Test<ImportHostPointerModuleFixture>;
TEST_F(ImportHostPointerSetKernelArg,givenHostPointerImportedWhenSettingKernelArgThenUseHostPointerAllocation)2041 TEST_F(ImportHostPointerSetKernelArg, givenHostPointerImportedWhenSettingKernelArgThenUseHostPointerAllocation) {
2042     createKernel();
2043 
2044     auto ret = driverHandle->importExternalPointer(hostPointer, MemoryConstants::pageSize);
2045     EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
2046 
2047     ret = kernel->setArgBuffer(0, sizeof(hostPointer), &hostPointer);
2048     EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
2049 
2050     ret = driverHandle->releaseImportedPointer(hostPointer);
2051     EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
2052 }
2053 
2054 class KernelGlobalWorkOffsetTests : public ModuleFixture, public ::testing::Test {
2055   public:
SetUp()2056     void SetUp() override {
2057         ModuleFixture::SetUp();
2058 
2059         ze_kernel_desc_t kernelDesc = {};
2060         kernelDesc.pKernelName = kernelName.c_str();
2061 
2062         ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
2063         EXPECT_EQ(ZE_RESULT_SUCCESS, res);
2064 
2065         kernel = L0::Kernel::fromHandle(kernelHandle);
2066     }
2067 
TearDown()2068     void TearDown() override {
2069         Kernel::fromHandle(kernelHandle)->destroy();
2070         ModuleFixture::TearDown();
2071     }
2072 
2073     ze_kernel_handle_t kernelHandle;
2074     L0::Kernel *kernel = nullptr;
2075 };
2076 
TEST_F(KernelGlobalWorkOffsetTests,givenCallToSetGlobalWorkOffsetThenOffsetsAreSet)2077 TEST_F(KernelGlobalWorkOffsetTests, givenCallToSetGlobalWorkOffsetThenOffsetsAreSet) {
2078     uint32_t globalOffsetx = 10;
2079     uint32_t globalOffsety = 20;
2080     uint32_t globalOffsetz = 30;
2081 
2082     ze_result_t res = kernel->setGlobalOffsetExp(globalOffsetx, globalOffsety, globalOffsetz);
2083     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
2084 
2085     KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
2086     EXPECT_EQ(globalOffsetx, kernelImp->getGlobalOffsets()[0]);
2087     EXPECT_EQ(globalOffsety, kernelImp->getGlobalOffsets()[1]);
2088     EXPECT_EQ(globalOffsetz, kernelImp->getGlobalOffsets()[2]);
2089 }
2090 
TEST_F(KernelGlobalWorkOffsetTests,whenSettingGlobalOffsetThenCrossThreadDataIsPatched)2091 TEST_F(KernelGlobalWorkOffsetTests, whenSettingGlobalOffsetThenCrossThreadDataIsPatched) {
2092     uint32_t globalOffsetx = 10;
2093     uint32_t globalOffsety = 20;
2094     uint32_t globalOffsetz = 30;
2095 
2096     ze_result_t res = kernel->setGlobalOffsetExp(globalOffsetx, globalOffsety, globalOffsetz);
2097     EXPECT_EQ(ZE_RESULT_SUCCESS, res);
2098 
2099     KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
2100     kernelImp->patchGlobalOffset();
2101 
2102     const NEO::KernelDescriptor &desc = kernelImp->getImmutableData()->getDescriptor();
2103     auto dst = ArrayRef<const uint8_t>(kernelImp->getCrossThreadData(), kernelImp->getCrossThreadDataSize());
2104     EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[0]), globalOffsetx);
2105     EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[1]), globalOffsety);
2106     EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[2]), globalOffsetz);
2107 }
2108 
2109 using KernelWorkDimTests = Test<ModuleImmutableDataFixture>;
2110 
TEST_F(KernelWorkDimTests,givenGroupCountsWhenPatchingWorkDimThenCrossThreadDataIsPatched)2111 TEST_F(KernelWorkDimTests, givenGroupCountsWhenPatchingWorkDimThenCrossThreadDataIsPatched) {
2112     uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
2113 
2114     std::unique_ptr<MockImmutableData> mockKernelImmData =
2115         std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
2116 
2117     createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
2118     auto kernel = std::make_unique<MockKernel>(module.get());
2119     createKernel(kernel.get());
2120     kernel->setCrossThreadData(sizeof(uint32_t));
2121 
2122     mockKernelImmData->mockKernelDescriptor->payloadMappings.dispatchTraits.workDim = 0x0u;
2123 
2124     auto destinationBuffer = ArrayRef<const uint8_t>(kernel->getCrossThreadData(), kernel->getCrossThreadDataSize());
2125     auto &kernelDescriptor = mockKernelImmData->getDescriptor();
2126     auto workDimInCrossThreadDataPtr = destinationBuffer.begin() + kernelDescriptor.payloadMappings.dispatchTraits.workDim;
2127     EXPECT_EQ(*workDimInCrossThreadDataPtr, 0u);
2128 
2129     std::array<std::array<uint32_t, 7>, 8> sizesCountsWorkDim = {{{2, 1, 1, 1, 1, 1, 1},
2130                                                                   {1, 1, 1, 1, 1, 1, 1},
2131                                                                   {1, 2, 1, 2, 1, 1, 2},
2132                                                                   {1, 2, 1, 1, 1, 1, 2},
2133                                                                   {1, 1, 1, 1, 2, 1, 2},
2134                                                                   {1, 1, 1, 2, 2, 2, 3},
2135                                                                   {1, 1, 2, 1, 1, 1, 3},
2136                                                                   {1, 1, 1, 1, 1, 2, 3}}};
2137 
2138     for (auto &[groupSizeX, groupSizeY, groupSizeZ, groupCountX, groupCountY, groupCountZ, expectedWorkDim] : sizesCountsWorkDim) {
2139         ze_result_t res = kernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
2140         EXPECT_EQ(res, ZE_RESULT_SUCCESS);
2141         kernel->setGroupCount(groupCountX, groupCountY, groupCountZ);
2142         EXPECT_EQ(*workDimInCrossThreadDataPtr, expectedWorkDim);
2143     }
2144 }
2145 
2146 using KernelPrintHandlerTest = Test<ModuleFixture>;
2147 struct MyPrintfHandler : public PrintfHandler {
getPrintfSurfaceInitialDataSizeL0::ult::MyPrintfHandler2148     static uint32_t getPrintfSurfaceInitialDataSize() {
2149         return PrintfHandler::printfSurfaceInitialDataSize;
2150     }
2151 };
2152 
TEST_F(KernelPrintHandlerTest,whenPrintPrintfOutputIsCalledThenPrintfBufferIsUsed)2153 TEST_F(KernelPrintHandlerTest, whenPrintPrintfOutputIsCalledThenPrintfBufferIsUsed) {
2154     ze_kernel_desc_t desc = {};
2155     desc.pKernelName = kernelName.c_str();
2156 
2157     kernel = std::make_unique<WhiteBox<::L0::Kernel>>();
2158     kernel->module = module.get();
2159     kernel->initialize(&desc);
2160 
2161     EXPECT_FALSE(kernel->printfBuffer == nullptr);
2162     kernel->printPrintfOutput();
2163     auto buffer = *reinterpret_cast<uint32_t *>(kernel->printfBuffer->getUnderlyingBuffer());
2164     EXPECT_EQ(buffer, MyPrintfHandler::getPrintfSurfaceInitialDataSize());
2165 }
2166 
2167 using PrintfTest = Test<DeviceFixture>;
2168 
TEST_F(PrintfTest,givenKernelWithPrintfThenPrintfBufferIsCreated)2169 TEST_F(PrintfTest, givenKernelWithPrintfThenPrintfBufferIsCreated) {
2170     Mock<Module> mockModule(this->device, nullptr);
2171     Mock<Kernel> mockKernel;
2172     mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
2173     mockKernel.module = &mockModule;
2174 
2175     EXPECT_TRUE(mockKernel.getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf);
2176 
2177     ze_kernel_desc_t kernelDesc = {};
2178     kernelDesc.pKernelName = "mock";
2179     mockKernel.createPrintfBuffer();
2180     EXPECT_NE(nullptr, mockKernel.getPrintfBufferAllocation());
2181 }
2182 
TEST_F(PrintfTest,GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocationIsNotCreated)2183 TEST_F(PrintfTest, GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocationIsNotCreated) {
2184     Mock<Module> mockModule(this->device, nullptr);
2185     Mock<Kernel> mockKernel;
2186     mockKernel.descriptor.kernelAttributes.flags.usesPrintf = false;
2187     mockKernel.module = &mockModule;
2188 
2189     ze_kernel_desc_t kernelDesc = {};
2190     kernelDesc.pKernelName = "mock";
2191     mockKernel.createPrintfBuffer();
2192     EXPECT_EQ(nullptr, mockKernel.getPrintfBufferAllocation());
2193 }
2194 
TEST_F(PrintfTest,WhenCreatingPrintfBufferThenAllocationAddedToResidencyContainer)2195 TEST_F(PrintfTest, WhenCreatingPrintfBufferThenAllocationAddedToResidencyContainer) {
2196     Mock<Module> mockModule(this->device, nullptr);
2197     Mock<Kernel> mockKernel;
2198     mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
2199     mockKernel.module = &mockModule;
2200 
2201     ze_kernel_desc_t kernelDesc = {};
2202     kernelDesc.pKernelName = "mock";
2203     mockKernel.createPrintfBuffer();
2204 
2205     auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation();
2206     EXPECT_NE(nullptr, printfBufferAllocation);
2207 
2208     EXPECT_NE(0u, mockKernel.residencyContainer.size());
2209     EXPECT_EQ(mockKernel.residencyContainer[mockKernel.residencyContainer.size() - 1], printfBufferAllocation);
2210 }
2211 
TEST_F(PrintfTest,WhenCreatingPrintfBufferThenCrossThreadDataIsPatched)2212 TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
2213     Mock<Module> mockModule(this->device, nullptr);
2214     Mock<Kernel> mockKernel;
2215     mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
2216     mockKernel.module = &mockModule;
2217 
2218     ze_kernel_desc_t kernelDesc = {};
2219     kernelDesc.pKernelName = "mock";
2220 
2221     auto crossThreadData = std::make_unique<uint32_t[]>(4);
2222 
2223     mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0;
2224     mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t);
2225     mockKernel.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData.get()));
2226     mockKernel.crossThreadDataSize = sizeof(uint32_t[4]);
2227 
2228     mockKernel.createPrintfBuffer();
2229 
2230     auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation();
2231     EXPECT_NE(nullptr, printfBufferAllocation);
2232 
2233     auto printfBufferAddressPatched = *reinterpret_cast<uintptr_t *>(crossThreadData.get());
2234     auto printfBufferGpuAddressOffset = static_cast<uintptr_t>(printfBufferAllocation->getGpuAddressToPatch());
2235     EXPECT_EQ(printfBufferGpuAddressOffset, printfBufferAddressPatched);
2236 
2237     mockKernel.crossThreadData.release();
2238 }
2239 
2240 using KernelImplicitArgTests = Test<ModuleImmutableDataFixture>;
2241 
TEST_F(KernelImplicitArgTests,givenKernelWithImplicitArgsWhenInitializeThenPrintfSurfaceIsCreatedAndProperlyPatchedInImplicitArgs)2242 TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenInitializeThenPrintfSurfaceIsCreatedAndProperlyPatchedInImplicitArgs) {
2243     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2244     mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2245     mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesPrintf = false;
2246 
2247     createModuleFromBinary(0u, false, mockKernelImmData.get());
2248 
2249     auto kernel = std::make_unique<MockKernel>(module.get());
2250 
2251     ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2252     kernel->initialize(&kernelDesc);
2253 
2254     EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2255     auto pImplicitArgs = kernel->getImplicitArgs();
2256     ASSERT_NE(nullptr, pImplicitArgs);
2257 
2258     auto printfSurface = kernel->getPrintfBufferAllocation();
2259     ASSERT_NE(nullptr, printfSurface);
2260 
2261     EXPECT_NE(0u, pImplicitArgs->printfBufferPtr);
2262     EXPECT_EQ(printfSurface->getGpuAddress(), pImplicitArgs->printfBufferPtr);
2263 }
2264 
TEST_F(KernelImplicitArgTests,givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated)2265 TEST_F(KernelImplicitArgTests, givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated) {
2266     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2267 
2268     mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2269 
2270     createModuleFromBinary(0u, false, mockKernelImmData.get());
2271 
2272     auto kernel = std::make_unique<MockKernel>(module.get());
2273 
2274     ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2275     kernel->initialize(&kernelDesc);
2276 
2277     EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2278     auto pImplicitArgs = kernel->getImplicitArgs();
2279     ASSERT_NE(nullptr, pImplicitArgs);
2280 
2281     EXPECT_EQ(sizeof(ImplicitArgs), pImplicitArgs->structSize);
2282     EXPECT_EQ(0u, pImplicitArgs->structVersion);
2283 }
2284 
TEST_F(KernelImplicitArgTests,givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreUpdated)2285 TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreUpdated) {
2286     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2287     mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2288     auto simd = mockKernelImmData->kernelDescriptor->kernelAttributes.simdSize;
2289 
2290     createModuleFromBinary(0u, false, mockKernelImmData.get());
2291 
2292     auto kernel = std::make_unique<MockKernel>(module.get());
2293 
2294     ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2295     kernel->initialize(&kernelDesc);
2296 
2297     EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2298     auto pImplicitArgs = kernel->getImplicitArgs();
2299     ASSERT_NE(nullptr, pImplicitArgs);
2300 
2301     ImplicitArgs expectedImplicitArgs{sizeof(ImplicitArgs)};
2302     expectedImplicitArgs.numWorkDim = 3;
2303     expectedImplicitArgs.simdWidth = simd;
2304     expectedImplicitArgs.localSizeX = 4;
2305     expectedImplicitArgs.localSizeY = 5;
2306     expectedImplicitArgs.localSizeZ = 6;
2307     expectedImplicitArgs.globalSizeX = 12;
2308     expectedImplicitArgs.globalSizeY = 10;
2309     expectedImplicitArgs.globalSizeZ = 6;
2310     expectedImplicitArgs.globalOffsetX = 1;
2311     expectedImplicitArgs.globalOffsetY = 2;
2312     expectedImplicitArgs.globalOffsetZ = 3;
2313     expectedImplicitArgs.groupCountX = 3;
2314     expectedImplicitArgs.groupCountY = 2;
2315     expectedImplicitArgs.groupCountZ = 1;
2316     expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress();
2317 
2318     kernel->setGroupSize(4, 5, 6);
2319     kernel->setGroupCount(3, 2, 1);
2320     kernel->setGlobalOffsetExp(1, 2, 3);
2321     kernel->patchGlobalOffset();
2322     EXPECT_EQ(0, memcmp(pImplicitArgs, &expectedImplicitArgs, sizeof(ImplicitArgs)));
2323 }
2324 
TEST_F(KernelImplicitArgTests,givenKernelWithImplicitArgsAndPrintfStringsMapWhenPrintOutputThenProperStringIsPrinted)2325 TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsAndPrintfStringsMapWhenPrintOutputThenProperStringIsPrinted) {
2326     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2327 
2328     auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
2329     kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true;
2330     kernelDescriptor->kernelAttributes.flags.usesPrintf = false;
2331     kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = false;
2332     std::string expectedString("test123");
2333     kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString));
2334 
2335     createModuleFromBinary(0u, false, mockKernelImmData.get());
2336 
2337     auto kernel = std::make_unique<MockKernel>(module.get());
2338 
2339     ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2340     kernel->initialize(&kernelDesc);
2341 
2342     auto printfAllocation = reinterpret_cast<uint32_t *>(kernel->getPrintfBufferAllocation()->getUnderlyingBuffer());
2343     printfAllocation[0] = 8;
2344     printfAllocation[1] = 0;
2345 
2346     EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
2347     ASSERT_NE(nullptr, kernel->getImplicitArgs());
2348 
2349     testing::internal::CaptureStdout();
2350     kernel->printPrintfOutput();
2351     std::string output = testing::internal::GetCapturedStdout();
2352     EXPECT_STREQ(expectedString.c_str(), output.c_str());
2353 }
2354 
TEST_F(KernelImplicitArgTests,givenKernelWithoutImplicitArgsWhenPatchingImplicitArgsThenNothingHappens)2355 TEST_F(KernelImplicitArgTests, givenKernelWithoutImplicitArgsWhenPatchingImplicitArgsThenNothingHappens) {
2356     std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
2357     mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
2358 
2359     createModuleFromBinary(0u, false, mockKernelImmData.get());
2360 
2361     auto kernel = std::make_unique<MockKernel>(module.get());
2362 
2363     ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
2364     kernel->initialize(&kernelDesc);
2365     EXPECT_EQ(nullptr, kernel->getImplicitArgs());
2366 
2367     uint8_t initData[64]{};
2368     uint8_t data[64]{};
2369     int pattern = 0xcd;
2370     memset(data, pattern, 64);
2371     memset(initData, pattern, 64);
2372 
2373     EXPECT_EQ(0u, kernel->getSizeForImplicitArgsPatching());
2374     void *dataPtr = data;
2375     kernel->patchImplicitArgs(dataPtr);
2376 
2377     EXPECT_EQ(dataPtr, data);
2378 
2379     EXPECT_EQ(0, memcmp(data, initData, 64));
2380 }
2381 
2382 } // namespace ult
2383 } // namespace L0
2384