1 /*
2  * Copyright (C) 2019-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/command_stream/command_stream_receiver.h"
9 #include "shared/source/device/device.h"
10 #include "shared/source/gmm_helper/gmm_helper.h"
11 #include "shared/source/helpers/state_base_address.h"
12 #include "shared/test/common/cmd_parse/hw_parse.h"
13 #include "shared/test/common/helpers/debug_manager_state_restore.h"
14 #include "shared/test/common/test_macros/test.h"
15 #include "shared/test/unit_test/utilities/base_object_utils.h"
16 
17 #include "opencl/extensions/public/cl_ext_private.h"
18 #include "opencl/source/api/api.h"
19 #include "opencl/source/command_queue/command_queue_hw.h"
20 #include "opencl/source/kernel/kernel.h"
21 #include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
22 
23 #include "hw_cmds.h"
24 
25 using namespace NEO;
26 
27 namespace clMemLocallyUncachedResourceTests {
28 
29 template <typename FamilyType>
argMocs(Kernel & kernel,size_t argIndex)30 uint32_t argMocs(Kernel &kernel, size_t argIndex) {
31     using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
32     auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap();
33     auto surfaceStateHeapAddressOffset = static_cast<size_t>(kernel.getKernelInfo().getArgDescriptorAt(static_cast<uint32_t>(argIndex)).as<ArgDescPointer>().bindful);
34     auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset));
35     return surfaceState->getMemoryObjectControlState();
36 }
37 
38 template <typename FamilyType>
cmdQueueMocs(CommandQueue * pCmdQ)39 uint32_t cmdQueueMocs(CommandQueue *pCmdQ) {
40     using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
41     auto pCmdQHw = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
42     auto &csr = pCmdQHw->getGpgpuCommandStreamReceiver();
43     HardwareParse hwParse;
44     hwParse.parseCommands<FamilyType>(csr.getCS(0), 0);
45     auto itorCmd = reverse_find<STATE_BASE_ADDRESS *>(hwParse.cmdList.rbegin(), hwParse.cmdList.rend());
46     EXPECT_NE(hwParse.cmdList.rend(), itorCmd);
47     auto sba = genCmdCast<STATE_BASE_ADDRESS *>(*itorCmd);
48     EXPECT_NE(nullptr, sba);
49 
50     return sba->getStatelessDataPortAccessMemoryObjectControlState();
51 }
52 
53 const size_t n = 512;
54 [[maybe_unused]] const size_t globalWorkSize[3] = {n, 1, 1};
55 [[maybe_unused]] const size_t localWorkSize[3] = {256, 1, 1};
56 
57 [[maybe_unused]] const cl_mem_properties_intel *propertiesCacheable = nullptr;
58 [[maybe_unused]] const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
59 [[maybe_unused]] const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0};
60 
61 using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
62 
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable)63 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) {
64     cl_int retVal = CL_SUCCESS;
65     MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
66 
67     auto kernel = mockKernel.mockKernel;
68     auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
69 
70     auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
71     auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
72     auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
73     auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
74 
75     auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
76     auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
77     auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
78     auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
79 
80     auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
81     auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
82 
83     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
84     EXPECT_EQ(CL_SUCCESS, retVal);
85     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
86 
87     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
88     EXPECT_EQ(CL_SUCCESS, retVal);
89     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
90 
91     EXPECT_TRUE(kernel->isPatched());
92     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
93     EXPECT_EQ(CL_SUCCESS, retVal);
94     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
95     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
96 
97     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1);
98     EXPECT_EQ(CL_SUCCESS, retVal);
99     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
100 
101     EXPECT_TRUE(kernel->isPatched());
102     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
103     EXPECT_EQ(CL_SUCCESS, retVal);
104     EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
105     EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
106 
107     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2);
108     EXPECT_EQ(CL_SUCCESS, retVal);
109     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
110 
111     EXPECT_TRUE(kernel->isPatched());
112     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
113     EXPECT_EQ(CL_SUCCESS, retVal);
114     EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
115     EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
116 
117     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
118     EXPECT_EQ(CL_SUCCESS, retVal);
119     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
120 
121     EXPECT_TRUE(kernel->isPatched());
122     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
123     EXPECT_EQ(CL_SUCCESS, retVal);
124     EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
125     EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
126 
127     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
128     EXPECT_EQ(CL_SUCCESS, retVal);
129     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
130 
131     EXPECT_TRUE(kernel->isPatched());
132     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
133     EXPECT_EQ(CL_SUCCESS, retVal);
134     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
135     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
136 }
137 
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedThenItIsCached)138 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedThenItIsCached) {
139     cl_int retVal = CL_SUCCESS;
140 
141     MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
142     auto kernel = mockKernel.mockKernel;
143     auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
144 
145     EXPECT_EQ(CL_SUCCESS, retVal);
146 
147     auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
148     auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
149     auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
150     auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
151 
152     auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr);
153     auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
154     auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr);
155     auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
156 
157     auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
158     auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
159 
160     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
161     EXPECT_EQ(CL_SUCCESS, retVal);
162     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
163 
164     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
165     EXPECT_EQ(CL_SUCCESS, retVal);
166     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
167 
168     EXPECT_TRUE(kernel->isPatched());
169     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
170     EXPECT_EQ(CL_SUCCESS, retVal);
171     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
172 
173     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1);
174     EXPECT_EQ(CL_SUCCESS, retVal);
175     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
176     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
177 
178     EXPECT_TRUE(kernel->isPatched());
179     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
180     EXPECT_EQ(CL_SUCCESS, retVal);
181     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
182 
183     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2);
184     EXPECT_EQ(CL_SUCCESS, retVal);
185     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
186     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
187 
188     EXPECT_TRUE(kernel->isPatched());
189     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
190     EXPECT_EQ(CL_SUCCESS, retVal);
191     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
192 
193     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
194     EXPECT_EQ(CL_SUCCESS, retVal);
195     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
196 
197     EXPECT_TRUE(kernel->isPatched());
198     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
199     EXPECT_EQ(CL_SUCCESS, retVal);
200     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
201 
202     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
203     EXPECT_EQ(CL_SUCCESS, retVal);
204     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
205 
206     EXPECT_TRUE(kernel->isPatched());
207     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
208     EXPECT_EQ(CL_SUCCESS, retVal);
209     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
210 }
211 
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable)212 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable) {
213     cl_int retVal = CL_SUCCESS;
214 
215     MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
216     auto kernel = mockKernel.mockKernel;
217     auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
218     mockKernel.kernelInfo.setBufferStateful(0);
219     mockKernel.kernelInfo.setBufferStateful(1);
220 
221     auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
222     auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
223     auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
224     auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
225 
226     auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
227     auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
228     auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
229     auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
230 
231     auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
232     auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
233 
234     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
235     EXPECT_EQ(CL_SUCCESS, retVal);
236     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
237 
238     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
239     EXPECT_EQ(CL_SUCCESS, retVal);
240     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
241 
242     EXPECT_TRUE(kernel->isPatched());
243     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
244     EXPECT_EQ(CL_SUCCESS, retVal);
245     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
246 
247     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1);
248     EXPECT_EQ(CL_SUCCESS, retVal);
249     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
250     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
251 
252     EXPECT_TRUE(kernel->isPatched());
253     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
254     EXPECT_EQ(CL_SUCCESS, retVal);
255     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
256 
257     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2);
258     EXPECT_EQ(CL_SUCCESS, retVal);
259     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
260     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
261 
262     EXPECT_TRUE(kernel->isPatched());
263     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
264     EXPECT_EQ(CL_SUCCESS, retVal);
265     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
266 
267     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
268     EXPECT_EQ(CL_SUCCESS, retVal);
269     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
270     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
271 
272     EXPECT_TRUE(kernel->isPatched());
273     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
274     EXPECT_EQ(CL_SUCCESS, retVal);
275     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
276 
277     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
278     EXPECT_EQ(CL_SUCCESS, retVal);
279     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
280     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
281 
282     EXPECT_TRUE(kernel->isPatched());
283     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
284     EXPECT_EQ(CL_SUCCESS, retVal);
285     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
286     EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
287 }
288 
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,WhenUnsettingUncacheableResourceFromKernelThenKernelContinuesToCorrectlySetMocs)289 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThenKernelContinuesToCorrectlySetMocs) {
290     cl_int retVal = CL_SUCCESS;
291     MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
292     auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
293     auto kernel = mockKernel.mockKernel;
294 
295     EXPECT_EQ(CL_SUCCESS, retVal);
296 
297     auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
298     auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
299     auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
300     auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
301 
302     auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
303     auto pBufferUncacheable = clUniquePtr(castToObject<Buffer>(bufferUncacheable));
304 
305     auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
306     auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
307 
308     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
309     EXPECT_EQ(CL_SUCCESS, retVal);
310     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
311 
312     retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
313     EXPECT_EQ(CL_SUCCESS, retVal);
314     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
315 
316     EXPECT_TRUE(kernel->isPatched());
317     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
318     EXPECT_EQ(CL_SUCCESS, retVal);
319     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
320 
321     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable);
322     EXPECT_EQ(CL_SUCCESS, retVal);
323     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
324 
325     EXPECT_TRUE(kernel->isPatched());
326     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
327     EXPECT_EQ(CL_SUCCESS, retVal);
328     EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
329 
330     kernel->unsetArg(0);
331 
332     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
333     EXPECT_EQ(CL_SUCCESS, retVal);
334     EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
335 
336     EXPECT_TRUE(kernel->isPatched());
337     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
338     EXPECT_EQ(CL_SUCCESS, retVal);
339     EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
340 
341     kernel->unsetArg(0);
342 
343     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable);
344     EXPECT_EQ(CL_SUCCESS, retVal);
345     EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
346 
347     EXPECT_TRUE(kernel->isPatched());
348     retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
349     EXPECT_EQ(CL_SUCCESS, retVal);
350     EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
351 }
352 
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush)353 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush) {
354     DebugManagerStateRestore restorer;
355     DebugManager.flags.CreateMultipleSubDevices.set(2);
356     auto context = std::make_unique<MockContext>();
357     cl_int retVal = CL_SUCCESS;
358     MockKernelWithInternals mockKernel(*context->getDevice(0), context.get(), true);
359     auto kernel = mockKernel.mockKernel;
360     auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
361     mockKernel.kernelInfo.setBufferStateful(0);
362     mockKernel.kernelInfo.setBufferStateful(1);
363 
364     auto bufferCacheable = clCreateBufferWithPropertiesINTEL(context.get(), propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
365 
366     auto bufferUncacheableInSurfaceState = clCreateBufferWithPropertiesINTEL(context.get(), propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr);
367     auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context.get(), propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
368 
369     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheableInSurfaceState);
370     EXPECT_EQ(CL_SUCCESS, retVal);
371 
372     EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]);
373 
374     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable);
375     EXPECT_EQ(CL_SUCCESS, retVal);
376 
377     EXPECT_NE(nullptr, kernel->kernelArgRequiresCacheFlush[0]);
378 
379     retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable);
380     EXPECT_EQ(CL_SUCCESS, retVal);
381 
382     EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]);
383 
384     clReleaseMemObject(bufferUncacheableInSurfaceState);
385     clReleaseMemObject(bufferUncacheable);
386     clReleaseMemObject(bufferCacheable);
387 }
388 
389 } // namespace clMemLocallyUncachedResourceTests
390