1 /*
2 * Copyright (C) 2019-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #include "shared/source/command_stream/command_stream_receiver.h"
9 #include "shared/source/device/device.h"
10 #include "shared/source/gmm_helper/gmm_helper.h"
11 #include "shared/source/helpers/state_base_address.h"
12 #include "shared/test/common/cmd_parse/hw_parse.h"
13 #include "shared/test/common/helpers/debug_manager_state_restore.h"
14 #include "shared/test/common/test_macros/test.h"
15 #include "shared/test/unit_test/utilities/base_object_utils.h"
16
17 #include "opencl/extensions/public/cl_ext_private.h"
18 #include "opencl/source/api/api.h"
19 #include "opencl/source/command_queue/command_queue_hw.h"
20 #include "opencl/source/kernel/kernel.h"
21 #include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
22
23 #include "hw_cmds.h"
24
25 using namespace NEO;
26
27 namespace clMemLocallyUncachedResourceTests {
28
29 template <typename FamilyType>
argMocs(Kernel & kernel,size_t argIndex)30 uint32_t argMocs(Kernel &kernel, size_t argIndex) {
31 using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
32 auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap();
33 auto surfaceStateHeapAddressOffset = static_cast<size_t>(kernel.getKernelInfo().getArgDescriptorAt(static_cast<uint32_t>(argIndex)).as<ArgDescPointer>().bindful);
34 auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset));
35 return surfaceState->getMemoryObjectControlState();
36 }
37
38 template <typename FamilyType>
cmdQueueMocs(CommandQueue * pCmdQ)39 uint32_t cmdQueueMocs(CommandQueue *pCmdQ) {
40 using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
41 auto pCmdQHw = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
42 auto &csr = pCmdQHw->getGpgpuCommandStreamReceiver();
43 HardwareParse hwParse;
44 hwParse.parseCommands<FamilyType>(csr.getCS(0), 0);
45 auto itorCmd = reverse_find<STATE_BASE_ADDRESS *>(hwParse.cmdList.rbegin(), hwParse.cmdList.rend());
46 EXPECT_NE(hwParse.cmdList.rend(), itorCmd);
47 auto sba = genCmdCast<STATE_BASE_ADDRESS *>(*itorCmd);
48 EXPECT_NE(nullptr, sba);
49
50 return sba->getStatelessDataPortAccessMemoryObjectControlState();
51 }
52
53 const size_t n = 512;
54 [[maybe_unused]] const size_t globalWorkSize[3] = {n, 1, 1};
55 [[maybe_unused]] const size_t localWorkSize[3] = {256, 1, 1};
56
57 [[maybe_unused]] const cl_mem_properties_intel *propertiesCacheable = nullptr;
58 [[maybe_unused]] const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
59 [[maybe_unused]] const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0};
60
61 using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
62
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable)63 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) {
64 cl_int retVal = CL_SUCCESS;
65 MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
66
67 auto kernel = mockKernel.mockKernel;
68 auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
69
70 auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
71 auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
72 auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
73 auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
74
75 auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
76 auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
77 auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
78 auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
79
80 auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
81 auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
82
83 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
84 EXPECT_EQ(CL_SUCCESS, retVal);
85 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
86
87 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
88 EXPECT_EQ(CL_SUCCESS, retVal);
89 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
90
91 EXPECT_TRUE(kernel->isPatched());
92 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
93 EXPECT_EQ(CL_SUCCESS, retVal);
94 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
95 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
96
97 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1);
98 EXPECT_EQ(CL_SUCCESS, retVal);
99 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
100
101 EXPECT_TRUE(kernel->isPatched());
102 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
103 EXPECT_EQ(CL_SUCCESS, retVal);
104 EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
105 EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
106
107 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2);
108 EXPECT_EQ(CL_SUCCESS, retVal);
109 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
110
111 EXPECT_TRUE(kernel->isPatched());
112 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
113 EXPECT_EQ(CL_SUCCESS, retVal);
114 EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
115 EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
116
117 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
118 EXPECT_EQ(CL_SUCCESS, retVal);
119 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
120
121 EXPECT_TRUE(kernel->isPatched());
122 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
123 EXPECT_EQ(CL_SUCCESS, retVal);
124 EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
125 EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
126
127 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
128 EXPECT_EQ(CL_SUCCESS, retVal);
129 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
130
131 EXPECT_TRUE(kernel->isPatched());
132 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
133 EXPECT_EQ(CL_SUCCESS, retVal);
134 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
135 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
136 }
137
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedThenItIsCached)138 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedThenItIsCached) {
139 cl_int retVal = CL_SUCCESS;
140
141 MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
142 auto kernel = mockKernel.mockKernel;
143 auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
144
145 EXPECT_EQ(CL_SUCCESS, retVal);
146
147 auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
148 auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
149 auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
150 auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
151
152 auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr);
153 auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
154 auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr);
155 auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
156
157 auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
158 auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
159
160 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
161 EXPECT_EQ(CL_SUCCESS, retVal);
162 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
163
164 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
165 EXPECT_EQ(CL_SUCCESS, retVal);
166 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
167
168 EXPECT_TRUE(kernel->isPatched());
169 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
170 EXPECT_EQ(CL_SUCCESS, retVal);
171 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
172
173 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1);
174 EXPECT_EQ(CL_SUCCESS, retVal);
175 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
176 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
177
178 EXPECT_TRUE(kernel->isPatched());
179 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
180 EXPECT_EQ(CL_SUCCESS, retVal);
181 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
182
183 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2);
184 EXPECT_EQ(CL_SUCCESS, retVal);
185 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
186 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
187
188 EXPECT_TRUE(kernel->isPatched());
189 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
190 EXPECT_EQ(CL_SUCCESS, retVal);
191 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
192
193 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
194 EXPECT_EQ(CL_SUCCESS, retVal);
195 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
196
197 EXPECT_TRUE(kernel->isPatched());
198 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
199 EXPECT_EQ(CL_SUCCESS, retVal);
200 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
201
202 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
203 EXPECT_EQ(CL_SUCCESS, retVal);
204 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
205
206 EXPECT_TRUE(kernel->isPatched());
207 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
208 EXPECT_EQ(CL_SUCCESS, retVal);
209 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
210 }
211
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable)212 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable) {
213 cl_int retVal = CL_SUCCESS;
214
215 MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
216 auto kernel = mockKernel.mockKernel;
217 auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
218 mockKernel.kernelInfo.setBufferStateful(0);
219 mockKernel.kernelInfo.setBufferStateful(1);
220
221 auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
222 auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
223 auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
224 auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
225
226 auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
227 auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
228 auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
229 auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
230
231 auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
232 auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
233
234 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
235 EXPECT_EQ(CL_SUCCESS, retVal);
236 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
237
238 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
239 EXPECT_EQ(CL_SUCCESS, retVal);
240 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
241
242 EXPECT_TRUE(kernel->isPatched());
243 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
244 EXPECT_EQ(CL_SUCCESS, retVal);
245 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
246
247 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1);
248 EXPECT_EQ(CL_SUCCESS, retVal);
249 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
250 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
251
252 EXPECT_TRUE(kernel->isPatched());
253 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
254 EXPECT_EQ(CL_SUCCESS, retVal);
255 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
256
257 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2);
258 EXPECT_EQ(CL_SUCCESS, retVal);
259 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
260 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
261
262 EXPECT_TRUE(kernel->isPatched());
263 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
264 EXPECT_EQ(CL_SUCCESS, retVal);
265 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
266
267 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
268 EXPECT_EQ(CL_SUCCESS, retVal);
269 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
270 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
271
272 EXPECT_TRUE(kernel->isPatched());
273 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
274 EXPECT_EQ(CL_SUCCESS, retVal);
275 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
276
277 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
278 EXPECT_EQ(CL_SUCCESS, retVal);
279 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
280 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
281
282 EXPECT_TRUE(kernel->isPatched());
283 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
284 EXPECT_EQ(CL_SUCCESS, retVal);
285 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
286 EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
287 }
288
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,WhenUnsettingUncacheableResourceFromKernelThenKernelContinuesToCorrectlySetMocs)289 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThenKernelContinuesToCorrectlySetMocs) {
290 cl_int retVal = CL_SUCCESS;
291 MockKernelWithInternals mockKernel(*this->pClDevice, context, true);
292 auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
293 auto kernel = mockKernel.mockKernel;
294
295 EXPECT_EQ(CL_SUCCESS, retVal);
296
297 auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
298 auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
299 auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
300 auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
301
302 auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
303 auto pBufferUncacheable = clUniquePtr(castToObject<Buffer>(bufferUncacheable));
304
305 auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
306 auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
307
308 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
309 EXPECT_EQ(CL_SUCCESS, retVal);
310 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
311
312 retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2);
313 EXPECT_EQ(CL_SUCCESS, retVal);
314 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
315
316 EXPECT_TRUE(kernel->isPatched());
317 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
318 EXPECT_EQ(CL_SUCCESS, retVal);
319 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
320
321 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable);
322 EXPECT_EQ(CL_SUCCESS, retVal);
323 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
324
325 EXPECT_TRUE(kernel->isPatched());
326 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
327 EXPECT_EQ(CL_SUCCESS, retVal);
328 EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
329
330 kernel->unsetArg(0);
331
332 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1);
333 EXPECT_EQ(CL_SUCCESS, retVal);
334 EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
335
336 EXPECT_TRUE(kernel->isPatched());
337 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
338 EXPECT_EQ(CL_SUCCESS, retVal);
339 EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
340
341 kernel->unsetArg(0);
342
343 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable);
344 EXPECT_EQ(CL_SUCCESS, retVal);
345 EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
346
347 EXPECT_TRUE(kernel->isPatched());
348 retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
349 EXPECT_EQ(CL_SUCCESS, retVal);
350 EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
351 }
352
HWCMDTEST_F(IGFX_GEN8_CORE,clMemLocallyUncachedResourceFixture,givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush)353 HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush) {
354 DebugManagerStateRestore restorer;
355 DebugManager.flags.CreateMultipleSubDevices.set(2);
356 auto context = std::make_unique<MockContext>();
357 cl_int retVal = CL_SUCCESS;
358 MockKernelWithInternals mockKernel(*context->getDevice(0), context.get(), true);
359 auto kernel = mockKernel.mockKernel;
360 auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel;
361 mockKernel.kernelInfo.setBufferStateful(0);
362 mockKernel.kernelInfo.setBufferStateful(1);
363
364 auto bufferCacheable = clCreateBufferWithPropertiesINTEL(context.get(), propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr);
365
366 auto bufferUncacheableInSurfaceState = clCreateBufferWithPropertiesINTEL(context.get(), propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr);
367 auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context.get(), propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr);
368
369 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheableInSurfaceState);
370 EXPECT_EQ(CL_SUCCESS, retVal);
371
372 EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]);
373
374 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable);
375 EXPECT_EQ(CL_SUCCESS, retVal);
376
377 EXPECT_NE(nullptr, kernel->kernelArgRequiresCacheFlush[0]);
378
379 retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable);
380 EXPECT_EQ(CL_SUCCESS, retVal);
381
382 EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]);
383
384 clReleaseMemObject(bufferUncacheableInSurfaceState);
385 clReleaseMemObject(bufferUncacheable);
386 clReleaseMemObject(bufferCacheable);
387 }
388
389 } // namespace clMemLocallyUncachedResourceTests
390