1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/helpers/local_work_size.h"
9 #include "shared/source/memory_manager/unified_memory_manager.h"
10 #include "shared/test/common/helpers/debug_manager_state_restore.h"
11 
12 #include "opencl/source/event/user_event.h"
13 #include "opencl/test/unit_test/context/driver_diagnostics_tests.h"
14 #include "opencl/test/unit_test/fixtures/buffer_fixture.h"
15 #include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
16 
17 using namespace NEO;
18 
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint)19 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint) {
20 
21     buffer->forceDisallowCPUCopy = false;
22     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
23     pCmdQ->enqueueReadBuffer(
24         buffer,
25         CL_TRUE,
26         0,
27         MemoryConstants::cacheLineSize,
28         ptr,
29         nullptr,
30         0,
31         nullptr,
32         nullptr);
33     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
34     EXPECT_TRUE(containsHint(expectedHint, userData));
35     alignedFree(ptr);
36 }
37 
TEST_P(PerformanceHintEnqueueReadBufferTest,GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments)38 TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments) {
39 
40     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
41     uintptr_t addressForReadBuffer = (uintptr_t)ptr;
42     size_t sizeForReadBuffer = MemoryConstants::cacheLineSize;
43     if (!alignedAddress) {
44         addressForReadBuffer++;
45     }
46     if (!alignedSize) {
47         sizeForReadBuffer--;
48     }
49     pCmdQ->enqueueReadBuffer(buffer, CL_FALSE,
50                              0,
51                              sizeForReadBuffer,
52                              (void *)addressForReadBuffer,
53                              nullptr,
54                              0,
55                              nullptr,
56                              nullptr);
57     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBuffer);
58     EXPECT_TRUE(containsHint(expectedHint, userData));
59     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
60     EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
61     alignedFree(ptr);
62 }
63 
TEST_P(PerformanceHintEnqueueReadBufferTest,GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments)64 TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) {
65 
66     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
67     uintptr_t addressForReadBufferRect = (uintptr_t)ptr;
68     size_t sizeForReadBufferRect = MemoryConstants::cacheLineSize;
69     if (!alignedAddress) {
70         addressForReadBufferRect++;
71     }
72     if (!alignedSize) {
73         sizeForReadBufferRect--;
74     }
75     size_t bufferOrigin[] = {0, 0, 0};
76     size_t hostOrigin[] = {0, 0, 0};
77     size_t region[] = {sizeForReadBufferRect, 1, 1};
78 
79     pCmdQ->enqueueReadBufferRect(
80         buffer,
81         CL_TRUE,
82         bufferOrigin,
83         hostOrigin,
84         region,
85         0,
86         0,
87         0,
88         0,
89         (void *)addressForReadBufferRect,
90         0,
91         nullptr,
92         nullptr);
93     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), addressForReadBufferRect);
94     EXPECT_TRUE(containsHint(expectedHint, userData));
95     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBufferRect, sizeForReadBufferRect, MemoryConstants::pageSize, MemoryConstants::pageSize);
96     EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
97     alignedFree(ptr);
98 }
99 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint)100 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
101 
102     size_t bufferOrigin[] = {0, 0, 0};
103     size_t hostOrigin[] = {0, 0, 0};
104     size_t region[] = {1, 2, 1};
105     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
106 
107     pCmdQ->enqueueReadBufferRect(
108         buffer,
109         CL_FALSE,
110         bufferOrigin,
111         hostOrigin,
112         region,
113         MemoryConstants::cacheLineSize,
114         MemoryConstants::cacheLineSize,
115         MemoryConstants::cacheLineSize,
116         MemoryConstants::cacheLineSize,
117         ptr,
118         0,
119         nullptr,
120         nullptr);
121 
122     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
123     EXPECT_TRUE(containsHint(expectedHint, userData));
124     alignedFree(ptr);
125 }
126 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint)127 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) {
128 
129     size_t bufferOrigin[] = {0, 0, 0};
130     size_t hostOrigin[] = {0, 0, 0};
131     size_t region[] = {1, 2, 1};
132 
133     pCmdQ->enqueueReadBufferRect(
134         buffer,
135         CL_FALSE,
136         bufferOrigin,
137         hostOrigin,
138         region,
139         MemoryConstants::cacheLineSize,
140         MemoryConstants::cacheLineSize,
141         MemoryConstants::cacheLineSize,
142         MemoryConstants::cacheLineSize,
143         address,
144         0,
145         nullptr,
146         nullptr);
147 
148     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), address);
149     EXPECT_TRUE(containsHint(expectedHint, userData));
150 }
151 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint)152 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
153 
154     buffer->forceDisallowCPUCopy = true;
155     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
156     pCmdQ->enqueueWriteBuffer(
157         buffer,
158         CL_FALSE,
159         0,
160         MemoryConstants::cacheLineSize,
161         ptr,
162         nullptr,
163         0,
164         nullptr,
165         nullptr);
166     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
167     EXPECT_TRUE(containsHint(expectedHint, userData));
168     alignedFree(ptr);
169 }
170 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint)171 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
172 
173     buffer->forceDisallowCPUCopy = true;
174     pCmdQ->enqueueWriteBuffer(
175         buffer,
176         CL_FALSE,
177         0,
178         MemoryConstants::cacheLineSize,
179         address,
180         nullptr,
181         0,
182         nullptr,
183         nullptr);
184     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
185     EXPECT_TRUE(containsHint(expectedHint, userData));
186 }
187 
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint)188 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
189 
190     buffer->forceDisallowCPUCopy = false;
191     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
192     pCmdQ->enqueueWriteBuffer(
193         buffer,
194         CL_TRUE,
195         0,
196         MemoryConstants::cacheLineSize,
197         ptr,
198         nullptr,
199         0,
200         nullptr,
201         nullptr);
202     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
203     EXPECT_TRUE(containsHint(expectedHint, userData));
204     alignedFree(ptr);
205 }
206 
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint)207 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
208 
209     buffer->forceDisallowCPUCopy = false;
210     pCmdQ->enqueueWriteBuffer(
211         buffer,
212         CL_TRUE,
213         0,
214         MemoryConstants::cacheLineSize,
215         address,
216         nullptr,
217         0,
218         nullptr,
219         nullptr);
220     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
221     EXPECT_TRUE(containsHint(expectedHint, userData));
222 }
223 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint)224 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) {
225 
226     buffer->forceDisallowCPUCopy = true;
227     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
228     pCmdQ->enqueueReadBuffer(
229         buffer,
230         CL_FALSE,
231         0,
232         MemoryConstants::cacheLineSize,
233         ptr,
234         nullptr,
235         0,
236         nullptr,
237         nullptr);
238     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
239     EXPECT_TRUE(containsHint(expectedHint, userData));
240     alignedFree(ptr);
241 }
242 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint)243 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
244 
245     buffer->forceDisallowCPUCopy = true;
246     pCmdQ->enqueueReadBuffer(
247         buffer,
248         CL_FALSE,
249         0,
250         MemoryConstants::cacheLineSize,
251         address,
252         nullptr,
253         0,
254         nullptr,
255         nullptr);
256     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
257     EXPECT_TRUE(containsHint(expectedHint, userData));
258 }
259 
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint)260 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) {
261 
262     buffer->forceDisallowCPUCopy = false;
263     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
264     pCmdQ->enqueueReadBuffer(
265         buffer,
266         CL_TRUE,
267         0,
268         MemoryConstants::cacheLineSize,
269         ptr,
270         nullptr,
271         0,
272         nullptr,
273         nullptr);
274     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer), ptr);
275     EXPECT_TRUE(containsHint(expectedHint, userData));
276     alignedFree(ptr);
277 }
278 
TEST_F(PerformanceHintEnqueueBufferTest,GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint)279 TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) {
280 
281     buffer->forceDisallowCPUCopy = false;
282     pCmdQ->enqueueReadBuffer(
283         buffer,
284         CL_TRUE,
285         0,
286         MemoryConstants::cacheLineSize,
287         address,
288         nullptr,
289         0,
290         nullptr,
291         nullptr);
292     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer), address);
293     EXPECT_TRUE(containsHint(expectedHint, userData));
294 }
295 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndNotSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint)296 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndNotSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) {
297 
298     size_t bufferOrigin[] = {0, 0, 0};
299     size_t hostOrigin[] = {0, 0, 0};
300     size_t region[] = {1, 2, 1};
301     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
302 
303     pCmdQ->enqueueWriteBufferRect(
304         buffer,
305         CL_FALSE,
306         bufferOrigin,
307         hostOrigin,
308         region,
309         MemoryConstants::cacheLineSize,
310         MemoryConstants::cacheLineSize,
311         MemoryConstants::cacheLineSize,
312         MemoryConstants::cacheLineSize,
313         ptr,
314         0,
315         nullptr,
316         nullptr);
317 
318     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
319     EXPECT_TRUE(containsHint(expectedHint, userData));
320     alignedFree(ptr);
321 }
322 
TEST_F(PerformanceHintEnqueueBufferTest,GivenNonBlockingWriteAndSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint)323 TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) {
324 
325     size_t bufferOrigin[] = {0, 0, 0};
326     size_t hostOrigin[] = {0, 0, 0};
327     size_t region[] = {1, 2, 1};
328 
329     pCmdQ->enqueueWriteBufferRect(
330         buffer,
331         CL_FALSE,
332         bufferOrigin,
333         hostOrigin,
334         region,
335         MemoryConstants::cacheLineSize,
336         MemoryConstants::cacheLineSize,
337         MemoryConstants::cacheLineSize,
338         MemoryConstants::cacheLineSize,
339         address,
340         0,
341         nullptr,
342         nullptr);
343 
344     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer));
345     EXPECT_TRUE(containsHint(expectedHint, userData));
346 }
347 
TEST_P(PerformanceHintEnqueueReadImageTest,GivenHostPtrAndSizeAlignmentsWhenEnqueueReadImageIsCallingThenContextProvidesHintsAboutAlignments)348 TEST_P(PerformanceHintEnqueueReadImageTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadImageIsCallingThenContextProvidesHintsAboutAlignments) {
349     REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
350 
351     size_t hostOrigin[] = {0, 0, 0};
352     void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
353     uintptr_t addressForReadImage = (uintptr_t)ptr;
354     size_t sizeForReadImageInPixels = MemoryConstants::cacheLineSize;
355     bool hintWithMisalignment = !(alignedAddress && alignedSize);
356     if (!alignedAddress) {
357         addressForReadImage++;
358     }
359     if (!alignedSize) {
360         sizeForReadImageInPixels--;
361     }
362     size_t region[] = {sizeForReadImageInPixels, 1, 1};
363     pCmdQ->enqueueReadImage(image,
364                             CL_FALSE,
365                             hostOrigin,
366                             region,
367                             0,
368                             0,
369                             (void *)addressForReadImage,
370                             nullptr,
371                             0,
372                             nullptr,
373                             nullptr);
374     size_t sizeForReadImage = sizeForReadImageInPixels * image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes;
375     ASSERT_EQ(alignedSize, isAligned<MemoryConstants::cacheLineSize>(sizeForReadImage));
376 
377     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadImage, sizeForReadImage, MemoryConstants::pageSize, MemoryConstants::pageSize);
378     EXPECT_EQ(hintWithMisalignment, containsHint(expectedHint, userData));
379     alignedFree(ptr);
380 }
381 
TEST_F(PerformanceHintEnqueueImageTest,GivenNonBlockingWriteWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint)382 TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) {
383 
384     size_t hostOrigin[] = {0, 0, 0};
385     size_t region[] = {1, 1, 1};
386 
387     pCmdQ->enqueueWriteImage(
388         image,
389         CL_FALSE,
390         hostOrigin,
391         region,
392         MemoryConstants::cacheLineSize,
393         MemoryConstants::cacheLineSize,
394         address,
395         nullptr,
396         0,
397         nullptr,
398         nullptr);
399 
400     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image));
401     EXPECT_TRUE(containsHint(expectedHint, userData));
402 }
403 
TEST_F(PerformanceHintEnqueueImageTest,GivenNonBlockingWriteImageSharesStorageWithDstPtrWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint)404 TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteImageSharesStorageWithDstPtrWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) {
405 
406     size_t hostOrigin[] = {0, 0, 0};
407     size_t region[] = {1, 1, 1};
408 
409     void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer();
410     pCmdQ->enqueueWriteImage(
411         zeroCopyImage.get(),
412         CL_FALSE,
413         hostOrigin,
414         region,
415         MemoryConstants::cacheLineSize,
416         MemoryConstants::cacheLineSize,
417         ptr,
418         nullptr,
419         0,
420         nullptr,
421         nullptr);
422 
423     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(zeroCopyImage.get()));
424     EXPECT_TRUE(containsHint(expectedHint, userData));
425 }
426 
TEST_F(PerformanceHintEnqueueImageTest,GivenNonBlockingReadImageSharesStorageWithDstPtrWhenEnqueueReadImageIsCallingThenContextProvidesProperHint)427 TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingReadImageSharesStorageWithDstPtrWhenEnqueueReadImageIsCallingThenContextProvidesProperHint) {
428 
429     size_t hostOrigin[] = {0, 0, 0};
430     size_t region[] = {1, 1, 1};
431 
432     void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer();
433     pCmdQ->enqueueReadImage(
434         zeroCopyImage.get(),
435         CL_FALSE,
436         hostOrigin,
437         region,
438         MemoryConstants::cacheLineSize,
439         MemoryConstants::cacheLineSize,
440         ptr,
441         nullptr,
442         0,
443         nullptr,
444         nullptr);
445 
446     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast<cl_mem>(zeroCopyImage.get()));
447     EXPECT_TRUE(containsHint(expectedHint, userData));
448 }
449 
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint)450 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
451 
452     Buffer *buffer;
453     void *address;
454     bool zeroCopyBuffer = GetParam();
455     size_t sizeForBuffer = MemoryConstants::cacheLineSize;
456     if (!zeroCopyBuffer) {
457         sizeForBuffer++;
458     }
459 
460     address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
461     buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal);
462 
463     pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal);
464 
465     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer));
466     EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
467 
468     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
469     EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
470 
471     alignedFree(address);
472     delete buffer;
473 }
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint)474 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
475 
476     void *address;
477     bool zeroCopyBuffer = GetParam();
478     UserEvent userEvent(context);
479     cl_event blockedEvent = &userEvent;
480     size_t sizeForBuffer = MemoryConstants::cacheLineSize;
481     if (!zeroCopyBuffer) {
482         sizeForBuffer++;
483     }
484 
485     address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
486     auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal));
487 
488     EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer);
489 
490     pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal);
491     EXPECT_TRUE(pCmdQ->isQueueBlocked());
492     userEvent.setStatus(CL_COMPLETE);
493     EXPECT_FALSE(pCmdQ->isQueueBlocked());
494 
495     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(buffer.get()));
496     EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
497 
498     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer.get()));
499     EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
500 
501     alignedFree(address);
502 }
503 
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint)504 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
505 
506     Image *image;
507     bool isZeroCopyImage;
508 
509     isZeroCopyImage = GetParam();
510 
511     size_t origin[] = {0, 0, 0};
512     size_t region[] = {1, 1, 1};
513 
514     if (isZeroCopyImage) {
515         image = ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context);
516     } else {
517         image = ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context);
518     }
519     EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
520     pCmdQ->enqueueMapImage(
521         image,
522         CL_FALSE,
523         0,
524         origin,
525         region,
526         nullptr,
527         nullptr,
528         0,
529         nullptr,
530         nullptr,
531         retVal);
532 
533     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(image));
534     EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
535 
536     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image));
537     EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
538 
539     delete image;
540 }
541 
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint)542 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
543 
544     auto image = std::unique_ptr<Image>(ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context));
545     bool isZeroCopyImage = GetParam();
546 
547     size_t origin[] = {0, 0, 0};
548     size_t region[] = {1, 1, 1};
549 
550     if (!isZeroCopyImage) {
551         image.reset(ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context));
552     }
553     EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
554 
555     UserEvent userEvent(context);
556     cl_event blockedEvent = &userEvent;
557     void *mapPtr = pCmdQ->enqueueMapImage(
558         image.get(),
559         CL_FALSE,
560         0,
561         origin,
562         region,
563         nullptr,
564         nullptr,
565         1,
566         &blockedEvent,
567         nullptr,
568         retVal);
569     EXPECT_TRUE(pCmdQ->isQueueBlocked());
570     userEvent.setStatus(CL_COMPLETE);
571     pCmdQ->enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr);
572 
573     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast<cl_mem>(image.get()));
574     EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
575 
576     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast<cl_mem>(image.get()));
577     EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
578 }
579 
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint)580 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
581 
582     Buffer *buffer;
583     void *address;
584     bool zeroCopyBuffer = GetParam();
585     size_t sizeForBuffer = MemoryConstants::cacheLineSize;
586     if (!zeroCopyBuffer) {
587         sizeForBuffer++;
588     }
589 
590     address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
591     buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal);
592 
593     void *mapPtr = pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal);
594     pCmdQ->enqueueUnmapMemObject(buffer, mapPtr, 0, nullptr, nullptr);
595 
596     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer));
597     EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
598 
599     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
600     EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
601 
602     alignedFree(address);
603     delete buffer;
604 }
605 
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint)606 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
607 
608     void *address;
609     bool zeroCopyBuffer = GetParam();
610     UserEvent userEvent(context);
611     cl_event blockedEvent = &userEvent;
612     size_t sizeForBuffer = MemoryConstants::cacheLineSize;
613     if (!zeroCopyBuffer) {
614         sizeForBuffer++;
615     }
616 
617     address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
618     auto buffer = std::unique_ptr<Buffer>(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal));
619     EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer);
620 
621     void *mapPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal);
622     EXPECT_TRUE(pCmdQ->isQueueBlocked());
623 
624     pCmdQ->enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr);
625     userEvent.setStatus(CL_COMPLETE);
626     EXPECT_FALSE(pCmdQ->isQueueBlocked());
627 
628     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer.get()));
629     EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
630 
631     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
632     EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
633 
634     alignedFree(address);
635 }
636 
TEST_P(PerformanceHintEnqueueMapTest,GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint)637 TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) {
638 
639     Image *image;
640     bool isZeroCopyImage;
641 
642     isZeroCopyImage = GetParam();
643 
644     size_t origin[] = {0, 0, 0};
645     size_t region[] = {1, 1, 1};
646 
647     if (isZeroCopyImage) {
648         image = ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context);
649     } else {
650         image = ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context);
651     }
652     EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy());
653 
654     void *mapPtr = pCmdQ->enqueueMapImage(image, CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal);
655 
656     pCmdQ->enqueueUnmapMemObject(image, mapPtr, 0, nullptr, nullptr);
657 
658     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(image));
659     EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData));
660 
661     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
662     EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData));
663 
664     delete image;
665 }
666 
TEST_F(PerformanceHintEnqueueTest,GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint)667 TEST_F(PerformanceHintEnqueueTest, GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint) {
668     REQUIRE_SVM_OR_SKIP(pPlatform->getClDevice(0));
669     void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
670 
671     pCmdQ->enqueueSVMMap(CL_FALSE, 0, svmPtr, 256, 0, nullptr, nullptr, false);
672 
673     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA], svmPtr);
674     EXPECT_TRUE(containsHint(expectedHint, userData));
675 
676     context->getSVMAllocsManager()->freeSVMAlloc(svmPtr);
677 }
678 
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeNDIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint)679 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
680 
681     retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
682     EXPECT_EQ(CL_SUCCESS, retVal);
683 
684     auto localWorkSize = kernel->getLocalWorkSizeValues();
685     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
686              kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
687              *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
688     EXPECT_TRUE(containsHint(expectedHint, userData));
689 }
690 
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeNDIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint)691 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
692 
693     bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get();
694     DebugManager.flags.EnableComputeWorkSizeND.set(true);
695     retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
696     EXPECT_EQ(CL_SUCCESS, retVal);
697 
698     auto localWorkSize = kernel->getLocalWorkSizeValues();
699     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
700              kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
701              *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
702     EXPECT_TRUE(containsHint(expectedHint, userData));
703     DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
704 }
705 
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeNDIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint)706 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
707 
708     bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get();
709     DebugManager.flags.EnableComputeWorkSizeND.set(false);
710     retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
711     EXPECT_EQ(CL_SUCCESS, retVal);
712 
713     auto localWorkSize = kernel->getLocalWorkSizeValues();
714     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
715              kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
716              *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
717 
718     EXPECT_TRUE(containsHint(expectedHint, userData));
719     DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled);
720 }
721 
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint)722 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
723 
724     retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
725     EXPECT_EQ(CL_SUCCESS, retVal);
726 
727     auto localWorkSize = kernel->getLocalWorkSizeValues();
728     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
729              kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
730              *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
731 
732     EXPECT_TRUE(containsHint(expectedHint, userData));
733 }
734 
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint)735 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
736 
737     DebugManagerStateRestore dbgRestore;
738     DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
739     DebugManager.flags.EnableComputeWorkSizeND.set(false);
740     retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
741     EXPECT_EQ(CL_SUCCESS, retVal);
742 
743     auto localWorkSize = kernel->getLocalWorkSizeValues();
744     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
745              kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
746              *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
747 
748     EXPECT_TRUE(containsHint(expectedHint, userData));
749 }
750 
TEST_F(PerformanceHintEnqueueKernelTest,GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint)751 TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
752 
753     DebugManagerStateRestore dbgRestore;
754     DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
755     DebugManager.flags.EnableComputeWorkSizeND.set(false);
756     retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr);
757     EXPECT_EQ(CL_SUCCESS, retVal);
758 
759     auto localWorkSize = kernel->getLocalWorkSizeValues();
760     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE],
761              kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
762              *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]);
763 
764     EXPECT_TRUE(containsHint(expectedHint, userData));
765 }
766 
TEST_P(PerformanceHintEnqueueKernelBadSizeTest,GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint)767 TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
768     size_t localWorkGroupSize[3];
769     int badSizeDimension;
770     uint32_t workDim = globalWorkGroupSize[1] == 1 ? 1 : globalWorkGroupSize[2] == 1 ? 2
771                                                                                      : 3;
772 
773     DispatchInfo dispatchInfo(&pCmdQ->getClDevice(), kernel, workDim, Vec3<size_t>(globalWorkGroupSize), Vec3<size_t>(0u, 0u, 0u), Vec3<size_t>(0u, 0u, 0u));
774 
775     auto computedLocalWorkgroupSize = computeWorkgroupSize(dispatchInfo);
776 
777     localWorkGroupSize[0] = computedLocalWorkgroupSize.x;
778     localWorkGroupSize[1] = computedLocalWorkgroupSize.y;
779     localWorkGroupSize[2] = computedLocalWorkgroupSize.z;
780 
781     badSizeDimension = GetParam();
782     if (localWorkGroupSize[badSizeDimension] > 1) {
783         localWorkGroupSize[badSizeDimension] /= 2;
784     } else {
785         localWorkGroupSize[0] /= 2;
786     }
787 
788     retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr);
789     EXPECT_EQ(CL_SUCCESS, retVal);
790 
791     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE],
792              localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(),
793              computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z);
794     EXPECT_TRUE(containsHint(expectedHint, userData));
795 }
796 
TEST_F(PerformanceHintEnqueueKernelPrintfTest,GivenKernelWithPrintfWhenEnqueueKernelIsCalledWithWorkDim3ThenContextProvidesProperHint)797 TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueKernelIsCalledWithWorkDim3ThenContextProvidesProperHint) {
798     size_t preferredWorkGroupSize[3];
799     auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
800     if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
801         WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
802         computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
803     } else
804         computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
805     retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
806     EXPECT_EQ(CL_SUCCESS, retVal);
807 
808     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
809     EXPECT_TRUE(containsHint(expectedHint, userData));
810 }
811 
TEST_F(PerformanceHintEnqueueTest,GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint)812 TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
813     size_t preferredWorkGroupSize[3];
814     size_t globalWorkGroupSize[3] = {1, 1, 1};
815     auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
816     MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
817     Kernel::SimpleKernelArgInfo kernelArgInfo;
818 
819     if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
820         WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
821         computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
822     } else
823         computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
824 
825     auto buffer = new MockBuffer();
826     buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
827     auto clBuffer = (cl_mem)buffer;
828 
829     kernelArgInfo.object = clBuffer;
830     kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
831 
832     std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
833     kernelArguments.resize(1);
834     kernelArguments[0] = kernelArgInfo;
835     mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
836     mockKernel.mockKernel->setKernelArguments(kernelArguments);
837 
838     retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
839 
840     EXPECT_EQ(CL_SUCCESS, retVal);
841 
842     snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
843     EXPECT_TRUE(containsHint(expectedHint, userData));
844     delete buffer;
845 }
846 
847 const int validDimensions[] = {0, 1, 2};
848 
849 INSTANTIATE_TEST_CASE_P(
850     DriverDiagnosticsTests,
851     PerformanceHintEnqueueReadBufferTest,
852     testing::Combine(
853         ::testing::Bool(),
854         ::testing::Bool()));
855 
856 INSTANTIATE_TEST_CASE_P(
857     DriverDiagnosticsTests,
858     PerformanceHintEnqueueReadImageTest,
859     testing::Combine(
860         ::testing::Bool(),
861         ::testing::Bool()));
862 
863 INSTANTIATE_TEST_CASE_P(
864     DriverDiagnosticsTests,
865     PerformanceHintEnqueueMapTest,
866     testing::Bool());
867 
868 INSTANTIATE_TEST_CASE_P(
869     DriverDiagnosticsTests,
870     PerformanceHintEnqueueKernelBadSizeTest,
871     testing::ValuesIn(validDimensions));
872